]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
manager: when reexecuting try to connect to bus only when dbus.service is around...
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
a7334b09
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
a7334b09
LP
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 14 Lesser General Public License for more details.
a7334b09 15
5430f7f2 16 You should have received a copy of the GNU Lesser General Public License
a7334b09
LP
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
034c6ed7
LP
20#include <errno.h>
21#include <fcntl.h>
8dd4c05b
LP
22#include <glob.h>
23#include <grp.h>
24#include <poll.h>
309bff19 25#include <signal.h>
8dd4c05b 26#include <string.h>
19c0b0b9 27#include <sys/capability.h>
d251207d 28#include <sys/eventfd.h>
f3e43635 29#include <sys/mman.h>
8dd4c05b 30#include <sys/personality.h>
94f04347 31#include <sys/prctl.h>
d2ffa389 32#include <sys/shm.h>
8dd4c05b 33#include <sys/socket.h>
451a074f 34#include <sys/stat.h>
d2ffa389 35#include <sys/types.h>
8dd4c05b
LP
36#include <sys/un.h>
37#include <unistd.h>
023a4f67 38#include <utmpx.h>
5cb5a6ff 39
5b6319dc
LP
40#ifdef HAVE_PAM
41#include <security/pam_appl.h>
42#endif
43
7b52a628
MS
44#ifdef HAVE_SELINUX
45#include <selinux/selinux.h>
46#endif
47
17df7223
LP
48#ifdef HAVE_SECCOMP
49#include <seccomp.h>
50#endif
51
eef65bf3
MS
52#ifdef HAVE_APPARMOR
53#include <sys/apparmor.h>
54#endif
55
24882e06 56#include "sd-messages.h"
8dd4c05b
LP
57
58#include "af-list.h"
b5efdb8a 59#include "alloc-util.h"
3ffd4af2
LP
60#ifdef HAVE_APPARMOR
61#include "apparmor-util.h"
62#endif
8dd4c05b
LP
63#include "async.h"
64#include "barrier.h"
8dd4c05b 65#include "cap-list.h"
430f0182 66#include "capability-util.h"
f6a6225e 67#include "def.h"
4d1a6904 68#include "env-util.h"
17df7223 69#include "errno-list.h"
3ffd4af2 70#include "execute.h"
8dd4c05b 71#include "exit-status.h"
3ffd4af2 72#include "fd-util.h"
8dd4c05b 73#include "fileio.h"
f97b34a6 74#include "format-util.h"
f4f15635 75#include "fs-util.h"
7d50b32a 76#include "glob-util.h"
c004493c 77#include "io-util.h"
8dd4c05b
LP
78#include "ioprio.h"
79#include "log.h"
80#include "macro.h"
81#include "missing.h"
82#include "mkdir.h"
83#include "namespace.h"
6bedfcbb 84#include "parse-util.h"
8dd4c05b 85#include "path-util.h"
0b452006 86#include "process-util.h"
78f22b97 87#include "rlimit-util.h"
8dd4c05b 88#include "rm-rf.h"
3ffd4af2
LP
89#ifdef HAVE_SECCOMP
90#include "seccomp-util.h"
91#endif
8dd4c05b 92#include "securebits.h"
07d46372 93#include "securebits-util.h"
8dd4c05b 94#include "selinux-util.h"
24882e06 95#include "signal-util.h"
8dd4c05b 96#include "smack-util.h"
fd63e712 97#include "special.h"
8b43440b 98#include "string-table.h"
07630cea 99#include "string-util.h"
8dd4c05b 100#include "strv.h"
7ccbd1ae 101#include "syslog-util.h"
8dd4c05b
LP
102#include "terminal-util.h"
103#include "unit.h"
b1d4f8e1 104#include "user-util.h"
8dd4c05b
LP
105#include "util.h"
106#include "utmp-wtmp.h"
5cb5a6ff 107
e056b01d 108#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 109#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 110
02a51aba
LP
111/* This assumes there is a 'tty' group */
112#define TTY_MODE 0620
113
531dca78
LP
114#define SNDBUF_SIZE (8*1024*1024)
115
034c6ed7
LP
116static int shift_fds(int fds[], unsigned n_fds) {
117 int start, restart_from;
118
119 if (n_fds <= 0)
120 return 0;
121
a0d40ac5
LP
122 /* Modifies the fds array! (sorts it) */
123
034c6ed7
LP
124 assert(fds);
125
126 start = 0;
127 for (;;) {
128 int i;
129
130 restart_from = -1;
131
132 for (i = start; i < (int) n_fds; i++) {
133 int nfd;
134
135 /* Already at right index? */
136 if (fds[i] == i+3)
137 continue;
138
3cc2aff1
LP
139 nfd = fcntl(fds[i], F_DUPFD, i + 3);
140 if (nfd < 0)
034c6ed7
LP
141 return -errno;
142
03e334a1 143 safe_close(fds[i]);
034c6ed7
LP
144 fds[i] = nfd;
145
146 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 147 * let's remember that and try again from here */
034c6ed7
LP
148 if (nfd != i+3 && restart_from < 0)
149 restart_from = i;
150 }
151
152 if (restart_from < 0)
153 break;
154
155 start = restart_from;
156 }
157
158 return 0;
159}
160
4c47affc
FB
161static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
162 unsigned i, n_fds;
e2c76839 163 int r;
47a71eed 164
4c47affc 165 n_fds = n_storage_fds + n_socket_fds;
47a71eed
LP
166 if (n_fds <= 0)
167 return 0;
168
169 assert(fds);
170
9b141911
FB
171 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
172 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
173
174 for (i = 0; i < n_fds; i++) {
47a71eed 175
9b141911
FB
176 if (i < n_socket_fds) {
177 r = fd_nonblock(fds[i], nonblock);
178 if (r < 0)
179 return r;
180 }
47a71eed 181
451a074f
LP
182 /* We unconditionally drop FD_CLOEXEC from the fds,
183 * since after all we want to pass these fds to our
184 * children */
47a71eed 185
3cc2aff1
LP
186 r = fd_cloexec(fds[i], false);
187 if (r < 0)
e2c76839 188 return r;
47a71eed
LP
189 }
190
191 return 0;
192}
193
1e22b5cd 194static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
195 assert(context);
196
1e22b5cd
LP
197 if (context->stdio_as_fds)
198 return NULL;
199
80876c20
LP
200 if (context->tty_path)
201 return context->tty_path;
202
203 return "/dev/console";
204}
205
1e22b5cd
LP
206static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
207 const char *path;
208
6ea832a2
LP
209 assert(context);
210
1e22b5cd 211 path = exec_context_tty_path(context);
6ea832a2 212
1e22b5cd
LP
213 if (context->tty_vhangup) {
214 if (p && p->stdin_fd >= 0)
215 (void) terminal_vhangup_fd(p->stdin_fd);
216 else if (path)
217 (void) terminal_vhangup(path);
218 }
6ea832a2 219
1e22b5cd
LP
220 if (context->tty_reset) {
221 if (p && p->stdin_fd >= 0)
222 (void) reset_terminal_fd(p->stdin_fd, true);
223 else if (path)
224 (void) reset_terminal(path);
225 }
226
227 if (context->tty_vt_disallocate && path)
228 (void) vt_disallocate(path);
6ea832a2
LP
229}
230
6af760f3
LP
231static bool is_terminal_input(ExecInput i) {
232 return IN_SET(i,
233 EXEC_INPUT_TTY,
234 EXEC_INPUT_TTY_FORCE,
235 EXEC_INPUT_TTY_FAIL);
236}
237
3a1286b6 238static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
239 return IN_SET(o,
240 EXEC_OUTPUT_TTY,
241 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
242 EXEC_OUTPUT_KMSG_AND_CONSOLE,
243 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
244}
245
aac8c0c3
LP
246static bool is_syslog_output(ExecOutput o) {
247 return IN_SET(o,
248 EXEC_OUTPUT_SYSLOG,
249 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
250}
251
252static bool is_kmsg_output(ExecOutput o) {
253 return IN_SET(o,
254 EXEC_OUTPUT_KMSG,
255 EXEC_OUTPUT_KMSG_AND_CONSOLE);
256}
257
6af760f3
LP
258static bool exec_context_needs_term(const ExecContext *c) {
259 assert(c);
260
261 /* Return true if the execution context suggests we should set $TERM to something useful. */
262
263 if (is_terminal_input(c->std_input))
264 return true;
265
266 if (is_terminal_output(c->std_output))
267 return true;
268
269 if (is_terminal_output(c->std_error))
270 return true;
271
272 return !!c->tty_path;
3a1286b6
MS
273}
274
80876c20
LP
275static int open_null_as(int flags, int nfd) {
276 int fd, r;
071830ff 277
80876c20 278 assert(nfd >= 0);
071830ff 279
613b411c
LP
280 fd = open("/dev/null", flags|O_NOCTTY);
281 if (fd < 0)
071830ff
LP
282 return -errno;
283
80876c20
LP
284 if (fd != nfd) {
285 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 286 safe_close(fd);
80876c20
LP
287 } else
288 r = nfd;
071830ff 289
80876c20 290 return r;
071830ff
LP
291}
292
524daa8c 293static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 294 static const union sockaddr_union sa = {
b92bea5d
ZJS
295 .un.sun_family = AF_UNIX,
296 .un.sun_path = "/run/systemd/journal/stdout",
297 };
524daa8c
ZJS
298 uid_t olduid = UID_INVALID;
299 gid_t oldgid = GID_INVALID;
300 int r;
301
cad93f29 302 if (gid_is_valid(gid)) {
524daa8c
ZJS
303 oldgid = getgid();
304
92a17af9 305 if (setegid(gid) < 0)
524daa8c
ZJS
306 return -errno;
307 }
308
cad93f29 309 if (uid_is_valid(uid)) {
524daa8c
ZJS
310 olduid = getuid();
311
92a17af9 312 if (seteuid(uid) < 0) {
524daa8c
ZJS
313 r = -errno;
314 goto restore_gid;
315 }
316 }
317
92a17af9 318 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
319
320 /* If we fail to restore the uid or gid, things will likely
321 fail later on. This should only happen if an LSM interferes. */
322
cad93f29 323 if (uid_is_valid(uid))
524daa8c
ZJS
324 (void) seteuid(olduid);
325
326 restore_gid:
cad93f29 327 if (gid_is_valid(gid))
524daa8c
ZJS
328 (void) setegid(oldgid);
329
330 return r;
331}
332
fd1f9c89 333static int connect_logger_as(
7a1ab780 334 Unit *unit,
fd1f9c89 335 const ExecContext *context,
af635cf3 336 const ExecParameters *params,
fd1f9c89
LP
337 ExecOutput output,
338 const char *ident,
fd1f9c89
LP
339 int nfd,
340 uid_t uid,
341 gid_t gid) {
342
524daa8c 343 int fd, r;
071830ff
LP
344
345 assert(context);
af635cf3 346 assert(params);
80876c20
LP
347 assert(output < _EXEC_OUTPUT_MAX);
348 assert(ident);
349 assert(nfd >= 0);
071830ff 350
54fe0cdb
LP
351 fd = socket(AF_UNIX, SOCK_STREAM, 0);
352 if (fd < 0)
80876c20 353 return -errno;
071830ff 354
524daa8c
ZJS
355 r = connect_journal_socket(fd, uid, gid);
356 if (r < 0)
357 return r;
071830ff 358
80876c20 359 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 360 safe_close(fd);
80876c20
LP
361 return -errno;
362 }
071830ff 363
fd1f9c89 364 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 365
80876c20 366 dprintf(fd,
62bca2c6 367 "%s\n"
80876c20
LP
368 "%s\n"
369 "%i\n"
54fe0cdb
LP
370 "%i\n"
371 "%i\n"
372 "%i\n"
4f4a1dbf 373 "%i\n",
c867611e 374 context->syslog_identifier ?: ident,
af635cf3 375 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
376 context->syslog_priority,
377 !!context->syslog_level_prefix,
aac8c0c3
LP
378 is_syslog_output(output),
379 is_kmsg_output(output),
3a1286b6 380 is_terminal_output(output));
80876c20 381
fd1f9c89
LP
382 if (fd == nfd)
383 return nfd;
384
385 r = dup2(fd, nfd) < 0 ? -errno : nfd;
386 safe_close(fd);
071830ff 387
80876c20
LP
388 return r;
389}
390static int open_terminal_as(const char *path, mode_t mode, int nfd) {
391 int fd, r;
071830ff 392
80876c20
LP
393 assert(path);
394 assert(nfd >= 0);
071830ff 395
3cc2aff1
LP
396 fd = open_terminal(path, mode | O_NOCTTY);
397 if (fd < 0)
80876c20 398 return fd;
071830ff 399
80876c20
LP
400 if (fd != nfd) {
401 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 402 safe_close(fd);
80876c20
LP
403 } else
404 r = nfd;
071830ff 405
80876c20
LP
406 return r;
407}
071830ff 408
1e3ad081
LP
409static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
410
411 if (is_terminal_input(std_input) && !apply_tty_stdin)
412 return EXEC_INPUT_NULL;
071830ff 413
03fd9c49 414 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
415 return EXEC_INPUT_NULL;
416
03fd9c49 417 return std_input;
4f2d528d
LP
418}
419
03fd9c49 420static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 421
03fd9c49 422 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
423 return EXEC_OUTPUT_INHERIT;
424
03fd9c49 425 return std_output;
4f2d528d
LP
426}
427
a34ceba6
LP
428static int setup_input(
429 const ExecContext *context,
430 const ExecParameters *params,
52c239d7
LB
431 int socket_fd,
432 int named_iofds[3]) {
a34ceba6 433
4f2d528d
LP
434 ExecInput i;
435
436 assert(context);
a34ceba6
LP
437 assert(params);
438
439 if (params->stdin_fd >= 0) {
440 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
441 return -errno;
442
443 /* Try to make this the controlling tty, if it is a tty, and reset it */
444 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
445 (void) reset_terminal_fd(STDIN_FILENO, true);
446
447 return STDIN_FILENO;
448 }
4f2d528d 449
c39f1ce2 450 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
451
452 switch (i) {
071830ff 453
80876c20
LP
454 case EXEC_INPUT_NULL:
455 return open_null_as(O_RDONLY, STDIN_FILENO);
456
457 case EXEC_INPUT_TTY:
458 case EXEC_INPUT_TTY_FORCE:
459 case EXEC_INPUT_TTY_FAIL: {
460 int fd, r;
071830ff 461
1e22b5cd 462 fd = acquire_terminal(exec_context_tty_path(context),
970edce6
ZJS
463 i == EXEC_INPUT_TTY_FAIL,
464 i == EXEC_INPUT_TTY_FORCE,
465 false,
3a43da28 466 USEC_INFINITY);
970edce6 467 if (fd < 0)
80876c20
LP
468 return fd;
469
470 if (fd != STDIN_FILENO) {
471 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
03e334a1 472 safe_close(fd);
80876c20
LP
473 } else
474 r = STDIN_FILENO;
475
476 return r;
477 }
478
4f2d528d
LP
479 case EXEC_INPUT_SOCKET:
480 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
481
52c239d7
LB
482 case EXEC_INPUT_NAMED_FD:
483 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
484 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
485
80876c20
LP
486 default:
487 assert_not_reached("Unknown input type");
488 }
489}
490
a34ceba6
LP
491static int setup_output(
492 Unit *unit,
493 const ExecContext *context,
494 const ExecParameters *params,
495 int fileno,
496 int socket_fd,
52c239d7 497 int named_iofds[3],
a34ceba6 498 const char *ident,
7bce046b
LP
499 uid_t uid,
500 gid_t gid,
501 dev_t *journal_stream_dev,
502 ino_t *journal_stream_ino) {
a34ceba6 503
4f2d528d
LP
504 ExecOutput o;
505 ExecInput i;
47c1d80d 506 int r;
4f2d528d 507
f2341e0a 508 assert(unit);
80876c20 509 assert(context);
a34ceba6 510 assert(params);
80876c20 511 assert(ident);
7bce046b
LP
512 assert(journal_stream_dev);
513 assert(journal_stream_ino);
80876c20 514
a34ceba6
LP
515 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
516
517 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
518 return -errno;
519
520 return STDOUT_FILENO;
521 }
522
523 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
524 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
525 return -errno;
526
527 return STDERR_FILENO;
528 }
529
c39f1ce2 530 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 531 o = fixup_output(context->std_output, socket_fd);
4f2d528d 532
eb17e935
MS
533 if (fileno == STDERR_FILENO) {
534 ExecOutput e;
535 e = fixup_output(context->std_error, socket_fd);
80876c20 536
eb17e935
MS
537 /* This expects the input and output are already set up */
538
539 /* Don't change the stderr file descriptor if we inherit all
540 * the way and are not on a tty */
541 if (e == EXEC_OUTPUT_INHERIT &&
542 o == EXEC_OUTPUT_INHERIT &&
543 i == EXEC_INPUT_NULL &&
544 !is_terminal_input(context->std_input) &&
545 getppid () != 1)
546 return fileno;
547
548 /* Duplicate from stdout if possible */
52c239d7 549 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 550 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 551
eb17e935 552 o = e;
80876c20 553
eb17e935 554 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
555 /* If input got downgraded, inherit the original value */
556 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 557 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 558
acb591e4 559 /* If the input is connected to anything that's not a /dev/null, inherit that... */
ff876e28 560 if (i != EXEC_INPUT_NULL)
eb17e935 561 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 562
acb591e4
LP
563 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
564 if (getppid() != 1)
eb17e935 565 return fileno;
94f04347 566
eb17e935
MS
567 /* We need to open /dev/null here anew, to get the right access mode. */
568 return open_null_as(O_WRONLY, fileno);
071830ff 569 }
94f04347 570
eb17e935 571 switch (o) {
80876c20
LP
572
573 case EXEC_OUTPUT_NULL:
eb17e935 574 return open_null_as(O_WRONLY, fileno);
80876c20
LP
575
576 case EXEC_OUTPUT_TTY:
4f2d528d 577 if (is_terminal_input(i))
eb17e935 578 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
579
580 /* We don't reset the terminal if this is just about output */
1e22b5cd 581 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
582
583 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 584 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 585 case EXEC_OUTPUT_KMSG:
28dbc1e8 586 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
587 case EXEC_OUTPUT_JOURNAL:
588 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 589 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 590 if (r < 0) {
f2341e0a 591 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 592 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
593 } else {
594 struct stat st;
595
596 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
597 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
598 * services to detect whether they are connected to the journal or not. */
599
600 if (fstat(fileno, &st) >= 0) {
601 *journal_stream_dev = st.st_dev;
602 *journal_stream_ino = st.st_ino;
603 }
47c1d80d
MS
604 }
605 return r;
4f2d528d
LP
606
607 case EXEC_OUTPUT_SOCKET:
608 assert(socket_fd >= 0);
eb17e935 609 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 610
52c239d7
LB
611 case EXEC_OUTPUT_NAMED_FD:
612 (void) fd_nonblock(named_iofds[fileno], false);
613 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
614
94f04347 615 default:
80876c20 616 assert_not_reached("Unknown error type");
94f04347 617 }
071830ff
LP
618}
619
02a51aba
LP
620static int chown_terminal(int fd, uid_t uid) {
621 struct stat st;
622
623 assert(fd >= 0);
02a51aba 624
1ff74fb6
LP
625 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
626 if (isatty(fd) < 1)
627 return 0;
628
02a51aba 629 /* This might fail. What matters are the results. */
bab45044
LP
630 (void) fchown(fd, uid, -1);
631 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
632
633 if (fstat(fd, &st) < 0)
634 return -errno;
635
d8b4e2e9 636 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
637 return -EPERM;
638
639 return 0;
640}
641
7d5ceb64 642static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
643 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
644 int r;
80876c20 645
80876c20
LP
646 assert(_saved_stdin);
647 assert(_saved_stdout);
648
af6da548
LP
649 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
650 if (saved_stdin < 0)
651 return -errno;
80876c20 652
af6da548 653 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
654 if (saved_stdout < 0)
655 return -errno;
80876c20 656
7d5ceb64 657 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
3d18b167
LP
658 if (fd < 0)
659 return fd;
80876c20 660
af6da548
LP
661 r = chown_terminal(fd, getuid());
662 if (r < 0)
3d18b167 663 return r;
02a51aba 664
3d18b167
LP
665 r = reset_terminal_fd(fd, true);
666 if (r < 0)
667 return r;
80876c20 668
3d18b167
LP
669 if (dup2(fd, STDIN_FILENO) < 0)
670 return -errno;
671
672 if (dup2(fd, STDOUT_FILENO) < 0)
673 return -errno;
80876c20
LP
674
675 if (fd >= 2)
03e334a1 676 safe_close(fd);
3d18b167 677 fd = -1;
80876c20
LP
678
679 *_saved_stdin = saved_stdin;
680 *_saved_stdout = saved_stdout;
681
3d18b167 682 saved_stdin = saved_stdout = -1;
80876c20 683
3d18b167 684 return 0;
80876c20
LP
685}
686
63d77c92 687static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
688 assert(err < 0);
689
690 if (err == -ETIMEDOUT)
63d77c92 691 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
692 else {
693 errno = -err;
63d77c92 694 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
695 }
696}
697
63d77c92 698static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 699 _cleanup_close_ int fd = -1;
80876c20 700
3b20f877 701 assert(vc);
80876c20 702
7d5ceb64 703 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 704 if (fd < 0)
3b20f877 705 return;
80876c20 706
63d77c92 707 write_confirm_error_fd(err, fd, u);
af6da548 708}
80876c20 709
3d18b167 710static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 711 int r = 0;
80876c20 712
af6da548
LP
713 assert(saved_stdin);
714 assert(saved_stdout);
715
716 release_terminal();
717
718 if (*saved_stdin >= 0)
80876c20 719 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 720 r = -errno;
80876c20 721
af6da548 722 if (*saved_stdout >= 0)
80876c20 723 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 724 r = -errno;
80876c20 725
3d18b167
LP
726 *saved_stdin = safe_close(*saved_stdin);
727 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
728
729 return r;
730}
731
3b20f877
FB
732enum {
733 CONFIRM_PRETEND_FAILURE = -1,
734 CONFIRM_PRETEND_SUCCESS = 0,
735 CONFIRM_EXECUTE = 1,
736};
737
eedf223a 738static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 739 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 740 _cleanup_free_ char *e = NULL;
3b20f877 741 char c;
af6da548 742
3b20f877 743 /* For any internal errors, assume a positive response. */
7d5ceb64 744 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 745 if (r < 0) {
63d77c92 746 write_confirm_error(r, vc, u);
3b20f877
FB
747 return CONFIRM_EXECUTE;
748 }
af6da548 749
b0eb2944
FB
750 /* confirm_spawn might have been disabled while we were sleeping. */
751 if (manager_is_confirm_spawn_disabled(u->manager)) {
752 r = 1;
753 goto restore_stdio;
754 }
af6da548 755
2bcd3c26
FB
756 e = ellipsize(cmdline, 60, 100);
757 if (!e) {
758 log_oom();
759 r = CONFIRM_EXECUTE;
760 goto restore_stdio;
761 }
af6da548 762
d172b175 763 for (;;) {
539622bd 764 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 765 if (r < 0) {
63d77c92 766 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
767 r = CONFIRM_EXECUTE;
768 goto restore_stdio;
769 }
af6da548 770
d172b175 771 switch (c) {
b0eb2944
FB
772 case 'c':
773 printf("Resuming normal execution.\n");
774 manager_disable_confirm_spawn();
775 r = 1;
776 break;
dd6f9ac0
FB
777 case 'D':
778 unit_dump(u, stdout, " ");
779 continue; /* ask again */
d172b175
FB
780 case 'f':
781 printf("Failing execution.\n");
782 r = CONFIRM_PRETEND_FAILURE;
783 break;
784 case 'h':
b0eb2944
FB
785 printf(" c - continue, proceed without asking anymore\n"
786 " D - dump, show the state of the unit\n"
dd6f9ac0 787 " f - fail, don't execute the command and pretend it failed\n"
d172b175 788 " h - help\n"
eedf223a 789 " i - info, show a short summary of the unit\n"
56fde33a 790 " j - jobs, show jobs that are in progress\n"
d172b175
FB
791 " s - skip, don't execute the command and pretend it succeeded\n"
792 " y - yes, execute the command\n");
dd6f9ac0 793 continue; /* ask again */
eedf223a
FB
794 case 'i':
795 printf(" Description: %s\n"
796 " Unit: %s\n"
797 " Command: %s\n",
798 u->id, u->description, cmdline);
799 continue; /* ask again */
56fde33a
FB
800 case 'j':
801 manager_dump_jobs(u->manager, stdout, " ");
802 continue; /* ask again */
539622bd
FB
803 case 'n':
804 /* 'n' was removed in favor of 'f'. */
805 printf("Didn't understand 'n', did you mean 'f'?\n");
806 continue; /* ask again */
d172b175
FB
807 case 's':
808 printf("Skipping execution.\n");
809 r = CONFIRM_PRETEND_SUCCESS;
810 break;
811 case 'y':
812 r = CONFIRM_EXECUTE;
813 break;
814 default:
815 assert_not_reached("Unhandled choice");
816 }
3b20f877 817 break;
3b20f877 818 }
af6da548 819
3b20f877 820restore_stdio:
af6da548 821 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 822 return r;
80876c20
LP
823}
824
4d885bd3
DH
825static int get_fixed_user(const ExecContext *c, const char **user,
826 uid_t *uid, gid_t *gid,
827 const char **home, const char **shell) {
81a2b7ce 828 int r;
4d885bd3 829 const char *name;
81a2b7ce 830
4d885bd3 831 assert(c);
81a2b7ce 832
23deef88
LP
833 if (!c->user)
834 return 0;
835
4d885bd3
DH
836 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
837 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 838
23deef88 839 name = c->user;
4d885bd3
DH
840 r = get_user_creds_clean(&name, uid, gid, home, shell);
841 if (r < 0)
842 return r;
81a2b7ce 843
4d885bd3
DH
844 *user = name;
845 return 0;
846}
847
848static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
849 int r;
850 const char *name;
851
852 assert(c);
853
854 if (!c->group)
855 return 0;
856
857 name = c->group;
858 r = get_group_creds(&name, gid);
859 if (r < 0)
860 return r;
861
862 *group = name;
863 return 0;
864}
865
cdc5d5c5
DH
866static int get_supplementary_groups(const ExecContext *c, const char *user,
867 const char *group, gid_t gid,
868 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
869 char **i;
870 int r, k = 0;
871 int ngroups_max;
872 bool keep_groups = false;
873 gid_t *groups = NULL;
874 _cleanup_free_ gid_t *l_gids = NULL;
875
876 assert(c);
877
bbeea271
DH
878 /*
879 * If user is given, then lookup GID and supplementary groups list.
880 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
881 * here and as early as possible so we keep the list of supplementary
882 * groups of the caller.
bbeea271
DH
883 */
884 if (user && gid_is_valid(gid) && gid != 0) {
885 /* First step, initialize groups from /etc/groups */
886 if (initgroups(user, gid) < 0)
887 return -errno;
888
889 keep_groups = true;
890 }
891
4d885bd3
DH
892 if (!c->supplementary_groups)
893 return 0;
894
366ddd25
DH
895 /*
896 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
897 * be positive, otherwise fail.
898 */
899 errno = 0;
900 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
901 if (ngroups_max <= 0) {
902 if (errno > 0)
903 return -errno;
904 else
905 return -EOPNOTSUPP; /* For all other values */
906 }
907
4d885bd3
DH
908 l_gids = new(gid_t, ngroups_max);
909 if (!l_gids)
910 return -ENOMEM;
81a2b7ce 911
4d885bd3
DH
912 if (keep_groups) {
913 /*
914 * Lookup the list of groups that the user belongs to, we
915 * avoid NSS lookups here too for gid=0.
916 */
917 k = ngroups_max;
918 if (getgrouplist(user, gid, l_gids, &k) < 0)
919 return -EINVAL;
920 } else
921 k = 0;
81a2b7ce 922
4d885bd3
DH
923 STRV_FOREACH(i, c->supplementary_groups) {
924 const char *g;
81a2b7ce 925
4d885bd3
DH
926 if (k >= ngroups_max)
927 return -E2BIG;
81a2b7ce 928
4d885bd3
DH
929 g = *i;
930 r = get_group_creds(&g, l_gids+k);
931 if (r < 0)
932 return r;
81a2b7ce 933
4d885bd3
DH
934 k++;
935 }
81a2b7ce 936
4d885bd3
DH
937 /*
938 * Sets ngids to zero to drop all supplementary groups, happens
939 * when we are under root and SupplementaryGroups= is empty.
940 */
941 if (k == 0) {
942 *ngids = 0;
943 return 0;
944 }
81a2b7ce 945
4d885bd3
DH
946 /* Otherwise get the final list of supplementary groups */
947 groups = memdup(l_gids, sizeof(gid_t) * k);
948 if (!groups)
949 return -ENOMEM;
950
951 *supplementary_gids = groups;
952 *ngids = k;
953
954 groups = NULL;
955
956 return 0;
957}
958
959static int enforce_groups(const ExecContext *context, gid_t gid,
960 gid_t *supplementary_gids, int ngids) {
961 int r;
962
963 assert(context);
964
965 /* Handle SupplementaryGroups= even if it is empty */
966 if (context->supplementary_groups) {
967 r = maybe_setgroups(ngids, supplementary_gids);
968 if (r < 0)
97f0e76f 969 return r;
4d885bd3 970 }
81a2b7ce 971
4d885bd3
DH
972 if (gid_is_valid(gid)) {
973 /* Then set our gids */
974 if (setresgid(gid, gid, gid) < 0)
975 return -errno;
81a2b7ce
LP
976 }
977
978 return 0;
979}
980
981static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
982 assert(context);
983
4d885bd3
DH
984 if (!uid_is_valid(uid))
985 return 0;
986
479050b3 987 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
988 * capabilities while doing so. */
989
479050b3 990 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
991
992 /* First step: If we need to keep capabilities but
993 * drop privileges we need to make sure we keep our
cbb21cca 994 * caps, while we drop privileges. */
693ced48 995 if (uid != 0) {
cbb21cca 996 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
997
998 if (prctl(PR_GET_SECUREBITS) != sb)
999 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1000 return -errno;
1001 }
81a2b7ce
LP
1002 }
1003
479050b3 1004 /* Second step: actually set the uids */
81a2b7ce
LP
1005 if (setresuid(uid, uid, uid) < 0)
1006 return -errno;
1007
1008 /* At this point we should have all necessary capabilities but
1009 are otherwise a normal user. However, the caps might got
1010 corrupted due to the setresuid() so we need clean them up
1011 later. This is done outside of this call. */
1012
1013 return 0;
1014}
1015
5b6319dc
LP
1016#ifdef HAVE_PAM
1017
1018static int null_conv(
1019 int num_msg,
1020 const struct pam_message **msg,
1021 struct pam_response **resp,
1022 void *appdata_ptr) {
1023
1024 /* We don't support conversations */
1025
1026 return PAM_CONV_ERR;
1027}
1028
cefc33ae
LP
1029#endif
1030
5b6319dc
LP
1031static int setup_pam(
1032 const char *name,
1033 const char *user,
940c5210 1034 uid_t uid,
2d6fce8d 1035 gid_t gid,
5b6319dc 1036 const char *tty,
2065ca69 1037 char ***env,
5b6319dc
LP
1038 int fds[], unsigned n_fds) {
1039
cefc33ae
LP
1040#ifdef HAVE_PAM
1041
5b6319dc
LP
1042 static const struct pam_conv conv = {
1043 .conv = null_conv,
1044 .appdata_ptr = NULL
1045 };
1046
2d7c6aa2 1047 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1048 pam_handle_t *handle = NULL;
d6e5f3ad 1049 sigset_t old_ss;
7bb70b6e 1050 int pam_code = PAM_SUCCESS, r;
84eada2f 1051 char **nv, **e = NULL;
5b6319dc
LP
1052 bool close_session = false;
1053 pid_t pam_pid = 0, parent_pid;
970edce6 1054 int flags = 0;
5b6319dc
LP
1055
1056 assert(name);
1057 assert(user);
2065ca69 1058 assert(env);
5b6319dc
LP
1059
1060 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1061 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1062 * systemd via the cgroup logic. It will then remove the PAM
1063 * session again. The parent process will exec() the actual
1064 * daemon. We do things this way to ensure that the main PID
1065 * of the daemon is the one we initially fork()ed. */
1066
7bb70b6e
LP
1067 r = barrier_create(&barrier);
1068 if (r < 0)
2d7c6aa2
DH
1069 goto fail;
1070
553d2243 1071 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1072 flags |= PAM_SILENT;
1073
f546241b
ZJS
1074 pam_code = pam_start(name, user, &conv, &handle);
1075 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1076 handle = NULL;
1077 goto fail;
1078 }
1079
f546241b
ZJS
1080 if (tty) {
1081 pam_code = pam_set_item(handle, PAM_TTY, tty);
1082 if (pam_code != PAM_SUCCESS)
5b6319dc 1083 goto fail;
f546241b 1084 }
5b6319dc 1085
84eada2f
JW
1086 STRV_FOREACH(nv, *env) {
1087 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1088 if (pam_code != PAM_SUCCESS)
1089 goto fail;
1090 }
1091
970edce6 1092 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1093 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1094 goto fail;
1095
970edce6 1096 pam_code = pam_open_session(handle, flags);
f546241b 1097 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1098 goto fail;
1099
1100 close_session = true;
1101
f546241b
ZJS
1102 e = pam_getenvlist(handle);
1103 if (!e) {
5b6319dc
LP
1104 pam_code = PAM_BUF_ERR;
1105 goto fail;
1106 }
1107
1108 /* Block SIGTERM, so that we know that it won't get lost in
1109 * the child */
ce30c8dc 1110
72c0a2c2 1111 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1112
df0ff127 1113 parent_pid = getpid_cached();
5b6319dc 1114
f546241b 1115 pam_pid = fork();
7bb70b6e
LP
1116 if (pam_pid < 0) {
1117 r = -errno;
5b6319dc 1118 goto fail;
7bb70b6e 1119 }
5b6319dc
LP
1120
1121 if (pam_pid == 0) {
7bb70b6e 1122 int sig, ret = EXIT_PAM;
5b6319dc
LP
1123
1124 /* The child's job is to reset the PAM session on
1125 * termination */
2d7c6aa2 1126 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc
LP
1127
1128 /* This string must fit in 10 chars (i.e. the length
5d6b1584
LP
1129 * of "/sbin/init"), to look pretty in /bin/ps */
1130 rename_process("(sd-pam)");
5b6319dc
LP
1131
1132 /* Make sure we don't keep open the passed fds in this
1133 child. We assume that otherwise only those fds are
1134 open here that have been opened by PAM. */
1135 close_many(fds, n_fds);
1136
940c5210
AK
1137 /* Drop privileges - we don't need any to pam_close_session
1138 * and this will make PR_SET_PDEATHSIG work in most cases.
1139 * If this fails, ignore the error - but expect sd-pam threads
1140 * to fail to exit normally */
2d6fce8d 1141
97f0e76f
LP
1142 r = maybe_setgroups(0, NULL);
1143 if (r < 0)
1144 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1145 if (setresgid(gid, gid, gid) < 0)
1146 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1147 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1148 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1149
ce30c8dc
LP
1150 (void) ignore_signals(SIGPIPE, -1);
1151
940c5210
AK
1152 /* Wait until our parent died. This will only work if
1153 * the above setresuid() succeeds, otherwise the kernel
1154 * will not allow unprivileged parents kill their privileged
1155 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1156 * to do the rest for us. */
1157 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1158 goto child_finish;
1159
2d7c6aa2
DH
1160 /* Tell the parent that our setup is done. This is especially
1161 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1162 * setup might race against our setresuid(2) call.
1163 *
1164 * If the parent aborted, we'll detect this below, hence ignore
1165 * return failure here. */
1166 (void) barrier_place(&barrier);
2d7c6aa2 1167
643f4706 1168 /* Check if our parent process might already have died? */
5b6319dc 1169 if (getppid() == parent_pid) {
d6e5f3ad
DM
1170 sigset_t ss;
1171
1172 assert_se(sigemptyset(&ss) >= 0);
1173 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1174
3dead8d9
LP
1175 for (;;) {
1176 if (sigwait(&ss, &sig) < 0) {
1177 if (errno == EINTR)
1178 continue;
1179
1180 goto child_finish;
1181 }
5b6319dc 1182
3dead8d9
LP
1183 assert(sig == SIGTERM);
1184 break;
1185 }
5b6319dc
LP
1186 }
1187
3dead8d9 1188 /* If our parent died we'll end the session */
f546241b 1189 if (getppid() != parent_pid) {
970edce6 1190 pam_code = pam_close_session(handle, flags);
f546241b 1191 if (pam_code != PAM_SUCCESS)
5b6319dc 1192 goto child_finish;
f546241b 1193 }
5b6319dc 1194
7bb70b6e 1195 ret = 0;
5b6319dc
LP
1196
1197 child_finish:
970edce6 1198 pam_end(handle, pam_code | flags);
7bb70b6e 1199 _exit(ret);
5b6319dc
LP
1200 }
1201
2d7c6aa2
DH
1202 barrier_set_role(&barrier, BARRIER_PARENT);
1203
5b6319dc
LP
1204 /* If the child was forked off successfully it will do all the
1205 * cleanups, so forget about the handle here. */
1206 handle = NULL;
1207
3b8bddde 1208 /* Unblock SIGTERM again in the parent */
72c0a2c2 1209 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1210
1211 /* We close the log explicitly here, since the PAM modules
1212 * might have opened it, but we don't want this fd around. */
1213 closelog();
1214
2d7c6aa2
DH
1215 /* Synchronously wait for the child to initialize. We don't care for
1216 * errors as we cannot recover. However, warn loudly if it happens. */
1217 if (!barrier_place_and_sync(&barrier))
1218 log_error("PAM initialization failed");
1219
2065ca69
JW
1220 strv_free(*env);
1221 *env = e;
aa87e624 1222
5b6319dc
LP
1223 return 0;
1224
1225fail:
970edce6
ZJS
1226 if (pam_code != PAM_SUCCESS) {
1227 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1228 r = -EPERM; /* PAM errors do not map to errno */
1229 } else
1230 log_error_errno(r, "PAM failed: %m");
9ba35398 1231
5b6319dc
LP
1232 if (handle) {
1233 if (close_session)
970edce6 1234 pam_code = pam_close_session(handle, flags);
5b6319dc 1235
970edce6 1236 pam_end(handle, pam_code | flags);
5b6319dc
LP
1237 }
1238
1239 strv_free(e);
5b6319dc
LP
1240 closelog();
1241
7bb70b6e 1242 return r;
cefc33ae
LP
1243#else
1244 return 0;
5b6319dc 1245#endif
cefc33ae 1246}
5b6319dc 1247
5d6b1584
LP
1248static void rename_process_from_path(const char *path) {
1249 char process_name[11];
1250 const char *p;
1251 size_t l;
1252
1253 /* This resulting string must fit in 10 chars (i.e. the length
1254 * of "/sbin/init") to look pretty in /bin/ps */
1255
2b6bf07d 1256 p = basename(path);
5d6b1584
LP
1257 if (isempty(p)) {
1258 rename_process("(...)");
1259 return;
1260 }
1261
1262 l = strlen(p);
1263 if (l > 8) {
1264 /* The end of the process name is usually more
1265 * interesting, since the first bit might just be
1266 * "systemd-" */
1267 p = p + l - 8;
1268 l = 8;
1269 }
1270
1271 process_name[0] = '(';
1272 memcpy(process_name+1, p, l);
1273 process_name[1+l] = ')';
1274 process_name[1+l+1] = 0;
1275
1276 rename_process(process_name);
1277}
1278
469830d1
LP
1279static bool context_has_address_families(const ExecContext *c) {
1280 assert(c);
1281
1282 return c->address_families_whitelist ||
1283 !set_isempty(c->address_families);
1284}
1285
1286static bool context_has_syscall_filters(const ExecContext *c) {
1287 assert(c);
1288
1289 return c->syscall_whitelist ||
1290 !set_isempty(c->syscall_filter);
1291}
1292
1293static bool context_has_no_new_privileges(const ExecContext *c) {
1294 assert(c);
1295
1296 if (c->no_new_privileges)
1297 return true;
1298
1299 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1300 return false;
1301
1302 /* We need NNP if we have any form of seccomp and are unprivileged */
1303 return context_has_address_families(c) ||
1304 c->memory_deny_write_execute ||
1305 c->restrict_realtime ||
1306 exec_context_restrict_namespaces_set(c) ||
1307 c->protect_kernel_tunables ||
1308 c->protect_kernel_modules ||
1309 c->private_devices ||
1310 context_has_syscall_filters(c) ||
78e864e5
TM
1311 !set_isempty(c->syscall_archs) ||
1312 c->lock_personality;
469830d1
LP
1313}
1314
c0467cf3 1315#ifdef HAVE_SECCOMP
17df7223 1316
83f12b27 1317static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1318
1319 if (is_seccomp_available())
1320 return false;
1321
1322 log_open();
1323 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1324 log_close();
1325 return true;
83f12b27
FS
1326}
1327
165a31c0 1328static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1329 uint32_t negative_action, default_action, action;
165a31c0 1330 int r;
8351ceae 1331
469830d1 1332 assert(u);
c0467cf3 1333 assert(c);
8351ceae 1334
469830d1 1335 if (!context_has_syscall_filters(c))
83f12b27
FS
1336 return 0;
1337
469830d1
LP
1338 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1339 return 0;
e9642be2 1340
469830d1 1341 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1342
469830d1
LP
1343 if (c->syscall_whitelist) {
1344 default_action = negative_action;
1345 action = SCMP_ACT_ALLOW;
7c66bae2 1346 } else {
469830d1
LP
1347 default_action = SCMP_ACT_ALLOW;
1348 action = negative_action;
57183d11 1349 }
8351ceae 1350
165a31c0
LP
1351 if (needs_ambient_hack) {
1352 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1353 if (r < 0)
1354 return r;
1355 }
1356
469830d1 1357 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
4298d0b5
LP
1358}
1359
469830d1
LP
1360static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1361 assert(u);
4298d0b5
LP
1362 assert(c);
1363
469830d1 1364 if (set_isempty(c->syscall_archs))
83f12b27
FS
1365 return 0;
1366
469830d1
LP
1367 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1368 return 0;
4298d0b5 1369
469830d1
LP
1370 return seccomp_restrict_archs(c->syscall_archs);
1371}
4298d0b5 1372
469830d1
LP
1373static int apply_address_families(const Unit* u, const ExecContext *c) {
1374 assert(u);
1375 assert(c);
4298d0b5 1376
469830d1
LP
1377 if (!context_has_address_families(c))
1378 return 0;
4298d0b5 1379
469830d1
LP
1380 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1381 return 0;
4298d0b5 1382
469830d1 1383 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1384}
4298d0b5 1385
83f12b27 1386static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1387 assert(u);
f3e43635
TM
1388 assert(c);
1389
469830d1 1390 if (!c->memory_deny_write_execute)
83f12b27
FS
1391 return 0;
1392
469830d1
LP
1393 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1394 return 0;
f3e43635 1395
469830d1 1396 return seccomp_memory_deny_write_execute();
f3e43635
TM
1397}
1398
83f12b27 1399static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1400 assert(u);
f4170c67
LP
1401 assert(c);
1402
469830d1 1403 if (!c->restrict_realtime)
83f12b27
FS
1404 return 0;
1405
469830d1
LP
1406 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1407 return 0;
f4170c67 1408
469830d1 1409 return seccomp_restrict_realtime();
f4170c67
LP
1410}
1411
59e856c7 1412static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1413 assert(u);
59eeb84b
LP
1414 assert(c);
1415
1416 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1417 * let's protect even those systems where this is left on in the kernel. */
1418
469830d1 1419 if (!c->protect_kernel_tunables)
59eeb84b
LP
1420 return 0;
1421
469830d1
LP
1422 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1423 return 0;
59eeb84b 1424
469830d1 1425 return seccomp_protect_sysctl();
59eeb84b
LP
1426}
1427
59e856c7 1428static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1429 assert(u);
502d704e
DH
1430 assert(c);
1431
25a8d8a0 1432 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1433
469830d1
LP
1434 if (!c->protect_kernel_modules)
1435 return 0;
1436
502d704e
DH
1437 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1438 return 0;
1439
469830d1 1440 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1441}
1442
59e856c7 1443static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1444 assert(u);
ba128bb8
LP
1445 assert(c);
1446
8f81a5f6 1447 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1448
469830d1
LP
1449 if (!c->private_devices)
1450 return 0;
1451
ba128bb8
LP
1452 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1453 return 0;
1454
469830d1 1455 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1456}
1457
add00535 1458static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
469830d1 1459 assert(u);
add00535
LP
1460 assert(c);
1461
1462 if (!exec_context_restrict_namespaces_set(c))
1463 return 0;
1464
1465 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1466 return 0;
1467
1468 return seccomp_restrict_namespaces(c->restrict_namespaces);
1469}
1470
78e864e5 1471static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1472 unsigned long personality;
1473 int r;
78e864e5
TM
1474
1475 assert(u);
1476 assert(c);
1477
1478 if (!c->lock_personality)
1479 return 0;
1480
1481 if (skip_seccomp_unavailable(u, "LockPersonality="))
1482 return 0;
1483
e8132d63
LP
1484 personality = c->personality;
1485
1486 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1487 if (personality == PERSONALITY_INVALID) {
1488
1489 r = opinionated_personality(&personality);
1490 if (r < 0)
1491 return r;
1492 }
78e864e5
TM
1493
1494 return seccomp_lock_personality(personality);
1495}
1496
c0467cf3 1497#endif
8351ceae 1498
31a7eb86
ZJS
1499static void do_idle_pipe_dance(int idle_pipe[4]) {
1500 assert(idle_pipe);
1501
54eb2300
LP
1502 idle_pipe[1] = safe_close(idle_pipe[1]);
1503 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1504
1505 if (idle_pipe[0] >= 0) {
1506 int r;
1507
1508 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1509
1510 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1511 ssize_t n;
1512
31a7eb86 1513 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1514 n = write(idle_pipe[3], "x", 1);
1515 if (n > 0)
cd972d69
ZJS
1516 /* Wait for systemd to react to the signal above. */
1517 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1518 }
1519
54eb2300 1520 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1521
1522 }
1523
54eb2300 1524 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1525}
1526
7cae38c4 1527static int build_environment(
fd63e712 1528 Unit *u,
9fa95f85 1529 const ExecContext *c,
1e22b5cd 1530 const ExecParameters *p,
7cae38c4
LP
1531 unsigned n_fds,
1532 const char *home,
1533 const char *username,
1534 const char *shell,
7bce046b
LP
1535 dev_t journal_stream_dev,
1536 ino_t journal_stream_ino,
7cae38c4
LP
1537 char ***ret) {
1538
1539 _cleanup_strv_free_ char **our_env = NULL;
1540 unsigned n_env = 0;
1541 char *x;
1542
4b58153d 1543 assert(u);
7cae38c4
LP
1544 assert(c);
1545 assert(ret);
1546
4b58153d 1547 our_env = new0(char*, 14);
7cae38c4
LP
1548 if (!our_env)
1549 return -ENOMEM;
1550
1551 if (n_fds > 0) {
8dd4c05b
LP
1552 _cleanup_free_ char *joined = NULL;
1553
df0ff127 1554 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1555 return -ENOMEM;
1556 our_env[n_env++] = x;
1557
1558 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1559 return -ENOMEM;
1560 our_env[n_env++] = x;
8dd4c05b 1561
1e22b5cd 1562 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1563 if (!joined)
1564 return -ENOMEM;
1565
605405c6 1566 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1567 if (!x)
1568 return -ENOMEM;
1569 our_env[n_env++] = x;
7cae38c4
LP
1570 }
1571
b08af3b1 1572 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1573 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1574 return -ENOMEM;
1575 our_env[n_env++] = x;
1576
1e22b5cd 1577 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1578 return -ENOMEM;
1579 our_env[n_env++] = x;
1580 }
1581
fd63e712
LP
1582 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1583 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1584 * check the database directly. */
ac647978 1585 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1586 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1587 if (!x)
1588 return -ENOMEM;
1589 our_env[n_env++] = x;
1590 }
1591
7cae38c4
LP
1592 if (home) {
1593 x = strappend("HOME=", home);
1594 if (!x)
1595 return -ENOMEM;
1596 our_env[n_env++] = x;
1597 }
1598
1599 if (username) {
1600 x = strappend("LOGNAME=", username);
1601 if (!x)
1602 return -ENOMEM;
1603 our_env[n_env++] = x;
1604
1605 x = strappend("USER=", username);
1606 if (!x)
1607 return -ENOMEM;
1608 our_env[n_env++] = x;
1609 }
1610
1611 if (shell) {
1612 x = strappend("SHELL=", shell);
1613 if (!x)
1614 return -ENOMEM;
1615 our_env[n_env++] = x;
1616 }
1617
4b58153d
LP
1618 if (!sd_id128_is_null(u->invocation_id)) {
1619 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1620 return -ENOMEM;
1621
1622 our_env[n_env++] = x;
1623 }
1624
6af760f3
LP
1625 if (exec_context_needs_term(c)) {
1626 const char *tty_path, *term = NULL;
1627
1628 tty_path = exec_context_tty_path(c);
1629
1630 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1631 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1632 * passes to PID 1 ends up all the way in the console login shown. */
1633
1634 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1635 term = getenv("TERM");
1636 if (!term)
1637 term = default_term_for_tty(tty_path);
7cae38c4 1638
6af760f3 1639 x = strappend("TERM=", term);
7cae38c4
LP
1640 if (!x)
1641 return -ENOMEM;
1642 our_env[n_env++] = x;
1643 }
1644
7bce046b
LP
1645 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1646 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1647 return -ENOMEM;
1648
1649 our_env[n_env++] = x;
1650 }
1651
7cae38c4 1652 our_env[n_env++] = NULL;
7bce046b 1653 assert(n_env <= 12);
7cae38c4
LP
1654
1655 *ret = our_env;
1656 our_env = NULL;
1657
1658 return 0;
1659}
1660
b4c14404
FB
1661static int build_pass_environment(const ExecContext *c, char ***ret) {
1662 _cleanup_strv_free_ char **pass_env = NULL;
1663 size_t n_env = 0, n_bufsize = 0;
1664 char **i;
1665
1666 STRV_FOREACH(i, c->pass_environment) {
1667 _cleanup_free_ char *x = NULL;
1668 char *v;
1669
1670 v = getenv(*i);
1671 if (!v)
1672 continue;
605405c6 1673 x = strjoin(*i, "=", v);
b4c14404
FB
1674 if (!x)
1675 return -ENOMEM;
1676 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1677 return -ENOMEM;
1678 pass_env[n_env++] = x;
1679 pass_env[n_env] = NULL;
1680 x = NULL;
1681 }
1682
1683 *ret = pass_env;
1684 pass_env = NULL;
1685
1686 return 0;
1687}
1688
8b44a3d2
LP
1689static bool exec_needs_mount_namespace(
1690 const ExecContext *context,
1691 const ExecParameters *params,
1692 ExecRuntime *runtime) {
1693
1694 assert(context);
1695 assert(params);
1696
915e6d16
LP
1697 if (context->root_image)
1698 return true;
1699
2a624c36
AP
1700 if (!strv_isempty(context->read_write_paths) ||
1701 !strv_isempty(context->read_only_paths) ||
1702 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1703 return true;
1704
d2d6c096
LP
1705 if (context->n_bind_mounts > 0)
1706 return true;
1707
8b44a3d2
LP
1708 if (context->mount_flags != 0)
1709 return true;
1710
1711 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1712 return true;
1713
8b44a3d2
LP
1714 if (context->private_devices ||
1715 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1716 context->protect_home != PROTECT_HOME_NO ||
1717 context->protect_kernel_tunables ||
c575770b 1718 context->protect_kernel_modules ||
59eeb84b 1719 context->protect_control_groups)
8b44a3d2
LP
1720 return true;
1721
9c988f93 1722 if (context->mount_apivfs && (context->root_image || context->root_directory))
5d997827
LP
1723 return true;
1724
8b44a3d2
LP
1725 return false;
1726}
1727
d251207d
LP
1728static int setup_private_users(uid_t uid, gid_t gid) {
1729 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1730 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1731 _cleanup_close_ int unshare_ready_fd = -1;
1732 _cleanup_(sigkill_waitp) pid_t pid = 0;
1733 uint64_t c = 1;
1734 siginfo_t si;
1735 ssize_t n;
1736 int r;
1737
1738 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1739 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1740 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1741 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1742 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1743 * continues execution normally. */
1744
587ab01b
ZJS
1745 if (uid != 0 && uid_is_valid(uid)) {
1746 r = asprintf(&uid_map,
1747 "0 0 1\n" /* Map root → root */
1748 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1749 uid, uid);
1750 if (r < 0)
1751 return -ENOMEM;
1752 } else {
e0f3720e 1753 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1754 if (!uid_map)
1755 return -ENOMEM;
1756 }
d251207d 1757
587ab01b
ZJS
1758 if (gid != 0 && gid_is_valid(gid)) {
1759 r = asprintf(&gid_map,
1760 "0 0 1\n" /* Map root → root */
1761 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1762 gid, gid);
1763 if (r < 0)
1764 return -ENOMEM;
1765 } else {
d251207d 1766 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1767 if (!gid_map)
1768 return -ENOMEM;
1769 }
d251207d
LP
1770
1771 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1772 * namespace. */
1773 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1774 if (unshare_ready_fd < 0)
1775 return -errno;
1776
1777 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1778 * failed. */
1779 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1780 return -errno;
1781
1782 pid = fork();
1783 if (pid < 0)
1784 return -errno;
1785
1786 if (pid == 0) {
1787 _cleanup_close_ int fd = -1;
1788 const char *a;
1789 pid_t ppid;
1790
1791 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1792 * here, after the parent opened its own user namespace. */
1793
1794 ppid = getppid();
1795 errno_pipe[0] = safe_close(errno_pipe[0]);
1796
1797 /* Wait until the parent unshared the user namespace */
1798 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1799 r = -errno;
1800 goto child_fail;
1801 }
1802
1803 /* Disable the setgroups() system call in the child user namespace, for good. */
1804 a = procfs_file_alloca(ppid, "setgroups");
1805 fd = open(a, O_WRONLY|O_CLOEXEC);
1806 if (fd < 0) {
1807 if (errno != ENOENT) {
1808 r = -errno;
1809 goto child_fail;
1810 }
1811
1812 /* If the file is missing the kernel is too old, let's continue anyway. */
1813 } else {
1814 if (write(fd, "deny\n", 5) < 0) {
1815 r = -errno;
1816 goto child_fail;
1817 }
1818
1819 fd = safe_close(fd);
1820 }
1821
1822 /* First write the GID map */
1823 a = procfs_file_alloca(ppid, "gid_map");
1824 fd = open(a, O_WRONLY|O_CLOEXEC);
1825 if (fd < 0) {
1826 r = -errno;
1827 goto child_fail;
1828 }
1829 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1830 r = -errno;
1831 goto child_fail;
1832 }
1833 fd = safe_close(fd);
1834
1835 /* The write the UID map */
1836 a = procfs_file_alloca(ppid, "uid_map");
1837 fd = open(a, O_WRONLY|O_CLOEXEC);
1838 if (fd < 0) {
1839 r = -errno;
1840 goto child_fail;
1841 }
1842 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1843 r = -errno;
1844 goto child_fail;
1845 }
1846
1847 _exit(EXIT_SUCCESS);
1848
1849 child_fail:
1850 (void) write(errno_pipe[1], &r, sizeof(r));
1851 _exit(EXIT_FAILURE);
1852 }
1853
1854 errno_pipe[1] = safe_close(errno_pipe[1]);
1855
1856 if (unshare(CLONE_NEWUSER) < 0)
1857 return -errno;
1858
1859 /* Let the child know that the namespace is ready now */
1860 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1861 return -errno;
1862
1863 /* Try to read an error code from the child */
1864 n = read(errno_pipe[0], &r, sizeof(r));
1865 if (n < 0)
1866 return -errno;
1867 if (n == sizeof(r)) { /* an error code was sent to us */
1868 if (r < 0)
1869 return r;
1870 return -EIO;
1871 }
1872 if (n != 0) /* on success we should have read 0 bytes */
1873 return -EIO;
1874
1875 r = wait_for_terminate(pid, &si);
1876 if (r < 0)
1877 return r;
1878 pid = 0;
1879
1880 /* If something strange happened with the child, let's consider this fatal, too */
1881 if (si.si_code != CLD_EXITED || si.si_status != 0)
1882 return -EIO;
1883
1884 return 0;
1885}
1886
3536f49e 1887static int setup_exec_directory(
07689d5d
LP
1888 const ExecContext *context,
1889 const ExecParameters *params,
1890 uid_t uid,
3536f49e 1891 gid_t gid,
3536f49e
YW
1892 ExecDirectoryType type,
1893 int *exit_status) {
07689d5d 1894
3536f49e
YW
1895 static const int exit_status_table[_EXEC_DIRECTORY_MAX] = {
1896 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1897 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1898 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1899 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1900 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1901 };
07689d5d
LP
1902 char **rt;
1903 int r;
1904
1905 assert(context);
1906 assert(params);
3536f49e
YW
1907 assert(type >= 0 && type < _EXEC_DIRECTORY_MAX);
1908 assert(exit_status);
07689d5d 1909
3536f49e
YW
1910 if (!params->prefix[type])
1911 return 0;
1912
8679efde 1913 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
1914 if (!uid_is_valid(uid))
1915 uid = 0;
1916 if (!gid_is_valid(gid))
1917 gid = 0;
1918 }
1919
1920 STRV_FOREACH(rt, context->directories[type].paths) {
07689d5d
LP
1921 _cleanup_free_ char *p;
1922
3536f49e
YW
1923 p = strjoin(params->prefix[type], "/", *rt);
1924 if (!p) {
1925 r = -ENOMEM;
1926 goto fail;
1927 }
07689d5d 1928
23a7448e
YW
1929 r = mkdir_parents_label(p, 0755);
1930 if (r < 0)
3536f49e 1931 goto fail;
23a7448e 1932
3536f49e 1933 r = mkdir_p_label(p, context->directories[type].mode);
07689d5d 1934 if (r < 0)
3536f49e 1935 goto fail;
07689d5d 1936
c71b2eb7
LP
1937 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
1938 * a service, and shall not be writable. */
1939 if (type == EXEC_DIRECTORY_CONFIGURATION)
1940 continue;
1941
3536f49e 1942 r = chmod_and_chown(p, context->directories[type].mode, uid, gid);
07689d5d 1943 if (r < 0)
3536f49e 1944 goto fail;
07689d5d
LP
1945 }
1946
1947 return 0;
3536f49e
YW
1948
1949fail:
1950 *exit_status = exit_status_table[type];
1951
1952 return r;
07689d5d
LP
1953}
1954
cefc33ae
LP
1955static int setup_smack(
1956 const ExecContext *context,
1957 const ExecCommand *command) {
1958
cefc33ae
LP
1959 int r;
1960
1961 assert(context);
1962 assert(command);
1963
cefc33ae
LP
1964 if (context->smack_process_label) {
1965 r = mac_smack_apply_pid(0, context->smack_process_label);
1966 if (r < 0)
1967 return r;
1968 }
1969#ifdef SMACK_DEFAULT_PROCESS_LABEL
1970 else {
1971 _cleanup_free_ char *exec_label = NULL;
1972
1973 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1974 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
1975 return r;
1976
1977 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1978 if (r < 0)
1979 return r;
1980 }
cefc33ae
LP
1981#endif
1982
1983 return 0;
1984}
1985
3fbe8dbe
LP
1986static int compile_read_write_paths(
1987 const ExecContext *context,
1988 const ExecParameters *params,
1989 char ***ret) {
1990
1991 _cleanup_strv_free_ char **l = NULL;
1992 char **rt;
3536f49e 1993 ExecDirectoryType i;
3fbe8dbe 1994
06ec51d8
ZJS
1995 /* Compile the list of writable paths. This is the combination of
1996 * the explicitly configured paths, plus all runtime directories. */
3fbe8dbe 1997
3536f49e
YW
1998 if (strv_isempty(context->read_write_paths)) {
1999 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
2000 if (!strv_isempty(context->directories[i].paths))
2001 break;
2002
2003 if (i == _EXEC_DIRECTORY_MAX) {
2004 *ret = NULL; /* NOP if neither is set */
2005 return 0;
2006 }
3fbe8dbe
LP
2007 }
2008
2009 l = strv_copy(context->read_write_paths);
2010 if (!l)
2011 return -ENOMEM;
2012
3536f49e
YW
2013 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++) {
2014 if (!params->prefix[i])
2015 continue;
3fbe8dbe 2016
3536f49e
YW
2017 STRV_FOREACH(rt, context->directories[i].paths) {
2018 char *s;
3fbe8dbe 2019
3536f49e
YW
2020 s = strjoin(params->prefix[i], "/", *rt);
2021 if (!s)
2022 return -ENOMEM;
2023
2024 if (strv_consume(&l, s) < 0)
2025 return -ENOMEM;
2026 }
3fbe8dbe
LP
2027 }
2028
2029 *ret = l;
2030 l = NULL;
2031
2032 return 0;
2033}
2034
6818c54c
LP
2035static int apply_mount_namespace(
2036 Unit *u,
2037 ExecCommand *command,
2038 const ExecContext *context,
2039 const ExecParameters *params,
2040 ExecRuntime *runtime) {
2041
06ec51d8 2042 _cleanup_strv_free_ char **rw = NULL;
93c6bb51 2043 char *tmp = NULL, *var = NULL;
915e6d16 2044 const char *root_dir = NULL, *root_image = NULL;
93c6bb51 2045 NameSpaceInfo ns_info = {
af964954 2046 .ignore_protect_paths = false,
93c6bb51
DH
2047 .private_dev = context->private_devices,
2048 .protect_control_groups = context->protect_control_groups,
2049 .protect_kernel_tunables = context->protect_kernel_tunables,
2050 .protect_kernel_modules = context->protect_kernel_modules,
5d997827 2051 .mount_apivfs = context->mount_apivfs,
93c6bb51 2052 };
165a31c0 2053 bool needs_sandboxing;
6818c54c 2054 int r;
93c6bb51 2055
2b3c1b9e
DH
2056 assert(context);
2057
93c6bb51
DH
2058 /* The runtime struct only contains the parent of the private /tmp,
2059 * which is non-accessible to world users. Inside of it there's a /tmp
2060 * that is sticky, and that's the one we want to use here. */
2061
2062 if (context->private_tmp && runtime) {
2063 if (runtime->tmp_dir)
2064 tmp = strjoina(runtime->tmp_dir, "/tmp");
2065 if (runtime->var_tmp_dir)
2066 var = strjoina(runtime->var_tmp_dir, "/tmp");
2067 }
2068
2069 r = compile_read_write_paths(context, params, &rw);
2070 if (r < 0)
2071 return r;
2072
915e6d16
LP
2073 if (params->flags & EXEC_APPLY_CHROOT) {
2074 root_image = context->root_image;
2075
2076 if (!root_image)
2077 root_dir = context->root_directory;
2078 }
93c6bb51 2079
af964954
DH
2080 /*
2081 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2082 * sandbox info, otherwise enforce it, don't ignore protected paths and
2083 * fail if we are enable to apply the sandbox inside the mount namespace.
2084 */
2085 if (!context->dynamic_user && root_dir)
2086 ns_info.ignore_protect_paths = true;
2087
165a31c0 2088 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
6818c54c 2089
915e6d16
LP
2090 r = setup_namespace(root_dir, root_image,
2091 &ns_info, rw,
165a31c0
LP
2092 needs_sandboxing ? context->read_only_paths : NULL,
2093 needs_sandboxing ? context->inaccessible_paths : NULL,
d2d6c096
LP
2094 context->bind_mounts,
2095 context->n_bind_mounts,
93c6bb51
DH
2096 tmp,
2097 var,
165a31c0
LP
2098 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2099 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2100 context->mount_flags,
2101 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51
DH
2102
2103 /* If we couldn't set up the namespace this is probably due to a
2104 * missing capability. In this case, silently proceeed. */
2105 if (IN_SET(r, -EPERM, -EACCES)) {
2106 log_open();
2107 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2108 log_close();
2109 r = 0;
2110 }
2111
2112 return r;
2113}
2114
915e6d16
LP
2115static int apply_working_directory(
2116 const ExecContext *context,
2117 const ExecParameters *params,
2118 const char *home,
376fecf6
LP
2119 const bool needs_mount_ns,
2120 int *exit_status) {
915e6d16 2121
6732edab 2122 const char *d, *wd;
2b3c1b9e
DH
2123
2124 assert(context);
376fecf6 2125 assert(exit_status);
2b3c1b9e 2126
6732edab
LP
2127 if (context->working_directory_home) {
2128
376fecf6
LP
2129 if (!home) {
2130 *exit_status = EXIT_CHDIR;
6732edab 2131 return -ENXIO;
376fecf6 2132 }
6732edab 2133
2b3c1b9e 2134 wd = home;
6732edab
LP
2135
2136 } else if (context->working_directory)
2b3c1b9e
DH
2137 wd = context->working_directory;
2138 else
2139 wd = "/";
e7f1e7c6
DH
2140
2141 if (params->flags & EXEC_APPLY_CHROOT) {
2142 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2143 if (chroot(context->root_directory) < 0) {
2144 *exit_status = EXIT_CHROOT;
e7f1e7c6 2145 return -errno;
376fecf6 2146 }
e7f1e7c6 2147
2b3c1b9e
DH
2148 d = wd;
2149 } else
3b0e5bb5 2150 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2151
376fecf6
LP
2152 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2153 *exit_status = EXIT_CHDIR;
2b3c1b9e 2154 return -errno;
376fecf6 2155 }
e7f1e7c6
DH
2156
2157 return 0;
2158}
2159
74dd6b51
LP
2160static int setup_keyring(Unit *u, const ExecParameters *p, uid_t uid, gid_t gid) {
2161 key_serial_t keyring;
2162
2163 assert(u);
2164 assert(p);
2165
2166 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2167 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2168 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2169 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2170 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2171 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2172
2173 if (!(p->flags & EXEC_NEW_KEYRING))
2174 return 0;
2175
2176 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2177 if (keyring == -1) {
2178 if (errno == ENOSYS)
2179 log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
2180 else if (IN_SET(errno, EACCES, EPERM))
2181 log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
2182 else if (errno == EDQUOT)
2183 log_debug_errno(errno, "Out of kernel keyrings to allocate, ignoring.");
2184 else
2185 return log_error_errno(errno, "Setting up kernel keyring failed: %m");
2186
2187 return 0;
2188 }
2189
b3415f5d
LP
2190 /* Populate they keyring with the invocation ID by default. */
2191 if (!sd_id128_is_null(u->invocation_id)) {
2192 key_serial_t key;
2193
2194 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2195 if (key == -1)
2196 log_debug_errno(errno, "Failed to add invocation ID to keyring, ignoring: %m");
2197 else {
2198 if (keyctl(KEYCTL_SETPERM, key,
2199 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2200 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2201 return log_error_errno(errno, "Failed to restrict invocation ID permission: %m");
2202 }
2203 }
2204
74dd6b51
LP
2205 /* And now, make the keyring owned by the service's user */
2206 if (uid_is_valid(uid) || gid_is_valid(gid))
2207 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
2208 return log_error_errno(errno, "Failed to change ownership of session keyring: %m");
2209
2210 return 0;
2211}
2212
29206d46
LP
2213static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2214 assert(array);
2215 assert(n);
2216
2217 if (!pair)
2218 return;
2219
2220 if (pair[0] >= 0)
2221 array[(*n)++] = pair[0];
2222 if (pair[1] >= 0)
2223 array[(*n)++] = pair[1];
2224}
2225
a34ceba6
LP
2226static int close_remaining_fds(
2227 const ExecParameters *params,
2228 ExecRuntime *runtime,
29206d46 2229 DynamicCreds *dcreds,
00d9ef85 2230 int user_lookup_fd,
a34ceba6
LP
2231 int socket_fd,
2232 int *fds, unsigned n_fds) {
2233
2234 unsigned n_dont_close = 0;
00d9ef85 2235 int dont_close[n_fds + 12];
a34ceba6
LP
2236
2237 assert(params);
2238
2239 if (params->stdin_fd >= 0)
2240 dont_close[n_dont_close++] = params->stdin_fd;
2241 if (params->stdout_fd >= 0)
2242 dont_close[n_dont_close++] = params->stdout_fd;
2243 if (params->stderr_fd >= 0)
2244 dont_close[n_dont_close++] = params->stderr_fd;
2245
2246 if (socket_fd >= 0)
2247 dont_close[n_dont_close++] = socket_fd;
2248 if (n_fds > 0) {
2249 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2250 n_dont_close += n_fds;
2251 }
2252
29206d46
LP
2253 if (runtime)
2254 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2255
2256 if (dcreds) {
2257 if (dcreds->user)
2258 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2259 if (dcreds->group)
2260 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2261 }
2262
00d9ef85
LP
2263 if (user_lookup_fd >= 0)
2264 dont_close[n_dont_close++] = user_lookup_fd;
2265
a34ceba6
LP
2266 return close_all_fds(dont_close, n_dont_close);
2267}
2268
00d9ef85
LP
2269static int send_user_lookup(
2270 Unit *unit,
2271 int user_lookup_fd,
2272 uid_t uid,
2273 gid_t gid) {
2274
2275 assert(unit);
2276
2277 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2278 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2279 * specified. */
2280
2281 if (user_lookup_fd < 0)
2282 return 0;
2283
2284 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2285 return 0;
2286
2287 if (writev(user_lookup_fd,
2288 (struct iovec[]) {
2289 { .iov_base = &uid, .iov_len = sizeof(uid) },
2290 { .iov_base = &gid, .iov_len = sizeof(gid) },
2291 { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
2292 return -errno;
2293
2294 return 0;
2295}
2296
6732edab
LP
2297static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2298 int r;
2299
2300 assert(c);
2301 assert(home);
2302 assert(buf);
2303
2304 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2305
2306 if (*home)
2307 return 0;
2308
2309 if (!c->working_directory_home)
2310 return 0;
2311
2312 if (uid == 0) {
2313 /* Hardcode /root as home directory for UID 0 */
2314 *home = "/root";
2315 return 1;
2316 }
2317
2318 r = get_home_dir(buf);
2319 if (r < 0)
2320 return r;
2321
2322 *home = *buf;
2323 return 1;
2324}
2325
ff0af2a1 2326static int exec_child(
f2341e0a 2327 Unit *unit,
ff0af2a1
LP
2328 ExecCommand *command,
2329 const ExecContext *context,
2330 const ExecParameters *params,
2331 ExecRuntime *runtime,
29206d46 2332 DynamicCreds *dcreds,
ff0af2a1
LP
2333 char **argv,
2334 int socket_fd,
52c239d7 2335 int named_iofds[3],
4c47affc
FB
2336 int *fds,
2337 unsigned n_storage_fds,
9b141911 2338 unsigned n_socket_fds,
ff0af2a1 2339 char **files_env,
00d9ef85 2340 int user_lookup_fd,
70dd455c
ZJS
2341 int *exit_status,
2342 char **error_message) {
d35fbf6b 2343
2065ca69 2344 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
6732edab 2345 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
4d885bd3
DH
2346 _cleanup_free_ gid_t *supplementary_gids = NULL;
2347 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2348 const char *home = NULL, *shell = NULL;
7bce046b
LP
2349 dev_t journal_stream_dev = 0;
2350 ino_t journal_stream_ino = 0;
165a31c0
LP
2351 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2352 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2353 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2354 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
ecfbc84f 2355#ifdef HAVE_SELINUX
43b1f709 2356 bool use_selinux = false;
ecfbc84f
YW
2357#endif
2358#ifdef HAVE_SMACK
43b1f709 2359 bool use_smack = false;
ecfbc84f
YW
2360#endif
2361#ifdef HAVE_APPARMOR
43b1f709 2362 bool use_apparmor = false;
ecfbc84f 2363#endif
fed1e721
LP
2364 uid_t uid = UID_INVALID;
2365 gid_t gid = GID_INVALID;
4d885bd3 2366 int i, r, ngids = 0;
4c47affc 2367 unsigned n_fds;
3536f49e 2368 ExecDirectoryType dt;
165a31c0 2369 int secure_bits;
034c6ed7 2370
f2341e0a 2371 assert(unit);
5cb5a6ff
LP
2372 assert(command);
2373 assert(context);
d35fbf6b 2374 assert(params);
ff0af2a1 2375 assert(exit_status);
70dd455c
ZJS
2376 assert(error_message);
2377 /* We don't always set error_message, hence it must be initialized */
2378 assert(*error_message == NULL);
d35fbf6b
DM
2379
2380 rename_process_from_path(command->path);
2381
2382 /* We reset exactly these signals, since they are the
2383 * only ones we set to SIG_IGN in the main daemon. All
2384 * others we leave untouched because we set them to
2385 * SIG_DFL or a valid handler initially, both of which
2386 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2387 (void) default_signals(SIGNALS_CRASH_HANDLER,
2388 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2389
2390 if (context->ignore_sigpipe)
ce30c8dc 2391 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2392
ff0af2a1
LP
2393 r = reset_signal_mask();
2394 if (r < 0) {
2395 *exit_status = EXIT_SIGNAL_MASK;
70dd455c
ZJS
2396 *error_message = strdup("Failed to reset signal mask");
2397 /* If strdup fails, here and below, we will just print the generic error message. */
ff0af2a1 2398 return r;
d35fbf6b 2399 }
034c6ed7 2400
d35fbf6b
DM
2401 if (params->idle_pipe)
2402 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2403
d35fbf6b
DM
2404 /* Close sockets very early to make sure we don't
2405 * block init reexecution because it cannot bind its
2406 * sockets */
ff0af2a1 2407
d35fbf6b 2408 log_forget_fds();
4f2d528d 2409
4c47affc 2410 n_fds = n_storage_fds + n_socket_fds;
00d9ef85 2411 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2412 if (r < 0) {
2413 *exit_status = EXIT_FDS;
70dd455c 2414 *error_message = strdup("Failed to close remaining fds");
ff0af2a1 2415 return r;
8c7be95e
LP
2416 }
2417
d35fbf6b
DM
2418 if (!context->same_pgrp)
2419 if (setsid() < 0) {
ff0af2a1 2420 *exit_status = EXIT_SETSID;
d35fbf6b
DM
2421 return -errno;
2422 }
9e2f7c11 2423
1e22b5cd 2424 exec_context_tty_reset(context, params);
d35fbf6b 2425
c891efaf 2426 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2427 const char *vc = params->confirm_spawn;
3b20f877
FB
2428 _cleanup_free_ char *cmdline = NULL;
2429
2430 cmdline = exec_command_line(argv);
2431 if (!cmdline) {
2432 *exit_status = EXIT_CONFIRM;
2433 return -ENOMEM;
2434 }
d35fbf6b 2435
eedf223a 2436 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2437 if (r != CONFIRM_EXECUTE) {
2438 if (r == CONFIRM_PRETEND_SUCCESS) {
2439 *exit_status = EXIT_SUCCESS;
2440 return 0;
2441 }
ff0af2a1 2442 *exit_status = EXIT_CONFIRM;
70dd455c 2443 *error_message = strdup("Execution cancelled");
d35fbf6b 2444 return -ECANCELED;
d35fbf6b
DM
2445 }
2446 }
1a63a750 2447
29206d46
LP
2448 if (context->dynamic_user && dcreds) {
2449
409093fe
LP
2450 /* Make sure we bypass our own NSS module for any NSS checks */
2451 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2452 *exit_status = EXIT_USER;
70dd455c 2453 *error_message = strdup("Failed to update environment");
409093fe
LP
2454 return -errno;
2455 }
2456
29206d46 2457 r = dynamic_creds_realize(dcreds, &uid, &gid);
ff0af2a1
LP
2458 if (r < 0) {
2459 *exit_status = EXIT_USER;
70dd455c 2460 *error_message = strdup("Failed to update dynamic user credentials");
ff0af2a1 2461 return r;
524daa8c 2462 }
524daa8c 2463
70dd455c 2464 if (!uid_is_valid(uid)) {
29206d46 2465 *exit_status = EXIT_USER;
70dd455c
ZJS
2466 (void) asprintf(error_message, "UID validation failed for \""UID_FMT"\"", uid);
2467 /* If asprintf fails, here and below, we will just print the generic error message. */
2468 return -ESRCH;
2469 }
2470
2471 if (!gid_is_valid(gid)) {
2472 *exit_status = EXIT_USER;
2473 (void) asprintf(error_message, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2474 return -ESRCH;
2475 }
5bc7452b 2476
29206d46
LP
2477 if (dcreds->user)
2478 username = dcreds->user->name;
2479
2480 } else {
4d885bd3
DH
2481 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2482 if (r < 0) {
2483 *exit_status = EXIT_USER;
70dd455c 2484 *error_message = strdup("Failed to determine user credentials");
4d885bd3 2485 return r;
5bc7452b 2486 }
5bc7452b 2487
4d885bd3
DH
2488 r = get_fixed_group(context, &groupname, &gid);
2489 if (r < 0) {
2490 *exit_status = EXIT_GROUP;
70dd455c 2491 *error_message = strdup("Failed to determine group credentials");
4d885bd3
DH
2492 return r;
2493 }
cdc5d5c5 2494 }
29206d46 2495
cdc5d5c5
DH
2496 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2497 r = get_supplementary_groups(context, username, groupname, gid,
2498 &supplementary_gids, &ngids);
2499 if (r < 0) {
2500 *exit_status = EXIT_GROUP;
70dd455c 2501 *error_message = strdup("Failed to determine supplementary groups");
cdc5d5c5 2502 return r;
29206d46 2503 }
5bc7452b 2504
00d9ef85
LP
2505 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2506 if (r < 0) {
2507 *exit_status = EXIT_USER;
70dd455c 2508 *error_message = strdup("Failed to send user credentials to PID1");
00d9ef85
LP
2509 return r;
2510 }
2511
2512 user_lookup_fd = safe_close(user_lookup_fd);
2513
6732edab
LP
2514 r = acquire_home(context, uid, &home, &home_buffer);
2515 if (r < 0) {
2516 *exit_status = EXIT_CHDIR;
2517 *error_message = strdup("Failed to determine $HOME for user");
2518 return r;
2519 }
2520
d35fbf6b
DM
2521 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2522 * must sure to drop O_NONBLOCK */
2523 if (socket_fd >= 0)
a34ceba6 2524 (void) fd_nonblock(socket_fd, false);
acbb0225 2525
52c239d7 2526 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2527 if (r < 0) {
2528 *exit_status = EXIT_STDIN;
70dd455c 2529 *error_message = strdup("Failed to set up stdin");
ff0af2a1 2530 return r;
d35fbf6b 2531 }
034c6ed7 2532
52c239d7 2533 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2534 if (r < 0) {
2535 *exit_status = EXIT_STDOUT;
70dd455c 2536 *error_message = strdup("Failed to set up stdout");
ff0af2a1 2537 return r;
d35fbf6b
DM
2538 }
2539
52c239d7 2540 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2541 if (r < 0) {
2542 *exit_status = EXIT_STDERR;
70dd455c 2543 *error_message = strdup("Failed to set up stderr");
ff0af2a1 2544 return r;
d35fbf6b
DM
2545 }
2546
2547 if (params->cgroup_path) {
ff0af2a1
LP
2548 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2549 if (r < 0) {
2550 *exit_status = EXIT_CGROUP;
70dd455c 2551 (void) asprintf(error_message, "Failed to attach to cgroup %s", params->cgroup_path);
ff0af2a1 2552 return r;
309bff19 2553 }
d35fbf6b 2554 }
309bff19 2555
d35fbf6b 2556 if (context->oom_score_adjust_set) {
d5243d62 2557 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
f2b68789 2558
d5243d62
LP
2559 /* When we can't make this change due to EPERM, then
2560 * let's silently skip over it. User namespaces
2561 * prohibit write access to this file, and we
2562 * shouldn't trip up over that. */
613b411c 2563
d5243d62 2564 sprintf(t, "%i", context->oom_score_adjust);
ad118bda 2565 r = write_string_file("/proc/self/oom_score_adj", t, 0);
6cb7fa17 2566 if (r == -EPERM || r == -EACCES) {
ff0af2a1 2567 log_open();
f2341e0a 2568 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
ff0af2a1
LP
2569 log_close();
2570 } else if (r < 0) {
2571 *exit_status = EXIT_OOM_ADJUST;
70dd455c 2572 *error_message = strdup("Failed to write /proc/self/oom_score_adj");
d35fbf6b 2573 return -errno;
613b411c 2574 }
d35fbf6b
DM
2575 }
2576
2577 if (context->nice_set)
2578 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2579 *exit_status = EXIT_NICE;
d35fbf6b 2580 return -errno;
613b411c
LP
2581 }
2582
d35fbf6b
DM
2583 if (context->cpu_sched_set) {
2584 struct sched_param param = {
2585 .sched_priority = context->cpu_sched_priority,
2586 };
2587
ff0af2a1
LP
2588 r = sched_setscheduler(0,
2589 context->cpu_sched_policy |
2590 (context->cpu_sched_reset_on_fork ?
2591 SCHED_RESET_ON_FORK : 0),
2592 &param);
2593 if (r < 0) {
2594 *exit_status = EXIT_SETSCHEDULER;
d35fbf6b 2595 return -errno;
fc9b2a84 2596 }
d35fbf6b 2597 }
fc9b2a84 2598
d35fbf6b
DM
2599 if (context->cpuset)
2600 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2601 *exit_status = EXIT_CPUAFFINITY;
d35fbf6b 2602 return -errno;
034c6ed7
LP
2603 }
2604
d35fbf6b
DM
2605 if (context->ioprio_set)
2606 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2607 *exit_status = EXIT_IOPRIO;
d35fbf6b
DM
2608 return -errno;
2609 }
da726a4d 2610
d35fbf6b
DM
2611 if (context->timer_slack_nsec != NSEC_INFINITY)
2612 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2613 *exit_status = EXIT_TIMERSLACK;
d35fbf6b 2614 return -errno;
4c2630eb 2615 }
9eba9da4 2616
050f7277 2617 if (context->personality != PERSONALITY_INVALID)
d35fbf6b 2618 if (personality(context->personality) < 0) {
ff0af2a1 2619 *exit_status = EXIT_PERSONALITY;
d35fbf6b 2620 return -errno;
4c2630eb 2621 }
94f04347 2622
d35fbf6b 2623 if (context->utmp_id)
df0ff127 2624 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 2625 context->tty_path,
023a4f67
LP
2626 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2627 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2628 USER_PROCESS,
6a93917d 2629 username);
d35fbf6b 2630
e0d2adfd 2631 if (context->user) {
ff0af2a1
LP
2632 r = chown_terminal(STDIN_FILENO, uid);
2633 if (r < 0) {
2634 *exit_status = EXIT_STDIN;
2635 return r;
071830ff 2636 }
d35fbf6b 2637 }
8e274523 2638
a931ad47
LP
2639 /* If delegation is enabled we'll pass ownership of the cgroup
2640 * (but only in systemd's own controller hierarchy!) to the
2641 * user of the new process. */
584b8688 2642 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
ff0af2a1
LP
2643 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2644 if (r < 0) {
2645 *exit_status = EXIT_CGROUP;
2646 return r;
d35fbf6b 2647 }
034c6ed7 2648
034c6ed7 2649
ff0af2a1
LP
2650 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2651 if (r < 0) {
2652 *exit_status = EXIT_CGROUP;
2653 return r;
034c6ed7 2654 }
d35fbf6b 2655 }
034c6ed7 2656
3536f49e 2657 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
8679efde 2658 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
3536f49e 2659 if (r < 0)
07689d5d 2660 return r;
d35fbf6b 2661 }
94f04347 2662
7bce046b 2663 r = build_environment(
fd63e712 2664 unit,
7bce046b
LP
2665 context,
2666 params,
2667 n_fds,
2668 home,
2669 username,
2670 shell,
2671 journal_stream_dev,
2672 journal_stream_ino,
2673 &our_env);
2065ca69
JW
2674 if (r < 0) {
2675 *exit_status = EXIT_MEMORY;
2676 return r;
2677 }
2678
2679 r = build_pass_environment(context, &pass_env);
2680 if (r < 0) {
2681 *exit_status = EXIT_MEMORY;
2682 return r;
2683 }
2684
2685 accum_env = strv_env_merge(5,
2686 params->environment,
2687 our_env,
2688 pass_env,
2689 context->environment,
2690 files_env,
2691 NULL);
2692 if (!accum_env) {
2693 *exit_status = EXIT_MEMORY;
2694 return -ENOMEM;
2695 }
1280503b 2696 accum_env = strv_env_clean(accum_env);
2065ca69 2697
096424d1 2698 (void) umask(context->umask);
b213e1c1 2699
74dd6b51
LP
2700 r = setup_keyring(unit, params, uid, gid);
2701 if (r < 0) {
2702 *exit_status = EXIT_KEYRING;
2703 return r;
2704 }
2705
165a31c0 2706 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 2707 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 2708
165a31c0
LP
2709 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2710 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 2711
165a31c0
LP
2712 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
2713 if (needs_ambient_hack)
2714 needs_setuid = false;
2715 else
2716 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
2717
2718 if (needs_sandboxing) {
7f18ef0a
FK
2719 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
2720 * present. The actual MAC context application will happen later, as late as possible, to avoid
2721 * impacting our own code paths. */
2722
2723#ifdef HAVE_SELINUX
43b1f709 2724 use_selinux = mac_selinux_use();
7f18ef0a 2725#endif
7f18ef0a 2726#ifdef HAVE_SMACK
43b1f709 2727 use_smack = mac_smack_use();
7f18ef0a 2728#endif
7f18ef0a 2729#ifdef HAVE_APPARMOR
43b1f709 2730 use_apparmor = mac_apparmor_use();
7f18ef0a 2731#endif
165a31c0 2732 }
7f18ef0a 2733
165a31c0
LP
2734 if (needs_setuid) {
2735 if (context->pam_name && username) {
2736 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
2737 if (r < 0) {
2738 *exit_status = EXIT_PAM;
2739 return r;
2740 }
2741 }
b213e1c1 2742 }
ac45f971 2743
d35fbf6b 2744 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
ff0af2a1
LP
2745 r = setup_netns(runtime->netns_storage_socket);
2746 if (r < 0) {
2747 *exit_status = EXIT_NETWORK;
2748 return r;
d35fbf6b
DM
2749 }
2750 }
169c1bda 2751
ee818b89 2752 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 2753 if (needs_mount_namespace) {
6818c54c 2754 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
2755 if (r < 0) {
2756 *exit_status = EXIT_NAMESPACE;
2757 return r;
2758 }
d35fbf6b 2759 }
81a2b7ce 2760
50b3dfb9 2761 /* Apply just after mount namespace setup */
376fecf6
LP
2762 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
2763 if (r < 0)
50b3dfb9 2764 return r;
50b3dfb9 2765
bbeea271 2766 /* Drop groups as early as possbile */
165a31c0 2767 if (needs_setuid) {
4d885bd3 2768 r = enforce_groups(context, gid, supplementary_gids, ngids);
096424d1
LP
2769 if (r < 0) {
2770 *exit_status = EXIT_GROUP;
2771 return r;
2772 }
165a31c0 2773 }
096424d1 2774
165a31c0 2775 if (needs_sandboxing) {
9008e1ac 2776#ifdef HAVE_SELINUX
43b1f709 2777 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
2778 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
2779 if (r < 0) {
2780 *exit_status = EXIT_SELINUX_CONTEXT;
2781 return r;
2782 }
9008e1ac 2783 }
9008e1ac
MS
2784#endif
2785
937ccce9
LP
2786 if (context->private_users) {
2787 r = setup_private_users(uid, gid);
2788 if (r < 0) {
2789 *exit_status = EXIT_USER;
2790 return r;
2791 }
d251207d
LP
2792 }
2793 }
2794
165a31c0
LP
2795 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
2796 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
2797 * was needed to upload the policy and can now be closed as well. */
ff0af2a1
LP
2798 r = close_all_fds(fds, n_fds);
2799 if (r >= 0)
2800 r = shift_fds(fds, n_fds);
2801 if (r >= 0)
4c47affc 2802 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
ff0af2a1
LP
2803 if (r < 0) {
2804 *exit_status = EXIT_FDS;
2805 return r;
d35fbf6b 2806 }
e66cf1a3 2807
165a31c0 2808 secure_bits = context->secure_bits;
e66cf1a3 2809
165a31c0
LP
2810 if (needs_sandboxing) {
2811 uint64_t bset;
755d4b67 2812
d35fbf6b 2813 for (i = 0; i < _RLIMIT_MAX; i++) {
03857c43 2814
d35fbf6b
DM
2815 if (!context->rlimit[i])
2816 continue;
2817
03857c43
LP
2818 r = setrlimit_closest(i, context->rlimit[i]);
2819 if (r < 0) {
ff0af2a1 2820 *exit_status = EXIT_LIMITS;
03857c43 2821 return r;
e66cf1a3
LP
2822 }
2823 }
2824
f4170c67
LP
2825 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2826 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
2827 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
2828 *exit_status = EXIT_LIMITS;
2829 return -errno;
2830 }
2831 }
2832
165a31c0
LP
2833 bset = context->capability_bounding_set;
2834 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
2835 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
2836 * instead of us doing that */
2837 if (needs_ambient_hack)
2838 bset |= (UINT64_C(1) << CAP_SETPCAP) |
2839 (UINT64_C(1) << CAP_SETUID) |
2840 (UINT64_C(1) << CAP_SETGID);
2841
2842 if (!cap_test_all(bset)) {
2843 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
2844 if (r < 0) {
2845 *exit_status = EXIT_CAPABILITIES;
70dd455c 2846 *error_message = strdup("Failed to drop capabilities");
ff0af2a1 2847 return r;
3b8bddde 2848 }
4c2630eb 2849 }
3b8bddde 2850
755d4b67
IP
2851 /* This is done before enforce_user, but ambient set
2852 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
2853 if (!needs_ambient_hack &&
2854 context->capability_ambient_set != 0) {
755d4b67
IP
2855 r = capability_ambient_set_apply(context->capability_ambient_set, true);
2856 if (r < 0) {
2857 *exit_status = EXIT_CAPABILITIES;
70dd455c 2858 *error_message = strdup("Failed to apply ambient capabilities (before UID change)");
755d4b67
IP
2859 return r;
2860 }
755d4b67 2861 }
165a31c0 2862 }
755d4b67 2863
165a31c0 2864 if (needs_setuid) {
d35fbf6b 2865 if (context->user) {
ff0af2a1
LP
2866 r = enforce_user(context, uid);
2867 if (r < 0) {
2868 *exit_status = EXIT_USER;
70dd455c 2869 (void) asprintf(error_message, "Failed to change UID to "UID_FMT, uid);
ff0af2a1 2870 return r;
5b6319dc 2871 }
165a31c0
LP
2872
2873 if (!needs_ambient_hack &&
2874 context->capability_ambient_set != 0) {
755d4b67
IP
2875
2876 /* Fix the ambient capabilities after user change. */
2877 r = capability_ambient_set_apply(context->capability_ambient_set, false);
2878 if (r < 0) {
2879 *exit_status = EXIT_CAPABILITIES;
70dd455c 2880 *error_message = strdup("Failed to apply ambient capabilities (after UID change)");
755d4b67
IP
2881 return r;
2882 }
2883
2884 /* If we were asked to change user and ambient capabilities
2885 * were requested, we had to add keep-caps to the securebits
2886 * so that we would maintain the inherited capability set
2887 * through the setresuid(). Make sure that the bit is added
2888 * also to the context secure_bits so that we don't try to
2889 * drop the bit away next. */
2890
7f508f2c 2891 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 2892 }
5b6319dc 2893 }
165a31c0 2894 }
d35fbf6b 2895
165a31c0 2896 if (needs_sandboxing) {
5cd9cd35
LP
2897 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
2898 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
2899 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
2900 * are restricted. */
2901
2902#ifdef HAVE_SELINUX
43b1f709 2903 if (use_selinux) {
5cd9cd35
LP
2904 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
2905
2906 if (exec_context) {
2907 r = setexeccon(exec_context);
2908 if (r < 0) {
2909 *exit_status = EXIT_SELINUX_CONTEXT;
70dd455c 2910 (void) asprintf(error_message, "Failed to set SELinux context to %s", exec_context);
5cd9cd35
LP
2911 return r;
2912 }
2913 }
2914 }
2915#endif
2916
7f18ef0a 2917#ifdef HAVE_SMACK
43b1f709 2918 if (use_smack) {
7f18ef0a
FK
2919 r = setup_smack(context, command);
2920 if (r < 0) {
2921 *exit_status = EXIT_SMACK_PROCESS_LABEL;
2922 *error_message = strdup("Failed to set SMACK process label");
2923 return r;
2924 }
5cd9cd35 2925 }
7f18ef0a 2926#endif
5cd9cd35
LP
2927
2928#ifdef HAVE_APPARMOR
43b1f709 2929 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
2930 r = aa_change_onexec(context->apparmor_profile);
2931 if (r < 0 && !context->apparmor_profile_ignore) {
2932 *exit_status = EXIT_APPARMOR_PROFILE;
70dd455c
ZJS
2933 (void) asprintf(error_message,
2934 "Failed to prepare AppArmor profile change to %s",
2935 context->apparmor_profile);
5cd9cd35
LP
2936 return -errno;
2937 }
2938 }
2939#endif
2940
165a31c0
LP
2941 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
2942 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
2943 if (prctl(PR_GET_SECUREBITS) != secure_bits)
2944 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 2945 *exit_status = EXIT_SECUREBITS;
70dd455c 2946 *error_message = strdup("Failed to set secure bits");
d35fbf6b 2947 return -errno;
ff01d048 2948 }
5b6319dc 2949
59eeb84b 2950 if (context_has_no_new_privileges(context))
d35fbf6b 2951 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 2952 *exit_status = EXIT_NO_NEW_PRIVILEGES;
70dd455c 2953 *error_message = strdup("Failed to disable new privileges");
d35fbf6b
DM
2954 return -errno;
2955 }
2956
2957#ifdef HAVE_SECCOMP
469830d1
LP
2958 r = apply_address_families(unit, context);
2959 if (r < 0) {
2960 *exit_status = EXIT_ADDRESS_FAMILIES;
5b3637b4 2961 *error_message = strdup("Failed to restrict address families");
469830d1 2962 return r;
4c2630eb 2963 }
04aa0cb9 2964
469830d1
LP
2965 r = apply_memory_deny_write_execute(unit, context);
2966 if (r < 0) {
2967 *exit_status = EXIT_SECCOMP;
5b3637b4 2968 *error_message = strdup("Failed to disable writing to executable memory");
469830d1 2969 return r;
f3e43635 2970 }
f4170c67 2971
469830d1
LP
2972 r = apply_restrict_realtime(unit, context);
2973 if (r < 0) {
2974 *exit_status = EXIT_SECCOMP;
5b3637b4 2975 *error_message = strdup("Failed to apply realtime restrictions");
469830d1 2976 return r;
f4170c67
LP
2977 }
2978
add00535
LP
2979 r = apply_restrict_namespaces(unit, context);
2980 if (r < 0) {
2981 *exit_status = EXIT_SECCOMP;
70dd455c 2982 *error_message = strdup("Failed to apply namespace restrictions");
add00535
LP
2983 return r;
2984 }
2985
469830d1
LP
2986 r = apply_protect_sysctl(unit, context);
2987 if (r < 0) {
2988 *exit_status = EXIT_SECCOMP;
5b3637b4 2989 *error_message = strdup("Failed to apply sysctl restrictions");
469830d1 2990 return r;
502d704e
DH
2991 }
2992
469830d1
LP
2993 r = apply_protect_kernel_modules(unit, context);
2994 if (r < 0) {
2995 *exit_status = EXIT_SECCOMP;
5b3637b4 2996 *error_message = strdup("Failed to apply module loading restrictions");
469830d1 2997 return r;
59eeb84b
LP
2998 }
2999
469830d1
LP
3000 r = apply_private_devices(unit, context);
3001 if (r < 0) {
3002 *exit_status = EXIT_SECCOMP;
5b3637b4 3003 *error_message = strdup("Failed to set up private devices");
469830d1
LP
3004 return r;
3005 }
3006
3007 r = apply_syscall_archs(unit, context);
3008 if (r < 0) {
3009 *exit_status = EXIT_SECCOMP;
5b3637b4 3010 *error_message = strdup("Failed to apply syscall architecture restrictions");
469830d1 3011 return r;
ba128bb8
LP
3012 }
3013
78e864e5
TM
3014 r = apply_lock_personality(unit, context);
3015 if (r < 0) {
3016 *exit_status = EXIT_SECCOMP;
3017 *error_message = strdup("Failed to lock personalities");
3018 return r;
3019 }
3020
5cd9cd35
LP
3021 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3022 * by the filter as little as possible. */
165a31c0 3023 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3024 if (r < 0) {
3025 *exit_status = EXIT_SECCOMP;
5b3637b4 3026 *error_message = strdup("Failed to apply syscall filters");
469830d1 3027 return r;
d35fbf6b
DM
3028 }
3029#endif
d35fbf6b 3030 }
034c6ed7 3031
2065ca69 3032 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 3033 if (!final_argv) {
ff0af2a1 3034 *exit_status = EXIT_MEMORY;
70dd455c 3035 *error_message = strdup("Failed to prepare process arguments");
d35fbf6b
DM
3036 return -ENOMEM;
3037 }
034c6ed7 3038
553d2243 3039 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
d35fbf6b 3040 _cleanup_free_ char *line;
81a2b7ce 3041
d35fbf6b
DM
3042 line = exec_command_line(final_argv);
3043 if (line) {
3044 log_open();
f2341e0a 3045 log_struct(LOG_DEBUG,
f2341e0a
LP
3046 "EXECUTABLE=%s", command->path,
3047 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3048 LOG_UNIT_ID(unit),
f2341e0a 3049 NULL);
d35fbf6b
DM
3050 log_close();
3051 }
3052 }
dd305ec9 3053
2065ca69 3054 execve(command->path, final_argv, accum_env);
ff0af2a1 3055 *exit_status = EXIT_EXEC;
d35fbf6b
DM
3056 return -errno;
3057}
81a2b7ce 3058
f2341e0a
LP
3059int exec_spawn(Unit *unit,
3060 ExecCommand *command,
d35fbf6b
DM
3061 const ExecContext *context,
3062 const ExecParameters *params,
3063 ExecRuntime *runtime,
29206d46 3064 DynamicCreds *dcreds,
d35fbf6b 3065 pid_t *ret) {
8351ceae 3066
d35fbf6b 3067 _cleanup_strv_free_ char **files_env = NULL;
9b141911 3068 int *fds = NULL;
4c47affc 3069 unsigned n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1
LP
3070 _cleanup_free_ char *line = NULL;
3071 int socket_fd, r;
52c239d7 3072 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 3073 char **argv;
d35fbf6b 3074 pid_t pid;
8351ceae 3075
f2341e0a 3076 assert(unit);
d35fbf6b
DM
3077 assert(command);
3078 assert(context);
3079 assert(ret);
3080 assert(params);
4c47affc 3081 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
4298d0b5 3082
d35fbf6b
DM
3083 if (context->std_input == EXEC_INPUT_SOCKET ||
3084 context->std_output == EXEC_OUTPUT_SOCKET ||
3085 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3086
4c47affc 3087 if (params->n_socket_fds > 1) {
f2341e0a 3088 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3089 return -EINVAL;
ff0af2a1 3090 }
eef65bf3 3091
4c47affc 3092 if (params->n_socket_fds == 0) {
488ab41c
AA
3093 log_unit_error(unit, "Got no socket.");
3094 return -EINVAL;
3095 }
3096
d35fbf6b
DM
3097 socket_fd = params->fds[0];
3098 } else {
3099 socket_fd = -1;
3100 fds = params->fds;
4c47affc 3101 n_storage_fds = params->n_storage_fds;
9b141911 3102 n_socket_fds = params->n_socket_fds;
d35fbf6b 3103 }
94f04347 3104
52c239d7
LB
3105 r = exec_context_named_iofds(unit, context, params, named_iofds);
3106 if (r < 0)
3107 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3108
f2341e0a 3109 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3110 if (r < 0)
f2341e0a 3111 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3112
d35fbf6b 3113 argv = params->argv ?: command->argv;
d35fbf6b
DM
3114 line = exec_command_line(argv);
3115 if (!line)
3116 return log_oom();
fab56fc5 3117
f2341e0a 3118 log_struct(LOG_DEBUG,
f2341e0a
LP
3119 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3120 "EXECUTABLE=%s", command->path,
ba360bb0 3121 LOG_UNIT_ID(unit),
f2341e0a 3122 NULL);
d35fbf6b
DM
3123 pid = fork();
3124 if (pid < 0)
74129a12 3125 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3126
3127 if (pid == 0) {
ff0af2a1 3128 int exit_status;
70dd455c 3129 _cleanup_free_ char *error_message = NULL;
ff0af2a1 3130
f2341e0a
LP
3131 r = exec_child(unit,
3132 command,
ff0af2a1
LP
3133 context,
3134 params,
3135 runtime,
29206d46 3136 dcreds,
ff0af2a1
LP
3137 argv,
3138 socket_fd,
52c239d7 3139 named_iofds,
4c47affc
FB
3140 fds,
3141 n_storage_fds,
9b141911 3142 n_socket_fds,
ff0af2a1 3143 files_env,
00d9ef85 3144 unit->manager->user_lookup_fds[1],
70dd455c
ZJS
3145 &exit_status,
3146 &error_message);
ff0af2a1 3147 if (r < 0) {
4c2630eb 3148 log_open();
70dd455c
ZJS
3149 if (error_message)
3150 log_struct_errno(LOG_ERR, r,
2b044526 3151 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
70dd455c
ZJS
3152 LOG_UNIT_ID(unit),
3153 LOG_UNIT_MESSAGE(unit, "%s: %m",
3154 error_message),
3155 "EXECUTABLE=%s", command->path,
3156 NULL);
3ed0cd26 3157 else if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE))
4d8b0f0f
YW
3158 log_struct_errno(LOG_INFO, r,
3159 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3160 LOG_UNIT_ID(unit),
3161 LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
3162 command->path),
3163 "EXECUTABLE=%s", command->path,
3164 NULL);
70dd455c
ZJS
3165 else
3166 log_struct_errno(LOG_ERR, r,
2b044526 3167 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
70dd455c
ZJS
3168 LOG_UNIT_ID(unit),
3169 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3170 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3171 command->path),
3172 "EXECUTABLE=%s", command->path,
3173 NULL);
4c2630eb
MS
3174 }
3175
ff0af2a1 3176 _exit(exit_status);
034c6ed7
LP
3177 }
3178
f2341e0a 3179 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3180
80876c20
LP
3181 /* We add the new process to the cgroup both in the child (so
3182 * that we can be sure that no user code is ever executed
3183 * outside of the cgroup) and in the parent (so that we can be
3184 * sure that when we kill the cgroup the process will be
3185 * killed too). */
d35fbf6b 3186 if (params->cgroup_path)
dd305ec9 3187 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3188
b58b4116 3189 exec_status_start(&command->exec_status, pid);
9fb86720 3190
034c6ed7 3191 *ret = pid;
5cb5a6ff
LP
3192 return 0;
3193}
3194
034c6ed7 3195void exec_context_init(ExecContext *c) {
3536f49e
YW
3196 ExecDirectoryType i;
3197
034c6ed7
LP
3198 assert(c);
3199
4c12626c 3200 c->umask = 0022;
9eba9da4 3201 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3202 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3203 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3204 c->syslog_level_prefix = true;
353e12c2 3205 c->ignore_sigpipe = true;
3a43da28 3206 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3207 c->personality = PERSONALITY_INVALID;
3536f49e
YW
3208 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3209 c->directories[i].mode = 0755;
a103496c 3210 c->capability_bounding_set = CAP_ALL;
add00535 3211 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
034c6ed7
LP
3212}
3213
613b411c 3214void exec_context_done(ExecContext *c) {
5cb5a6ff 3215 unsigned l;
3536f49e 3216 ExecDirectoryType i;
5cb5a6ff
LP
3217
3218 assert(c);
3219
6796073e
LP
3220 c->environment = strv_free(c->environment);
3221 c->environment_files = strv_free(c->environment_files);
b4c14404 3222 c->pass_environment = strv_free(c->pass_environment);
8c7be95e 3223
1f6b4113 3224 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
a1e58e8e 3225 c->rlimit[l] = mfree(c->rlimit[l]);
034c6ed7 3226
52c239d7
LB
3227 for (l = 0; l < 3; l++)
3228 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3229
a1e58e8e
LP
3230 c->working_directory = mfree(c->working_directory);
3231 c->root_directory = mfree(c->root_directory);
915e6d16 3232 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3233 c->tty_path = mfree(c->tty_path);
3234 c->syslog_identifier = mfree(c->syslog_identifier);
3235 c->user = mfree(c->user);
3236 c->group = mfree(c->group);
034c6ed7 3237
6796073e 3238 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3239
a1e58e8e 3240 c->pam_name = mfree(c->pam_name);
5b6319dc 3241
2a624c36
AP
3242 c->read_only_paths = strv_free(c->read_only_paths);
3243 c->read_write_paths = strv_free(c->read_write_paths);
3244 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3245
d2d6c096
LP
3246 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3247
82c121a4
LP
3248 if (c->cpuset)
3249 CPU_FREE(c->cpuset);
86a3475b 3250
a1e58e8e
LP
3251 c->utmp_id = mfree(c->utmp_id);
3252 c->selinux_context = mfree(c->selinux_context);
3253 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3254 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3255
525d3cc7
LP
3256 c->syscall_filter = set_free(c->syscall_filter);
3257 c->syscall_archs = set_free(c->syscall_archs);
3258 c->address_families = set_free(c->address_families);
e66cf1a3 3259
3536f49e
YW
3260 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3261 c->directories[i].paths = strv_free(c->directories[i].paths);
e66cf1a3
LP
3262}
3263
3264int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3265 char **i;
3266
3267 assert(c);
3268
3269 if (!runtime_prefix)
3270 return 0;
3271
3536f49e 3272 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3273 _cleanup_free_ char *p;
3274
605405c6 3275 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3276 if (!p)
3277 return -ENOMEM;
3278
3279 /* We execute this synchronously, since we need to be
3280 * sure this is gone when we start the service
3281 * next. */
c6878637 3282 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3283 }
3284
3285 return 0;
5cb5a6ff
LP
3286}
3287
43d0fcbd
LP
3288void exec_command_done(ExecCommand *c) {
3289 assert(c);
3290
a1e58e8e 3291 c->path = mfree(c->path);
43d0fcbd 3292
6796073e 3293 c->argv = strv_free(c->argv);
43d0fcbd
LP
3294}
3295
3296void exec_command_done_array(ExecCommand *c, unsigned n) {
3297 unsigned i;
3298
3299 for (i = 0; i < n; i++)
3300 exec_command_done(c+i);
3301}
3302
f1acf85a 3303ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3304 ExecCommand *i;
3305
3306 while ((i = c)) {
71fda00f 3307 LIST_REMOVE(command, c, i);
43d0fcbd 3308 exec_command_done(i);
5cb5a6ff
LP
3309 free(i);
3310 }
f1acf85a
ZJS
3311
3312 return NULL;
5cb5a6ff
LP
3313}
3314
034c6ed7
LP
3315void exec_command_free_array(ExecCommand **c, unsigned n) {
3316 unsigned i;
3317
f1acf85a
ZJS
3318 for (i = 0; i < n; i++)
3319 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3320}
3321
039f0e70 3322typedef struct InvalidEnvInfo {
f2341e0a 3323 Unit *unit;
039f0e70
LP
3324 const char *path;
3325} InvalidEnvInfo;
3326
3327static void invalid_env(const char *p, void *userdata) {
3328 InvalidEnvInfo *info = userdata;
3329
f2341e0a 3330 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3331}
3332
52c239d7
LB
3333const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3334 assert(c);
3335
3336 switch (fd_index) {
3337 case STDIN_FILENO:
3338 if (c->std_input != EXEC_INPUT_NAMED_FD)
3339 return NULL;
3340 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3341 case STDOUT_FILENO:
3342 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3343 return NULL;
3344 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3345 case STDERR_FILENO:
3346 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3347 return NULL;
3348 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3349 default:
3350 return NULL;
3351 }
3352}
3353
3354int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3355 unsigned i, targets;
56fbd561 3356 const char* stdio_fdname[3];
4c47affc 3357 unsigned n_fds;
52c239d7
LB
3358
3359 assert(c);
3360 assert(p);
3361
3362 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3363 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3364 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3365
3366 for (i = 0; i < 3; i++)
3367 stdio_fdname[i] = exec_context_fdname(c, i);
3368
4c47affc
FB
3369 n_fds = p->n_storage_fds + p->n_socket_fds;
3370
3371 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3372 if (named_iofds[STDIN_FILENO] < 0 &&
3373 c->std_input == EXEC_INPUT_NAMED_FD &&
3374 stdio_fdname[STDIN_FILENO] &&
3375 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3376
52c239d7
LB
3377 named_iofds[STDIN_FILENO] = p->fds[i];
3378 targets--;
56fbd561
ZJS
3379
3380 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3381 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3382 stdio_fdname[STDOUT_FILENO] &&
3383 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3384
52c239d7
LB
3385 named_iofds[STDOUT_FILENO] = p->fds[i];
3386 targets--;
56fbd561
ZJS
3387
3388 } else if (named_iofds[STDERR_FILENO] < 0 &&
3389 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3390 stdio_fdname[STDERR_FILENO] &&
3391 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3392
52c239d7
LB
3393 named_iofds[STDERR_FILENO] = p->fds[i];
3394 targets--;
3395 }
3396
56fbd561 3397 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3398}
3399
f2341e0a 3400int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3401 char **i, **r = NULL;
3402
3403 assert(c);
3404 assert(l);
3405
3406 STRV_FOREACH(i, c->environment_files) {
3407 char *fn;
52511fae
ZJS
3408 int k;
3409 unsigned n;
8c7be95e
LP
3410 bool ignore = false;
3411 char **p;
7fd1b19b 3412 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3413
3414 fn = *i;
3415
3416 if (fn[0] == '-') {
3417 ignore = true;
313cefa1 3418 fn++;
8c7be95e
LP
3419 }
3420
3421 if (!path_is_absolute(fn)) {
8c7be95e
LP
3422 if (ignore)
3423 continue;
3424
3425 strv_free(r);
3426 return -EINVAL;
3427 }
3428
2bef10ab 3429 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3430 k = safe_glob(fn, 0, &pglob);
3431 if (k < 0) {
2bef10ab
PL
3432 if (ignore)
3433 continue;
8c7be95e 3434
2bef10ab 3435 strv_free(r);
d8c92e8b 3436 return k;
2bef10ab 3437 }
8c7be95e 3438
d8c92e8b
ZJS
3439 /* When we don't match anything, -ENOENT should be returned */
3440 assert(pglob.gl_pathc > 0);
3441
3442 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3443 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3444 if (k < 0) {
3445 if (ignore)
3446 continue;
8c7be95e 3447
2bef10ab 3448 strv_free(r);
2bef10ab 3449 return k;
e9c1ea9d 3450 }
ebc05a09 3451 /* Log invalid environment variables with filename */
039f0e70
LP
3452 if (p) {
3453 InvalidEnvInfo info = {
f2341e0a 3454 .unit = unit,
039f0e70
LP
3455 .path = pglob.gl_pathv[n]
3456 };
3457
3458 p = strv_env_clean_with_callback(p, invalid_env, &info);
3459 }
8c7be95e 3460
2bef10ab
PL
3461 if (r == NULL)
3462 r = p;
3463 else {
3464 char **m;
8c7be95e 3465
2bef10ab
PL
3466 m = strv_env_merge(2, r, p);
3467 strv_free(r);
3468 strv_free(p);
c84a9488 3469 if (!m)
2bef10ab 3470 return -ENOMEM;
2bef10ab
PL
3471
3472 r = m;
3473 }
8c7be95e
LP
3474 }
3475 }
3476
3477 *l = r;
3478
3479 return 0;
3480}
3481
6ac8fdc9 3482static bool tty_may_match_dev_console(const char *tty) {
e1d75803 3483 _cleanup_free_ char *active = NULL;
7d6884b6 3484 char *console;
6ac8fdc9 3485
1e22b5cd
LP
3486 if (!tty)
3487 return true;
3488
a119ec7c 3489 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
3490
3491 /* trivial identity? */
3492 if (streq(tty, "console"))
3493 return true;
3494
3495 console = resolve_dev_console(&active);
3496 /* if we could not resolve, assume it may */
3497 if (!console)
3498 return true;
3499
3500 /* "tty0" means the active VC, so it may be the same sometimes */
e1d75803 3501 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3502}
3503
3504bool exec_context_may_touch_console(ExecContext *ec) {
1e22b5cd
LP
3505
3506 return (ec->tty_reset ||
3507 ec->tty_vhangup ||
3508 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3509 is_terminal_input(ec->std_input) ||
3510 is_terminal_output(ec->std_output) ||
3511 is_terminal_output(ec->std_error)) &&
1e22b5cd 3512 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3513}
3514
15ae422b
LP
3515static void strv_fprintf(FILE *f, char **l) {
3516 char **g;
3517
3518 assert(f);
3519
3520 STRV_FOREACH(g, l)
3521 fprintf(f, " %s", *g);
3522}
3523
5cb5a6ff 3524void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
c2bbd90b 3525 char **e, **d;
94f04347 3526 unsigned i;
3536f49e 3527 ExecDirectoryType dt;
add00535 3528 int r;
9eba9da4 3529
5cb5a6ff
LP
3530 assert(c);
3531 assert(f);
3532
4ad49000 3533 prefix = strempty(prefix);
5cb5a6ff
LP
3534
3535 fprintf(f,
94f04347
LP
3536 "%sUMask: %04o\n"
3537 "%sWorkingDirectory: %s\n"
451a074f 3538 "%sRootDirectory: %s\n"
15ae422b 3539 "%sNonBlocking: %s\n"
64747e2d 3540 "%sPrivateTmp: %s\n"
7f112f50 3541 "%sPrivateDevices: %s\n"
59eeb84b 3542 "%sProtectKernelTunables: %s\n"
e66a2f65 3543 "%sProtectKernelModules: %s\n"
59eeb84b 3544 "%sProtectControlGroups: %s\n"
d251207d
LP
3545 "%sPrivateNetwork: %s\n"
3546 "%sPrivateUsers: %s\n"
1b8689f9
LP
3547 "%sProtectHome: %s\n"
3548 "%sProtectSystem: %s\n"
5d997827 3549 "%sMountAPIVFS: %s\n"
f3e43635 3550 "%sIgnoreSIGPIPE: %s\n"
f4170c67
LP
3551 "%sMemoryDenyWriteExecute: %s\n"
3552 "%sRestrictRealtime: %s\n",
5cb5a6ff 3553 prefix, c->umask,
9eba9da4 3554 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3555 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3556 prefix, yes_no(c->non_blocking),
64747e2d 3557 prefix, yes_no(c->private_tmp),
7f112f50 3558 prefix, yes_no(c->private_devices),
59eeb84b 3559 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3560 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3561 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3562 prefix, yes_no(c->private_network),
3563 prefix, yes_no(c->private_users),
1b8689f9
LP
3564 prefix, protect_home_to_string(c->protect_home),
3565 prefix, protect_system_to_string(c->protect_system),
5d997827 3566 prefix, yes_no(c->mount_apivfs),
f3e43635 3567 prefix, yes_no(c->ignore_sigpipe),
f4170c67
LP
3568 prefix, yes_no(c->memory_deny_write_execute),
3569 prefix, yes_no(c->restrict_realtime));
fb33a393 3570
915e6d16
LP
3571 if (c->root_image)
3572 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3573
8c7be95e
LP
3574 STRV_FOREACH(e, c->environment)
3575 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3576
3577 STRV_FOREACH(e, c->environment_files)
3578 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3579
b4c14404
FB
3580 STRV_FOREACH(e, c->pass_environment)
3581 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3582
53f47dfc
YW
3583 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3584
3536f49e
YW
3585 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
3586 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3587
3588 STRV_FOREACH(d, c->directories[dt].paths)
3589 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3590 }
c2bbd90b 3591
fb33a393
LP
3592 if (c->nice_set)
3593 fprintf(f,
3594 "%sNice: %i\n",
3595 prefix, c->nice);
3596
dd6c17b1 3597 if (c->oom_score_adjust_set)
fb33a393 3598 fprintf(f,
dd6c17b1
LP
3599 "%sOOMScoreAdjust: %i\n",
3600 prefix, c->oom_score_adjust);
9eba9da4 3601
94f04347 3602 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d
EV
3603 if (c->rlimit[i]) {
3604 fprintf(f, "%s%s: " RLIM_FMT "\n",
3605 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3606 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3607 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3608 }
94f04347 3609
f8b69d1d 3610 if (c->ioprio_set) {
1756a011 3611 _cleanup_free_ char *class_str = NULL;
f8b69d1d 3612
837df140
YW
3613 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3614 if (r >= 0)
3615 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3616
3617 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 3618 }
94f04347 3619
f8b69d1d 3620 if (c->cpu_sched_set) {
1756a011 3621 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 3622
837df140
YW
3623 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3624 if (r >= 0)
3625 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3626
94f04347 3627 fprintf(f,
38b48754
LP
3628 "%sCPUSchedulingPriority: %i\n"
3629 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
3630 prefix, c->cpu_sched_priority,
3631 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 3632 }
94f04347 3633
82c121a4 3634 if (c->cpuset) {
94f04347 3635 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
3636 for (i = 0; i < c->cpuset_ncpus; i++)
3637 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 3638 fprintf(f, " %u", i);
94f04347
LP
3639 fputs("\n", f);
3640 }
3641
3a43da28 3642 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 3643 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
3644
3645 fprintf(f,
80876c20
LP
3646 "%sStandardInput: %s\n"
3647 "%sStandardOutput: %s\n"
3648 "%sStandardError: %s\n",
3649 prefix, exec_input_to_string(c->std_input),
3650 prefix, exec_output_to_string(c->std_output),
3651 prefix, exec_output_to_string(c->std_error));
3652
3653 if (c->tty_path)
3654 fprintf(f,
6ea832a2
LP
3655 "%sTTYPath: %s\n"
3656 "%sTTYReset: %s\n"
3657 "%sTTYVHangup: %s\n"
3658 "%sTTYVTDisallocate: %s\n",
3659 prefix, c->tty_path,
3660 prefix, yes_no(c->tty_reset),
3661 prefix, yes_no(c->tty_vhangup),
3662 prefix, yes_no(c->tty_vt_disallocate));
94f04347 3663
9f6444eb
LP
3664 if (IN_SET(c->std_output,
3665 EXEC_OUTPUT_SYSLOG,
3666 EXEC_OUTPUT_KMSG,
3667 EXEC_OUTPUT_JOURNAL,
3668 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3669 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3670 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
3671 IN_SET(c->std_error,
3672 EXEC_OUTPUT_SYSLOG,
3673 EXEC_OUTPUT_KMSG,
3674 EXEC_OUTPUT_JOURNAL,
3675 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3676 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3677 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 3678
5ce70e5b 3679 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 3680
837df140
YW
3681 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3682 if (r >= 0)
3683 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 3684
837df140
YW
3685 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3686 if (r >= 0)
3687 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 3688 }
94f04347 3689
07d46372
YW
3690 if (c->secure_bits) {
3691 _cleanup_free_ char *str = NULL;
3692
3693 r = secure_bits_to_string_alloc(c->secure_bits, &str);
3694 if (r >= 0)
3695 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
3696 }
94f04347 3697
a103496c 3698 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 3699 _cleanup_free_ char *str = NULL;
94f04347 3700
dd1f5bd0
YW
3701 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
3702 if (r >= 0)
3703 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
3704 }
3705
3706 if (c->capability_ambient_set != 0) {
dd1f5bd0 3707 _cleanup_free_ char *str = NULL;
755d4b67 3708
dd1f5bd0
YW
3709 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
3710 if (r >= 0)
3711 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
3712 }
3713
3714 if (c->user)
f2d3769a 3715 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 3716 if (c->group)
f2d3769a 3717 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 3718
29206d46
LP
3719 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
3720
15ae422b 3721 if (strv_length(c->supplementary_groups) > 0) {
94f04347 3722 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
3723 strv_fprintf(f, c->supplementary_groups);
3724 fputs("\n", f);
3725 }
94f04347 3726
5b6319dc 3727 if (c->pam_name)
f2d3769a 3728 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 3729
2a624c36
AP
3730 if (strv_length(c->read_write_paths) > 0) {
3731 fprintf(f, "%sReadWritePaths:", prefix);
3732 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
3733 fputs("\n", f);
3734 }
3735
2a624c36
AP
3736 if (strv_length(c->read_only_paths) > 0) {
3737 fprintf(f, "%sReadOnlyPaths:", prefix);
3738 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
3739 fputs("\n", f);
3740 }
94f04347 3741
2a624c36
AP
3742 if (strv_length(c->inaccessible_paths) > 0) {
3743 fprintf(f, "%sInaccessiblePaths:", prefix);
3744 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
3745 fputs("\n", f);
3746 }
2e22afe9 3747
d2d6c096
LP
3748 if (c->n_bind_mounts > 0)
3749 for (i = 0; i < c->n_bind_mounts; i++) {
3750 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
3751 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
3752 c->bind_mounts[i].source,
3753 c->bind_mounts[i].destination,
3754 c->bind_mounts[i].recursive ? "rbind" : "norbind");
3755 }
3756
169c1bda
LP
3757 if (c->utmp_id)
3758 fprintf(f,
3759 "%sUtmpIdentifier: %s\n",
3760 prefix, c->utmp_id);
7b52a628
MS
3761
3762 if (c->selinux_context)
3763 fprintf(f,
5f8640fb
LP
3764 "%sSELinuxContext: %s%s\n",
3765 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 3766
80c21aea
WC
3767 if (c->apparmor_profile)
3768 fprintf(f,
3769 "%sAppArmorProfile: %s%s\n",
3770 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3771
3772 if (c->smack_process_label)
3773 fprintf(f,
3774 "%sSmackProcessLabel: %s%s\n",
3775 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
3776
050f7277 3777 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
3778 fprintf(f,
3779 "%sPersonality: %s\n",
3780 prefix, strna(personality_to_string(c->personality)));
3781
78e864e5
TM
3782 fprintf(f,
3783 "%sLockPersonality: %s\n",
3784 prefix, yes_no(c->lock_personality));
3785
17df7223 3786 if (c->syscall_filter) {
351a19b1 3787#ifdef HAVE_SECCOMP
17df7223
LP
3788 Iterator j;
3789 void *id;
3790 bool first = true;
351a19b1 3791#endif
17df7223
LP
3792
3793 fprintf(f,
57183d11 3794 "%sSystemCallFilter: ",
17df7223
LP
3795 prefix);
3796
3797 if (!c->syscall_whitelist)
3798 fputc('~', f);
3799
351a19b1 3800#ifdef HAVE_SECCOMP
17df7223
LP
3801 SET_FOREACH(id, c->syscall_filter, j) {
3802 _cleanup_free_ char *name = NULL;
3803
3804 if (first)
3805 first = false;
3806 else
3807 fputc(' ', f);
3808
57183d11 3809 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223
LP
3810 fputs(strna(name), f);
3811 }
351a19b1 3812#endif
17df7223
LP
3813
3814 fputc('\n', f);
3815 }
3816
57183d11
LP
3817 if (c->syscall_archs) {
3818#ifdef HAVE_SECCOMP
3819 Iterator j;
3820 void *id;
3821#endif
3822
3823 fprintf(f,
3824 "%sSystemCallArchitectures:",
3825 prefix);
3826
3827#ifdef HAVE_SECCOMP
3828 SET_FOREACH(id, c->syscall_archs, j)
3829 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
3830#endif
3831 fputc('\n', f);
3832 }
3833
add00535
LP
3834 if (exec_context_restrict_namespaces_set(c)) {
3835 _cleanup_free_ char *s = NULL;
3836
3837 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
3838 if (r >= 0)
3839 fprintf(f, "%sRestrictNamespaces: %s\n",
3840 prefix, s);
3841 }
3842
b3267152 3843 if (c->syscall_errno > 0)
17df7223
LP
3844 fprintf(f,
3845 "%sSystemCallErrorNumber: %s\n",
3846 prefix, strna(errno_to_name(c->syscall_errno)));
eef65bf3
MS
3847
3848 if (c->apparmor_profile)
3849 fprintf(f,
3850 "%sAppArmorProfile: %s%s\n",
3851 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
3852}
3853
a931ad47
LP
3854bool exec_context_maintains_privileges(ExecContext *c) {
3855 assert(c);
3856
61233823 3857 /* Returns true if the process forked off would run under
a931ad47
LP
3858 * an unchanged UID or as root. */
3859
3860 if (!c->user)
3861 return true;
3862
3863 if (streq(c->user, "root") || streq(c->user, "0"))
3864 return true;
3865
3866 return false;
3867}
3868
7f452159
LP
3869int exec_context_get_effective_ioprio(ExecContext *c) {
3870 int p;
3871
3872 assert(c);
3873
3874 if (c->ioprio_set)
3875 return c->ioprio;
3876
3877 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
3878 if (p < 0)
3879 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
3880
3881 return p;
3882}
3883
b58b4116 3884void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 3885 assert(s);
5cb5a6ff 3886
b58b4116
LP
3887 zero(*s);
3888 s->pid = pid;
3889 dual_timestamp_get(&s->start_timestamp);
3890}
3891
6ea832a2 3892void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
3893 assert(s);
3894
0b1f4ae6 3895 if (s->pid && s->pid != pid)
b58b4116
LP
3896 zero(*s);
3897
034c6ed7 3898 s->pid = pid;
63983207 3899 dual_timestamp_get(&s->exit_timestamp);
9fb86720 3900
034c6ed7
LP
3901 s->code = code;
3902 s->status = status;
169c1bda 3903
6ea832a2
LP
3904 if (context) {
3905 if (context->utmp_id)
3906 utmp_put_dead_process(context->utmp_id, pid, code, status);
3907
1e22b5cd 3908 exec_context_tty_reset(context, NULL);
6ea832a2 3909 }
9fb86720
LP
3910}
3911
3912void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
3913 char buf[FORMAT_TIMESTAMP_MAX];
3914
3915 assert(s);
3916 assert(f);
3917
9fb86720
LP
3918 if (s->pid <= 0)
3919 return;
3920
4c940960
LP
3921 prefix = strempty(prefix);
3922
9fb86720 3923 fprintf(f,
ccd06097
ZJS
3924 "%sPID: "PID_FMT"\n",
3925 prefix, s->pid);
9fb86720 3926
af9d16e1 3927 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
3928 fprintf(f,
3929 "%sStart Timestamp: %s\n",
63983207 3930 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 3931
af9d16e1 3932 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
3933 fprintf(f,
3934 "%sExit Timestamp: %s\n"
3935 "%sExit Code: %s\n"
3936 "%sExit Status: %i\n",
63983207 3937 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
3938 prefix, sigchld_code_to_string(s->code),
3939 prefix, s->status);
5cb5a6ff 3940}
44d8db9e 3941
9e2f7c11 3942char *exec_command_line(char **argv) {
44d8db9e
LP
3943 size_t k;
3944 char *n, *p, **a;
3945 bool first = true;
3946
9e2f7c11 3947 assert(argv);
44d8db9e 3948
9164977d 3949 k = 1;
9e2f7c11 3950 STRV_FOREACH(a, argv)
44d8db9e
LP
3951 k += strlen(*a)+3;
3952
5cd9cd35
LP
3953 n = new(char, k);
3954 if (!n)
44d8db9e
LP
3955 return NULL;
3956
3957 p = n;
9e2f7c11 3958 STRV_FOREACH(a, argv) {
44d8db9e
LP
3959
3960 if (!first)
3961 *(p++) = ' ';
3962 else
3963 first = false;
3964
3965 if (strpbrk(*a, WHITESPACE)) {
3966 *(p++) = '\'';
3967 p = stpcpy(p, *a);
3968 *(p++) = '\'';
3969 } else
3970 p = stpcpy(p, *a);
3971
3972 }
3973
9164977d
LP
3974 *p = 0;
3975
44d8db9e
LP
3976 /* FIXME: this doesn't really handle arguments that have
3977 * spaces and ticks in them */
3978
3979 return n;
3980}
3981
3982void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 3983 _cleanup_free_ char *cmd = NULL;
4c940960 3984 const char *prefix2;
44d8db9e
LP
3985
3986 assert(c);
3987 assert(f);
3988
4c940960 3989 prefix = strempty(prefix);
63c372cb 3990 prefix2 = strjoina(prefix, "\t");
44d8db9e 3991
9e2f7c11 3992 cmd = exec_command_line(c->argv);
44d8db9e
LP
3993 fprintf(f,
3994 "%sCommand Line: %s\n",
3995 prefix, cmd ? cmd : strerror(ENOMEM));
3996
9fb86720 3997 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
3998}
3999
4000void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4001 assert(f);
4002
4c940960 4003 prefix = strempty(prefix);
44d8db9e
LP
4004
4005 LIST_FOREACH(command, c, c)
4006 exec_command_dump(c, f, prefix);
4007}
94f04347 4008
a6a80b4f
LP
4009void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4010 ExecCommand *end;
4011
4012 assert(l);
4013 assert(e);
4014
4015 if (*l) {
35b8ca3a 4016 /* It's kind of important, that we keep the order here */
71fda00f
LP
4017 LIST_FIND_TAIL(command, *l, end);
4018 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4019 } else
4020 *l = e;
4021}
4022
26fd040d
LP
4023int exec_command_set(ExecCommand *c, const char *path, ...) {
4024 va_list ap;
4025 char **l, *p;
4026
4027 assert(c);
4028 assert(path);
4029
4030 va_start(ap, path);
4031 l = strv_new_ap(path, ap);
4032 va_end(ap);
4033
4034 if (!l)
4035 return -ENOMEM;
4036
250a918d
LP
4037 p = strdup(path);
4038 if (!p) {
26fd040d
LP
4039 strv_free(l);
4040 return -ENOMEM;
4041 }
4042
4043 free(c->path);
4044 c->path = p;
4045
4046 strv_free(c->argv);
4047 c->argv = l;
4048
4049 return 0;
4050}
4051
86b23b07 4052int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4053 _cleanup_strv_free_ char **l = NULL;
86b23b07 4054 va_list ap;
86b23b07
JS
4055 int r;
4056
4057 assert(c);
4058 assert(path);
4059
4060 va_start(ap, path);
4061 l = strv_new_ap(path, ap);
4062 va_end(ap);
4063
4064 if (!l)
4065 return -ENOMEM;
4066
e287086b 4067 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4068 if (r < 0)
86b23b07 4069 return r;
86b23b07
JS
4070
4071 return 0;
4072}
4073
4074
613b411c
LP
4075static int exec_runtime_allocate(ExecRuntime **rt) {
4076
4077 if (*rt)
4078 return 0;
4079
4080 *rt = new0(ExecRuntime, 1);
f146f5e1 4081 if (!*rt)
613b411c
LP
4082 return -ENOMEM;
4083
4084 (*rt)->n_ref = 1;
4085 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4086
4087 return 0;
4088}
4089
4090int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4091 int r;
4092
4093 assert(rt);
4094 assert(c);
4095 assert(id);
4096
4097 if (*rt)
4098 return 1;
4099
4100 if (!c->private_network && !c->private_tmp)
4101 return 0;
4102
4103 r = exec_runtime_allocate(rt);
4104 if (r < 0)
4105 return r;
4106
4107 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
33df919d 4108 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
613b411c
LP
4109 return -errno;
4110 }
4111
4112 if (c->private_tmp && !(*rt)->tmp_dir) {
4113 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4114 if (r < 0)
4115 return r;
4116 }
4117
4118 return 1;
4119}
4120
4121ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4122 assert(r);
4123 assert(r->n_ref > 0);
4124
4125 r->n_ref++;
4126 return r;
4127}
4128
4129ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4130
4131 if (!r)
4132 return NULL;
4133
4134 assert(r->n_ref > 0);
4135
4136 r->n_ref--;
f2341e0a
LP
4137 if (r->n_ref > 0)
4138 return NULL;
4139
4140 free(r->tmp_dir);
4141 free(r->var_tmp_dir);
4142 safe_close_pair(r->netns_storage_socket);
6b430fdb 4143 return mfree(r);
613b411c
LP
4144}
4145
f2341e0a 4146int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
613b411c
LP
4147 assert(u);
4148 assert(f);
4149 assert(fds);
4150
4151 if (!rt)
4152 return 0;
4153
4154 if (rt->tmp_dir)
4155 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4156
4157 if (rt->var_tmp_dir)
4158 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4159
4160 if (rt->netns_storage_socket[0] >= 0) {
4161 int copy;
4162
4163 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4164 if (copy < 0)
4165 return copy;
4166
4167 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4168 }
4169
4170 if (rt->netns_storage_socket[1] >= 0) {
4171 int copy;
4172
4173 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4174 if (copy < 0)
4175 return copy;
4176
4177 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4178 }
4179
4180 return 0;
4181}
4182
f2341e0a 4183int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
613b411c
LP
4184 int r;
4185
4186 assert(rt);
4187 assert(key);
4188 assert(value);
4189
4190 if (streq(key, "tmp-dir")) {
4191 char *copy;
4192
4193 r = exec_runtime_allocate(rt);
4194 if (r < 0)
f2341e0a 4195 return log_oom();
613b411c
LP
4196
4197 copy = strdup(value);
4198 if (!copy)
4199 return log_oom();
4200
4201 free((*rt)->tmp_dir);
4202 (*rt)->tmp_dir = copy;
4203
4204 } else if (streq(key, "var-tmp-dir")) {
4205 char *copy;
4206
4207 r = exec_runtime_allocate(rt);
4208 if (r < 0)
f2341e0a 4209 return log_oom();
613b411c
LP
4210
4211 copy = strdup(value);
4212 if (!copy)
4213 return log_oom();
4214
4215 free((*rt)->var_tmp_dir);
4216 (*rt)->var_tmp_dir = copy;
4217
4218 } else if (streq(key, "netns-socket-0")) {
4219 int fd;
4220
4221 r = exec_runtime_allocate(rt);
4222 if (r < 0)
f2341e0a 4223 return log_oom();
613b411c
LP
4224
4225 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4226 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4227 else {
03e334a1 4228 safe_close((*rt)->netns_storage_socket[0]);
613b411c
LP
4229 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4230 }
4231 } else if (streq(key, "netns-socket-1")) {
4232 int fd;
4233
4234 r = exec_runtime_allocate(rt);
4235 if (r < 0)
f2341e0a 4236 return log_oom();
613b411c
LP
4237
4238 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4239 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4240 else {
03e334a1 4241 safe_close((*rt)->netns_storage_socket[1]);
613b411c
LP
4242 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4243 }
4244 } else
4245 return 0;
4246
4247 return 1;
4248}
4249
4250static void *remove_tmpdir_thread(void *p) {
4251 _cleanup_free_ char *path = p;
4252
c6878637 4253 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
613b411c
LP
4254 return NULL;
4255}
4256
4257void exec_runtime_destroy(ExecRuntime *rt) {
98b47d54
LP
4258 int r;
4259
613b411c
LP
4260 if (!rt)
4261 return;
4262
4263 /* If there are multiple users of this, let's leave the stuff around */
4264 if (rt->n_ref > 1)
4265 return;
4266
4267 if (rt->tmp_dir) {
4268 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
98b47d54
LP
4269
4270 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4271 if (r < 0) {
da927ba9 4272 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
98b47d54
LP
4273 free(rt->tmp_dir);
4274 }
4275
613b411c
LP
4276 rt->tmp_dir = NULL;
4277 }
4278
4279 if (rt->var_tmp_dir) {
4280 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
98b47d54
LP
4281
4282 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4283 if (r < 0) {
da927ba9 4284 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
98b47d54
LP
4285 free(rt->var_tmp_dir);
4286 }
4287
613b411c
LP
4288 rt->var_tmp_dir = NULL;
4289 }
4290
3d94f76c 4291 safe_close_pair(rt->netns_storage_socket);
613b411c
LP
4292}
4293
80876c20
LP
4294static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4295 [EXEC_INPUT_NULL] = "null",
4296 [EXEC_INPUT_TTY] = "tty",
4297 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4298 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4299 [EXEC_INPUT_SOCKET] = "socket",
4300 [EXEC_INPUT_NAMED_FD] = "fd",
80876c20
LP
4301};
4302
8a0867d6
LP
4303DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4304
94f04347 4305static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4306 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4307 [EXEC_OUTPUT_NULL] = "null",
80876c20 4308 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4309 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4310 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4311 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4312 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4313 [EXEC_OUTPUT_JOURNAL] = "journal",
4314 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4315 [EXEC_OUTPUT_SOCKET] = "socket",
4316 [EXEC_OUTPUT_NAMED_FD] = "fd",
94f04347
LP
4317};
4318
4319DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4320
4321static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4322 [EXEC_UTMP_INIT] = "init",
4323 [EXEC_UTMP_LOGIN] = "login",
4324 [EXEC_UTMP_USER] = "user",
4325};
4326
4327DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
4328
4329static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4330 [EXEC_PRESERVE_NO] = "no",
4331 [EXEC_PRESERVE_YES] = "yes",
4332 [EXEC_PRESERVE_RESTART] = "restart",
4333};
4334
4335DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e
YW
4336
4337static const char* const exec_directory_type_table[_EXEC_DIRECTORY_MAX] = {
4338 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4339 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4340 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4341 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4342 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4343};
4344
4345DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);