]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
execute: drop explicit log_open()/log_close() now that it is unnecessary
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
a7334b09
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
a7334b09
LP
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 14 Lesser General Public License for more details.
a7334b09 15
5430f7f2 16 You should have received a copy of the GNU Lesser General Public License
a7334b09
LP
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
034c6ed7
LP
20#include <errno.h>
21#include <fcntl.h>
8dd4c05b
LP
22#include <glob.h>
23#include <grp.h>
24#include <poll.h>
309bff19 25#include <signal.h>
8dd4c05b 26#include <string.h>
19c0b0b9 27#include <sys/capability.h>
d251207d 28#include <sys/eventfd.h>
f3e43635 29#include <sys/mman.h>
8dd4c05b 30#include <sys/personality.h>
94f04347 31#include <sys/prctl.h>
d2ffa389 32#include <sys/shm.h>
8dd4c05b 33#include <sys/socket.h>
451a074f 34#include <sys/stat.h>
d2ffa389 35#include <sys/types.h>
8dd4c05b
LP
36#include <sys/un.h>
37#include <unistd.h>
023a4f67 38#include <utmpx.h>
5cb5a6ff 39
5b6319dc
LP
40#ifdef HAVE_PAM
41#include <security/pam_appl.h>
42#endif
43
7b52a628
MS
44#ifdef HAVE_SELINUX
45#include <selinux/selinux.h>
46#endif
47
17df7223
LP
48#ifdef HAVE_SECCOMP
49#include <seccomp.h>
50#endif
51
eef65bf3
MS
52#ifdef HAVE_APPARMOR
53#include <sys/apparmor.h>
54#endif
55
24882e06 56#include "sd-messages.h"
8dd4c05b
LP
57
58#include "af-list.h"
b5efdb8a 59#include "alloc-util.h"
3ffd4af2
LP
60#ifdef HAVE_APPARMOR
61#include "apparmor-util.h"
62#endif
8dd4c05b
LP
63#include "async.h"
64#include "barrier.h"
8dd4c05b 65#include "cap-list.h"
430f0182 66#include "capability-util.h"
f6a6225e 67#include "def.h"
4d1a6904 68#include "env-util.h"
17df7223 69#include "errno-list.h"
3ffd4af2 70#include "execute.h"
8dd4c05b 71#include "exit-status.h"
3ffd4af2 72#include "fd-util.h"
8dd4c05b 73#include "fileio.h"
f97b34a6 74#include "format-util.h"
f4f15635 75#include "fs-util.h"
7d50b32a 76#include "glob-util.h"
c004493c 77#include "io-util.h"
8dd4c05b
LP
78#include "ioprio.h"
79#include "log.h"
80#include "macro.h"
81#include "missing.h"
82#include "mkdir.h"
83#include "namespace.h"
6bedfcbb 84#include "parse-util.h"
8dd4c05b 85#include "path-util.h"
0b452006 86#include "process-util.h"
78f22b97 87#include "rlimit-util.h"
8dd4c05b 88#include "rm-rf.h"
3ffd4af2
LP
89#ifdef HAVE_SECCOMP
90#include "seccomp-util.h"
91#endif
8dd4c05b 92#include "securebits.h"
07d46372 93#include "securebits-util.h"
8dd4c05b 94#include "selinux-util.h"
24882e06 95#include "signal-util.h"
8dd4c05b 96#include "smack-util.h"
fd63e712 97#include "special.h"
8b43440b 98#include "string-table.h"
07630cea 99#include "string-util.h"
8dd4c05b 100#include "strv.h"
7ccbd1ae 101#include "syslog-util.h"
8dd4c05b
LP
102#include "terminal-util.h"
103#include "unit.h"
b1d4f8e1 104#include "user-util.h"
8dd4c05b
LP
105#include "util.h"
106#include "utmp-wtmp.h"
5cb5a6ff 107
e056b01d 108#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 109#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 110
02a51aba
LP
111/* This assumes there is a 'tty' group */
112#define TTY_MODE 0620
113
531dca78
LP
114#define SNDBUF_SIZE (8*1024*1024)
115
034c6ed7
LP
116static int shift_fds(int fds[], unsigned n_fds) {
117 int start, restart_from;
118
119 if (n_fds <= 0)
120 return 0;
121
a0d40ac5
LP
122 /* Modifies the fds array! (sorts it) */
123
034c6ed7
LP
124 assert(fds);
125
126 start = 0;
127 for (;;) {
128 int i;
129
130 restart_from = -1;
131
132 for (i = start; i < (int) n_fds; i++) {
133 int nfd;
134
135 /* Already at right index? */
136 if (fds[i] == i+3)
137 continue;
138
3cc2aff1
LP
139 nfd = fcntl(fds[i], F_DUPFD, i + 3);
140 if (nfd < 0)
034c6ed7
LP
141 return -errno;
142
03e334a1 143 safe_close(fds[i]);
034c6ed7
LP
144 fds[i] = nfd;
145
146 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 147 * let's remember that and try again from here */
034c6ed7
LP
148 if (nfd != i+3 && restart_from < 0)
149 restart_from = i;
150 }
151
152 if (restart_from < 0)
153 break;
154
155 start = restart_from;
156 }
157
158 return 0;
159}
160
4c47affc
FB
161static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
162 unsigned i, n_fds;
e2c76839 163 int r;
47a71eed 164
4c47affc 165 n_fds = n_storage_fds + n_socket_fds;
47a71eed
LP
166 if (n_fds <= 0)
167 return 0;
168
169 assert(fds);
170
9b141911
FB
171 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
172 * O_NONBLOCK only applies to socket activation though. */
47a71eed
LP
173
174 for (i = 0; i < n_fds; i++) {
47a71eed 175
9b141911
FB
176 if (i < n_socket_fds) {
177 r = fd_nonblock(fds[i], nonblock);
178 if (r < 0)
179 return r;
180 }
47a71eed 181
451a074f
LP
182 /* We unconditionally drop FD_CLOEXEC from the fds,
183 * since after all we want to pass these fds to our
184 * children */
47a71eed 185
3cc2aff1
LP
186 r = fd_cloexec(fds[i], false);
187 if (r < 0)
e2c76839 188 return r;
47a71eed
LP
189 }
190
191 return 0;
192}
193
1e22b5cd 194static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
195 assert(context);
196
1e22b5cd
LP
197 if (context->stdio_as_fds)
198 return NULL;
199
80876c20
LP
200 if (context->tty_path)
201 return context->tty_path;
202
203 return "/dev/console";
204}
205
1e22b5cd
LP
206static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
207 const char *path;
208
6ea832a2
LP
209 assert(context);
210
1e22b5cd 211 path = exec_context_tty_path(context);
6ea832a2 212
1e22b5cd
LP
213 if (context->tty_vhangup) {
214 if (p && p->stdin_fd >= 0)
215 (void) terminal_vhangup_fd(p->stdin_fd);
216 else if (path)
217 (void) terminal_vhangup(path);
218 }
6ea832a2 219
1e22b5cd
LP
220 if (context->tty_reset) {
221 if (p && p->stdin_fd >= 0)
222 (void) reset_terminal_fd(p->stdin_fd, true);
223 else if (path)
224 (void) reset_terminal(path);
225 }
226
227 if (context->tty_vt_disallocate && path)
228 (void) vt_disallocate(path);
6ea832a2
LP
229}
230
6af760f3
LP
231static bool is_terminal_input(ExecInput i) {
232 return IN_SET(i,
233 EXEC_INPUT_TTY,
234 EXEC_INPUT_TTY_FORCE,
235 EXEC_INPUT_TTY_FAIL);
236}
237
3a1286b6 238static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
239 return IN_SET(o,
240 EXEC_OUTPUT_TTY,
241 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
242 EXEC_OUTPUT_KMSG_AND_CONSOLE,
243 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
244}
245
aac8c0c3
LP
246static bool is_syslog_output(ExecOutput o) {
247 return IN_SET(o,
248 EXEC_OUTPUT_SYSLOG,
249 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
250}
251
252static bool is_kmsg_output(ExecOutput o) {
253 return IN_SET(o,
254 EXEC_OUTPUT_KMSG,
255 EXEC_OUTPUT_KMSG_AND_CONSOLE);
256}
257
6af760f3
LP
258static bool exec_context_needs_term(const ExecContext *c) {
259 assert(c);
260
261 /* Return true if the execution context suggests we should set $TERM to something useful. */
262
263 if (is_terminal_input(c->std_input))
264 return true;
265
266 if (is_terminal_output(c->std_output))
267 return true;
268
269 if (is_terminal_output(c->std_error))
270 return true;
271
272 return !!c->tty_path;
3a1286b6
MS
273}
274
80876c20
LP
275static int open_null_as(int flags, int nfd) {
276 int fd, r;
071830ff 277
80876c20 278 assert(nfd >= 0);
071830ff 279
613b411c
LP
280 fd = open("/dev/null", flags|O_NOCTTY);
281 if (fd < 0)
071830ff
LP
282 return -errno;
283
80876c20
LP
284 if (fd != nfd) {
285 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 286 safe_close(fd);
80876c20
LP
287 } else
288 r = nfd;
071830ff 289
80876c20 290 return r;
071830ff
LP
291}
292
524daa8c 293static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
92a17af9 294 static const union sockaddr_union sa = {
b92bea5d
ZJS
295 .un.sun_family = AF_UNIX,
296 .un.sun_path = "/run/systemd/journal/stdout",
297 };
524daa8c
ZJS
298 uid_t olduid = UID_INVALID;
299 gid_t oldgid = GID_INVALID;
300 int r;
301
cad93f29 302 if (gid_is_valid(gid)) {
524daa8c
ZJS
303 oldgid = getgid();
304
92a17af9 305 if (setegid(gid) < 0)
524daa8c
ZJS
306 return -errno;
307 }
308
cad93f29 309 if (uid_is_valid(uid)) {
524daa8c
ZJS
310 olduid = getuid();
311
92a17af9 312 if (seteuid(uid) < 0) {
524daa8c
ZJS
313 r = -errno;
314 goto restore_gid;
315 }
316 }
317
92a17af9 318 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
524daa8c
ZJS
319
320 /* If we fail to restore the uid or gid, things will likely
321 fail later on. This should only happen if an LSM interferes. */
322
cad93f29 323 if (uid_is_valid(uid))
524daa8c
ZJS
324 (void) seteuid(olduid);
325
326 restore_gid:
cad93f29 327 if (gid_is_valid(gid))
524daa8c
ZJS
328 (void) setegid(oldgid);
329
330 return r;
331}
332
fd1f9c89 333static int connect_logger_as(
7a1ab780 334 Unit *unit,
fd1f9c89 335 const ExecContext *context,
af635cf3 336 const ExecParameters *params,
fd1f9c89
LP
337 ExecOutput output,
338 const char *ident,
fd1f9c89
LP
339 int nfd,
340 uid_t uid,
341 gid_t gid) {
342
524daa8c 343 int fd, r;
071830ff
LP
344
345 assert(context);
af635cf3 346 assert(params);
80876c20
LP
347 assert(output < _EXEC_OUTPUT_MAX);
348 assert(ident);
349 assert(nfd >= 0);
071830ff 350
54fe0cdb
LP
351 fd = socket(AF_UNIX, SOCK_STREAM, 0);
352 if (fd < 0)
80876c20 353 return -errno;
071830ff 354
524daa8c
ZJS
355 r = connect_journal_socket(fd, uid, gid);
356 if (r < 0)
357 return r;
071830ff 358
80876c20 359 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 360 safe_close(fd);
80876c20
LP
361 return -errno;
362 }
071830ff 363
fd1f9c89 364 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 365
80876c20 366 dprintf(fd,
62bca2c6 367 "%s\n"
80876c20
LP
368 "%s\n"
369 "%i\n"
54fe0cdb
LP
370 "%i\n"
371 "%i\n"
372 "%i\n"
4f4a1dbf 373 "%i\n",
c867611e 374 context->syslog_identifier ?: ident,
af635cf3 375 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
54fe0cdb
LP
376 context->syslog_priority,
377 !!context->syslog_level_prefix,
aac8c0c3
LP
378 is_syslog_output(output),
379 is_kmsg_output(output),
3a1286b6 380 is_terminal_output(output));
80876c20 381
fd1f9c89
LP
382 if (fd == nfd)
383 return nfd;
384
385 r = dup2(fd, nfd) < 0 ? -errno : nfd;
386 safe_close(fd);
071830ff 387
80876c20
LP
388 return r;
389}
390static int open_terminal_as(const char *path, mode_t mode, int nfd) {
391 int fd, r;
071830ff 392
80876c20
LP
393 assert(path);
394 assert(nfd >= 0);
071830ff 395
3cc2aff1
LP
396 fd = open_terminal(path, mode | O_NOCTTY);
397 if (fd < 0)
80876c20 398 return fd;
071830ff 399
80876c20
LP
400 if (fd != nfd) {
401 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 402 safe_close(fd);
80876c20
LP
403 } else
404 r = nfd;
071830ff 405
80876c20
LP
406 return r;
407}
071830ff 408
1e3ad081
LP
409static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
410
411 if (is_terminal_input(std_input) && !apply_tty_stdin)
412 return EXEC_INPUT_NULL;
071830ff 413
03fd9c49 414 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
415 return EXEC_INPUT_NULL;
416
03fd9c49 417 return std_input;
4f2d528d
LP
418}
419
03fd9c49 420static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 421
03fd9c49 422 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
423 return EXEC_OUTPUT_INHERIT;
424
03fd9c49 425 return std_output;
4f2d528d
LP
426}
427
a34ceba6
LP
428static int setup_input(
429 const ExecContext *context,
430 const ExecParameters *params,
52c239d7
LB
431 int socket_fd,
432 int named_iofds[3]) {
a34ceba6 433
4f2d528d
LP
434 ExecInput i;
435
436 assert(context);
a34ceba6
LP
437 assert(params);
438
439 if (params->stdin_fd >= 0) {
440 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
441 return -errno;
442
443 /* Try to make this the controlling tty, if it is a tty, and reset it */
444 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
445 (void) reset_terminal_fd(STDIN_FILENO, true);
446
447 return STDIN_FILENO;
448 }
4f2d528d 449
c39f1ce2 450 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
451
452 switch (i) {
071830ff 453
80876c20
LP
454 case EXEC_INPUT_NULL:
455 return open_null_as(O_RDONLY, STDIN_FILENO);
456
457 case EXEC_INPUT_TTY:
458 case EXEC_INPUT_TTY_FORCE:
459 case EXEC_INPUT_TTY_FAIL: {
460 int fd, r;
071830ff 461
1e22b5cd 462 fd = acquire_terminal(exec_context_tty_path(context),
970edce6
ZJS
463 i == EXEC_INPUT_TTY_FAIL,
464 i == EXEC_INPUT_TTY_FORCE,
465 false,
3a43da28 466 USEC_INFINITY);
970edce6 467 if (fd < 0)
80876c20
LP
468 return fd;
469
470 if (fd != STDIN_FILENO) {
471 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
03e334a1 472 safe_close(fd);
80876c20
LP
473 } else
474 r = STDIN_FILENO;
475
476 return r;
477 }
478
4f2d528d
LP
479 case EXEC_INPUT_SOCKET:
480 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
481
52c239d7
LB
482 case EXEC_INPUT_NAMED_FD:
483 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
484 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
485
80876c20
LP
486 default:
487 assert_not_reached("Unknown input type");
488 }
489}
490
a34ceba6
LP
491static int setup_output(
492 Unit *unit,
493 const ExecContext *context,
494 const ExecParameters *params,
495 int fileno,
496 int socket_fd,
52c239d7 497 int named_iofds[3],
a34ceba6 498 const char *ident,
7bce046b
LP
499 uid_t uid,
500 gid_t gid,
501 dev_t *journal_stream_dev,
502 ino_t *journal_stream_ino) {
a34ceba6 503
4f2d528d
LP
504 ExecOutput o;
505 ExecInput i;
47c1d80d 506 int r;
4f2d528d 507
f2341e0a 508 assert(unit);
80876c20 509 assert(context);
a34ceba6 510 assert(params);
80876c20 511 assert(ident);
7bce046b
LP
512 assert(journal_stream_dev);
513 assert(journal_stream_ino);
80876c20 514
a34ceba6
LP
515 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
516
517 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
518 return -errno;
519
520 return STDOUT_FILENO;
521 }
522
523 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
524 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
525 return -errno;
526
527 return STDERR_FILENO;
528 }
529
c39f1ce2 530 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 531 o = fixup_output(context->std_output, socket_fd);
4f2d528d 532
eb17e935
MS
533 if (fileno == STDERR_FILENO) {
534 ExecOutput e;
535 e = fixup_output(context->std_error, socket_fd);
80876c20 536
eb17e935
MS
537 /* This expects the input and output are already set up */
538
539 /* Don't change the stderr file descriptor if we inherit all
540 * the way and are not on a tty */
541 if (e == EXEC_OUTPUT_INHERIT &&
542 o == EXEC_OUTPUT_INHERIT &&
543 i == EXEC_INPUT_NULL &&
544 !is_terminal_input(context->std_input) &&
545 getppid () != 1)
546 return fileno;
547
548 /* Duplicate from stdout if possible */
52c239d7 549 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 550 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 551
eb17e935 552 o = e;
80876c20 553
eb17e935 554 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
555 /* If input got downgraded, inherit the original value */
556 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 557 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 558
acb591e4 559 /* If the input is connected to anything that's not a /dev/null, inherit that... */
ff876e28 560 if (i != EXEC_INPUT_NULL)
eb17e935 561 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 562
acb591e4
LP
563 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
564 if (getppid() != 1)
eb17e935 565 return fileno;
94f04347 566
eb17e935
MS
567 /* We need to open /dev/null here anew, to get the right access mode. */
568 return open_null_as(O_WRONLY, fileno);
071830ff 569 }
94f04347 570
eb17e935 571 switch (o) {
80876c20
LP
572
573 case EXEC_OUTPUT_NULL:
eb17e935 574 return open_null_as(O_WRONLY, fileno);
80876c20
LP
575
576 case EXEC_OUTPUT_TTY:
4f2d528d 577 if (is_terminal_input(i))
eb17e935 578 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
579
580 /* We don't reset the terminal if this is just about output */
1e22b5cd 581 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
582
583 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 584 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 585 case EXEC_OUTPUT_KMSG:
28dbc1e8 586 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
587 case EXEC_OUTPUT_JOURNAL:
588 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
af635cf3 589 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
47c1d80d 590 if (r < 0) {
82677ae4 591 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 592 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
593 } else {
594 struct stat st;
595
596 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
597 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
ab2116b1
LP
598 * services to detect whether they are connected to the journal or not.
599 *
600 * If both stdout and stderr are connected to a stream then let's make sure to store the data
601 * about STDERR as that's usually the best way to do logging. */
7bce046b 602
ab2116b1
LP
603 if (fstat(fileno, &st) >= 0 &&
604 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
7bce046b
LP
605 *journal_stream_dev = st.st_dev;
606 *journal_stream_ino = st.st_ino;
607 }
47c1d80d
MS
608 }
609 return r;
4f2d528d
LP
610
611 case EXEC_OUTPUT_SOCKET:
612 assert(socket_fd >= 0);
eb17e935 613 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 614
52c239d7
LB
615 case EXEC_OUTPUT_NAMED_FD:
616 (void) fd_nonblock(named_iofds[fileno], false);
617 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
618
94f04347 619 default:
80876c20 620 assert_not_reached("Unknown error type");
94f04347 621 }
071830ff
LP
622}
623
02a51aba
LP
624static int chown_terminal(int fd, uid_t uid) {
625 struct stat st;
626
627 assert(fd >= 0);
02a51aba 628
1ff74fb6
LP
629 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
630 if (isatty(fd) < 1)
631 return 0;
632
02a51aba 633 /* This might fail. What matters are the results. */
bab45044
LP
634 (void) fchown(fd, uid, -1);
635 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
636
637 if (fstat(fd, &st) < 0)
638 return -errno;
639
d8b4e2e9 640 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
641 return -EPERM;
642
643 return 0;
644}
645
7d5ceb64 646static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
647 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
648 int r;
80876c20 649
80876c20
LP
650 assert(_saved_stdin);
651 assert(_saved_stdout);
652
af6da548
LP
653 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
654 if (saved_stdin < 0)
655 return -errno;
80876c20 656
af6da548 657 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
658 if (saved_stdout < 0)
659 return -errno;
80876c20 660
7d5ceb64 661 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
3d18b167
LP
662 if (fd < 0)
663 return fd;
80876c20 664
af6da548
LP
665 r = chown_terminal(fd, getuid());
666 if (r < 0)
3d18b167 667 return r;
02a51aba 668
3d18b167
LP
669 r = reset_terminal_fd(fd, true);
670 if (r < 0)
671 return r;
80876c20 672
3d18b167
LP
673 if (dup2(fd, STDIN_FILENO) < 0)
674 return -errno;
675
676 if (dup2(fd, STDOUT_FILENO) < 0)
677 return -errno;
80876c20
LP
678
679 if (fd >= 2)
03e334a1 680 safe_close(fd);
3d18b167 681 fd = -1;
80876c20
LP
682
683 *_saved_stdin = saved_stdin;
684 *_saved_stdout = saved_stdout;
685
3d18b167 686 saved_stdin = saved_stdout = -1;
80876c20 687
3d18b167 688 return 0;
80876c20
LP
689}
690
63d77c92 691static void write_confirm_error_fd(int err, int fd, const Unit *u) {
3b20f877
FB
692 assert(err < 0);
693
694 if (err == -ETIMEDOUT)
63d77c92 695 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
3b20f877
FB
696 else {
697 errno = -err;
63d77c92 698 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
3b20f877
FB
699 }
700}
701
63d77c92 702static void write_confirm_error(int err, const char *vc, const Unit *u) {
03e334a1 703 _cleanup_close_ int fd = -1;
80876c20 704
3b20f877 705 assert(vc);
80876c20 706
7d5ceb64 707 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548 708 if (fd < 0)
3b20f877 709 return;
80876c20 710
63d77c92 711 write_confirm_error_fd(err, fd, u);
af6da548 712}
80876c20 713
3d18b167 714static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 715 int r = 0;
80876c20 716
af6da548
LP
717 assert(saved_stdin);
718 assert(saved_stdout);
719
720 release_terminal();
721
722 if (*saved_stdin >= 0)
80876c20 723 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 724 r = -errno;
80876c20 725
af6da548 726 if (*saved_stdout >= 0)
80876c20 727 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 728 r = -errno;
80876c20 729
3d18b167
LP
730 *saved_stdin = safe_close(*saved_stdin);
731 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
732
733 return r;
734}
735
3b20f877
FB
736enum {
737 CONFIRM_PRETEND_FAILURE = -1,
738 CONFIRM_PRETEND_SUCCESS = 0,
739 CONFIRM_EXECUTE = 1,
740};
741
eedf223a 742static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
af6da548 743 int saved_stdout = -1, saved_stdin = -1, r;
2bcd3c26 744 _cleanup_free_ char *e = NULL;
3b20f877 745 char c;
af6da548 746
3b20f877 747 /* For any internal errors, assume a positive response. */
7d5ceb64 748 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
3b20f877 749 if (r < 0) {
63d77c92 750 write_confirm_error(r, vc, u);
3b20f877
FB
751 return CONFIRM_EXECUTE;
752 }
af6da548 753
b0eb2944
FB
754 /* confirm_spawn might have been disabled while we were sleeping. */
755 if (manager_is_confirm_spawn_disabled(u->manager)) {
756 r = 1;
757 goto restore_stdio;
758 }
af6da548 759
2bcd3c26
FB
760 e = ellipsize(cmdline, 60, 100);
761 if (!e) {
762 log_oom();
763 r = CONFIRM_EXECUTE;
764 goto restore_stdio;
765 }
af6da548 766
d172b175 767 for (;;) {
539622bd 768 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
d172b175 769 if (r < 0) {
63d77c92 770 write_confirm_error_fd(r, STDOUT_FILENO, u);
d172b175
FB
771 r = CONFIRM_EXECUTE;
772 goto restore_stdio;
773 }
af6da548 774
d172b175 775 switch (c) {
b0eb2944
FB
776 case 'c':
777 printf("Resuming normal execution.\n");
778 manager_disable_confirm_spawn();
779 r = 1;
780 break;
dd6f9ac0
FB
781 case 'D':
782 unit_dump(u, stdout, " ");
783 continue; /* ask again */
d172b175
FB
784 case 'f':
785 printf("Failing execution.\n");
786 r = CONFIRM_PRETEND_FAILURE;
787 break;
788 case 'h':
b0eb2944
FB
789 printf(" c - continue, proceed without asking anymore\n"
790 " D - dump, show the state of the unit\n"
dd6f9ac0 791 " f - fail, don't execute the command and pretend it failed\n"
d172b175 792 " h - help\n"
eedf223a 793 " i - info, show a short summary of the unit\n"
56fde33a 794 " j - jobs, show jobs that are in progress\n"
d172b175
FB
795 " s - skip, don't execute the command and pretend it succeeded\n"
796 " y - yes, execute the command\n");
dd6f9ac0 797 continue; /* ask again */
eedf223a
FB
798 case 'i':
799 printf(" Description: %s\n"
800 " Unit: %s\n"
801 " Command: %s\n",
802 u->id, u->description, cmdline);
803 continue; /* ask again */
56fde33a
FB
804 case 'j':
805 manager_dump_jobs(u->manager, stdout, " ");
806 continue; /* ask again */
539622bd
FB
807 case 'n':
808 /* 'n' was removed in favor of 'f'. */
809 printf("Didn't understand 'n', did you mean 'f'?\n");
810 continue; /* ask again */
d172b175
FB
811 case 's':
812 printf("Skipping execution.\n");
813 r = CONFIRM_PRETEND_SUCCESS;
814 break;
815 case 'y':
816 r = CONFIRM_EXECUTE;
817 break;
818 default:
819 assert_not_reached("Unhandled choice");
820 }
3b20f877 821 break;
3b20f877 822 }
af6da548 823
3b20f877 824restore_stdio:
af6da548 825 restore_confirm_stdio(&saved_stdin, &saved_stdout);
af6da548 826 return r;
80876c20
LP
827}
828
4d885bd3
DH
829static int get_fixed_user(const ExecContext *c, const char **user,
830 uid_t *uid, gid_t *gid,
831 const char **home, const char **shell) {
81a2b7ce 832 int r;
4d885bd3 833 const char *name;
81a2b7ce 834
4d885bd3 835 assert(c);
81a2b7ce 836
23deef88
LP
837 if (!c->user)
838 return 0;
839
4d885bd3
DH
840 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
841 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 842
23deef88 843 name = c->user;
4d885bd3
DH
844 r = get_user_creds_clean(&name, uid, gid, home, shell);
845 if (r < 0)
846 return r;
81a2b7ce 847
4d885bd3
DH
848 *user = name;
849 return 0;
850}
851
852static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
853 int r;
854 const char *name;
855
856 assert(c);
857
858 if (!c->group)
859 return 0;
860
861 name = c->group;
862 r = get_group_creds(&name, gid);
863 if (r < 0)
864 return r;
865
866 *group = name;
867 return 0;
868}
869
cdc5d5c5
DH
870static int get_supplementary_groups(const ExecContext *c, const char *user,
871 const char *group, gid_t gid,
872 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
873 char **i;
874 int r, k = 0;
875 int ngroups_max;
876 bool keep_groups = false;
877 gid_t *groups = NULL;
878 _cleanup_free_ gid_t *l_gids = NULL;
879
880 assert(c);
881
bbeea271
DH
882 /*
883 * If user is given, then lookup GID and supplementary groups list.
884 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
885 * here and as early as possible so we keep the list of supplementary
886 * groups of the caller.
bbeea271
DH
887 */
888 if (user && gid_is_valid(gid) && gid != 0) {
889 /* First step, initialize groups from /etc/groups */
890 if (initgroups(user, gid) < 0)
891 return -errno;
892
893 keep_groups = true;
894 }
895
4d885bd3
DH
896 if (!c->supplementary_groups)
897 return 0;
898
366ddd25
DH
899 /*
900 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
901 * be positive, otherwise fail.
902 */
903 errno = 0;
904 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
905 if (ngroups_max <= 0) {
906 if (errno > 0)
907 return -errno;
908 else
909 return -EOPNOTSUPP; /* For all other values */
910 }
911
4d885bd3
DH
912 l_gids = new(gid_t, ngroups_max);
913 if (!l_gids)
914 return -ENOMEM;
81a2b7ce 915
4d885bd3
DH
916 if (keep_groups) {
917 /*
918 * Lookup the list of groups that the user belongs to, we
919 * avoid NSS lookups here too for gid=0.
920 */
921 k = ngroups_max;
922 if (getgrouplist(user, gid, l_gids, &k) < 0)
923 return -EINVAL;
924 } else
925 k = 0;
81a2b7ce 926
4d885bd3
DH
927 STRV_FOREACH(i, c->supplementary_groups) {
928 const char *g;
81a2b7ce 929
4d885bd3
DH
930 if (k >= ngroups_max)
931 return -E2BIG;
81a2b7ce 932
4d885bd3
DH
933 g = *i;
934 r = get_group_creds(&g, l_gids+k);
935 if (r < 0)
936 return r;
81a2b7ce 937
4d885bd3
DH
938 k++;
939 }
81a2b7ce 940
4d885bd3
DH
941 /*
942 * Sets ngids to zero to drop all supplementary groups, happens
943 * when we are under root and SupplementaryGroups= is empty.
944 */
945 if (k == 0) {
946 *ngids = 0;
947 return 0;
948 }
81a2b7ce 949
4d885bd3
DH
950 /* Otherwise get the final list of supplementary groups */
951 groups = memdup(l_gids, sizeof(gid_t) * k);
952 if (!groups)
953 return -ENOMEM;
954
955 *supplementary_gids = groups;
956 *ngids = k;
957
958 groups = NULL;
959
960 return 0;
961}
962
963static int enforce_groups(const ExecContext *context, gid_t gid,
964 gid_t *supplementary_gids, int ngids) {
965 int r;
966
967 assert(context);
968
969 /* Handle SupplementaryGroups= even if it is empty */
970 if (context->supplementary_groups) {
971 r = maybe_setgroups(ngids, supplementary_gids);
972 if (r < 0)
97f0e76f 973 return r;
4d885bd3 974 }
81a2b7ce 975
4d885bd3
DH
976 if (gid_is_valid(gid)) {
977 /* Then set our gids */
978 if (setresgid(gid, gid, gid) < 0)
979 return -errno;
81a2b7ce
LP
980 }
981
982 return 0;
983}
984
985static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
986 assert(context);
987
4d885bd3
DH
988 if (!uid_is_valid(uid))
989 return 0;
990
479050b3 991 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
992 * capabilities while doing so. */
993
479050b3 994 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
995
996 /* First step: If we need to keep capabilities but
997 * drop privileges we need to make sure we keep our
cbb21cca 998 * caps, while we drop privileges. */
693ced48 999 if (uid != 0) {
cbb21cca 1000 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
1001
1002 if (prctl(PR_GET_SECUREBITS) != sb)
1003 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1004 return -errno;
1005 }
81a2b7ce
LP
1006 }
1007
479050b3 1008 /* Second step: actually set the uids */
81a2b7ce
LP
1009 if (setresuid(uid, uid, uid) < 0)
1010 return -errno;
1011
1012 /* At this point we should have all necessary capabilities but
1013 are otherwise a normal user. However, the caps might got
1014 corrupted due to the setresuid() so we need clean them up
1015 later. This is done outside of this call. */
1016
1017 return 0;
1018}
1019
5b6319dc
LP
1020#ifdef HAVE_PAM
1021
1022static int null_conv(
1023 int num_msg,
1024 const struct pam_message **msg,
1025 struct pam_response **resp,
1026 void *appdata_ptr) {
1027
1028 /* We don't support conversations */
1029
1030 return PAM_CONV_ERR;
1031}
1032
cefc33ae
LP
1033#endif
1034
5b6319dc
LP
1035static int setup_pam(
1036 const char *name,
1037 const char *user,
940c5210 1038 uid_t uid,
2d6fce8d 1039 gid_t gid,
5b6319dc 1040 const char *tty,
2065ca69 1041 char ***env,
5b6319dc
LP
1042 int fds[], unsigned n_fds) {
1043
cefc33ae
LP
1044#ifdef HAVE_PAM
1045
5b6319dc
LP
1046 static const struct pam_conv conv = {
1047 .conv = null_conv,
1048 .appdata_ptr = NULL
1049 };
1050
2d7c6aa2 1051 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 1052 pam_handle_t *handle = NULL;
d6e5f3ad 1053 sigset_t old_ss;
7bb70b6e 1054 int pam_code = PAM_SUCCESS, r;
84eada2f 1055 char **nv, **e = NULL;
5b6319dc
LP
1056 bool close_session = false;
1057 pid_t pam_pid = 0, parent_pid;
970edce6 1058 int flags = 0;
5b6319dc
LP
1059
1060 assert(name);
1061 assert(user);
2065ca69 1062 assert(env);
5b6319dc
LP
1063
1064 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 1065 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
1066 * systemd via the cgroup logic. It will then remove the PAM
1067 * session again. The parent process will exec() the actual
1068 * daemon. We do things this way to ensure that the main PID
1069 * of the daemon is the one we initially fork()ed. */
1070
7bb70b6e
LP
1071 r = barrier_create(&barrier);
1072 if (r < 0)
2d7c6aa2
DH
1073 goto fail;
1074
553d2243 1075 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
1076 flags |= PAM_SILENT;
1077
f546241b
ZJS
1078 pam_code = pam_start(name, user, &conv, &handle);
1079 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
1080 handle = NULL;
1081 goto fail;
1082 }
1083
f546241b
ZJS
1084 if (tty) {
1085 pam_code = pam_set_item(handle, PAM_TTY, tty);
1086 if (pam_code != PAM_SUCCESS)
5b6319dc 1087 goto fail;
f546241b 1088 }
5b6319dc 1089
84eada2f
JW
1090 STRV_FOREACH(nv, *env) {
1091 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
1092 if (pam_code != PAM_SUCCESS)
1093 goto fail;
1094 }
1095
970edce6 1096 pam_code = pam_acct_mgmt(handle, flags);
f546241b 1097 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1098 goto fail;
1099
970edce6 1100 pam_code = pam_open_session(handle, flags);
f546241b 1101 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1102 goto fail;
1103
1104 close_session = true;
1105
f546241b
ZJS
1106 e = pam_getenvlist(handle);
1107 if (!e) {
5b6319dc
LP
1108 pam_code = PAM_BUF_ERR;
1109 goto fail;
1110 }
1111
1112 /* Block SIGTERM, so that we know that it won't get lost in
1113 * the child */
ce30c8dc 1114
72c0a2c2 1115 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc 1116
df0ff127 1117 parent_pid = getpid_cached();
5b6319dc 1118
f546241b 1119 pam_pid = fork();
7bb70b6e
LP
1120 if (pam_pid < 0) {
1121 r = -errno;
5b6319dc 1122 goto fail;
7bb70b6e 1123 }
5b6319dc
LP
1124
1125 if (pam_pid == 0) {
7bb70b6e 1126 int sig, ret = EXIT_PAM;
5b6319dc
LP
1127
1128 /* The child's job is to reset the PAM session on
1129 * termination */
2d7c6aa2 1130 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc
LP
1131
1132 /* This string must fit in 10 chars (i.e. the length
5d6b1584
LP
1133 * of "/sbin/init"), to look pretty in /bin/ps */
1134 rename_process("(sd-pam)");
5b6319dc
LP
1135
1136 /* Make sure we don't keep open the passed fds in this
1137 child. We assume that otherwise only those fds are
1138 open here that have been opened by PAM. */
1139 close_many(fds, n_fds);
1140
940c5210
AK
1141 /* Drop privileges - we don't need any to pam_close_session
1142 * and this will make PR_SET_PDEATHSIG work in most cases.
1143 * If this fails, ignore the error - but expect sd-pam threads
1144 * to fail to exit normally */
2d6fce8d 1145
97f0e76f
LP
1146 r = maybe_setgroups(0, NULL);
1147 if (r < 0)
1148 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1149 if (setresgid(gid, gid, gid) < 0)
1150 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1151 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1152 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1153
ce30c8dc
LP
1154 (void) ignore_signals(SIGPIPE, -1);
1155
940c5210
AK
1156 /* Wait until our parent died. This will only work if
1157 * the above setresuid() succeeds, otherwise the kernel
1158 * will not allow unprivileged parents kill their privileged
1159 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1160 * to do the rest for us. */
1161 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1162 goto child_finish;
1163
2d7c6aa2
DH
1164 /* Tell the parent that our setup is done. This is especially
1165 * important regarding dropping privileges. Otherwise, unit
643f4706
ZJS
1166 * setup might race against our setresuid(2) call.
1167 *
1168 * If the parent aborted, we'll detect this below, hence ignore
1169 * return failure here. */
1170 (void) barrier_place(&barrier);
2d7c6aa2 1171
643f4706 1172 /* Check if our parent process might already have died? */
5b6319dc 1173 if (getppid() == parent_pid) {
d6e5f3ad
DM
1174 sigset_t ss;
1175
1176 assert_se(sigemptyset(&ss) >= 0);
1177 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1178
3dead8d9
LP
1179 for (;;) {
1180 if (sigwait(&ss, &sig) < 0) {
1181 if (errno == EINTR)
1182 continue;
1183
1184 goto child_finish;
1185 }
5b6319dc 1186
3dead8d9
LP
1187 assert(sig == SIGTERM);
1188 break;
1189 }
5b6319dc
LP
1190 }
1191
3dead8d9 1192 /* If our parent died we'll end the session */
f546241b 1193 if (getppid() != parent_pid) {
970edce6 1194 pam_code = pam_close_session(handle, flags);
f546241b 1195 if (pam_code != PAM_SUCCESS)
5b6319dc 1196 goto child_finish;
f546241b 1197 }
5b6319dc 1198
7bb70b6e 1199 ret = 0;
5b6319dc
LP
1200
1201 child_finish:
970edce6 1202 pam_end(handle, pam_code | flags);
7bb70b6e 1203 _exit(ret);
5b6319dc
LP
1204 }
1205
2d7c6aa2
DH
1206 barrier_set_role(&barrier, BARRIER_PARENT);
1207
5b6319dc
LP
1208 /* If the child was forked off successfully it will do all the
1209 * cleanups, so forget about the handle here. */
1210 handle = NULL;
1211
3b8bddde 1212 /* Unblock SIGTERM again in the parent */
72c0a2c2 1213 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1214
1215 /* We close the log explicitly here, since the PAM modules
1216 * might have opened it, but we don't want this fd around. */
1217 closelog();
1218
2d7c6aa2
DH
1219 /* Synchronously wait for the child to initialize. We don't care for
1220 * errors as we cannot recover. However, warn loudly if it happens. */
1221 if (!barrier_place_and_sync(&barrier))
1222 log_error("PAM initialization failed");
1223
2065ca69
JW
1224 strv_free(*env);
1225 *env = e;
aa87e624 1226
5b6319dc
LP
1227 return 0;
1228
1229fail:
970edce6
ZJS
1230 if (pam_code != PAM_SUCCESS) {
1231 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1232 r = -EPERM; /* PAM errors do not map to errno */
1233 } else
1234 log_error_errno(r, "PAM failed: %m");
9ba35398 1235
5b6319dc
LP
1236 if (handle) {
1237 if (close_session)
970edce6 1238 pam_code = pam_close_session(handle, flags);
5b6319dc 1239
970edce6 1240 pam_end(handle, pam_code | flags);
5b6319dc
LP
1241 }
1242
1243 strv_free(e);
5b6319dc
LP
1244 closelog();
1245
7bb70b6e 1246 return r;
cefc33ae
LP
1247#else
1248 return 0;
5b6319dc 1249#endif
cefc33ae 1250}
5b6319dc 1251
5d6b1584
LP
1252static void rename_process_from_path(const char *path) {
1253 char process_name[11];
1254 const char *p;
1255 size_t l;
1256
1257 /* This resulting string must fit in 10 chars (i.e. the length
1258 * of "/sbin/init") to look pretty in /bin/ps */
1259
2b6bf07d 1260 p = basename(path);
5d6b1584
LP
1261 if (isempty(p)) {
1262 rename_process("(...)");
1263 return;
1264 }
1265
1266 l = strlen(p);
1267 if (l > 8) {
1268 /* The end of the process name is usually more
1269 * interesting, since the first bit might just be
1270 * "systemd-" */
1271 p = p + l - 8;
1272 l = 8;
1273 }
1274
1275 process_name[0] = '(';
1276 memcpy(process_name+1, p, l);
1277 process_name[1+l] = ')';
1278 process_name[1+l+1] = 0;
1279
1280 rename_process(process_name);
1281}
1282
469830d1
LP
1283static bool context_has_address_families(const ExecContext *c) {
1284 assert(c);
1285
1286 return c->address_families_whitelist ||
1287 !set_isempty(c->address_families);
1288}
1289
1290static bool context_has_syscall_filters(const ExecContext *c) {
1291 assert(c);
1292
1293 return c->syscall_whitelist ||
1294 !set_isempty(c->syscall_filter);
1295}
1296
1297static bool context_has_no_new_privileges(const ExecContext *c) {
1298 assert(c);
1299
1300 if (c->no_new_privileges)
1301 return true;
1302
1303 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1304 return false;
1305
1306 /* We need NNP if we have any form of seccomp and are unprivileged */
1307 return context_has_address_families(c) ||
1308 c->memory_deny_write_execute ||
1309 c->restrict_realtime ||
1310 exec_context_restrict_namespaces_set(c) ||
1311 c->protect_kernel_tunables ||
1312 c->protect_kernel_modules ||
1313 c->private_devices ||
1314 context_has_syscall_filters(c) ||
78e864e5
TM
1315 !set_isempty(c->syscall_archs) ||
1316 c->lock_personality;
469830d1
LP
1317}
1318
c0467cf3 1319#ifdef HAVE_SECCOMP
17df7223 1320
83f12b27 1321static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1322
1323 if (is_seccomp_available())
1324 return false;
1325
f673b62d 1326 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
f673b62d 1327 return true;
83f12b27
FS
1328}
1329
165a31c0 1330static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
469830d1 1331 uint32_t negative_action, default_action, action;
165a31c0 1332 int r;
8351ceae 1333
469830d1 1334 assert(u);
c0467cf3 1335 assert(c);
8351ceae 1336
469830d1 1337 if (!context_has_syscall_filters(c))
83f12b27
FS
1338 return 0;
1339
469830d1
LP
1340 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1341 return 0;
e9642be2 1342
469830d1 1343 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
e9642be2 1344
469830d1
LP
1345 if (c->syscall_whitelist) {
1346 default_action = negative_action;
1347 action = SCMP_ACT_ALLOW;
7c66bae2 1348 } else {
469830d1
LP
1349 default_action = SCMP_ACT_ALLOW;
1350 action = negative_action;
57183d11 1351 }
8351ceae 1352
165a31c0
LP
1353 if (needs_ambient_hack) {
1354 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1355 if (r < 0)
1356 return r;
1357 }
1358
469830d1 1359 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
4298d0b5
LP
1360}
1361
469830d1
LP
1362static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1363 assert(u);
4298d0b5
LP
1364 assert(c);
1365
469830d1 1366 if (set_isempty(c->syscall_archs))
83f12b27
FS
1367 return 0;
1368
469830d1
LP
1369 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1370 return 0;
4298d0b5 1371
469830d1
LP
1372 return seccomp_restrict_archs(c->syscall_archs);
1373}
4298d0b5 1374
469830d1
LP
1375static int apply_address_families(const Unit* u, const ExecContext *c) {
1376 assert(u);
1377 assert(c);
4298d0b5 1378
469830d1
LP
1379 if (!context_has_address_families(c))
1380 return 0;
4298d0b5 1381
469830d1
LP
1382 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1383 return 0;
4298d0b5 1384
469830d1 1385 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
8351ceae 1386}
4298d0b5 1387
83f12b27 1388static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
469830d1 1389 assert(u);
f3e43635
TM
1390 assert(c);
1391
469830d1 1392 if (!c->memory_deny_write_execute)
83f12b27
FS
1393 return 0;
1394
469830d1
LP
1395 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1396 return 0;
f3e43635 1397
469830d1 1398 return seccomp_memory_deny_write_execute();
f3e43635
TM
1399}
1400
83f12b27 1401static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
469830d1 1402 assert(u);
f4170c67
LP
1403 assert(c);
1404
469830d1 1405 if (!c->restrict_realtime)
83f12b27
FS
1406 return 0;
1407
469830d1
LP
1408 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1409 return 0;
f4170c67 1410
469830d1 1411 return seccomp_restrict_realtime();
f4170c67
LP
1412}
1413
59e856c7 1414static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
469830d1 1415 assert(u);
59eeb84b
LP
1416 assert(c);
1417
1418 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1419 * let's protect even those systems where this is left on in the kernel. */
1420
469830d1 1421 if (!c->protect_kernel_tunables)
59eeb84b
LP
1422 return 0;
1423
469830d1
LP
1424 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1425 return 0;
59eeb84b 1426
469830d1 1427 return seccomp_protect_sysctl();
59eeb84b
LP
1428}
1429
59e856c7 1430static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
469830d1 1431 assert(u);
502d704e
DH
1432 assert(c);
1433
25a8d8a0 1434 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e 1435
469830d1
LP
1436 if (!c->protect_kernel_modules)
1437 return 0;
1438
502d704e
DH
1439 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1440 return 0;
1441
469830d1 1442 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1443}
1444
59e856c7 1445static int apply_private_devices(const Unit *u, const ExecContext *c) {
469830d1 1446 assert(u);
ba128bb8
LP
1447 assert(c);
1448
8f81a5f6 1449 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8 1450
469830d1
LP
1451 if (!c->private_devices)
1452 return 0;
1453
ba128bb8
LP
1454 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1455 return 0;
1456
469830d1 1457 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1458}
1459
add00535 1460static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
469830d1 1461 assert(u);
add00535
LP
1462 assert(c);
1463
1464 if (!exec_context_restrict_namespaces_set(c))
1465 return 0;
1466
1467 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1468 return 0;
1469
1470 return seccomp_restrict_namespaces(c->restrict_namespaces);
1471}
1472
78e864e5 1473static int apply_lock_personality(const Unit* u, const ExecContext *c) {
e8132d63
LP
1474 unsigned long personality;
1475 int r;
78e864e5
TM
1476
1477 assert(u);
1478 assert(c);
1479
1480 if (!c->lock_personality)
1481 return 0;
1482
1483 if (skip_seccomp_unavailable(u, "LockPersonality="))
1484 return 0;
1485
e8132d63
LP
1486 personality = c->personality;
1487
1488 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1489 if (personality == PERSONALITY_INVALID) {
1490
1491 r = opinionated_personality(&personality);
1492 if (r < 0)
1493 return r;
1494 }
78e864e5
TM
1495
1496 return seccomp_lock_personality(personality);
1497}
1498
c0467cf3 1499#endif
8351ceae 1500
31a7eb86
ZJS
1501static void do_idle_pipe_dance(int idle_pipe[4]) {
1502 assert(idle_pipe);
1503
54eb2300
LP
1504 idle_pipe[1] = safe_close(idle_pipe[1]);
1505 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1506
1507 if (idle_pipe[0] >= 0) {
1508 int r;
1509
1510 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1511
1512 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1513 ssize_t n;
1514
31a7eb86 1515 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1516 n = write(idle_pipe[3], "x", 1);
1517 if (n > 0)
cd972d69
ZJS
1518 /* Wait for systemd to react to the signal above. */
1519 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1520 }
1521
54eb2300 1522 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1523
1524 }
1525
54eb2300 1526 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1527}
1528
7cae38c4 1529static int build_environment(
fd63e712 1530 Unit *u,
9fa95f85 1531 const ExecContext *c,
1e22b5cd 1532 const ExecParameters *p,
7cae38c4
LP
1533 unsigned n_fds,
1534 const char *home,
1535 const char *username,
1536 const char *shell,
7bce046b
LP
1537 dev_t journal_stream_dev,
1538 ino_t journal_stream_ino,
7cae38c4
LP
1539 char ***ret) {
1540
1541 _cleanup_strv_free_ char **our_env = NULL;
1542 unsigned n_env = 0;
1543 char *x;
1544
4b58153d 1545 assert(u);
7cae38c4
LP
1546 assert(c);
1547 assert(ret);
1548
4b58153d 1549 our_env = new0(char*, 14);
7cae38c4
LP
1550 if (!our_env)
1551 return -ENOMEM;
1552
1553 if (n_fds > 0) {
8dd4c05b
LP
1554 _cleanup_free_ char *joined = NULL;
1555
df0ff127 1556 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
7cae38c4
LP
1557 return -ENOMEM;
1558 our_env[n_env++] = x;
1559
1560 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1561 return -ENOMEM;
1562 our_env[n_env++] = x;
8dd4c05b 1563
1e22b5cd 1564 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1565 if (!joined)
1566 return -ENOMEM;
1567
605405c6 1568 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1569 if (!x)
1570 return -ENOMEM;
1571 our_env[n_env++] = x;
7cae38c4
LP
1572 }
1573
b08af3b1 1574 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
df0ff127 1575 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
09812eb7
LP
1576 return -ENOMEM;
1577 our_env[n_env++] = x;
1578
1e22b5cd 1579 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1580 return -ENOMEM;
1581 our_env[n_env++] = x;
1582 }
1583
fd63e712
LP
1584 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1585 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1586 * check the database directly. */
ac647978 1587 if (p->flags & EXEC_NSS_BYPASS_BUS) {
fd63e712
LP
1588 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1589 if (!x)
1590 return -ENOMEM;
1591 our_env[n_env++] = x;
1592 }
1593
7cae38c4
LP
1594 if (home) {
1595 x = strappend("HOME=", home);
1596 if (!x)
1597 return -ENOMEM;
1598 our_env[n_env++] = x;
1599 }
1600
1601 if (username) {
1602 x = strappend("LOGNAME=", username);
1603 if (!x)
1604 return -ENOMEM;
1605 our_env[n_env++] = x;
1606
1607 x = strappend("USER=", username);
1608 if (!x)
1609 return -ENOMEM;
1610 our_env[n_env++] = x;
1611 }
1612
1613 if (shell) {
1614 x = strappend("SHELL=", shell);
1615 if (!x)
1616 return -ENOMEM;
1617 our_env[n_env++] = x;
1618 }
1619
4b58153d
LP
1620 if (!sd_id128_is_null(u->invocation_id)) {
1621 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1622 return -ENOMEM;
1623
1624 our_env[n_env++] = x;
1625 }
1626
6af760f3
LP
1627 if (exec_context_needs_term(c)) {
1628 const char *tty_path, *term = NULL;
1629
1630 tty_path = exec_context_tty_path(c);
1631
1632 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1633 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1634 * passes to PID 1 ends up all the way in the console login shown. */
1635
1636 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1637 term = getenv("TERM");
1638 if (!term)
1639 term = default_term_for_tty(tty_path);
7cae38c4 1640
6af760f3 1641 x = strappend("TERM=", term);
7cae38c4
LP
1642 if (!x)
1643 return -ENOMEM;
1644 our_env[n_env++] = x;
1645 }
1646
7bce046b
LP
1647 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1648 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1649 return -ENOMEM;
1650
1651 our_env[n_env++] = x;
1652 }
1653
7cae38c4 1654 our_env[n_env++] = NULL;
7bce046b 1655 assert(n_env <= 12);
7cae38c4
LP
1656
1657 *ret = our_env;
1658 our_env = NULL;
1659
1660 return 0;
1661}
1662
b4c14404
FB
1663static int build_pass_environment(const ExecContext *c, char ***ret) {
1664 _cleanup_strv_free_ char **pass_env = NULL;
1665 size_t n_env = 0, n_bufsize = 0;
1666 char **i;
1667
1668 STRV_FOREACH(i, c->pass_environment) {
1669 _cleanup_free_ char *x = NULL;
1670 char *v;
1671
1672 v = getenv(*i);
1673 if (!v)
1674 continue;
605405c6 1675 x = strjoin(*i, "=", v);
b4c14404
FB
1676 if (!x)
1677 return -ENOMEM;
00819cc1 1678
b4c14404
FB
1679 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1680 return -ENOMEM;
00819cc1 1681
b4c14404
FB
1682 pass_env[n_env++] = x;
1683 pass_env[n_env] = NULL;
1684 x = NULL;
1685 }
1686
1687 *ret = pass_env;
1688 pass_env = NULL;
1689
1690 return 0;
1691}
1692
8b44a3d2
LP
1693static bool exec_needs_mount_namespace(
1694 const ExecContext *context,
1695 const ExecParameters *params,
1696 ExecRuntime *runtime) {
1697
1698 assert(context);
1699 assert(params);
1700
915e6d16
LP
1701 if (context->root_image)
1702 return true;
1703
2a624c36
AP
1704 if (!strv_isempty(context->read_write_paths) ||
1705 !strv_isempty(context->read_only_paths) ||
1706 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1707 return true;
1708
d2d6c096
LP
1709 if (context->n_bind_mounts > 0)
1710 return true;
1711
8b44a3d2
LP
1712 if (context->mount_flags != 0)
1713 return true;
1714
1715 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1716 return true;
1717
8b44a3d2
LP
1718 if (context->private_devices ||
1719 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1720 context->protect_home != PROTECT_HOME_NO ||
1721 context->protect_kernel_tunables ||
c575770b 1722 context->protect_kernel_modules ||
59eeb84b 1723 context->protect_control_groups)
8b44a3d2
LP
1724 return true;
1725
9c988f93 1726 if (context->mount_apivfs && (context->root_image || context->root_directory))
5d997827
LP
1727 return true;
1728
8b44a3d2
LP
1729 return false;
1730}
1731
d251207d
LP
1732static int setup_private_users(uid_t uid, gid_t gid) {
1733 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1734 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1735 _cleanup_close_ int unshare_ready_fd = -1;
1736 _cleanup_(sigkill_waitp) pid_t pid = 0;
1737 uint64_t c = 1;
1738 siginfo_t si;
1739 ssize_t n;
1740 int r;
1741
1742 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1743 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1744 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1745 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1746 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1747 * continues execution normally. */
1748
587ab01b
ZJS
1749 if (uid != 0 && uid_is_valid(uid)) {
1750 r = asprintf(&uid_map,
1751 "0 0 1\n" /* Map root → root */
1752 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1753 uid, uid);
1754 if (r < 0)
1755 return -ENOMEM;
1756 } else {
e0f3720e 1757 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1758 if (!uid_map)
1759 return -ENOMEM;
1760 }
d251207d 1761
587ab01b
ZJS
1762 if (gid != 0 && gid_is_valid(gid)) {
1763 r = asprintf(&gid_map,
1764 "0 0 1\n" /* Map root → root */
1765 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1766 gid, gid);
1767 if (r < 0)
1768 return -ENOMEM;
1769 } else {
d251207d 1770 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
587ab01b
ZJS
1771 if (!gid_map)
1772 return -ENOMEM;
1773 }
d251207d
LP
1774
1775 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1776 * namespace. */
1777 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1778 if (unshare_ready_fd < 0)
1779 return -errno;
1780
1781 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1782 * failed. */
1783 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1784 return -errno;
1785
1786 pid = fork();
1787 if (pid < 0)
1788 return -errno;
1789
1790 if (pid == 0) {
1791 _cleanup_close_ int fd = -1;
1792 const char *a;
1793 pid_t ppid;
1794
1795 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1796 * here, after the parent opened its own user namespace. */
1797
1798 ppid = getppid();
1799 errno_pipe[0] = safe_close(errno_pipe[0]);
1800
1801 /* Wait until the parent unshared the user namespace */
1802 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1803 r = -errno;
1804 goto child_fail;
1805 }
1806
1807 /* Disable the setgroups() system call in the child user namespace, for good. */
1808 a = procfs_file_alloca(ppid, "setgroups");
1809 fd = open(a, O_WRONLY|O_CLOEXEC);
1810 if (fd < 0) {
1811 if (errno != ENOENT) {
1812 r = -errno;
1813 goto child_fail;
1814 }
1815
1816 /* If the file is missing the kernel is too old, let's continue anyway. */
1817 } else {
1818 if (write(fd, "deny\n", 5) < 0) {
1819 r = -errno;
1820 goto child_fail;
1821 }
1822
1823 fd = safe_close(fd);
1824 }
1825
1826 /* First write the GID map */
1827 a = procfs_file_alloca(ppid, "gid_map");
1828 fd = open(a, O_WRONLY|O_CLOEXEC);
1829 if (fd < 0) {
1830 r = -errno;
1831 goto child_fail;
1832 }
1833 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1834 r = -errno;
1835 goto child_fail;
1836 }
1837 fd = safe_close(fd);
1838
1839 /* The write the UID map */
1840 a = procfs_file_alloca(ppid, "uid_map");
1841 fd = open(a, O_WRONLY|O_CLOEXEC);
1842 if (fd < 0) {
1843 r = -errno;
1844 goto child_fail;
1845 }
1846 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1847 r = -errno;
1848 goto child_fail;
1849 }
1850
1851 _exit(EXIT_SUCCESS);
1852
1853 child_fail:
1854 (void) write(errno_pipe[1], &r, sizeof(r));
1855 _exit(EXIT_FAILURE);
1856 }
1857
1858 errno_pipe[1] = safe_close(errno_pipe[1]);
1859
1860 if (unshare(CLONE_NEWUSER) < 0)
1861 return -errno;
1862
1863 /* Let the child know that the namespace is ready now */
1864 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1865 return -errno;
1866
1867 /* Try to read an error code from the child */
1868 n = read(errno_pipe[0], &r, sizeof(r));
1869 if (n < 0)
1870 return -errno;
1871 if (n == sizeof(r)) { /* an error code was sent to us */
1872 if (r < 0)
1873 return r;
1874 return -EIO;
1875 }
1876 if (n != 0) /* on success we should have read 0 bytes */
1877 return -EIO;
1878
1879 r = wait_for_terminate(pid, &si);
1880 if (r < 0)
1881 return r;
1882 pid = 0;
1883
1884 /* If something strange happened with the child, let's consider this fatal, too */
1885 if (si.si_code != CLD_EXITED || si.si_status != 0)
1886 return -EIO;
1887
1888 return 0;
1889}
1890
3536f49e 1891static int setup_exec_directory(
07689d5d
LP
1892 const ExecContext *context,
1893 const ExecParameters *params,
1894 uid_t uid,
3536f49e 1895 gid_t gid,
3536f49e
YW
1896 ExecDirectoryType type,
1897 int *exit_status) {
07689d5d 1898
3536f49e
YW
1899 static const int exit_status_table[_EXEC_DIRECTORY_MAX] = {
1900 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1901 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1902 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1903 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1904 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1905 };
07689d5d
LP
1906 char **rt;
1907 int r;
1908
1909 assert(context);
1910 assert(params);
3536f49e
YW
1911 assert(type >= 0 && type < _EXEC_DIRECTORY_MAX);
1912 assert(exit_status);
07689d5d 1913
3536f49e
YW
1914 if (!params->prefix[type])
1915 return 0;
1916
8679efde 1917 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
3536f49e
YW
1918 if (!uid_is_valid(uid))
1919 uid = 0;
1920 if (!gid_is_valid(gid))
1921 gid = 0;
1922 }
1923
1924 STRV_FOREACH(rt, context->directories[type].paths) {
07689d5d
LP
1925 _cleanup_free_ char *p;
1926
3536f49e
YW
1927 p = strjoin(params->prefix[type], "/", *rt);
1928 if (!p) {
1929 r = -ENOMEM;
1930 goto fail;
1931 }
07689d5d 1932
23a7448e
YW
1933 r = mkdir_parents_label(p, 0755);
1934 if (r < 0)
3536f49e 1935 goto fail;
23a7448e 1936
3536f49e 1937 r = mkdir_p_label(p, context->directories[type].mode);
07689d5d 1938 if (r < 0)
3536f49e 1939 goto fail;
07689d5d 1940
c71b2eb7
LP
1941 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
1942 * a service, and shall not be writable. */
1943 if (type == EXEC_DIRECTORY_CONFIGURATION)
1944 continue;
1945
3536f49e 1946 r = chmod_and_chown(p, context->directories[type].mode, uid, gid);
07689d5d 1947 if (r < 0)
3536f49e 1948 goto fail;
07689d5d
LP
1949 }
1950
1951 return 0;
3536f49e
YW
1952
1953fail:
1954 *exit_status = exit_status_table[type];
1955
1956 return r;
07689d5d
LP
1957}
1958
cefc33ae
LP
1959static int setup_smack(
1960 const ExecContext *context,
1961 const ExecCommand *command) {
1962
cefc33ae
LP
1963 int r;
1964
1965 assert(context);
1966 assert(command);
1967
cefc33ae
LP
1968 if (context->smack_process_label) {
1969 r = mac_smack_apply_pid(0, context->smack_process_label);
1970 if (r < 0)
1971 return r;
1972 }
1973#ifdef SMACK_DEFAULT_PROCESS_LABEL
1974 else {
1975 _cleanup_free_ char *exec_label = NULL;
1976
1977 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1978 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
1979 return r;
1980
1981 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1982 if (r < 0)
1983 return r;
1984 }
cefc33ae
LP
1985#endif
1986
1987 return 0;
1988}
1989
3fbe8dbe
LP
1990static int compile_read_write_paths(
1991 const ExecContext *context,
1992 const ExecParameters *params,
1993 char ***ret) {
1994
1995 _cleanup_strv_free_ char **l = NULL;
1996 char **rt;
3536f49e 1997 ExecDirectoryType i;
3fbe8dbe 1998
06ec51d8
ZJS
1999 /* Compile the list of writable paths. This is the combination of
2000 * the explicitly configured paths, plus all runtime directories. */
3fbe8dbe 2001
3536f49e
YW
2002 if (strv_isempty(context->read_write_paths)) {
2003 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
2004 if (!strv_isempty(context->directories[i].paths))
2005 break;
2006
2007 if (i == _EXEC_DIRECTORY_MAX) {
2008 *ret = NULL; /* NOP if neither is set */
2009 return 0;
2010 }
3fbe8dbe
LP
2011 }
2012
2013 l = strv_copy(context->read_write_paths);
2014 if (!l)
2015 return -ENOMEM;
2016
3536f49e
YW
2017 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++) {
2018 if (!params->prefix[i])
2019 continue;
3fbe8dbe 2020
3536f49e
YW
2021 STRV_FOREACH(rt, context->directories[i].paths) {
2022 char *s;
3fbe8dbe 2023
3536f49e
YW
2024 s = strjoin(params->prefix[i], "/", *rt);
2025 if (!s)
2026 return -ENOMEM;
2027
2028 if (strv_consume(&l, s) < 0)
2029 return -ENOMEM;
2030 }
3fbe8dbe
LP
2031 }
2032
2033 *ret = l;
2034 l = NULL;
2035
2036 return 0;
2037}
2038
6818c54c
LP
2039static int apply_mount_namespace(
2040 Unit *u,
2041 ExecCommand *command,
2042 const ExecContext *context,
2043 const ExecParameters *params,
2044 ExecRuntime *runtime) {
2045
06ec51d8 2046 _cleanup_strv_free_ char **rw = NULL;
93c6bb51 2047 char *tmp = NULL, *var = NULL;
915e6d16 2048 const char *root_dir = NULL, *root_image = NULL;
93c6bb51 2049 NameSpaceInfo ns_info = {
af964954 2050 .ignore_protect_paths = false,
93c6bb51
DH
2051 .private_dev = context->private_devices,
2052 .protect_control_groups = context->protect_control_groups,
2053 .protect_kernel_tunables = context->protect_kernel_tunables,
2054 .protect_kernel_modules = context->protect_kernel_modules,
5d997827 2055 .mount_apivfs = context->mount_apivfs,
93c6bb51 2056 };
165a31c0 2057 bool needs_sandboxing;
6818c54c 2058 int r;
93c6bb51 2059
2b3c1b9e
DH
2060 assert(context);
2061
93c6bb51
DH
2062 /* The runtime struct only contains the parent of the private /tmp,
2063 * which is non-accessible to world users. Inside of it there's a /tmp
2064 * that is sticky, and that's the one we want to use here. */
2065
2066 if (context->private_tmp && runtime) {
2067 if (runtime->tmp_dir)
2068 tmp = strjoina(runtime->tmp_dir, "/tmp");
2069 if (runtime->var_tmp_dir)
2070 var = strjoina(runtime->var_tmp_dir, "/tmp");
2071 }
2072
2073 r = compile_read_write_paths(context, params, &rw);
2074 if (r < 0)
2075 return r;
2076
915e6d16
LP
2077 if (params->flags & EXEC_APPLY_CHROOT) {
2078 root_image = context->root_image;
2079
2080 if (!root_image)
2081 root_dir = context->root_directory;
2082 }
93c6bb51 2083
af964954
DH
2084 /*
2085 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2086 * sandbox info, otherwise enforce it, don't ignore protected paths and
2087 * fail if we are enable to apply the sandbox inside the mount namespace.
2088 */
2089 if (!context->dynamic_user && root_dir)
2090 ns_info.ignore_protect_paths = true;
2091
165a31c0 2092 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
6818c54c 2093
915e6d16
LP
2094 r = setup_namespace(root_dir, root_image,
2095 &ns_info, rw,
165a31c0
LP
2096 needs_sandboxing ? context->read_only_paths : NULL,
2097 needs_sandboxing ? context->inaccessible_paths : NULL,
d2d6c096
LP
2098 context->bind_mounts,
2099 context->n_bind_mounts,
93c6bb51
DH
2100 tmp,
2101 var,
165a31c0
LP
2102 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2103 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
915e6d16
LP
2104 context->mount_flags,
2105 DISSECT_IMAGE_DISCARD_ON_LOOP);
93c6bb51
DH
2106
2107 /* If we couldn't set up the namespace this is probably due to a
2108 * missing capability. In this case, silently proceeed. */
2109 if (IN_SET(r, -EPERM, -EACCES)) {
93c6bb51 2110 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
86ffb325 2111 return 0;
93c6bb51
DH
2112 }
2113
2114 return r;
2115}
2116
915e6d16
LP
2117static int apply_working_directory(
2118 const ExecContext *context,
2119 const ExecParameters *params,
2120 const char *home,
376fecf6
LP
2121 const bool needs_mount_ns,
2122 int *exit_status) {
915e6d16 2123
6732edab 2124 const char *d, *wd;
2b3c1b9e
DH
2125
2126 assert(context);
376fecf6 2127 assert(exit_status);
2b3c1b9e 2128
6732edab
LP
2129 if (context->working_directory_home) {
2130
376fecf6
LP
2131 if (!home) {
2132 *exit_status = EXIT_CHDIR;
6732edab 2133 return -ENXIO;
376fecf6 2134 }
6732edab 2135
2b3c1b9e 2136 wd = home;
6732edab
LP
2137
2138 } else if (context->working_directory)
2b3c1b9e
DH
2139 wd = context->working_directory;
2140 else
2141 wd = "/";
e7f1e7c6
DH
2142
2143 if (params->flags & EXEC_APPLY_CHROOT) {
2144 if (!needs_mount_ns && context->root_directory)
376fecf6
LP
2145 if (chroot(context->root_directory) < 0) {
2146 *exit_status = EXIT_CHROOT;
e7f1e7c6 2147 return -errno;
376fecf6 2148 }
e7f1e7c6 2149
2b3c1b9e
DH
2150 d = wd;
2151 } else
3b0e5bb5 2152 d = prefix_roota(context->root_directory, wd);
e7f1e7c6 2153
376fecf6
LP
2154 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2155 *exit_status = EXIT_CHDIR;
2b3c1b9e 2156 return -errno;
376fecf6 2157 }
e7f1e7c6
DH
2158
2159 return 0;
2160}
2161
b1edf445
LP
2162static int setup_keyring(
2163 Unit *u,
2164 const ExecContext *context,
2165 const ExecParameters *p,
2166 uid_t uid, gid_t gid) {
2167
74dd6b51 2168 key_serial_t keyring;
b1edf445 2169 int r;
74dd6b51
LP
2170
2171 assert(u);
b1edf445 2172 assert(context);
74dd6b51
LP
2173 assert(p);
2174
2175 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2176 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2177 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2178 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2179 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2180 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2181
2182 if (!(p->flags & EXEC_NEW_KEYRING))
2183 return 0;
2184
b1edf445
LP
2185 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2186 return 0;
2187
74dd6b51
LP
2188 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2189 if (keyring == -1) {
2190 if (errno == ENOSYS)
8002fb97 2191 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
74dd6b51 2192 else if (IN_SET(errno, EACCES, EPERM))
8002fb97 2193 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
74dd6b51 2194 else if (errno == EDQUOT)
8002fb97 2195 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
74dd6b51 2196 else
8002fb97 2197 return log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
74dd6b51
LP
2198
2199 return 0;
2200 }
2201
b3415f5d
LP
2202 /* Populate they keyring with the invocation ID by default. */
2203 if (!sd_id128_is_null(u->invocation_id)) {
2204 key_serial_t key;
2205
2206 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2207 if (key == -1)
8002fb97 2208 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
b3415f5d
LP
2209 else {
2210 if (keyctl(KEYCTL_SETPERM, key,
2211 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2212 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
8002fb97 2213 return log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
b3415f5d
LP
2214 }
2215 }
2216
74dd6b51
LP
2217 /* And now, make the keyring owned by the service's user */
2218 if (uid_is_valid(uid) || gid_is_valid(gid))
2219 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
8002fb97 2220 return log_unit_error_errno(u, errno, "Failed to change ownership of session keyring: %m");
74dd6b51 2221
b1edf445
LP
2222 /* When requested link the user keyring into the session keyring. */
2223 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2224 uid_t saved_uid;
2225 gid_t saved_gid;
2226
2227 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2228 * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2229 * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2230
2231 saved_uid = getuid();
2232 saved_gid = getgid();
2233
2234 if (gid_is_valid(gid) && gid != saved_gid) {
2235 if (setregid(gid, -1) < 0)
8002fb97 2236 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
b1edf445
LP
2237 }
2238
2239 if (uid_is_valid(uid) && uid != saved_uid) {
2240 if (setreuid(uid, -1) < 0) {
2241 (void) setregid(saved_gid, -1);
8002fb97 2242 return log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
b1edf445
LP
2243 }
2244 }
2245
2246 if (keyctl(KEYCTL_LINK,
2247 KEY_SPEC_USER_KEYRING,
2248 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2249
2250 r = -errno;
2251
2252 (void) setreuid(saved_uid, -1);
2253 (void) setregid(saved_gid, -1);
2254
8002fb97 2255 return log_unit_error_errno(u, r, "Failed to link user keyring into session keyring: %m");
b1edf445
LP
2256 }
2257
2258 if (uid_is_valid(uid) && uid != saved_uid) {
2259 if (setreuid(saved_uid, -1) < 0) {
2260 (void) setregid(saved_gid, -1);
8002fb97 2261 return log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
b1edf445
LP
2262 }
2263 }
2264
2265 if (gid_is_valid(gid) && gid != saved_gid) {
2266 if (setregid(saved_gid, -1) < 0)
8002fb97 2267 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
b1edf445 2268 }
61ceaea5 2269 }
b1edf445 2270
74dd6b51
LP
2271 return 0;
2272}
2273
29206d46
LP
2274static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2275 assert(array);
2276 assert(n);
2277
2278 if (!pair)
2279 return;
2280
2281 if (pair[0] >= 0)
2282 array[(*n)++] = pair[0];
2283 if (pair[1] >= 0)
2284 array[(*n)++] = pair[1];
2285}
2286
a34ceba6
LP
2287static int close_remaining_fds(
2288 const ExecParameters *params,
2289 ExecRuntime *runtime,
29206d46 2290 DynamicCreds *dcreds,
00d9ef85 2291 int user_lookup_fd,
a34ceba6
LP
2292 int socket_fd,
2293 int *fds, unsigned n_fds) {
2294
2295 unsigned n_dont_close = 0;
00d9ef85 2296 int dont_close[n_fds + 12];
a34ceba6
LP
2297
2298 assert(params);
2299
2300 if (params->stdin_fd >= 0)
2301 dont_close[n_dont_close++] = params->stdin_fd;
2302 if (params->stdout_fd >= 0)
2303 dont_close[n_dont_close++] = params->stdout_fd;
2304 if (params->stderr_fd >= 0)
2305 dont_close[n_dont_close++] = params->stderr_fd;
2306
2307 if (socket_fd >= 0)
2308 dont_close[n_dont_close++] = socket_fd;
2309 if (n_fds > 0) {
2310 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2311 n_dont_close += n_fds;
2312 }
2313
29206d46
LP
2314 if (runtime)
2315 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2316
2317 if (dcreds) {
2318 if (dcreds->user)
2319 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2320 if (dcreds->group)
2321 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2322 }
2323
00d9ef85
LP
2324 if (user_lookup_fd >= 0)
2325 dont_close[n_dont_close++] = user_lookup_fd;
2326
a34ceba6
LP
2327 return close_all_fds(dont_close, n_dont_close);
2328}
2329
00d9ef85
LP
2330static int send_user_lookup(
2331 Unit *unit,
2332 int user_lookup_fd,
2333 uid_t uid,
2334 gid_t gid) {
2335
2336 assert(unit);
2337
2338 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2339 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2340 * specified. */
2341
2342 if (user_lookup_fd < 0)
2343 return 0;
2344
2345 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2346 return 0;
2347
2348 if (writev(user_lookup_fd,
2349 (struct iovec[]) {
e6a7ec4b
LP
2350 IOVEC_INIT(&uid, sizeof(uid)),
2351 IOVEC_INIT(&gid, sizeof(gid)),
2352 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
00d9ef85
LP
2353 return -errno;
2354
2355 return 0;
2356}
2357
6732edab
LP
2358static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2359 int r;
2360
2361 assert(c);
2362 assert(home);
2363 assert(buf);
2364
2365 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2366
2367 if (*home)
2368 return 0;
2369
2370 if (!c->working_directory_home)
2371 return 0;
2372
2373 if (uid == 0) {
2374 /* Hardcode /root as home directory for UID 0 */
2375 *home = "/root";
2376 return 1;
2377 }
2378
2379 r = get_home_dir(buf);
2380 if (r < 0)
2381 return r;
2382
2383 *home = *buf;
2384 return 1;
2385}
2386
ff0af2a1 2387static int exec_child(
f2341e0a 2388 Unit *unit,
ff0af2a1
LP
2389 ExecCommand *command,
2390 const ExecContext *context,
2391 const ExecParameters *params,
2392 ExecRuntime *runtime,
29206d46 2393 DynamicCreds *dcreds,
ff0af2a1
LP
2394 char **argv,
2395 int socket_fd,
52c239d7 2396 int named_iofds[3],
4c47affc
FB
2397 int *fds,
2398 unsigned n_storage_fds,
9b141911 2399 unsigned n_socket_fds,
ff0af2a1 2400 char **files_env,
00d9ef85 2401 int user_lookup_fd,
70dd455c
ZJS
2402 int *exit_status,
2403 char **error_message) {
d35fbf6b 2404
2065ca69 2405 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
6732edab 2406 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
4d885bd3
DH
2407 _cleanup_free_ gid_t *supplementary_gids = NULL;
2408 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2409 const char *home = NULL, *shell = NULL;
7bce046b
LP
2410 dev_t journal_stream_dev = 0;
2411 ino_t journal_stream_ino = 0;
165a31c0
LP
2412 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2413 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2414 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2415 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
ecfbc84f 2416#ifdef HAVE_SELINUX
43b1f709 2417 bool use_selinux = false;
ecfbc84f
YW
2418#endif
2419#ifdef HAVE_SMACK
43b1f709 2420 bool use_smack = false;
ecfbc84f
YW
2421#endif
2422#ifdef HAVE_APPARMOR
43b1f709 2423 bool use_apparmor = false;
ecfbc84f 2424#endif
fed1e721
LP
2425 uid_t uid = UID_INVALID;
2426 gid_t gid = GID_INVALID;
4d885bd3 2427 int i, r, ngids = 0;
4c47affc 2428 unsigned n_fds;
3536f49e 2429 ExecDirectoryType dt;
165a31c0 2430 int secure_bits;
034c6ed7 2431
f2341e0a 2432 assert(unit);
5cb5a6ff
LP
2433 assert(command);
2434 assert(context);
d35fbf6b 2435 assert(params);
ff0af2a1 2436 assert(exit_status);
70dd455c
ZJS
2437 assert(error_message);
2438 /* We don't always set error_message, hence it must be initialized */
2439 assert(*error_message == NULL);
d35fbf6b
DM
2440
2441 rename_process_from_path(command->path);
2442
2443 /* We reset exactly these signals, since they are the
2444 * only ones we set to SIG_IGN in the main daemon. All
2445 * others we leave untouched because we set them to
2446 * SIG_DFL or a valid handler initially, both of which
2447 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2448 (void) default_signals(SIGNALS_CRASH_HANDLER,
2449 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2450
2451 if (context->ignore_sigpipe)
ce30c8dc 2452 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2453
ff0af2a1
LP
2454 r = reset_signal_mask();
2455 if (r < 0) {
2456 *exit_status = EXIT_SIGNAL_MASK;
0460aa5c 2457 *error_message = strdup("Failed to set process signal mask");
70dd455c 2458 /* If strdup fails, here and below, we will just print the generic error message. */
ff0af2a1 2459 return r;
d35fbf6b 2460 }
034c6ed7 2461
d35fbf6b
DM
2462 if (params->idle_pipe)
2463 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2464
2c027c62
LP
2465 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2466 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2467 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2468 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
ff0af2a1 2469
d35fbf6b 2470 log_forget_fds();
2c027c62 2471 log_set_open_when_needed(true);
4f2d528d 2472
4c47affc 2473 n_fds = n_storage_fds + n_socket_fds;
00d9ef85 2474 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2475 if (r < 0) {
2476 *exit_status = EXIT_FDS;
0460aa5c 2477 *error_message = strdup("Failed to close unwanted file descriptors");
ff0af2a1 2478 return r;
8c7be95e
LP
2479 }
2480
d35fbf6b
DM
2481 if (!context->same_pgrp)
2482 if (setsid() < 0) {
ff0af2a1 2483 *exit_status = EXIT_SETSID;
0460aa5c 2484 *error_message = strdup("Failed to create new process session");
d35fbf6b
DM
2485 return -errno;
2486 }
9e2f7c11 2487
1e22b5cd 2488 exec_context_tty_reset(context, params);
d35fbf6b 2489
c891efaf 2490 if (unit_shall_confirm_spawn(unit)) {
7d5ceb64 2491 const char *vc = params->confirm_spawn;
3b20f877
FB
2492 _cleanup_free_ char *cmdline = NULL;
2493
2494 cmdline = exec_command_line(argv);
2495 if (!cmdline) {
0460aa5c 2496 *exit_status = EXIT_MEMORY;
3b20f877
FB
2497 return -ENOMEM;
2498 }
d35fbf6b 2499
eedf223a 2500 r = ask_for_confirmation(vc, unit, cmdline);
3b20f877
FB
2501 if (r != CONFIRM_EXECUTE) {
2502 if (r == CONFIRM_PRETEND_SUCCESS) {
2503 *exit_status = EXIT_SUCCESS;
2504 return 0;
2505 }
ff0af2a1 2506 *exit_status = EXIT_CONFIRM;
0460aa5c 2507 *error_message = strdup("Execution cancelled by the user");
d35fbf6b 2508 return -ECANCELED;
d35fbf6b
DM
2509 }
2510 }
1a63a750 2511
29206d46
LP
2512 if (context->dynamic_user && dcreds) {
2513
409093fe
LP
2514 /* Make sure we bypass our own NSS module for any NSS checks */
2515 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2516 *exit_status = EXIT_USER;
70dd455c 2517 *error_message = strdup("Failed to update environment");
409093fe
LP
2518 return -errno;
2519 }
2520
29206d46 2521 r = dynamic_creds_realize(dcreds, &uid, &gid);
ff0af2a1
LP
2522 if (r < 0) {
2523 *exit_status = EXIT_USER;
70dd455c 2524 *error_message = strdup("Failed to update dynamic user credentials");
ff0af2a1 2525 return r;
524daa8c 2526 }
524daa8c 2527
70dd455c 2528 if (!uid_is_valid(uid)) {
29206d46 2529 *exit_status = EXIT_USER;
70dd455c
ZJS
2530 (void) asprintf(error_message, "UID validation failed for \""UID_FMT"\"", uid);
2531 /* If asprintf fails, here and below, we will just print the generic error message. */
2532 return -ESRCH;
2533 }
2534
2535 if (!gid_is_valid(gid)) {
2536 *exit_status = EXIT_USER;
2537 (void) asprintf(error_message, "GID validation failed for \""GID_FMT"\"", gid);
29206d46
LP
2538 return -ESRCH;
2539 }
5bc7452b 2540
29206d46
LP
2541 if (dcreds->user)
2542 username = dcreds->user->name;
2543
2544 } else {
4d885bd3
DH
2545 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2546 if (r < 0) {
2547 *exit_status = EXIT_USER;
70dd455c 2548 *error_message = strdup("Failed to determine user credentials");
4d885bd3 2549 return r;
5bc7452b 2550 }
5bc7452b 2551
4d885bd3
DH
2552 r = get_fixed_group(context, &groupname, &gid);
2553 if (r < 0) {
2554 *exit_status = EXIT_GROUP;
70dd455c 2555 *error_message = strdup("Failed to determine group credentials");
4d885bd3
DH
2556 return r;
2557 }
cdc5d5c5 2558 }
29206d46 2559
cdc5d5c5
DH
2560 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2561 r = get_supplementary_groups(context, username, groupname, gid,
2562 &supplementary_gids, &ngids);
2563 if (r < 0) {
2564 *exit_status = EXIT_GROUP;
70dd455c 2565 *error_message = strdup("Failed to determine supplementary groups");
cdc5d5c5 2566 return r;
29206d46 2567 }
5bc7452b 2568
00d9ef85
LP
2569 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2570 if (r < 0) {
2571 *exit_status = EXIT_USER;
70dd455c 2572 *error_message = strdup("Failed to send user credentials to PID1");
00d9ef85
LP
2573 return r;
2574 }
2575
2576 user_lookup_fd = safe_close(user_lookup_fd);
2577
6732edab
LP
2578 r = acquire_home(context, uid, &home, &home_buffer);
2579 if (r < 0) {
2580 *exit_status = EXIT_CHDIR;
2581 *error_message = strdup("Failed to determine $HOME for user");
2582 return r;
2583 }
2584
d35fbf6b
DM
2585 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2586 * must sure to drop O_NONBLOCK */
2587 if (socket_fd >= 0)
a34ceba6 2588 (void) fd_nonblock(socket_fd, false);
acbb0225 2589
52c239d7 2590 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2591 if (r < 0) {
2592 *exit_status = EXIT_STDIN;
0460aa5c 2593 *error_message = strdup("Failed to set up standard input");
ff0af2a1 2594 return r;
d35fbf6b 2595 }
034c6ed7 2596
52c239d7 2597 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2598 if (r < 0) {
2599 *exit_status = EXIT_STDOUT;
0460aa5c 2600 *error_message = strdup("Failed to set up standard output");
ff0af2a1 2601 return r;
d35fbf6b
DM
2602 }
2603
52c239d7 2604 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2605 if (r < 0) {
2606 *exit_status = EXIT_STDERR;
0460aa5c 2607 *error_message = strdup("Failed to set up standard error output");
ff0af2a1 2608 return r;
d35fbf6b
DM
2609 }
2610
2611 if (params->cgroup_path) {
ff0af2a1
LP
2612 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2613 if (r < 0) {
2614 *exit_status = EXIT_CGROUP;
70dd455c 2615 (void) asprintf(error_message, "Failed to attach to cgroup %s", params->cgroup_path);
ff0af2a1 2616 return r;
309bff19 2617 }
d35fbf6b 2618 }
309bff19 2619
d35fbf6b 2620 if (context->oom_score_adjust_set) {
d5243d62 2621 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
f2b68789 2622
d5243d62
LP
2623 /* When we can't make this change due to EPERM, then
2624 * let's silently skip over it. User namespaces
2625 * prohibit write access to this file, and we
2626 * shouldn't trip up over that. */
613b411c 2627
d5243d62 2628 sprintf(t, "%i", context->oom_score_adjust);
ad118bda 2629 r = write_string_file("/proc/self/oom_score_adj", t, 0);
6cb7fa17 2630 if (r == -EPERM || r == -EACCES) {
ff0af2a1 2631 log_open();
f2341e0a 2632 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
ff0af2a1
LP
2633 log_close();
2634 } else if (r < 0) {
2635 *exit_status = EXIT_OOM_ADJUST;
0460aa5c 2636 *error_message = strdup("Failed to adjust OOM setting");
d35fbf6b 2637 return -errno;
613b411c 2638 }
d35fbf6b
DM
2639 }
2640
2641 if (context->nice_set)
2642 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2643 *exit_status = EXIT_NICE;
f679ed61 2644 *error_message = strdup("Failed to set up process scheduling priority (nice level)");
d35fbf6b 2645 return -errno;
613b411c
LP
2646 }
2647
d35fbf6b
DM
2648 if (context->cpu_sched_set) {
2649 struct sched_param param = {
2650 .sched_priority = context->cpu_sched_priority,
2651 };
2652
ff0af2a1
LP
2653 r = sched_setscheduler(0,
2654 context->cpu_sched_policy |
2655 (context->cpu_sched_reset_on_fork ?
2656 SCHED_RESET_ON_FORK : 0),
2657 &param);
2658 if (r < 0) {
2659 *exit_status = EXIT_SETSCHEDULER;
0460aa5c 2660 *error_message = strdup("Failed to set up CPU scheduling");
d35fbf6b 2661 return -errno;
fc9b2a84 2662 }
d35fbf6b 2663 }
fc9b2a84 2664
d35fbf6b
DM
2665 if (context->cpuset)
2666 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2667 *exit_status = EXIT_CPUAFFINITY;
0460aa5c 2668 *error_message = strdup("Failed to set up CPU affinity");
d35fbf6b 2669 return -errno;
034c6ed7
LP
2670 }
2671
d35fbf6b
DM
2672 if (context->ioprio_set)
2673 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2674 *exit_status = EXIT_IOPRIO;
0460aa5c 2675 *error_message = strdup("Failed to set up IO scheduling priority");
d35fbf6b
DM
2676 return -errno;
2677 }
da726a4d 2678
d35fbf6b
DM
2679 if (context->timer_slack_nsec != NSEC_INFINITY)
2680 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2681 *exit_status = EXIT_TIMERSLACK;
0460aa5c 2682 *error_message = strdup("Failed to set up timer slack");
d35fbf6b 2683 return -errno;
4c2630eb 2684 }
9eba9da4 2685
21022b9d
LP
2686 if (context->personality != PERSONALITY_INVALID) {
2687 r = safe_personality(context->personality);
2688 if (r < 0) {
ff0af2a1 2689 *exit_status = EXIT_PERSONALITY;
0460aa5c 2690 *error_message = strdup("Failed to set up execution domain (personality)");
21022b9d 2691 return r;
4c2630eb 2692 }
21022b9d 2693 }
94f04347 2694
d35fbf6b 2695 if (context->utmp_id)
df0ff127 2696 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
6a93917d 2697 context->tty_path,
023a4f67
LP
2698 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2699 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2700 USER_PROCESS,
6a93917d 2701 username);
d35fbf6b 2702
e0d2adfd 2703 if (context->user) {
ff0af2a1
LP
2704 r = chown_terminal(STDIN_FILENO, uid);
2705 if (r < 0) {
2706 *exit_status = EXIT_STDIN;
0460aa5c 2707 *error_message = strdup("Failed to change ownership of terminal");
ff0af2a1 2708 return r;
071830ff 2709 }
d35fbf6b 2710 }
8e274523 2711
a931ad47
LP
2712 /* If delegation is enabled we'll pass ownership of the cgroup
2713 * (but only in systemd's own controller hierarchy!) to the
2714 * user of the new process. */
584b8688 2715 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
ff0af2a1
LP
2716 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2717 if (r < 0) {
2718 *exit_status = EXIT_CGROUP;
0460aa5c 2719 *error_message = strdup("Failed to adjust control group access");
ff0af2a1 2720 return r;
d35fbf6b 2721 }
034c6ed7 2722
034c6ed7 2723
ff0af2a1
LP
2724 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2725 if (r < 0) {
2726 *exit_status = EXIT_CGROUP;
0460aa5c 2727 *error_message = strdup("Failed to adjust control group access");
ff0af2a1 2728 return r;
034c6ed7 2729 }
d35fbf6b 2730 }
034c6ed7 2731
3536f49e 2732 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
8679efde 2733 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
0460aa5c
LP
2734 if (r < 0) {
2735 *error_message = strdup("Failed to set up special execution directory");
07689d5d 2736 return r;
0460aa5c 2737 }
d35fbf6b 2738 }
94f04347 2739
7bce046b 2740 r = build_environment(
fd63e712 2741 unit,
7bce046b
LP
2742 context,
2743 params,
2744 n_fds,
2745 home,
2746 username,
2747 shell,
2748 journal_stream_dev,
2749 journal_stream_ino,
2750 &our_env);
2065ca69
JW
2751 if (r < 0) {
2752 *exit_status = EXIT_MEMORY;
2753 return r;
2754 }
2755
2756 r = build_pass_environment(context, &pass_env);
2757 if (r < 0) {
2758 *exit_status = EXIT_MEMORY;
2759 return r;
2760 }
2761
2762 accum_env = strv_env_merge(5,
2763 params->environment,
2764 our_env,
2765 pass_env,
2766 context->environment,
2767 files_env,
2768 NULL);
2769 if (!accum_env) {
2770 *exit_status = EXIT_MEMORY;
2771 return -ENOMEM;
2772 }
1280503b 2773 accum_env = strv_env_clean(accum_env);
2065ca69 2774
096424d1 2775 (void) umask(context->umask);
b213e1c1 2776
b1edf445 2777 r = setup_keyring(unit, context, params, uid, gid);
74dd6b51
LP
2778 if (r < 0) {
2779 *exit_status = EXIT_KEYRING;
0460aa5c 2780 *error_message = strdup("Failed to set up kernel keyring");
74dd6b51
LP
2781 return r;
2782 }
2783
165a31c0 2784 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
1703fa41 2785 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
7f18ef0a 2786
165a31c0
LP
2787 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2788 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
7f18ef0a 2789
165a31c0
LP
2790 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
2791 if (needs_ambient_hack)
2792 needs_setuid = false;
2793 else
2794 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
2795
2796 if (needs_sandboxing) {
7f18ef0a
FK
2797 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
2798 * present. The actual MAC context application will happen later, as late as possible, to avoid
2799 * impacting our own code paths. */
2800
2801#ifdef HAVE_SELINUX
43b1f709 2802 use_selinux = mac_selinux_use();
7f18ef0a 2803#endif
7f18ef0a 2804#ifdef HAVE_SMACK
43b1f709 2805 use_smack = mac_smack_use();
7f18ef0a 2806#endif
7f18ef0a 2807#ifdef HAVE_APPARMOR
43b1f709 2808 use_apparmor = mac_apparmor_use();
7f18ef0a 2809#endif
165a31c0 2810 }
7f18ef0a 2811
165a31c0
LP
2812 if (needs_setuid) {
2813 if (context->pam_name && username) {
2814 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
2815 if (r < 0) {
2816 *exit_status = EXIT_PAM;
0460aa5c 2817 *error_message = strdup("Failed to set up PAM session");
165a31c0
LP
2818 return r;
2819 }
2820 }
b213e1c1 2821 }
ac45f971 2822
d35fbf6b 2823 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
ff0af2a1
LP
2824 r = setup_netns(runtime->netns_storage_socket);
2825 if (r < 0) {
2826 *exit_status = EXIT_NETWORK;
0460aa5c 2827 *error_message = strdup("Failed to set up network namespacing");
ff0af2a1 2828 return r;
d35fbf6b
DM
2829 }
2830 }
169c1bda 2831
ee818b89 2832 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 2833 if (needs_mount_namespace) {
6818c54c 2834 r = apply_mount_namespace(unit, command, context, params, runtime);
3fbe8dbe
LP
2835 if (r < 0) {
2836 *exit_status = EXIT_NAMESPACE;
0460aa5c 2837 *error_message = strdup("Failed to set up mount namespacing");
3fbe8dbe
LP
2838 return r;
2839 }
d35fbf6b 2840 }
81a2b7ce 2841
50b3dfb9 2842 /* Apply just after mount namespace setup */
376fecf6 2843 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
0460aa5c
LP
2844 if (r < 0) {
2845 *error_message = strdup("Changing to the requested working directory failed");
50b3dfb9 2846 return r;
0460aa5c 2847 }
50b3dfb9 2848
bbeea271 2849 /* Drop groups as early as possbile */
165a31c0 2850 if (needs_setuid) {
4d885bd3 2851 r = enforce_groups(context, gid, supplementary_gids, ngids);
096424d1 2852 if (r < 0) {
0460aa5c 2853 *error_message = strdup("Changing group credentials failed");
096424d1
LP
2854 *exit_status = EXIT_GROUP;
2855 return r;
2856 }
165a31c0 2857 }
096424d1 2858
165a31c0 2859 if (needs_sandboxing) {
9008e1ac 2860#ifdef HAVE_SELINUX
43b1f709 2861 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
937ccce9
LP
2862 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
2863 if (r < 0) {
0460aa5c 2864 *error_message = strdup("Failed to determine SELinux context");
937ccce9
LP
2865 *exit_status = EXIT_SELINUX_CONTEXT;
2866 return r;
2867 }
9008e1ac 2868 }
9008e1ac
MS
2869#endif
2870
937ccce9
LP
2871 if (context->private_users) {
2872 r = setup_private_users(uid, gid);
2873 if (r < 0) {
0460aa5c 2874 *error_message = strdup("Failed to set up user namespacing");
937ccce9
LP
2875 *exit_status = EXIT_USER;
2876 return r;
2877 }
d251207d
LP
2878 }
2879 }
2880
165a31c0
LP
2881 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
2882 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
2883 * was needed to upload the policy and can now be closed as well. */
ff0af2a1
LP
2884 r = close_all_fds(fds, n_fds);
2885 if (r >= 0)
2886 r = shift_fds(fds, n_fds);
2887 if (r >= 0)
4c47affc 2888 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
ff0af2a1 2889 if (r < 0) {
0460aa5c 2890 *error_message = strdup("Failed to adjust passed file descriptors");
ff0af2a1
LP
2891 *exit_status = EXIT_FDS;
2892 return r;
d35fbf6b 2893 }
e66cf1a3 2894
165a31c0 2895 secure_bits = context->secure_bits;
e66cf1a3 2896
165a31c0
LP
2897 if (needs_sandboxing) {
2898 uint64_t bset;
755d4b67 2899
d35fbf6b 2900 for (i = 0; i < _RLIMIT_MAX; i++) {
03857c43 2901
d35fbf6b
DM
2902 if (!context->rlimit[i])
2903 continue;
2904
03857c43
LP
2905 r = setrlimit_closest(i, context->rlimit[i]);
2906 if (r < 0) {
0460aa5c 2907 *error_message = strdup("Failed to adjust resource limits");
ff0af2a1 2908 *exit_status = EXIT_LIMITS;
03857c43 2909 return r;
e66cf1a3
LP
2910 }
2911 }
2912
f4170c67
LP
2913 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2914 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
2915 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
0460aa5c 2916 *error_message = strdup("Failed to adjust RLIMIT_RTPRIO resource limit");
f4170c67
LP
2917 *exit_status = EXIT_LIMITS;
2918 return -errno;
2919 }
2920 }
2921
165a31c0
LP
2922 bset = context->capability_bounding_set;
2923 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
2924 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
2925 * instead of us doing that */
2926 if (needs_ambient_hack)
2927 bset |= (UINT64_C(1) << CAP_SETPCAP) |
2928 (UINT64_C(1) << CAP_SETUID) |
2929 (UINT64_C(1) << CAP_SETGID);
2930
2931 if (!cap_test_all(bset)) {
2932 r = capability_bounding_set_drop(bset, false);
ff0af2a1
LP
2933 if (r < 0) {
2934 *exit_status = EXIT_CAPABILITIES;
70dd455c 2935 *error_message = strdup("Failed to drop capabilities");
ff0af2a1 2936 return r;
3b8bddde 2937 }
4c2630eb 2938 }
3b8bddde 2939
755d4b67
IP
2940 /* This is done before enforce_user, but ambient set
2941 * does not survive over setresuid() if keep_caps is not set. */
165a31c0
LP
2942 if (!needs_ambient_hack &&
2943 context->capability_ambient_set != 0) {
755d4b67
IP
2944 r = capability_ambient_set_apply(context->capability_ambient_set, true);
2945 if (r < 0) {
2946 *exit_status = EXIT_CAPABILITIES;
70dd455c 2947 *error_message = strdup("Failed to apply ambient capabilities (before UID change)");
755d4b67
IP
2948 return r;
2949 }
755d4b67 2950 }
165a31c0 2951 }
755d4b67 2952
165a31c0 2953 if (needs_setuid) {
d35fbf6b 2954 if (context->user) {
ff0af2a1
LP
2955 r = enforce_user(context, uid);
2956 if (r < 0) {
2957 *exit_status = EXIT_USER;
70dd455c 2958 (void) asprintf(error_message, "Failed to change UID to "UID_FMT, uid);
ff0af2a1 2959 return r;
5b6319dc 2960 }
165a31c0
LP
2961
2962 if (!needs_ambient_hack &&
2963 context->capability_ambient_set != 0) {
755d4b67
IP
2964
2965 /* Fix the ambient capabilities after user change. */
2966 r = capability_ambient_set_apply(context->capability_ambient_set, false);
2967 if (r < 0) {
2968 *exit_status = EXIT_CAPABILITIES;
70dd455c 2969 *error_message = strdup("Failed to apply ambient capabilities (after UID change)");
755d4b67
IP
2970 return r;
2971 }
2972
2973 /* If we were asked to change user and ambient capabilities
2974 * were requested, we had to add keep-caps to the securebits
2975 * so that we would maintain the inherited capability set
2976 * through the setresuid(). Make sure that the bit is added
2977 * also to the context secure_bits so that we don't try to
2978 * drop the bit away next. */
2979
7f508f2c 2980 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 2981 }
5b6319dc 2982 }
165a31c0 2983 }
d35fbf6b 2984
165a31c0 2985 if (needs_sandboxing) {
5cd9cd35
LP
2986 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
2987 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
2988 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
2989 * are restricted. */
2990
2991#ifdef HAVE_SELINUX
43b1f709 2992 if (use_selinux) {
5cd9cd35
LP
2993 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
2994
2995 if (exec_context) {
2996 r = setexeccon(exec_context);
2997 if (r < 0) {
2998 *exit_status = EXIT_SELINUX_CONTEXT;
0460aa5c 2999 (void) asprintf(error_message, "Failed to change SELinux context to %s", exec_context);
5cd9cd35
LP
3000 return r;
3001 }
3002 }
3003 }
3004#endif
3005
7f18ef0a 3006#ifdef HAVE_SMACK
43b1f709 3007 if (use_smack) {
7f18ef0a
FK
3008 r = setup_smack(context, command);
3009 if (r < 0) {
3010 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3011 *error_message = strdup("Failed to set SMACK process label");
3012 return r;
3013 }
5cd9cd35 3014 }
7f18ef0a 3015#endif
5cd9cd35
LP
3016
3017#ifdef HAVE_APPARMOR
43b1f709 3018 if (use_apparmor && context->apparmor_profile) {
5cd9cd35
LP
3019 r = aa_change_onexec(context->apparmor_profile);
3020 if (r < 0 && !context->apparmor_profile_ignore) {
3021 *exit_status = EXIT_APPARMOR_PROFILE;
70dd455c
ZJS
3022 (void) asprintf(error_message,
3023 "Failed to prepare AppArmor profile change to %s",
3024 context->apparmor_profile);
5cd9cd35
LP
3025 return -errno;
3026 }
3027 }
3028#endif
3029
165a31c0
LP
3030 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3031 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
3032 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3033 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 3034 *exit_status = EXIT_SECUREBITS;
0460aa5c 3035 *error_message = strdup("Failed to set process secure bits");
d35fbf6b 3036 return -errno;
ff01d048 3037 }
5b6319dc 3038
59eeb84b 3039 if (context_has_no_new_privileges(context))
d35fbf6b 3040 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 3041 *exit_status = EXIT_NO_NEW_PRIVILEGES;
70dd455c 3042 *error_message = strdup("Failed to disable new privileges");
d35fbf6b
DM
3043 return -errno;
3044 }
3045
3046#ifdef HAVE_SECCOMP
469830d1
LP
3047 r = apply_address_families(unit, context);
3048 if (r < 0) {
3049 *exit_status = EXIT_ADDRESS_FAMILIES;
5b3637b4 3050 *error_message = strdup("Failed to restrict address families");
469830d1 3051 return r;
4c2630eb 3052 }
04aa0cb9 3053
469830d1
LP
3054 r = apply_memory_deny_write_execute(unit, context);
3055 if (r < 0) {
3056 *exit_status = EXIT_SECCOMP;
5b3637b4 3057 *error_message = strdup("Failed to disable writing to executable memory");
469830d1 3058 return r;
f3e43635 3059 }
f4170c67 3060
469830d1
LP
3061 r = apply_restrict_realtime(unit, context);
3062 if (r < 0) {
3063 *exit_status = EXIT_SECCOMP;
5b3637b4 3064 *error_message = strdup("Failed to apply realtime restrictions");
469830d1 3065 return r;
f4170c67
LP
3066 }
3067
add00535
LP
3068 r = apply_restrict_namespaces(unit, context);
3069 if (r < 0) {
3070 *exit_status = EXIT_SECCOMP;
70dd455c 3071 *error_message = strdup("Failed to apply namespace restrictions");
add00535
LP
3072 return r;
3073 }
3074
469830d1
LP
3075 r = apply_protect_sysctl(unit, context);
3076 if (r < 0) {
3077 *exit_status = EXIT_SECCOMP;
5b3637b4 3078 *error_message = strdup("Failed to apply sysctl restrictions");
469830d1 3079 return r;
502d704e
DH
3080 }
3081
469830d1
LP
3082 r = apply_protect_kernel_modules(unit, context);
3083 if (r < 0) {
3084 *exit_status = EXIT_SECCOMP;
5b3637b4 3085 *error_message = strdup("Failed to apply module loading restrictions");
469830d1 3086 return r;
59eeb84b
LP
3087 }
3088
469830d1
LP
3089 r = apply_private_devices(unit, context);
3090 if (r < 0) {
3091 *exit_status = EXIT_SECCOMP;
5b3637b4 3092 *error_message = strdup("Failed to set up private devices");
469830d1
LP
3093 return r;
3094 }
3095
3096 r = apply_syscall_archs(unit, context);
3097 if (r < 0) {
3098 *exit_status = EXIT_SECCOMP;
5b3637b4 3099 *error_message = strdup("Failed to apply syscall architecture restrictions");
469830d1 3100 return r;
ba128bb8
LP
3101 }
3102
78e864e5
TM
3103 r = apply_lock_personality(unit, context);
3104 if (r < 0) {
3105 *exit_status = EXIT_SECCOMP;
3106 *error_message = strdup("Failed to lock personalities");
3107 return r;
3108 }
3109
5cd9cd35
LP
3110 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3111 * by the filter as little as possible. */
165a31c0 3112 r = apply_syscall_filter(unit, context, needs_ambient_hack);
469830d1
LP
3113 if (r < 0) {
3114 *exit_status = EXIT_SECCOMP;
0460aa5c 3115 *error_message = strdup("Failed to apply system call filters");
469830d1 3116 return r;
d35fbf6b
DM
3117 }
3118#endif
d35fbf6b 3119 }
034c6ed7 3120
00819cc1
LP
3121 if (!strv_isempty(context->unset_environment)) {
3122 char **ee = NULL;
3123
3124 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3125 if (!ee) {
3126 *exit_status = EXIT_MEMORY;
3127 return -ENOMEM;
3128 }
3129
3130 strv_free(accum_env);
3131 accum_env = ee;
3132 }
3133
2065ca69 3134 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 3135 if (!final_argv) {
ff0af2a1 3136 *exit_status = EXIT_MEMORY;
70dd455c 3137 *error_message = strdup("Failed to prepare process arguments");
d35fbf6b
DM
3138 return -ENOMEM;
3139 }
034c6ed7 3140
553d2243 3141 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
d35fbf6b 3142 _cleanup_free_ char *line;
81a2b7ce 3143
d35fbf6b
DM
3144 line = exec_command_line(final_argv);
3145 if (line) {
3146 log_open();
f2341e0a 3147 log_struct(LOG_DEBUG,
f2341e0a
LP
3148 "EXECUTABLE=%s", command->path,
3149 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
ba360bb0 3150 LOG_UNIT_ID(unit),
f1c50bec 3151 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3152 NULL);
d35fbf6b
DM
3153 log_close();
3154 }
3155 }
dd305ec9 3156
2065ca69 3157 execve(command->path, final_argv, accum_env);
ff0af2a1 3158 *exit_status = EXIT_EXEC;
d35fbf6b
DM
3159 return -errno;
3160}
81a2b7ce 3161
f2341e0a
LP
3162int exec_spawn(Unit *unit,
3163 ExecCommand *command,
d35fbf6b
DM
3164 const ExecContext *context,
3165 const ExecParameters *params,
3166 ExecRuntime *runtime,
29206d46 3167 DynamicCreds *dcreds,
d35fbf6b 3168 pid_t *ret) {
8351ceae 3169
d35fbf6b 3170 _cleanup_strv_free_ char **files_env = NULL;
9b141911 3171 int *fds = NULL;
4c47affc 3172 unsigned n_storage_fds = 0, n_socket_fds = 0;
ff0af2a1
LP
3173 _cleanup_free_ char *line = NULL;
3174 int socket_fd, r;
52c239d7 3175 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 3176 char **argv;
d35fbf6b 3177 pid_t pid;
8351ceae 3178
f2341e0a 3179 assert(unit);
d35fbf6b
DM
3180 assert(command);
3181 assert(context);
3182 assert(ret);
3183 assert(params);
4c47affc 3184 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
4298d0b5 3185
d35fbf6b
DM
3186 if (context->std_input == EXEC_INPUT_SOCKET ||
3187 context->std_output == EXEC_OUTPUT_SOCKET ||
3188 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 3189
4c47affc 3190 if (params->n_socket_fds > 1) {
f2341e0a 3191 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 3192 return -EINVAL;
ff0af2a1 3193 }
eef65bf3 3194
4c47affc 3195 if (params->n_socket_fds == 0) {
488ab41c
AA
3196 log_unit_error(unit, "Got no socket.");
3197 return -EINVAL;
3198 }
3199
d35fbf6b
DM
3200 socket_fd = params->fds[0];
3201 } else {
3202 socket_fd = -1;
3203 fds = params->fds;
4c47affc 3204 n_storage_fds = params->n_storage_fds;
9b141911 3205 n_socket_fds = params->n_socket_fds;
d35fbf6b 3206 }
94f04347 3207
52c239d7
LB
3208 r = exec_context_named_iofds(unit, context, params, named_iofds);
3209 if (r < 0)
3210 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3211
f2341e0a 3212 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 3213 if (r < 0)
f2341e0a 3214 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 3215
d35fbf6b 3216 argv = params->argv ?: command->argv;
d35fbf6b
DM
3217 line = exec_command_line(argv);
3218 if (!line)
3219 return log_oom();
fab56fc5 3220
f2341e0a 3221 log_struct(LOG_DEBUG,
f2341e0a
LP
3222 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3223 "EXECUTABLE=%s", command->path,
ba360bb0 3224 LOG_UNIT_ID(unit),
f1c50bec 3225 LOG_UNIT_INVOCATION_ID(unit),
f2341e0a 3226 NULL);
d35fbf6b
DM
3227 pid = fork();
3228 if (pid < 0)
74129a12 3229 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
3230
3231 if (pid == 0) {
ff0af2a1 3232 int exit_status;
70dd455c 3233 _cleanup_free_ char *error_message = NULL;
ff0af2a1 3234
f2341e0a
LP
3235 r = exec_child(unit,
3236 command,
ff0af2a1
LP
3237 context,
3238 params,
3239 runtime,
29206d46 3240 dcreds,
ff0af2a1
LP
3241 argv,
3242 socket_fd,
52c239d7 3243 named_iofds,
4c47affc
FB
3244 fds,
3245 n_storage_fds,
9b141911 3246 n_socket_fds,
ff0af2a1 3247 files_env,
00d9ef85 3248 unit->manager->user_lookup_fds[1],
70dd455c
ZJS
3249 &exit_status,
3250 &error_message);
ff0af2a1 3251 if (r < 0) {
4c2630eb 3252 log_open();
70dd455c
ZJS
3253 if (error_message)
3254 log_struct_errno(LOG_ERR, r,
2b044526 3255 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
70dd455c 3256 LOG_UNIT_ID(unit),
f1c50bec 3257 LOG_UNIT_INVOCATION_ID(unit),
70dd455c
ZJS
3258 LOG_UNIT_MESSAGE(unit, "%s: %m",
3259 error_message),
3260 "EXECUTABLE=%s", command->path,
3261 NULL);
3ed0cd26 3262 else if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE))
4d8b0f0f
YW
3263 log_struct_errno(LOG_INFO, r,
3264 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3265 LOG_UNIT_ID(unit),
f1c50bec 3266 LOG_UNIT_INVOCATION_ID(unit),
4d8b0f0f
YW
3267 LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
3268 command->path),
3269 "EXECUTABLE=%s", command->path,
3270 NULL);
70dd455c
ZJS
3271 else
3272 log_struct_errno(LOG_ERR, r,
2b044526 3273 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
70dd455c 3274 LOG_UNIT_ID(unit),
f1c50bec 3275 LOG_UNIT_INVOCATION_ID(unit),
70dd455c
ZJS
3276 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3277 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3278 command->path),
3279 "EXECUTABLE=%s", command->path,
3280 NULL);
4c2630eb
MS
3281 }
3282
ff0af2a1 3283 _exit(exit_status);
034c6ed7
LP
3284 }
3285
f2341e0a 3286 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 3287
80876c20
LP
3288 /* We add the new process to the cgroup both in the child (so
3289 * that we can be sure that no user code is ever executed
3290 * outside of the cgroup) and in the parent (so that we can be
3291 * sure that when we kill the cgroup the process will be
3292 * killed too). */
d35fbf6b 3293 if (params->cgroup_path)
dd305ec9 3294 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 3295
b58b4116 3296 exec_status_start(&command->exec_status, pid);
9fb86720 3297
034c6ed7 3298 *ret = pid;
5cb5a6ff
LP
3299 return 0;
3300}
3301
034c6ed7 3302void exec_context_init(ExecContext *c) {
3536f49e
YW
3303 ExecDirectoryType i;
3304
034c6ed7
LP
3305 assert(c);
3306
4c12626c 3307 c->umask = 0022;
9eba9da4 3308 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 3309 c->cpu_sched_policy = SCHED_OTHER;
071830ff 3310 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 3311 c->syslog_level_prefix = true;
353e12c2 3312 c->ignore_sigpipe = true;
3a43da28 3313 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 3314 c->personality = PERSONALITY_INVALID;
3536f49e
YW
3315 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3316 c->directories[i].mode = 0755;
a103496c 3317 c->capability_bounding_set = CAP_ALL;
add00535 3318 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
034c6ed7
LP
3319}
3320
613b411c 3321void exec_context_done(ExecContext *c) {
5cb5a6ff 3322 unsigned l;
3536f49e 3323 ExecDirectoryType i;
5cb5a6ff
LP
3324
3325 assert(c);
3326
6796073e
LP
3327 c->environment = strv_free(c->environment);
3328 c->environment_files = strv_free(c->environment_files);
b4c14404 3329 c->pass_environment = strv_free(c->pass_environment);
00819cc1 3330 c->unset_environment = strv_free(c->unset_environment);
8c7be95e 3331
1f6b4113 3332 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
a1e58e8e 3333 c->rlimit[l] = mfree(c->rlimit[l]);
034c6ed7 3334
52c239d7
LB
3335 for (l = 0; l < 3; l++)
3336 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3337
a1e58e8e
LP
3338 c->working_directory = mfree(c->working_directory);
3339 c->root_directory = mfree(c->root_directory);
915e6d16 3340 c->root_image = mfree(c->root_image);
a1e58e8e
LP
3341 c->tty_path = mfree(c->tty_path);
3342 c->syslog_identifier = mfree(c->syslog_identifier);
3343 c->user = mfree(c->user);
3344 c->group = mfree(c->group);
034c6ed7 3345
6796073e 3346 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3347
a1e58e8e 3348 c->pam_name = mfree(c->pam_name);
5b6319dc 3349
2a624c36
AP
3350 c->read_only_paths = strv_free(c->read_only_paths);
3351 c->read_write_paths = strv_free(c->read_write_paths);
3352 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4 3353
d2d6c096
LP
3354 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3355
82c121a4
LP
3356 if (c->cpuset)
3357 CPU_FREE(c->cpuset);
86a3475b 3358
a1e58e8e
LP
3359 c->utmp_id = mfree(c->utmp_id);
3360 c->selinux_context = mfree(c->selinux_context);
3361 c->apparmor_profile = mfree(c->apparmor_profile);
5b8e1b77 3362 c->smack_process_label = mfree(c->smack_process_label);
eef65bf3 3363
525d3cc7
LP
3364 c->syscall_filter = set_free(c->syscall_filter);
3365 c->syscall_archs = set_free(c->syscall_archs);
3366 c->address_families = set_free(c->address_families);
e66cf1a3 3367
3536f49e
YW
3368 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3369 c->directories[i].paths = strv_free(c->directories[i].paths);
e66cf1a3
LP
3370}
3371
3372int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3373 char **i;
3374
3375 assert(c);
3376
3377 if (!runtime_prefix)
3378 return 0;
3379
3536f49e 3380 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
e66cf1a3
LP
3381 _cleanup_free_ char *p;
3382
605405c6 3383 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3384 if (!p)
3385 return -ENOMEM;
3386
3387 /* We execute this synchronously, since we need to be
3388 * sure this is gone when we start the service
3389 * next. */
c6878637 3390 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3391 }
3392
3393 return 0;
5cb5a6ff
LP
3394}
3395
43d0fcbd
LP
3396void exec_command_done(ExecCommand *c) {
3397 assert(c);
3398
a1e58e8e 3399 c->path = mfree(c->path);
43d0fcbd 3400
6796073e 3401 c->argv = strv_free(c->argv);
43d0fcbd
LP
3402}
3403
3404void exec_command_done_array(ExecCommand *c, unsigned n) {
3405 unsigned i;
3406
3407 for (i = 0; i < n; i++)
3408 exec_command_done(c+i);
3409}
3410
f1acf85a 3411ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3412 ExecCommand *i;
3413
3414 while ((i = c)) {
71fda00f 3415 LIST_REMOVE(command, c, i);
43d0fcbd 3416 exec_command_done(i);
5cb5a6ff
LP
3417 free(i);
3418 }
f1acf85a
ZJS
3419
3420 return NULL;
5cb5a6ff
LP
3421}
3422
034c6ed7
LP
3423void exec_command_free_array(ExecCommand **c, unsigned n) {
3424 unsigned i;
3425
f1acf85a
ZJS
3426 for (i = 0; i < n; i++)
3427 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3428}
3429
039f0e70 3430typedef struct InvalidEnvInfo {
f2341e0a 3431 Unit *unit;
039f0e70
LP
3432 const char *path;
3433} InvalidEnvInfo;
3434
3435static void invalid_env(const char *p, void *userdata) {
3436 InvalidEnvInfo *info = userdata;
3437
f2341e0a 3438 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3439}
3440
52c239d7
LB
3441const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3442 assert(c);
3443
3444 switch (fd_index) {
3445 case STDIN_FILENO:
3446 if (c->std_input != EXEC_INPUT_NAMED_FD)
3447 return NULL;
3448 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3449 case STDOUT_FILENO:
3450 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3451 return NULL;
3452 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3453 case STDERR_FILENO:
3454 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3455 return NULL;
3456 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3457 default:
3458 return NULL;
3459 }
3460}
3461
3462int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3463 unsigned i, targets;
56fbd561 3464 const char* stdio_fdname[3];
4c47affc 3465 unsigned n_fds;
52c239d7
LB
3466
3467 assert(c);
3468 assert(p);
3469
3470 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3471 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3472 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3473
3474 for (i = 0; i < 3; i++)
3475 stdio_fdname[i] = exec_context_fdname(c, i);
3476
4c47affc
FB
3477 n_fds = p->n_storage_fds + p->n_socket_fds;
3478
3479 for (i = 0; i < n_fds && targets > 0; i++)
56fbd561
ZJS
3480 if (named_iofds[STDIN_FILENO] < 0 &&
3481 c->std_input == EXEC_INPUT_NAMED_FD &&
3482 stdio_fdname[STDIN_FILENO] &&
3483 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3484
52c239d7
LB
3485 named_iofds[STDIN_FILENO] = p->fds[i];
3486 targets--;
56fbd561
ZJS
3487
3488 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3489 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3490 stdio_fdname[STDOUT_FILENO] &&
3491 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3492
52c239d7
LB
3493 named_iofds[STDOUT_FILENO] = p->fds[i];
3494 targets--;
56fbd561
ZJS
3495
3496 } else if (named_iofds[STDERR_FILENO] < 0 &&
3497 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3498 stdio_fdname[STDERR_FILENO] &&
3499 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3500
52c239d7
LB
3501 named_iofds[STDERR_FILENO] = p->fds[i];
3502 targets--;
3503 }
3504
56fbd561 3505 return targets == 0 ? 0 : -ENOENT;
52c239d7
LB
3506}
3507
f2341e0a 3508int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3509 char **i, **r = NULL;
3510
3511 assert(c);
3512 assert(l);
3513
3514 STRV_FOREACH(i, c->environment_files) {
3515 char *fn;
52511fae
ZJS
3516 int k;
3517 unsigned n;
8c7be95e
LP
3518 bool ignore = false;
3519 char **p;
7fd1b19b 3520 _cleanup_globfree_ glob_t pglob = {};
8c7be95e
LP
3521
3522 fn = *i;
3523
3524 if (fn[0] == '-') {
3525 ignore = true;
313cefa1 3526 fn++;
8c7be95e
LP
3527 }
3528
3529 if (!path_is_absolute(fn)) {
8c7be95e
LP
3530 if (ignore)
3531 continue;
3532
3533 strv_free(r);
3534 return -EINVAL;
3535 }
3536
2bef10ab 3537 /* Filename supports globbing, take all matching files */
d8c92e8b
ZJS
3538 k = safe_glob(fn, 0, &pglob);
3539 if (k < 0) {
2bef10ab
PL
3540 if (ignore)
3541 continue;
8c7be95e 3542
2bef10ab 3543 strv_free(r);
d8c92e8b 3544 return k;
2bef10ab 3545 }
8c7be95e 3546
d8c92e8b
ZJS
3547 /* When we don't match anything, -ENOENT should be returned */
3548 assert(pglob.gl_pathc > 0);
3549
3550 for (n = 0; n < pglob.gl_pathc; n++) {
717603e3 3551 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3552 if (k < 0) {
3553 if (ignore)
3554 continue;
8c7be95e 3555
2bef10ab 3556 strv_free(r);
2bef10ab 3557 return k;
e9c1ea9d 3558 }
ebc05a09 3559 /* Log invalid environment variables with filename */
039f0e70
LP
3560 if (p) {
3561 InvalidEnvInfo info = {
f2341e0a 3562 .unit = unit,
039f0e70
LP
3563 .path = pglob.gl_pathv[n]
3564 };
3565
3566 p = strv_env_clean_with_callback(p, invalid_env, &info);
3567 }
8c7be95e 3568
2bef10ab
PL
3569 if (r == NULL)
3570 r = p;
3571 else {
3572 char **m;
8c7be95e 3573
2bef10ab
PL
3574 m = strv_env_merge(2, r, p);
3575 strv_free(r);
3576 strv_free(p);
c84a9488 3577 if (!m)
2bef10ab 3578 return -ENOMEM;
2bef10ab
PL
3579
3580 r = m;
3581 }
8c7be95e
LP
3582 }
3583 }
3584
3585 *l = r;
3586
3587 return 0;
3588}
3589
6ac8fdc9 3590static bool tty_may_match_dev_console(const char *tty) {
e1d75803 3591 _cleanup_free_ char *active = NULL;
7d6884b6 3592 char *console;
6ac8fdc9 3593
1e22b5cd
LP
3594 if (!tty)
3595 return true;
3596
a119ec7c 3597 tty = skip_dev_prefix(tty);
6ac8fdc9
MS
3598
3599 /* trivial identity? */
3600 if (streq(tty, "console"))
3601 return true;
3602
3603 console = resolve_dev_console(&active);
3604 /* if we could not resolve, assume it may */
3605 if (!console)
3606 return true;
3607
3608 /* "tty0" means the active VC, so it may be the same sometimes */
e1d75803 3609 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3610}
3611
3612bool exec_context_may_touch_console(ExecContext *ec) {
1e22b5cd
LP
3613
3614 return (ec->tty_reset ||
3615 ec->tty_vhangup ||
3616 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3617 is_terminal_input(ec->std_input) ||
3618 is_terminal_output(ec->std_output) ||
3619 is_terminal_output(ec->std_error)) &&
1e22b5cd 3620 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3621}
3622
15ae422b
LP
3623static void strv_fprintf(FILE *f, char **l) {
3624 char **g;
3625
3626 assert(f);
3627
3628 STRV_FOREACH(g, l)
3629 fprintf(f, " %s", *g);
3630}
3631
5cb5a6ff 3632void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
c2bbd90b 3633 char **e, **d;
94f04347 3634 unsigned i;
3536f49e 3635 ExecDirectoryType dt;
add00535 3636 int r;
9eba9da4 3637
5cb5a6ff
LP
3638 assert(c);
3639 assert(f);
3640
4ad49000 3641 prefix = strempty(prefix);
5cb5a6ff
LP
3642
3643 fprintf(f,
94f04347
LP
3644 "%sUMask: %04o\n"
3645 "%sWorkingDirectory: %s\n"
451a074f 3646 "%sRootDirectory: %s\n"
15ae422b 3647 "%sNonBlocking: %s\n"
64747e2d 3648 "%sPrivateTmp: %s\n"
7f112f50 3649 "%sPrivateDevices: %s\n"
59eeb84b 3650 "%sProtectKernelTunables: %s\n"
e66a2f65 3651 "%sProtectKernelModules: %s\n"
59eeb84b 3652 "%sProtectControlGroups: %s\n"
d251207d
LP
3653 "%sPrivateNetwork: %s\n"
3654 "%sPrivateUsers: %s\n"
1b8689f9
LP
3655 "%sProtectHome: %s\n"
3656 "%sProtectSystem: %s\n"
5d997827 3657 "%sMountAPIVFS: %s\n"
f3e43635 3658 "%sIgnoreSIGPIPE: %s\n"
f4170c67 3659 "%sMemoryDenyWriteExecute: %s\n"
b1edf445
LP
3660 "%sRestrictRealtime: %s\n"
3661 "%sKeyringMode: %s\n",
5cb5a6ff 3662 prefix, c->umask,
9eba9da4 3663 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3664 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3665 prefix, yes_no(c->non_blocking),
64747e2d 3666 prefix, yes_no(c->private_tmp),
7f112f50 3667 prefix, yes_no(c->private_devices),
59eeb84b 3668 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3669 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3670 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3671 prefix, yes_no(c->private_network),
3672 prefix, yes_no(c->private_users),
1b8689f9
LP
3673 prefix, protect_home_to_string(c->protect_home),
3674 prefix, protect_system_to_string(c->protect_system),
5d997827 3675 prefix, yes_no(c->mount_apivfs),
f3e43635 3676 prefix, yes_no(c->ignore_sigpipe),
f4170c67 3677 prefix, yes_no(c->memory_deny_write_execute),
b1edf445
LP
3678 prefix, yes_no(c->restrict_realtime),
3679 prefix, exec_keyring_mode_to_string(c->keyring_mode));
fb33a393 3680
915e6d16
LP
3681 if (c->root_image)
3682 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3683
8c7be95e
LP
3684 STRV_FOREACH(e, c->environment)
3685 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3686
3687 STRV_FOREACH(e, c->environment_files)
3688 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3689
b4c14404
FB
3690 STRV_FOREACH(e, c->pass_environment)
3691 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3692
00819cc1
LP
3693 STRV_FOREACH(e, c->unset_environment)
3694 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3695
53f47dfc
YW
3696 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3697
3536f49e
YW
3698 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
3699 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3700
3701 STRV_FOREACH(d, c->directories[dt].paths)
3702 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3703 }
c2bbd90b 3704
fb33a393
LP
3705 if (c->nice_set)
3706 fprintf(f,
3707 "%sNice: %i\n",
3708 prefix, c->nice);
3709
dd6c17b1 3710 if (c->oom_score_adjust_set)
fb33a393 3711 fprintf(f,
dd6c17b1
LP
3712 "%sOOMScoreAdjust: %i\n",
3713 prefix, c->oom_score_adjust);
9eba9da4 3714
94f04347 3715 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d
EV
3716 if (c->rlimit[i]) {
3717 fprintf(f, "%s%s: " RLIM_FMT "\n",
3718 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3719 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3720 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3721 }
94f04347 3722
f8b69d1d 3723 if (c->ioprio_set) {
1756a011 3724 _cleanup_free_ char *class_str = NULL;
f8b69d1d 3725
837df140
YW
3726 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3727 if (r >= 0)
3728 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3729
3730 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 3731 }
94f04347 3732
f8b69d1d 3733 if (c->cpu_sched_set) {
1756a011 3734 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 3735
837df140
YW
3736 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3737 if (r >= 0)
3738 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3739
94f04347 3740 fprintf(f,
38b48754
LP
3741 "%sCPUSchedulingPriority: %i\n"
3742 "%sCPUSchedulingResetOnFork: %s\n",
38b48754
LP
3743 prefix, c->cpu_sched_priority,
3744 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 3745 }
94f04347 3746
82c121a4 3747 if (c->cpuset) {
94f04347 3748 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
3749 for (i = 0; i < c->cpuset_ncpus; i++)
3750 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 3751 fprintf(f, " %u", i);
94f04347
LP
3752 fputs("\n", f);
3753 }
3754
3a43da28 3755 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 3756 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
3757
3758 fprintf(f,
80876c20
LP
3759 "%sStandardInput: %s\n"
3760 "%sStandardOutput: %s\n"
3761 "%sStandardError: %s\n",
3762 prefix, exec_input_to_string(c->std_input),
3763 prefix, exec_output_to_string(c->std_output),
3764 prefix, exec_output_to_string(c->std_error));
3765
3766 if (c->tty_path)
3767 fprintf(f,
6ea832a2
LP
3768 "%sTTYPath: %s\n"
3769 "%sTTYReset: %s\n"
3770 "%sTTYVHangup: %s\n"
3771 "%sTTYVTDisallocate: %s\n",
3772 prefix, c->tty_path,
3773 prefix, yes_no(c->tty_reset),
3774 prefix, yes_no(c->tty_vhangup),
3775 prefix, yes_no(c->tty_vt_disallocate));
94f04347 3776
9f6444eb
LP
3777 if (IN_SET(c->std_output,
3778 EXEC_OUTPUT_SYSLOG,
3779 EXEC_OUTPUT_KMSG,
3780 EXEC_OUTPUT_JOURNAL,
3781 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3782 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3783 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
3784 IN_SET(c->std_error,
3785 EXEC_OUTPUT_SYSLOG,
3786 EXEC_OUTPUT_KMSG,
3787 EXEC_OUTPUT_JOURNAL,
3788 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3789 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3790 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
f8b69d1d 3791
5ce70e5b 3792 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 3793
837df140
YW
3794 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3795 if (r >= 0)
3796 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
f8b69d1d 3797
837df140
YW
3798 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3799 if (r >= 0)
3800 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
f8b69d1d 3801 }
94f04347 3802
07d46372
YW
3803 if (c->secure_bits) {
3804 _cleanup_free_ char *str = NULL;
3805
3806 r = secure_bits_to_string_alloc(c->secure_bits, &str);
3807 if (r >= 0)
3808 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
3809 }
94f04347 3810
a103496c 3811 if (c->capability_bounding_set != CAP_ALL) {
dd1f5bd0 3812 _cleanup_free_ char *str = NULL;
94f04347 3813
dd1f5bd0
YW
3814 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
3815 if (r >= 0)
3816 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
755d4b67
IP
3817 }
3818
3819 if (c->capability_ambient_set != 0) {
dd1f5bd0 3820 _cleanup_free_ char *str = NULL;
755d4b67 3821
dd1f5bd0
YW
3822 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
3823 if (r >= 0)
3824 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
94f04347
LP
3825 }
3826
3827 if (c->user)
f2d3769a 3828 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 3829 if (c->group)
f2d3769a 3830 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 3831
29206d46
LP
3832 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
3833
15ae422b 3834 if (strv_length(c->supplementary_groups) > 0) {
94f04347 3835 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
3836 strv_fprintf(f, c->supplementary_groups);
3837 fputs("\n", f);
3838 }
94f04347 3839
5b6319dc 3840 if (c->pam_name)
f2d3769a 3841 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 3842
2a624c36
AP
3843 if (strv_length(c->read_write_paths) > 0) {
3844 fprintf(f, "%sReadWritePaths:", prefix);
3845 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
3846 fputs("\n", f);
3847 }
3848
2a624c36
AP
3849 if (strv_length(c->read_only_paths) > 0) {
3850 fprintf(f, "%sReadOnlyPaths:", prefix);
3851 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
3852 fputs("\n", f);
3853 }
94f04347 3854
2a624c36
AP
3855 if (strv_length(c->inaccessible_paths) > 0) {
3856 fprintf(f, "%sInaccessiblePaths:", prefix);
3857 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
3858 fputs("\n", f);
3859 }
2e22afe9 3860
d2d6c096
LP
3861 if (c->n_bind_mounts > 0)
3862 for (i = 0; i < c->n_bind_mounts; i++) {
3863 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
3864 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
3865 c->bind_mounts[i].source,
3866 c->bind_mounts[i].destination,
3867 c->bind_mounts[i].recursive ? "rbind" : "norbind");
3868 }
3869
169c1bda
LP
3870 if (c->utmp_id)
3871 fprintf(f,
3872 "%sUtmpIdentifier: %s\n",
3873 prefix, c->utmp_id);
7b52a628
MS
3874
3875 if (c->selinux_context)
3876 fprintf(f,
5f8640fb
LP
3877 "%sSELinuxContext: %s%s\n",
3878 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 3879
80c21aea
WC
3880 if (c->apparmor_profile)
3881 fprintf(f,
3882 "%sAppArmorProfile: %s%s\n",
3883 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3884
3885 if (c->smack_process_label)
3886 fprintf(f,
3887 "%sSmackProcessLabel: %s%s\n",
3888 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
3889
050f7277 3890 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
3891 fprintf(f,
3892 "%sPersonality: %s\n",
3893 prefix, strna(personality_to_string(c->personality)));
3894
78e864e5
TM
3895 fprintf(f,
3896 "%sLockPersonality: %s\n",
3897 prefix, yes_no(c->lock_personality));
3898
17df7223 3899 if (c->syscall_filter) {
351a19b1 3900#ifdef HAVE_SECCOMP
17df7223
LP
3901 Iterator j;
3902 void *id;
3903 bool first = true;
351a19b1 3904#endif
17df7223
LP
3905
3906 fprintf(f,
57183d11 3907 "%sSystemCallFilter: ",
17df7223
LP
3908 prefix);
3909
3910 if (!c->syscall_whitelist)
3911 fputc('~', f);
3912
351a19b1 3913#ifdef HAVE_SECCOMP
17df7223
LP
3914 SET_FOREACH(id, c->syscall_filter, j) {
3915 _cleanup_free_ char *name = NULL;
3916
3917 if (first)
3918 first = false;
3919 else
3920 fputc(' ', f);
3921
57183d11 3922 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223
LP
3923 fputs(strna(name), f);
3924 }
351a19b1 3925#endif
17df7223
LP
3926
3927 fputc('\n', f);
3928 }
3929
57183d11
LP
3930 if (c->syscall_archs) {
3931#ifdef HAVE_SECCOMP
3932 Iterator j;
3933 void *id;
3934#endif
3935
3936 fprintf(f,
3937 "%sSystemCallArchitectures:",
3938 prefix);
3939
3940#ifdef HAVE_SECCOMP
3941 SET_FOREACH(id, c->syscall_archs, j)
3942 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
3943#endif
3944 fputc('\n', f);
3945 }
3946
add00535
LP
3947 if (exec_context_restrict_namespaces_set(c)) {
3948 _cleanup_free_ char *s = NULL;
3949
3950 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
3951 if (r >= 0)
3952 fprintf(f, "%sRestrictNamespaces: %s\n",
3953 prefix, s);
3954 }
3955
b3267152 3956 if (c->syscall_errno > 0)
17df7223
LP
3957 fprintf(f,
3958 "%sSystemCallErrorNumber: %s\n",
3959 prefix, strna(errno_to_name(c->syscall_errno)));
eef65bf3
MS
3960
3961 if (c->apparmor_profile)
3962 fprintf(f,
3963 "%sAppArmorProfile: %s%s\n",
3964 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
3965}
3966
a931ad47
LP
3967bool exec_context_maintains_privileges(ExecContext *c) {
3968 assert(c);
3969
61233823 3970 /* Returns true if the process forked off would run under
a931ad47
LP
3971 * an unchanged UID or as root. */
3972
3973 if (!c->user)
3974 return true;
3975
3976 if (streq(c->user, "root") || streq(c->user, "0"))
3977 return true;
3978
3979 return false;
3980}
3981
7f452159
LP
3982int exec_context_get_effective_ioprio(ExecContext *c) {
3983 int p;
3984
3985 assert(c);
3986
3987 if (c->ioprio_set)
3988 return c->ioprio;
3989
3990 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
3991 if (p < 0)
3992 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
3993
3994 return p;
3995}
3996
b58b4116 3997void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 3998 assert(s);
5cb5a6ff 3999
b58b4116
LP
4000 zero(*s);
4001 s->pid = pid;
4002 dual_timestamp_get(&s->start_timestamp);
4003}
4004
6ea832a2 4005void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
4006 assert(s);
4007
0b1f4ae6 4008 if (s->pid && s->pid != pid)
b58b4116
LP
4009 zero(*s);
4010
034c6ed7 4011 s->pid = pid;
63983207 4012 dual_timestamp_get(&s->exit_timestamp);
9fb86720 4013
034c6ed7
LP
4014 s->code = code;
4015 s->status = status;
169c1bda 4016
6ea832a2
LP
4017 if (context) {
4018 if (context->utmp_id)
4019 utmp_put_dead_process(context->utmp_id, pid, code, status);
4020
1e22b5cd 4021 exec_context_tty_reset(context, NULL);
6ea832a2 4022 }
9fb86720
LP
4023}
4024
4025void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
4026 char buf[FORMAT_TIMESTAMP_MAX];
4027
4028 assert(s);
4029 assert(f);
4030
9fb86720
LP
4031 if (s->pid <= 0)
4032 return;
4033
4c940960
LP
4034 prefix = strempty(prefix);
4035
9fb86720 4036 fprintf(f,
ccd06097
ZJS
4037 "%sPID: "PID_FMT"\n",
4038 prefix, s->pid);
9fb86720 4039
af9d16e1 4040 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
4041 fprintf(f,
4042 "%sStart Timestamp: %s\n",
63983207 4043 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 4044
af9d16e1 4045 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
4046 fprintf(f,
4047 "%sExit Timestamp: %s\n"
4048 "%sExit Code: %s\n"
4049 "%sExit Status: %i\n",
63983207 4050 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
4051 prefix, sigchld_code_to_string(s->code),
4052 prefix, s->status);
5cb5a6ff 4053}
44d8db9e 4054
9e2f7c11 4055char *exec_command_line(char **argv) {
44d8db9e
LP
4056 size_t k;
4057 char *n, *p, **a;
4058 bool first = true;
4059
9e2f7c11 4060 assert(argv);
44d8db9e 4061
9164977d 4062 k = 1;
9e2f7c11 4063 STRV_FOREACH(a, argv)
44d8db9e
LP
4064 k += strlen(*a)+3;
4065
5cd9cd35
LP
4066 n = new(char, k);
4067 if (!n)
44d8db9e
LP
4068 return NULL;
4069
4070 p = n;
9e2f7c11 4071 STRV_FOREACH(a, argv) {
44d8db9e
LP
4072
4073 if (!first)
4074 *(p++) = ' ';
4075 else
4076 first = false;
4077
4078 if (strpbrk(*a, WHITESPACE)) {
4079 *(p++) = '\'';
4080 p = stpcpy(p, *a);
4081 *(p++) = '\'';
4082 } else
4083 p = stpcpy(p, *a);
4084
4085 }
4086
9164977d
LP
4087 *p = 0;
4088
44d8db9e
LP
4089 /* FIXME: this doesn't really handle arguments that have
4090 * spaces and ticks in them */
4091
4092 return n;
4093}
4094
4095void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 4096 _cleanup_free_ char *cmd = NULL;
4c940960 4097 const char *prefix2;
44d8db9e
LP
4098
4099 assert(c);
4100 assert(f);
4101
4c940960 4102 prefix = strempty(prefix);
63c372cb 4103 prefix2 = strjoina(prefix, "\t");
44d8db9e 4104
9e2f7c11 4105 cmd = exec_command_line(c->argv);
44d8db9e
LP
4106 fprintf(f,
4107 "%sCommand Line: %s\n",
4108 prefix, cmd ? cmd : strerror(ENOMEM));
4109
9fb86720 4110 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
4111}
4112
4113void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4114 assert(f);
4115
4c940960 4116 prefix = strempty(prefix);
44d8db9e
LP
4117
4118 LIST_FOREACH(command, c, c)
4119 exec_command_dump(c, f, prefix);
4120}
94f04347 4121
a6a80b4f
LP
4122void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4123 ExecCommand *end;
4124
4125 assert(l);
4126 assert(e);
4127
4128 if (*l) {
35b8ca3a 4129 /* It's kind of important, that we keep the order here */
71fda00f
LP
4130 LIST_FIND_TAIL(command, *l, end);
4131 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
4132 } else
4133 *l = e;
4134}
4135
26fd040d
LP
4136int exec_command_set(ExecCommand *c, const char *path, ...) {
4137 va_list ap;
4138 char **l, *p;
4139
4140 assert(c);
4141 assert(path);
4142
4143 va_start(ap, path);
4144 l = strv_new_ap(path, ap);
4145 va_end(ap);
4146
4147 if (!l)
4148 return -ENOMEM;
4149
250a918d
LP
4150 p = strdup(path);
4151 if (!p) {
26fd040d
LP
4152 strv_free(l);
4153 return -ENOMEM;
4154 }
4155
4156 free(c->path);
4157 c->path = p;
4158
4159 strv_free(c->argv);
4160 c->argv = l;
4161
4162 return 0;
4163}
4164
86b23b07 4165int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 4166 _cleanup_strv_free_ char **l = NULL;
86b23b07 4167 va_list ap;
86b23b07
JS
4168 int r;
4169
4170 assert(c);
4171 assert(path);
4172
4173 va_start(ap, path);
4174 l = strv_new_ap(path, ap);
4175 va_end(ap);
4176
4177 if (!l)
4178 return -ENOMEM;
4179
e287086b 4180 r = strv_extend_strv(&c->argv, l, false);
e63ff941 4181 if (r < 0)
86b23b07 4182 return r;
86b23b07
JS
4183
4184 return 0;
4185}
4186
4187
613b411c
LP
4188static int exec_runtime_allocate(ExecRuntime **rt) {
4189
4190 if (*rt)
4191 return 0;
4192
4193 *rt = new0(ExecRuntime, 1);
f146f5e1 4194 if (!*rt)
613b411c
LP
4195 return -ENOMEM;
4196
4197 (*rt)->n_ref = 1;
4198 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4199
4200 return 0;
4201}
4202
4203int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4204 int r;
4205
4206 assert(rt);
4207 assert(c);
4208 assert(id);
4209
4210 if (*rt)
4211 return 1;
4212
4213 if (!c->private_network && !c->private_tmp)
4214 return 0;
4215
4216 r = exec_runtime_allocate(rt);
4217 if (r < 0)
4218 return r;
4219
4220 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
33df919d 4221 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
613b411c
LP
4222 return -errno;
4223 }
4224
4225 if (c->private_tmp && !(*rt)->tmp_dir) {
4226 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4227 if (r < 0)
4228 return r;
4229 }
4230
4231 return 1;
4232}
4233
4234ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4235 assert(r);
4236 assert(r->n_ref > 0);
4237
4238 r->n_ref++;
4239 return r;
4240}
4241
4242ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4243
4244 if (!r)
4245 return NULL;
4246
4247 assert(r->n_ref > 0);
4248
4249 r->n_ref--;
f2341e0a
LP
4250 if (r->n_ref > 0)
4251 return NULL;
4252
4253 free(r->tmp_dir);
4254 free(r->var_tmp_dir);
4255 safe_close_pair(r->netns_storage_socket);
6b430fdb 4256 return mfree(r);
613b411c
LP
4257}
4258
f2341e0a 4259int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
613b411c
LP
4260 assert(u);
4261 assert(f);
4262 assert(fds);
4263
4264 if (!rt)
4265 return 0;
4266
4267 if (rt->tmp_dir)
4268 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4269
4270 if (rt->var_tmp_dir)
4271 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4272
4273 if (rt->netns_storage_socket[0] >= 0) {
4274 int copy;
4275
4276 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4277 if (copy < 0)
4278 return copy;
4279
4280 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4281 }
4282
4283 if (rt->netns_storage_socket[1] >= 0) {
4284 int copy;
4285
4286 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4287 if (copy < 0)
4288 return copy;
4289
4290 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4291 }
4292
4293 return 0;
4294}
4295
f2341e0a 4296int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
613b411c
LP
4297 int r;
4298
4299 assert(rt);
4300 assert(key);
4301 assert(value);
4302
4303 if (streq(key, "tmp-dir")) {
4304 char *copy;
4305
4306 r = exec_runtime_allocate(rt);
4307 if (r < 0)
f2341e0a 4308 return log_oom();
613b411c
LP
4309
4310 copy = strdup(value);
4311 if (!copy)
4312 return log_oom();
4313
4314 free((*rt)->tmp_dir);
4315 (*rt)->tmp_dir = copy;
4316
4317 } else if (streq(key, "var-tmp-dir")) {
4318 char *copy;
4319
4320 r = exec_runtime_allocate(rt);
4321 if (r < 0)
f2341e0a 4322 return log_oom();
613b411c
LP
4323
4324 copy = strdup(value);
4325 if (!copy)
4326 return log_oom();
4327
4328 free((*rt)->var_tmp_dir);
4329 (*rt)->var_tmp_dir = copy;
4330
4331 } else if (streq(key, "netns-socket-0")) {
4332 int fd;
4333
4334 r = exec_runtime_allocate(rt);
4335 if (r < 0)
f2341e0a 4336 return log_oom();
613b411c
LP
4337
4338 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4339 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4340 else {
03e334a1 4341 safe_close((*rt)->netns_storage_socket[0]);
613b411c
LP
4342 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4343 }
4344 } else if (streq(key, "netns-socket-1")) {
4345 int fd;
4346
4347 r = exec_runtime_allocate(rt);
4348 if (r < 0)
f2341e0a 4349 return log_oom();
613b411c
LP
4350
4351 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 4352 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 4353 else {
03e334a1 4354 safe_close((*rt)->netns_storage_socket[1]);
613b411c
LP
4355 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4356 }
4357 } else
4358 return 0;
4359
4360 return 1;
4361}
4362
4363static void *remove_tmpdir_thread(void *p) {
4364 _cleanup_free_ char *path = p;
4365
c6878637 4366 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
613b411c
LP
4367 return NULL;
4368}
4369
4370void exec_runtime_destroy(ExecRuntime *rt) {
98b47d54
LP
4371 int r;
4372
613b411c
LP
4373 if (!rt)
4374 return;
4375
4376 /* If there are multiple users of this, let's leave the stuff around */
4377 if (rt->n_ref > 1)
4378 return;
4379
4380 if (rt->tmp_dir) {
4381 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
98b47d54
LP
4382
4383 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4384 if (r < 0) {
da927ba9 4385 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
98b47d54
LP
4386 free(rt->tmp_dir);
4387 }
4388
613b411c
LP
4389 rt->tmp_dir = NULL;
4390 }
4391
4392 if (rt->var_tmp_dir) {
4393 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
98b47d54
LP
4394
4395 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4396 if (r < 0) {
da927ba9 4397 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
98b47d54
LP
4398 free(rt->var_tmp_dir);
4399 }
4400
613b411c
LP
4401 rt->var_tmp_dir = NULL;
4402 }
4403
3d94f76c 4404 safe_close_pair(rt->netns_storage_socket);
613b411c
LP
4405}
4406
80876c20
LP
4407static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4408 [EXEC_INPUT_NULL] = "null",
4409 [EXEC_INPUT_TTY] = "tty",
4410 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4411 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4412 [EXEC_INPUT_SOCKET] = "socket",
4413 [EXEC_INPUT_NAMED_FD] = "fd",
80876c20
LP
4414};
4415
8a0867d6
LP
4416DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4417
94f04347 4418static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4419 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4420 [EXEC_OUTPUT_NULL] = "null",
80876c20 4421 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4422 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4423 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4424 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4425 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4426 [EXEC_OUTPUT_JOURNAL] = "journal",
4427 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4428 [EXEC_OUTPUT_SOCKET] = "socket",
4429 [EXEC_OUTPUT_NAMED_FD] = "fd",
94f04347
LP
4430};
4431
4432DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4433
4434static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4435 [EXEC_UTMP_INIT] = "init",
4436 [EXEC_UTMP_LOGIN] = "login",
4437 [EXEC_UTMP_USER] = "user",
4438};
4439
4440DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
53f47dfc
YW
4441
4442static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4443 [EXEC_PRESERVE_NO] = "no",
4444 [EXEC_PRESERVE_YES] = "yes",
4445 [EXEC_PRESERVE_RESTART] = "restart",
4446};
4447
4448DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
3536f49e
YW
4449
4450static const char* const exec_directory_type_table[_EXEC_DIRECTORY_MAX] = {
4451 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4452 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4453 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4454 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4455 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4456};
4457
4458DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
b1edf445
LP
4459
4460static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
4461 [EXEC_KEYRING_INHERIT] = "inherit",
4462 [EXEC_KEYRING_PRIVATE] = "private",
4463 [EXEC_KEYRING_SHARED] = "shared",
4464};
4465
4466DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);