]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/execute.c
core: allow to redirect confirmation messages to a different console
[thirdparty/systemd.git] / src / core / execute.c
CommitLineData
a7334b09
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
a7334b09
LP
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 14 Lesser General Public License for more details.
a7334b09 15
5430f7f2 16 You should have received a copy of the GNU Lesser General Public License
a7334b09
LP
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
034c6ed7
LP
20#include <errno.h>
21#include <fcntl.h>
8dd4c05b
LP
22#include <glob.h>
23#include <grp.h>
24#include <poll.h>
309bff19 25#include <signal.h>
8dd4c05b 26#include <string.h>
19c0b0b9 27#include <sys/capability.h>
d251207d 28#include <sys/eventfd.h>
f3e43635 29#include <sys/mman.h>
8dd4c05b 30#include <sys/personality.h>
94f04347 31#include <sys/prctl.h>
d2ffa389 32#include <sys/shm.h>
8dd4c05b 33#include <sys/socket.h>
451a074f 34#include <sys/stat.h>
d2ffa389 35#include <sys/types.h>
8dd4c05b
LP
36#include <sys/un.h>
37#include <unistd.h>
023a4f67 38#include <utmpx.h>
5cb5a6ff 39
5b6319dc
LP
40#ifdef HAVE_PAM
41#include <security/pam_appl.h>
42#endif
43
7b52a628
MS
44#ifdef HAVE_SELINUX
45#include <selinux/selinux.h>
46#endif
47
17df7223
LP
48#ifdef HAVE_SECCOMP
49#include <seccomp.h>
50#endif
51
eef65bf3
MS
52#ifdef HAVE_APPARMOR
53#include <sys/apparmor.h>
54#endif
55
24882e06 56#include "sd-messages.h"
8dd4c05b
LP
57
58#include "af-list.h"
b5efdb8a 59#include "alloc-util.h"
3ffd4af2
LP
60#ifdef HAVE_APPARMOR
61#include "apparmor-util.h"
62#endif
8dd4c05b
LP
63#include "async.h"
64#include "barrier.h"
8dd4c05b 65#include "cap-list.h"
430f0182 66#include "capability-util.h"
f6a6225e 67#include "def.h"
4d1a6904 68#include "env-util.h"
17df7223 69#include "errno-list.h"
3ffd4af2 70#include "execute.h"
8dd4c05b 71#include "exit-status.h"
3ffd4af2 72#include "fd-util.h"
8dd4c05b 73#include "fileio.h"
f97b34a6 74#include "format-util.h"
f4f15635 75#include "fs-util.h"
7d50b32a 76#include "glob-util.h"
c004493c 77#include "io-util.h"
8dd4c05b
LP
78#include "ioprio.h"
79#include "log.h"
80#include "macro.h"
81#include "missing.h"
82#include "mkdir.h"
83#include "namespace.h"
6bedfcbb 84#include "parse-util.h"
8dd4c05b 85#include "path-util.h"
0b452006 86#include "process-util.h"
78f22b97 87#include "rlimit-util.h"
8dd4c05b 88#include "rm-rf.h"
3ffd4af2
LP
89#ifdef HAVE_SECCOMP
90#include "seccomp-util.h"
91#endif
8dd4c05b
LP
92#include "securebits.h"
93#include "selinux-util.h"
24882e06 94#include "signal-util.h"
8dd4c05b 95#include "smack-util.h"
fd63e712 96#include "special.h"
8b43440b 97#include "string-table.h"
07630cea 98#include "string-util.h"
8dd4c05b 99#include "strv.h"
7ccbd1ae 100#include "syslog-util.h"
8dd4c05b
LP
101#include "terminal-util.h"
102#include "unit.h"
b1d4f8e1 103#include "user-util.h"
8dd4c05b
LP
104#include "util.h"
105#include "utmp-wtmp.h"
5cb5a6ff 106
e056b01d 107#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
31a7eb86 108#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
e6a26745 109
02a51aba
LP
110/* This assumes there is a 'tty' group */
111#define TTY_MODE 0620
112
531dca78
LP
113#define SNDBUF_SIZE (8*1024*1024)
114
034c6ed7
LP
115static int shift_fds(int fds[], unsigned n_fds) {
116 int start, restart_from;
117
118 if (n_fds <= 0)
119 return 0;
120
a0d40ac5
LP
121 /* Modifies the fds array! (sorts it) */
122
034c6ed7
LP
123 assert(fds);
124
125 start = 0;
126 for (;;) {
127 int i;
128
129 restart_from = -1;
130
131 for (i = start; i < (int) n_fds; i++) {
132 int nfd;
133
134 /* Already at right index? */
135 if (fds[i] == i+3)
136 continue;
137
3cc2aff1
LP
138 nfd = fcntl(fds[i], F_DUPFD, i + 3);
139 if (nfd < 0)
034c6ed7
LP
140 return -errno;
141
03e334a1 142 safe_close(fds[i]);
034c6ed7
LP
143 fds[i] = nfd;
144
145 /* Hmm, the fd we wanted isn't free? Then
ee33e53a 146 * let's remember that and try again from here */
034c6ed7
LP
147 if (nfd != i+3 && restart_from < 0)
148 restart_from = i;
149 }
150
151 if (restart_from < 0)
152 break;
153
154 start = restart_from;
155 }
156
157 return 0;
158}
159
c2748801 160static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
47a71eed 161 unsigned i;
e2c76839 162 int r;
47a71eed
LP
163
164 if (n_fds <= 0)
165 return 0;
166
167 assert(fds);
168
451a074f 169 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
47a71eed
LP
170
171 for (i = 0; i < n_fds; i++) {
47a71eed 172
3cc2aff1
LP
173 r = fd_nonblock(fds[i], nonblock);
174 if (r < 0)
e2c76839 175 return r;
47a71eed 176
451a074f
LP
177 /* We unconditionally drop FD_CLOEXEC from the fds,
178 * since after all we want to pass these fds to our
179 * children */
47a71eed 180
3cc2aff1
LP
181 r = fd_cloexec(fds[i], false);
182 if (r < 0)
e2c76839 183 return r;
47a71eed
LP
184 }
185
186 return 0;
187}
188
1e22b5cd 189static const char *exec_context_tty_path(const ExecContext *context) {
80876c20
LP
190 assert(context);
191
1e22b5cd
LP
192 if (context->stdio_as_fds)
193 return NULL;
194
80876c20
LP
195 if (context->tty_path)
196 return context->tty_path;
197
198 return "/dev/console";
199}
200
1e22b5cd
LP
201static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
202 const char *path;
203
6ea832a2
LP
204 assert(context);
205
1e22b5cd 206 path = exec_context_tty_path(context);
6ea832a2 207
1e22b5cd
LP
208 if (context->tty_vhangup) {
209 if (p && p->stdin_fd >= 0)
210 (void) terminal_vhangup_fd(p->stdin_fd);
211 else if (path)
212 (void) terminal_vhangup(path);
213 }
6ea832a2 214
1e22b5cd
LP
215 if (context->tty_reset) {
216 if (p && p->stdin_fd >= 0)
217 (void) reset_terminal_fd(p->stdin_fd, true);
218 else if (path)
219 (void) reset_terminal(path);
220 }
221
222 if (context->tty_vt_disallocate && path)
223 (void) vt_disallocate(path);
6ea832a2
LP
224}
225
6af760f3
LP
226static bool is_terminal_input(ExecInput i) {
227 return IN_SET(i,
228 EXEC_INPUT_TTY,
229 EXEC_INPUT_TTY_FORCE,
230 EXEC_INPUT_TTY_FAIL);
231}
232
3a1286b6 233static bool is_terminal_output(ExecOutput o) {
6af760f3
LP
234 return IN_SET(o,
235 EXEC_OUTPUT_TTY,
236 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
237 EXEC_OUTPUT_KMSG_AND_CONSOLE,
238 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
239}
240
241static bool exec_context_needs_term(const ExecContext *c) {
242 assert(c);
243
244 /* Return true if the execution context suggests we should set $TERM to something useful. */
245
246 if (is_terminal_input(c->std_input))
247 return true;
248
249 if (is_terminal_output(c->std_output))
250 return true;
251
252 if (is_terminal_output(c->std_error))
253 return true;
254
255 return !!c->tty_path;
3a1286b6
MS
256}
257
80876c20
LP
258static int open_null_as(int flags, int nfd) {
259 int fd, r;
071830ff 260
80876c20 261 assert(nfd >= 0);
071830ff 262
613b411c
LP
263 fd = open("/dev/null", flags|O_NOCTTY);
264 if (fd < 0)
071830ff
LP
265 return -errno;
266
80876c20
LP
267 if (fd != nfd) {
268 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 269 safe_close(fd);
80876c20
LP
270 } else
271 r = nfd;
071830ff 272
80876c20 273 return r;
071830ff
LP
274}
275
524daa8c 276static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
b92bea5d
ZJS
277 union sockaddr_union sa = {
278 .un.sun_family = AF_UNIX,
279 .un.sun_path = "/run/systemd/journal/stdout",
280 };
524daa8c
ZJS
281 uid_t olduid = UID_INVALID;
282 gid_t oldgid = GID_INVALID;
283 int r;
284
285 if (gid != GID_INVALID) {
286 oldgid = getgid();
287
288 r = setegid(gid);
289 if (r < 0)
290 return -errno;
291 }
292
293 if (uid != UID_INVALID) {
294 olduid = getuid();
295
296 r = seteuid(uid);
297 if (r < 0) {
298 r = -errno;
299 goto restore_gid;
300 }
301 }
302
fc2fffe7 303 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
524daa8c
ZJS
304 if (r < 0)
305 r = -errno;
306
307 /* If we fail to restore the uid or gid, things will likely
308 fail later on. This should only happen if an LSM interferes. */
309
310 if (uid != UID_INVALID)
311 (void) seteuid(olduid);
312
313 restore_gid:
314 if (gid != GID_INVALID)
315 (void) setegid(oldgid);
316
317 return r;
318}
319
fd1f9c89 320static int connect_logger_as(
7a1ab780 321 Unit *unit,
fd1f9c89
LP
322 const ExecContext *context,
323 ExecOutput output,
324 const char *ident,
fd1f9c89
LP
325 int nfd,
326 uid_t uid,
327 gid_t gid) {
328
524daa8c 329 int fd, r;
071830ff
LP
330
331 assert(context);
80876c20
LP
332 assert(output < _EXEC_OUTPUT_MAX);
333 assert(ident);
334 assert(nfd >= 0);
071830ff 335
54fe0cdb
LP
336 fd = socket(AF_UNIX, SOCK_STREAM, 0);
337 if (fd < 0)
80876c20 338 return -errno;
071830ff 339
524daa8c
ZJS
340 r = connect_journal_socket(fd, uid, gid);
341 if (r < 0)
342 return r;
071830ff 343
80876c20 344 if (shutdown(fd, SHUT_RD) < 0) {
03e334a1 345 safe_close(fd);
80876c20
LP
346 return -errno;
347 }
071830ff 348
fd1f9c89 349 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
531dca78 350
80876c20 351 dprintf(fd,
62bca2c6 352 "%s\n"
80876c20
LP
353 "%s\n"
354 "%i\n"
54fe0cdb
LP
355 "%i\n"
356 "%i\n"
357 "%i\n"
4f4a1dbf 358 "%i\n",
4f4a1dbf 359 context->syslog_identifier ? context->syslog_identifier : ident,
7a1ab780 360 unit->id,
54fe0cdb
LP
361 context->syslog_priority,
362 !!context->syslog_level_prefix,
363 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
364 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
3a1286b6 365 is_terminal_output(output));
80876c20 366
fd1f9c89
LP
367 if (fd == nfd)
368 return nfd;
369
370 r = dup2(fd, nfd) < 0 ? -errno : nfd;
371 safe_close(fd);
071830ff 372
80876c20
LP
373 return r;
374}
375static int open_terminal_as(const char *path, mode_t mode, int nfd) {
376 int fd, r;
071830ff 377
80876c20
LP
378 assert(path);
379 assert(nfd >= 0);
071830ff 380
3cc2aff1
LP
381 fd = open_terminal(path, mode | O_NOCTTY);
382 if (fd < 0)
80876c20 383 return fd;
071830ff 384
80876c20
LP
385 if (fd != nfd) {
386 r = dup2(fd, nfd) < 0 ? -errno : nfd;
03e334a1 387 safe_close(fd);
80876c20
LP
388 } else
389 r = nfd;
071830ff 390
80876c20
LP
391 return r;
392}
071830ff 393
1e3ad081
LP
394static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
395
396 if (is_terminal_input(std_input) && !apply_tty_stdin)
397 return EXEC_INPUT_NULL;
071830ff 398
03fd9c49 399 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
400 return EXEC_INPUT_NULL;
401
03fd9c49 402 return std_input;
4f2d528d
LP
403}
404
03fd9c49 405static int fixup_output(ExecOutput std_output, int socket_fd) {
4f2d528d 406
03fd9c49 407 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
4f2d528d
LP
408 return EXEC_OUTPUT_INHERIT;
409
03fd9c49 410 return std_output;
4f2d528d
LP
411}
412
a34ceba6
LP
413static int setup_input(
414 const ExecContext *context,
415 const ExecParameters *params,
52c239d7
LB
416 int socket_fd,
417 int named_iofds[3]) {
a34ceba6 418
4f2d528d
LP
419 ExecInput i;
420
421 assert(context);
a34ceba6
LP
422 assert(params);
423
424 if (params->stdin_fd >= 0) {
425 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
426 return -errno;
427
428 /* Try to make this the controlling tty, if it is a tty, and reset it */
429 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
430 (void) reset_terminal_fd(STDIN_FILENO, true);
431
432 return STDIN_FILENO;
433 }
4f2d528d 434
c39f1ce2 435 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
4f2d528d
LP
436
437 switch (i) {
071830ff 438
80876c20
LP
439 case EXEC_INPUT_NULL:
440 return open_null_as(O_RDONLY, STDIN_FILENO);
441
442 case EXEC_INPUT_TTY:
443 case EXEC_INPUT_TTY_FORCE:
444 case EXEC_INPUT_TTY_FAIL: {
445 int fd, r;
071830ff 446
1e22b5cd 447 fd = acquire_terminal(exec_context_tty_path(context),
970edce6
ZJS
448 i == EXEC_INPUT_TTY_FAIL,
449 i == EXEC_INPUT_TTY_FORCE,
450 false,
3a43da28 451 USEC_INFINITY);
970edce6 452 if (fd < 0)
80876c20
LP
453 return fd;
454
455 if (fd != STDIN_FILENO) {
456 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
03e334a1 457 safe_close(fd);
80876c20
LP
458 } else
459 r = STDIN_FILENO;
460
461 return r;
462 }
463
4f2d528d
LP
464 case EXEC_INPUT_SOCKET:
465 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
466
52c239d7
LB
467 case EXEC_INPUT_NAMED_FD:
468 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
469 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
470
80876c20
LP
471 default:
472 assert_not_reached("Unknown input type");
473 }
474}
475
a34ceba6
LP
476static int setup_output(
477 Unit *unit,
478 const ExecContext *context,
479 const ExecParameters *params,
480 int fileno,
481 int socket_fd,
52c239d7 482 int named_iofds[3],
a34ceba6 483 const char *ident,
7bce046b
LP
484 uid_t uid,
485 gid_t gid,
486 dev_t *journal_stream_dev,
487 ino_t *journal_stream_ino) {
a34ceba6 488
4f2d528d
LP
489 ExecOutput o;
490 ExecInput i;
47c1d80d 491 int r;
4f2d528d 492
f2341e0a 493 assert(unit);
80876c20 494 assert(context);
a34ceba6 495 assert(params);
80876c20 496 assert(ident);
7bce046b
LP
497 assert(journal_stream_dev);
498 assert(journal_stream_ino);
80876c20 499
a34ceba6
LP
500 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
501
502 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
503 return -errno;
504
505 return STDOUT_FILENO;
506 }
507
508 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
509 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
510 return -errno;
511
512 return STDERR_FILENO;
513 }
514
c39f1ce2 515 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
03fd9c49 516 o = fixup_output(context->std_output, socket_fd);
4f2d528d 517
eb17e935
MS
518 if (fileno == STDERR_FILENO) {
519 ExecOutput e;
520 e = fixup_output(context->std_error, socket_fd);
80876c20 521
eb17e935
MS
522 /* This expects the input and output are already set up */
523
524 /* Don't change the stderr file descriptor if we inherit all
525 * the way and are not on a tty */
526 if (e == EXEC_OUTPUT_INHERIT &&
527 o == EXEC_OUTPUT_INHERIT &&
528 i == EXEC_INPUT_NULL &&
529 !is_terminal_input(context->std_input) &&
530 getppid () != 1)
531 return fileno;
532
533 /* Duplicate from stdout if possible */
52c239d7 534 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
eb17e935 535 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 536
eb17e935 537 o = e;
80876c20 538
eb17e935 539 } else if (o == EXEC_OUTPUT_INHERIT) {
21d21ea4
LP
540 /* If input got downgraded, inherit the original value */
541 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
1e22b5cd 542 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
21d21ea4 543
acb591e4 544 /* If the input is connected to anything that's not a /dev/null, inherit that... */
ff876e28 545 if (i != EXEC_INPUT_NULL)
eb17e935 546 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
071830ff 547
acb591e4
LP
548 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
549 if (getppid() != 1)
eb17e935 550 return fileno;
94f04347 551
eb17e935
MS
552 /* We need to open /dev/null here anew, to get the right access mode. */
553 return open_null_as(O_WRONLY, fileno);
071830ff 554 }
94f04347 555
eb17e935 556 switch (o) {
80876c20
LP
557
558 case EXEC_OUTPUT_NULL:
eb17e935 559 return open_null_as(O_WRONLY, fileno);
80876c20
LP
560
561 case EXEC_OUTPUT_TTY:
4f2d528d 562 if (is_terminal_input(i))
eb17e935 563 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
80876c20
LP
564
565 /* We don't reset the terminal if this is just about output */
1e22b5cd 566 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
80876c20
LP
567
568 case EXEC_OUTPUT_SYSLOG:
28dbc1e8 569 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
9a6bca7a 570 case EXEC_OUTPUT_KMSG:
28dbc1e8 571 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
706343f4
LP
572 case EXEC_OUTPUT_JOURNAL:
573 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
7a1ab780 574 r = connect_logger_as(unit, context, o, ident, fileno, uid, gid);
47c1d80d 575 if (r < 0) {
f2341e0a 576 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
eb17e935 577 r = open_null_as(O_WRONLY, fileno);
7bce046b
LP
578 } else {
579 struct stat st;
580
581 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
582 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
583 * services to detect whether they are connected to the journal or not. */
584
585 if (fstat(fileno, &st) >= 0) {
586 *journal_stream_dev = st.st_dev;
587 *journal_stream_ino = st.st_ino;
588 }
47c1d80d
MS
589 }
590 return r;
4f2d528d
LP
591
592 case EXEC_OUTPUT_SOCKET:
593 assert(socket_fd >= 0);
eb17e935 594 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
94f04347 595
52c239d7
LB
596 case EXEC_OUTPUT_NAMED_FD:
597 (void) fd_nonblock(named_iofds[fileno], false);
598 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
599
94f04347 600 default:
80876c20 601 assert_not_reached("Unknown error type");
94f04347 602 }
071830ff
LP
603}
604
02a51aba
LP
605static int chown_terminal(int fd, uid_t uid) {
606 struct stat st;
607
608 assert(fd >= 0);
02a51aba 609
1ff74fb6
LP
610 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
611 if (isatty(fd) < 1)
612 return 0;
613
02a51aba 614 /* This might fail. What matters are the results. */
bab45044
LP
615 (void) fchown(fd, uid, -1);
616 (void) fchmod(fd, TTY_MODE);
02a51aba
LP
617
618 if (fstat(fd, &st) < 0)
619 return -errno;
620
d8b4e2e9 621 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
02a51aba
LP
622 return -EPERM;
623
624 return 0;
625}
626
7d5ceb64 627static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
3d18b167
LP
628 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
629 int r;
80876c20 630
80876c20
LP
631 assert(_saved_stdin);
632 assert(_saved_stdout);
633
af6da548
LP
634 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
635 if (saved_stdin < 0)
636 return -errno;
80876c20 637
af6da548 638 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
3d18b167
LP
639 if (saved_stdout < 0)
640 return -errno;
80876c20 641
7d5ceb64 642 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
3d18b167
LP
643 if (fd < 0)
644 return fd;
80876c20 645
af6da548
LP
646 r = chown_terminal(fd, getuid());
647 if (r < 0)
3d18b167 648 return r;
02a51aba 649
3d18b167
LP
650 r = reset_terminal_fd(fd, true);
651 if (r < 0)
652 return r;
80876c20 653
3d18b167
LP
654 if (dup2(fd, STDIN_FILENO) < 0)
655 return -errno;
656
657 if (dup2(fd, STDOUT_FILENO) < 0)
658 return -errno;
80876c20
LP
659
660 if (fd >= 2)
03e334a1 661 safe_close(fd);
3d18b167 662 fd = -1;
80876c20
LP
663
664 *_saved_stdin = saved_stdin;
665 *_saved_stdout = saved_stdout;
666
3d18b167 667 saved_stdin = saved_stdout = -1;
80876c20 668
3d18b167 669 return 0;
80876c20
LP
670}
671
7d5ceb64 672_printf_(2, 3) static int write_confirm_message(const char *vc, const char *format, ...) {
03e334a1 673 _cleanup_close_ int fd = -1;
af6da548 674 va_list ap;
80876c20 675
af6da548 676 assert(format);
80876c20 677
7d5ceb64 678 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
af6da548
LP
679 if (fd < 0)
680 return fd;
80876c20 681
af6da548
LP
682 va_start(ap, format);
683 vdprintf(fd, format, ap);
684 va_end(ap);
80876c20 685
af6da548
LP
686 return 0;
687}
80876c20 688
3d18b167 689static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
af6da548 690 int r = 0;
80876c20 691
af6da548
LP
692 assert(saved_stdin);
693 assert(saved_stdout);
694
695 release_terminal();
696
697 if (*saved_stdin >= 0)
80876c20 698 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
af6da548 699 r = -errno;
80876c20 700
af6da548 701 if (*saved_stdout >= 0)
80876c20 702 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
af6da548 703 r = -errno;
80876c20 704
3d18b167
LP
705 *saved_stdin = safe_close(*saved_stdin);
706 *saved_stdout = safe_close(*saved_stdout);
af6da548
LP
707
708 return r;
709}
710
7d5ceb64 711static int ask_for_confirmation(const char *vc, char *response, char **argv) {
af6da548 712 int saved_stdout = -1, saved_stdin = -1, r;
e1d75803 713 _cleanup_free_ char *line = NULL;
af6da548 714
7d5ceb64 715 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
af6da548
LP
716 if (r < 0)
717 return r;
718
719 line = exec_command_line(argv);
720 if (!line)
721 return -ENOMEM;
722
418b9be5 723 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
af6da548
LP
724
725 restore_confirm_stdio(&saved_stdin, &saved_stdout);
726
727 return r;
80876c20
LP
728}
729
4d885bd3
DH
730static int get_fixed_user(const ExecContext *c, const char **user,
731 uid_t *uid, gid_t *gid,
732 const char **home, const char **shell) {
81a2b7ce 733 int r;
4d885bd3 734 const char *name;
81a2b7ce 735
4d885bd3 736 assert(c);
81a2b7ce 737
4d885bd3
DH
738 if (!c->user)
739 return 0;
81a2b7ce 740
4d885bd3
DH
741 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
742 * (i.e. are "/" or "/bin/nologin"). */
81a2b7ce 743
4d885bd3
DH
744 name = c->user;
745 r = get_user_creds_clean(&name, uid, gid, home, shell);
746 if (r < 0)
747 return r;
81a2b7ce 748
4d885bd3
DH
749 *user = name;
750 return 0;
751}
752
753static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
754 int r;
755 const char *name;
756
757 assert(c);
758
759 if (!c->group)
760 return 0;
761
762 name = c->group;
763 r = get_group_creds(&name, gid);
764 if (r < 0)
765 return r;
766
767 *group = name;
768 return 0;
769}
770
cdc5d5c5
DH
771static int get_supplementary_groups(const ExecContext *c, const char *user,
772 const char *group, gid_t gid,
773 gid_t **supplementary_gids, int *ngids) {
4d885bd3
DH
774 char **i;
775 int r, k = 0;
776 int ngroups_max;
777 bool keep_groups = false;
778 gid_t *groups = NULL;
779 _cleanup_free_ gid_t *l_gids = NULL;
780
781 assert(c);
782
bbeea271
DH
783 /*
784 * If user is given, then lookup GID and supplementary groups list.
785 * We avoid NSS lookups for gid=0. Also we have to initialize groups
cdc5d5c5
DH
786 * here and as early as possible so we keep the list of supplementary
787 * groups of the caller.
bbeea271
DH
788 */
789 if (user && gid_is_valid(gid) && gid != 0) {
790 /* First step, initialize groups from /etc/groups */
791 if (initgroups(user, gid) < 0)
792 return -errno;
793
794 keep_groups = true;
795 }
796
4d885bd3
DH
797 if (!c->supplementary_groups)
798 return 0;
799
366ddd25
DH
800 /*
801 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
802 * be positive, otherwise fail.
803 */
804 errno = 0;
805 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
806 if (ngroups_max <= 0) {
807 if (errno > 0)
808 return -errno;
809 else
810 return -EOPNOTSUPP; /* For all other values */
811 }
812
4d885bd3
DH
813 l_gids = new(gid_t, ngroups_max);
814 if (!l_gids)
815 return -ENOMEM;
81a2b7ce 816
4d885bd3
DH
817 if (keep_groups) {
818 /*
819 * Lookup the list of groups that the user belongs to, we
820 * avoid NSS lookups here too for gid=0.
821 */
822 k = ngroups_max;
823 if (getgrouplist(user, gid, l_gids, &k) < 0)
824 return -EINVAL;
825 } else
826 k = 0;
81a2b7ce 827
4d885bd3
DH
828 STRV_FOREACH(i, c->supplementary_groups) {
829 const char *g;
81a2b7ce 830
4d885bd3
DH
831 if (k >= ngroups_max)
832 return -E2BIG;
81a2b7ce 833
4d885bd3
DH
834 g = *i;
835 r = get_group_creds(&g, l_gids+k);
836 if (r < 0)
837 return r;
81a2b7ce 838
4d885bd3
DH
839 k++;
840 }
81a2b7ce 841
4d885bd3
DH
842 /*
843 * Sets ngids to zero to drop all supplementary groups, happens
844 * when we are under root and SupplementaryGroups= is empty.
845 */
846 if (k == 0) {
847 *ngids = 0;
848 return 0;
849 }
81a2b7ce 850
4d885bd3
DH
851 /* Otherwise get the final list of supplementary groups */
852 groups = memdup(l_gids, sizeof(gid_t) * k);
853 if (!groups)
854 return -ENOMEM;
855
856 *supplementary_gids = groups;
857 *ngids = k;
858
859 groups = NULL;
860
861 return 0;
862}
863
864static int enforce_groups(const ExecContext *context, gid_t gid,
865 gid_t *supplementary_gids, int ngids) {
866 int r;
867
868 assert(context);
869
870 /* Handle SupplementaryGroups= even if it is empty */
871 if (context->supplementary_groups) {
872 r = maybe_setgroups(ngids, supplementary_gids);
873 if (r < 0)
97f0e76f 874 return r;
4d885bd3 875 }
81a2b7ce 876
4d885bd3
DH
877 if (gid_is_valid(gid)) {
878 /* Then set our gids */
879 if (setresgid(gid, gid, gid) < 0)
880 return -errno;
81a2b7ce
LP
881 }
882
883 return 0;
884}
885
886static int enforce_user(const ExecContext *context, uid_t uid) {
81a2b7ce
LP
887 assert(context);
888
4d885bd3
DH
889 if (!uid_is_valid(uid))
890 return 0;
891
479050b3 892 /* Sets (but doesn't look up) the uid and make sure we keep the
81a2b7ce
LP
893 * capabilities while doing so. */
894
479050b3 895 if (context->capability_ambient_set != 0) {
81a2b7ce
LP
896
897 /* First step: If we need to keep capabilities but
898 * drop privileges we need to make sure we keep our
cbb21cca 899 * caps, while we drop privileges. */
693ced48 900 if (uid != 0) {
cbb21cca 901 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
693ced48
LP
902
903 if (prctl(PR_GET_SECUREBITS) != sb)
904 if (prctl(PR_SET_SECUREBITS, sb) < 0)
905 return -errno;
906 }
81a2b7ce
LP
907 }
908
479050b3 909 /* Second step: actually set the uids */
81a2b7ce
LP
910 if (setresuid(uid, uid, uid) < 0)
911 return -errno;
912
913 /* At this point we should have all necessary capabilities but
914 are otherwise a normal user. However, the caps might got
915 corrupted due to the setresuid() so we need clean them up
916 later. This is done outside of this call. */
917
918 return 0;
919}
920
5b6319dc
LP
921#ifdef HAVE_PAM
922
923static int null_conv(
924 int num_msg,
925 const struct pam_message **msg,
926 struct pam_response **resp,
927 void *appdata_ptr) {
928
929 /* We don't support conversations */
930
931 return PAM_CONV_ERR;
932}
933
cefc33ae
LP
934#endif
935
5b6319dc
LP
936static int setup_pam(
937 const char *name,
938 const char *user,
940c5210 939 uid_t uid,
2d6fce8d 940 gid_t gid,
5b6319dc 941 const char *tty,
2065ca69 942 char ***env,
5b6319dc
LP
943 int fds[], unsigned n_fds) {
944
cefc33ae
LP
945#ifdef HAVE_PAM
946
5b6319dc
LP
947 static const struct pam_conv conv = {
948 .conv = null_conv,
949 .appdata_ptr = NULL
950 };
951
2d7c6aa2 952 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
5b6319dc 953 pam_handle_t *handle = NULL;
d6e5f3ad 954 sigset_t old_ss;
7bb70b6e 955 int pam_code = PAM_SUCCESS, r;
84eada2f 956 char **nv, **e = NULL;
5b6319dc
LP
957 bool close_session = false;
958 pid_t pam_pid = 0, parent_pid;
970edce6 959 int flags = 0;
5b6319dc
LP
960
961 assert(name);
962 assert(user);
2065ca69 963 assert(env);
5b6319dc
LP
964
965 /* We set up PAM in the parent process, then fork. The child
35b8ca3a 966 * will then stay around until killed via PR_GET_PDEATHSIG or
5b6319dc
LP
967 * systemd via the cgroup logic. It will then remove the PAM
968 * session again. The parent process will exec() the actual
969 * daemon. We do things this way to ensure that the main PID
970 * of the daemon is the one we initially fork()ed. */
971
7bb70b6e
LP
972 r = barrier_create(&barrier);
973 if (r < 0)
2d7c6aa2
DH
974 goto fail;
975
553d2243 976 if (log_get_max_level() < LOG_DEBUG)
970edce6
ZJS
977 flags |= PAM_SILENT;
978
f546241b
ZJS
979 pam_code = pam_start(name, user, &conv, &handle);
980 if (pam_code != PAM_SUCCESS) {
5b6319dc
LP
981 handle = NULL;
982 goto fail;
983 }
984
f546241b
ZJS
985 if (tty) {
986 pam_code = pam_set_item(handle, PAM_TTY, tty);
987 if (pam_code != PAM_SUCCESS)
5b6319dc 988 goto fail;
f546241b 989 }
5b6319dc 990
84eada2f
JW
991 STRV_FOREACH(nv, *env) {
992 pam_code = pam_putenv(handle, *nv);
2065ca69
JW
993 if (pam_code != PAM_SUCCESS)
994 goto fail;
995 }
996
970edce6 997 pam_code = pam_acct_mgmt(handle, flags);
f546241b 998 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
999 goto fail;
1000
970edce6 1001 pam_code = pam_open_session(handle, flags);
f546241b 1002 if (pam_code != PAM_SUCCESS)
5b6319dc
LP
1003 goto fail;
1004
1005 close_session = true;
1006
f546241b
ZJS
1007 e = pam_getenvlist(handle);
1008 if (!e) {
5b6319dc
LP
1009 pam_code = PAM_BUF_ERR;
1010 goto fail;
1011 }
1012
1013 /* Block SIGTERM, so that we know that it won't get lost in
1014 * the child */
ce30c8dc 1015
72c0a2c2 1016 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
5b6319dc
LP
1017
1018 parent_pid = getpid();
1019
f546241b 1020 pam_pid = fork();
7bb70b6e
LP
1021 if (pam_pid < 0) {
1022 r = -errno;
5b6319dc 1023 goto fail;
7bb70b6e 1024 }
5b6319dc
LP
1025
1026 if (pam_pid == 0) {
7bb70b6e 1027 int sig, ret = EXIT_PAM;
5b6319dc
LP
1028
1029 /* The child's job is to reset the PAM session on
1030 * termination */
2d7c6aa2 1031 barrier_set_role(&barrier, BARRIER_CHILD);
5b6319dc
LP
1032
1033 /* This string must fit in 10 chars (i.e. the length
5d6b1584
LP
1034 * of "/sbin/init"), to look pretty in /bin/ps */
1035 rename_process("(sd-pam)");
5b6319dc
LP
1036
1037 /* Make sure we don't keep open the passed fds in this
1038 child. We assume that otherwise only those fds are
1039 open here that have been opened by PAM. */
1040 close_many(fds, n_fds);
1041
940c5210
AK
1042 /* Drop privileges - we don't need any to pam_close_session
1043 * and this will make PR_SET_PDEATHSIG work in most cases.
1044 * If this fails, ignore the error - but expect sd-pam threads
1045 * to fail to exit normally */
2d6fce8d 1046
97f0e76f
LP
1047 r = maybe_setgroups(0, NULL);
1048 if (r < 0)
1049 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
2d6fce8d
LP
1050 if (setresgid(gid, gid, gid) < 0)
1051 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
940c5210 1052 if (setresuid(uid, uid, uid) < 0)
2d6fce8d 1053 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
940c5210 1054
ce30c8dc
LP
1055 (void) ignore_signals(SIGPIPE, -1);
1056
940c5210
AK
1057 /* Wait until our parent died. This will only work if
1058 * the above setresuid() succeeds, otherwise the kernel
1059 * will not allow unprivileged parents kill their privileged
1060 * children this way. We rely on the control groups kill logic
5b6319dc
LP
1061 * to do the rest for us. */
1062 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1063 goto child_finish;
1064
2d7c6aa2
DH
1065 /* Tell the parent that our setup is done. This is especially
1066 * important regarding dropping privileges. Otherwise, unit
1067 * setup might race against our setresuid(2) call. */
1068 barrier_place(&barrier);
1069
5b6319dc
LP
1070 /* Check if our parent process might already have
1071 * died? */
1072 if (getppid() == parent_pid) {
d6e5f3ad
DM
1073 sigset_t ss;
1074
1075 assert_se(sigemptyset(&ss) >= 0);
1076 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1077
3dead8d9
LP
1078 for (;;) {
1079 if (sigwait(&ss, &sig) < 0) {
1080 if (errno == EINTR)
1081 continue;
1082
1083 goto child_finish;
1084 }
5b6319dc 1085
3dead8d9
LP
1086 assert(sig == SIGTERM);
1087 break;
1088 }
5b6319dc
LP
1089 }
1090
3dead8d9 1091 /* If our parent died we'll end the session */
f546241b 1092 if (getppid() != parent_pid) {
970edce6 1093 pam_code = pam_close_session(handle, flags);
f546241b 1094 if (pam_code != PAM_SUCCESS)
5b6319dc 1095 goto child_finish;
f546241b 1096 }
5b6319dc 1097
7bb70b6e 1098 ret = 0;
5b6319dc
LP
1099
1100 child_finish:
970edce6 1101 pam_end(handle, pam_code | flags);
7bb70b6e 1102 _exit(ret);
5b6319dc
LP
1103 }
1104
2d7c6aa2
DH
1105 barrier_set_role(&barrier, BARRIER_PARENT);
1106
5b6319dc
LP
1107 /* If the child was forked off successfully it will do all the
1108 * cleanups, so forget about the handle here. */
1109 handle = NULL;
1110
3b8bddde 1111 /* Unblock SIGTERM again in the parent */
72c0a2c2 1112 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
5b6319dc
LP
1113
1114 /* We close the log explicitly here, since the PAM modules
1115 * might have opened it, but we don't want this fd around. */
1116 closelog();
1117
2d7c6aa2
DH
1118 /* Synchronously wait for the child to initialize. We don't care for
1119 * errors as we cannot recover. However, warn loudly if it happens. */
1120 if (!barrier_place_and_sync(&barrier))
1121 log_error("PAM initialization failed");
1122
2065ca69
JW
1123 strv_free(*env);
1124 *env = e;
aa87e624 1125
5b6319dc
LP
1126 return 0;
1127
1128fail:
970edce6
ZJS
1129 if (pam_code != PAM_SUCCESS) {
1130 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
7bb70b6e
LP
1131 r = -EPERM; /* PAM errors do not map to errno */
1132 } else
1133 log_error_errno(r, "PAM failed: %m");
9ba35398 1134
5b6319dc
LP
1135 if (handle) {
1136 if (close_session)
970edce6 1137 pam_code = pam_close_session(handle, flags);
5b6319dc 1138
970edce6 1139 pam_end(handle, pam_code | flags);
5b6319dc
LP
1140 }
1141
1142 strv_free(e);
5b6319dc
LP
1143 closelog();
1144
7bb70b6e 1145 return r;
cefc33ae
LP
1146#else
1147 return 0;
5b6319dc 1148#endif
cefc33ae 1149}
5b6319dc 1150
5d6b1584
LP
1151static void rename_process_from_path(const char *path) {
1152 char process_name[11];
1153 const char *p;
1154 size_t l;
1155
1156 /* This resulting string must fit in 10 chars (i.e. the length
1157 * of "/sbin/init") to look pretty in /bin/ps */
1158
2b6bf07d 1159 p = basename(path);
5d6b1584
LP
1160 if (isempty(p)) {
1161 rename_process("(...)");
1162 return;
1163 }
1164
1165 l = strlen(p);
1166 if (l > 8) {
1167 /* The end of the process name is usually more
1168 * interesting, since the first bit might just be
1169 * "systemd-" */
1170 p = p + l - 8;
1171 l = 8;
1172 }
1173
1174 process_name[0] = '(';
1175 memcpy(process_name+1, p, l);
1176 process_name[1+l] = ')';
1177 process_name[1+l+1] = 0;
1178
1179 rename_process(process_name);
1180}
1181
c0467cf3 1182#ifdef HAVE_SECCOMP
17df7223 1183
83f12b27 1184static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
f673b62d
LP
1185
1186 if (is_seccomp_available())
1187 return false;
1188
1189 log_open();
1190 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1191 log_close();
1192 return true;
83f12b27
FS
1193}
1194
1195static int apply_seccomp(const Unit* u, const ExecContext *c) {
17df7223 1196 uint32_t negative_action, action;
8d7b0c8f 1197 scmp_filter_ctx seccomp;
c0467cf3
RC
1198 Iterator i;
1199 void *id;
17df7223 1200 int r;
8351ceae 1201
c0467cf3 1202 assert(c);
8351ceae 1203
83f12b27
FS
1204 if (skip_seccomp_unavailable(u, "syscall filtering"))
1205 return 0;
1206
17df7223
LP
1207 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1208
1209 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
1210 if (!seccomp)
1211 return -ENOMEM;
8351ceae 1212
e9642be2
LP
1213 if (c->syscall_archs) {
1214
1215 SET_FOREACH(id, c->syscall_archs, i) {
1216 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1217 if (r == -EEXIST)
1218 continue;
7c66bae2
LP
1219 if (r < 0)
1220 goto finish;
e9642be2 1221 }
e9642be2 1222
7c66bae2 1223 } else {
e9642be2 1224 r = seccomp_add_secondary_archs(seccomp);
7c66bae2
LP
1225 if (r < 0)
1226 goto finish;
57183d11 1227 }
8351ceae 1228
57183d11 1229 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
17df7223
LP
1230 SET_FOREACH(id, c->syscall_filter, i) {
1231 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
7c66bae2
LP
1232 if (r < 0)
1233 goto finish;
c0467cf3 1234 }
8351ceae 1235
7c66bae2
LP
1236 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1237 if (r < 0)
1238 goto finish;
1239
17df7223 1240 r = seccomp_load(seccomp);
7c66bae2
LP
1241
1242finish:
17df7223 1243 seccomp_release(seccomp);
4298d0b5
LP
1244 return r;
1245}
1246
83f12b27 1247static int apply_address_families(const Unit* u, const ExecContext *c) {
8d7b0c8f 1248 scmp_filter_ctx seccomp;
4298d0b5
LP
1249 Iterator i;
1250 int r;
1251
1252 assert(c);
1253
83f12b27
FS
1254 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1255 return 0;
1256
8d7b0c8f 1257 r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
4298d0b5 1258 if (r < 0)
8d7b0c8f 1259 return r;
4298d0b5
LP
1260
1261 if (c->address_families_whitelist) {
1262 int af, first = 0, last = 0;
1263 void *afp;
1264
1265 /* If this is a whitelist, we first block the address
1266 * families that are out of range and then everything
1267 * that is not in the set. First, we find the lowest
1268 * and highest address family in the set. */
1269
1270 SET_FOREACH(afp, c->address_families, i) {
1271 af = PTR_TO_INT(afp);
17df7223 1272
4298d0b5
LP
1273 if (af <= 0 || af >= af_max())
1274 continue;
1275
1276 if (first == 0 || af < first)
1277 first = af;
1278
1279 if (last == 0 || af > last)
1280 last = af;
1281 }
1282
1283 assert((first == 0) == (last == 0));
1284
1285 if (first == 0) {
1286
1287 /* No entries in the valid range, block everything */
1288 r = seccomp_rule_add(
1289 seccomp,
1290 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1291 SCMP_SYS(socket),
1292 0);
1293 if (r < 0)
1294 goto finish;
1295
1296 } else {
1297
1298 /* Block everything below the first entry */
1299 r = seccomp_rule_add(
1300 seccomp,
1301 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1302 SCMP_SYS(socket),
1303 1,
1304 SCMP_A0(SCMP_CMP_LT, first));
1305 if (r < 0)
1306 goto finish;
1307
1308 /* Block everything above the last entry */
1309 r = seccomp_rule_add(
1310 seccomp,
1311 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1312 SCMP_SYS(socket),
1313 1,
1314 SCMP_A0(SCMP_CMP_GT, last));
1315 if (r < 0)
1316 goto finish;
1317
1318 /* Block everything between the first and last
1319 * entry */
1320 for (af = 1; af < af_max(); af++) {
1321
1322 if (set_contains(c->address_families, INT_TO_PTR(af)))
1323 continue;
1324
1325 r = seccomp_rule_add(
1326 seccomp,
1327 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1328 SCMP_SYS(socket),
1329 1,
1330 SCMP_A0(SCMP_CMP_EQ, af));
1331 if (r < 0)
1332 goto finish;
1333 }
1334 }
1335
1336 } else {
1337 void *af;
1338
1339 /* If this is a blacklist, then generate one rule for
1340 * each address family that are then combined in OR
1341 * checks. */
1342
1343 SET_FOREACH(af, c->address_families, i) {
1344
1345 r = seccomp_rule_add(
1346 seccomp,
1347 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1348 SCMP_SYS(socket),
1349 1,
1350 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1351 if (r < 0)
1352 goto finish;
1353 }
1354 }
1355
4298d0b5
LP
1356 r = seccomp_load(seccomp);
1357
1358finish:
1359 seccomp_release(seccomp);
17df7223 1360 return r;
8351ceae 1361}
4298d0b5 1362
83f12b27 1363static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
8d7b0c8f 1364 scmp_filter_ctx seccomp;
f3e43635
TM
1365 int r;
1366
1367 assert(c);
1368
83f12b27
FS
1369 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1370 return 0;
1371
8d7b0c8f 1372 r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
72246c2a 1373 if (r < 0)
8d7b0c8f 1374 return r;
72246c2a 1375
f3e43635
TM
1376 r = seccomp_rule_add(
1377 seccomp,
abd84d4d 1378 SCMP_ACT_ERRNO(EPERM),
f3e43635
TM
1379 SCMP_SYS(mmap),
1380 1,
1381 SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
1382 if (r < 0)
1383 goto finish;
1384
1385 r = seccomp_rule_add(
1386 seccomp,
abd84d4d 1387 SCMP_ACT_ERRNO(EPERM),
f3e43635
TM
1388 SCMP_SYS(mprotect),
1389 1,
1390 SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
1391 if (r < 0)
1392 goto finish;
1393
d2ffa389
TM
1394 r = seccomp_rule_add(
1395 seccomp,
1396 SCMP_ACT_ERRNO(EPERM),
1397 SCMP_SYS(shmat),
1398 1,
1399 SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
1400 if (r < 0)
1401 goto finish;
1402
f3e43635
TM
1403 r = seccomp_load(seccomp);
1404
1405finish:
1406 seccomp_release(seccomp);
1407 return r;
1408}
1409
83f12b27 1410static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
f4170c67
LP
1411 static const int permitted_policies[] = {
1412 SCHED_OTHER,
1413 SCHED_BATCH,
1414 SCHED_IDLE,
1415 };
1416
8d7b0c8f 1417 scmp_filter_ctx seccomp;
f4170c67
LP
1418 unsigned i;
1419 int r, p, max_policy = 0;
1420
1421 assert(c);
1422
83f12b27
FS
1423 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1424 return 0;
1425
8d7b0c8f 1426 r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
72246c2a 1427 if (r < 0)
8d7b0c8f 1428 return r;
72246c2a 1429
f4170c67
LP
1430 /* Determine the highest policy constant we want to allow */
1431 for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
1432 if (permitted_policies[i] > max_policy)
1433 max_policy = permitted_policies[i];
1434
1435 /* Go through all policies with lower values than that, and block them -- unless they appear in the
1436 * whitelist. */
1437 for (p = 0; p < max_policy; p++) {
1438 bool good = false;
1439
1440 /* Check if this is in the whitelist. */
1441 for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
1442 if (permitted_policies[i] == p) {
1443 good = true;
1444 break;
1445 }
1446
1447 if (good)
1448 continue;
1449
1450 /* Deny this policy */
1451 r = seccomp_rule_add(
1452 seccomp,
1453 SCMP_ACT_ERRNO(EPERM),
1454 SCMP_SYS(sched_setscheduler),
1455 1,
1456 SCMP_A1(SCMP_CMP_EQ, p));
1457 if (r < 0)
1458 goto finish;
1459 }
1460
1461 /* Blacklist all other policies, i.e. the ones with higher values. Note that all comparisons are unsigned here,
1462 * hence no need no check for < 0 values. */
1463 r = seccomp_rule_add(
1464 seccomp,
1465 SCMP_ACT_ERRNO(EPERM),
1466 SCMP_SYS(sched_setscheduler),
1467 1,
1468 SCMP_A1(SCMP_CMP_GT, max_policy));
1469 if (r < 0)
1470 goto finish;
1471
f4170c67
LP
1472 r = seccomp_load(seccomp);
1473
1474finish:
1475 seccomp_release(seccomp);
1476 return r;
1477}
1478
59e856c7 1479static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
8d7b0c8f 1480 scmp_filter_ctx seccomp;
59eeb84b
LP
1481 int r;
1482
1483 assert(c);
1484
1485 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1486 * let's protect even those systems where this is left on in the kernel. */
1487
1488 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1489 return 0;
1490
8d7b0c8f 1491 r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
59eeb84b 1492 if (r < 0)
8d7b0c8f 1493 return r;
59eeb84b
LP
1494
1495 r = seccomp_rule_add(
1496 seccomp,
1497 SCMP_ACT_ERRNO(EPERM),
1498 SCMP_SYS(_sysctl),
1499 0);
1500 if (r < 0)
1501 goto finish;
1502
59eeb84b
LP
1503 r = seccomp_load(seccomp);
1504
1505finish:
1506 seccomp_release(seccomp);
1507 return r;
1508}
1509
59e856c7 1510static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
502d704e
DH
1511 assert(c);
1512
25a8d8a0 1513 /* Turn off module syscalls on ProtectKernelModules=yes */
502d704e
DH
1514
1515 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1516 return 0;
1517
a3be2849 1518 return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
502d704e
DH
1519}
1520
59e856c7 1521static int apply_private_devices(const Unit *u, const ExecContext *c) {
ba128bb8
LP
1522 assert(c);
1523
8f81a5f6 1524 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
ba128bb8
LP
1525
1526 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1527 return 0;
1528
a3be2849 1529 return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
ba128bb8
LP
1530}
1531
add00535
LP
1532static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
1533 assert(c);
1534
1535 if (!exec_context_restrict_namespaces_set(c))
1536 return 0;
1537
1538 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1539 return 0;
1540
1541 return seccomp_restrict_namespaces(c->restrict_namespaces);
1542}
1543
c0467cf3 1544#endif
8351ceae 1545
31a7eb86
ZJS
1546static void do_idle_pipe_dance(int idle_pipe[4]) {
1547 assert(idle_pipe);
1548
54eb2300
LP
1549 idle_pipe[1] = safe_close(idle_pipe[1]);
1550 idle_pipe[2] = safe_close(idle_pipe[2]);
31a7eb86
ZJS
1551
1552 if (idle_pipe[0] >= 0) {
1553 int r;
1554
1555 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1556
1557 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
c7cc737f
LP
1558 ssize_t n;
1559
31a7eb86 1560 /* Signal systemd that we are bored and want to continue. */
c7cc737f
LP
1561 n = write(idle_pipe[3], "x", 1);
1562 if (n > 0)
cd972d69
ZJS
1563 /* Wait for systemd to react to the signal above. */
1564 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
31a7eb86
ZJS
1565 }
1566
54eb2300 1567 idle_pipe[0] = safe_close(idle_pipe[0]);
31a7eb86
ZJS
1568
1569 }
1570
54eb2300 1571 idle_pipe[3] = safe_close(idle_pipe[3]);
31a7eb86
ZJS
1572}
1573
7cae38c4 1574static int build_environment(
fd63e712 1575 Unit *u,
9fa95f85 1576 const ExecContext *c,
1e22b5cd 1577 const ExecParameters *p,
7cae38c4
LP
1578 unsigned n_fds,
1579 const char *home,
1580 const char *username,
1581 const char *shell,
7bce046b
LP
1582 dev_t journal_stream_dev,
1583 ino_t journal_stream_ino,
7cae38c4
LP
1584 char ***ret) {
1585
1586 _cleanup_strv_free_ char **our_env = NULL;
1587 unsigned n_env = 0;
1588 char *x;
1589
4b58153d 1590 assert(u);
7cae38c4
LP
1591 assert(c);
1592 assert(ret);
1593
4b58153d 1594 our_env = new0(char*, 14);
7cae38c4
LP
1595 if (!our_env)
1596 return -ENOMEM;
1597
1598 if (n_fds > 0) {
8dd4c05b
LP
1599 _cleanup_free_ char *joined = NULL;
1600
ccd06097 1601 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
7cae38c4
LP
1602 return -ENOMEM;
1603 our_env[n_env++] = x;
1604
1605 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1606 return -ENOMEM;
1607 our_env[n_env++] = x;
8dd4c05b 1608
1e22b5cd 1609 joined = strv_join(p->fd_names, ":");
8dd4c05b
LP
1610 if (!joined)
1611 return -ENOMEM;
1612
605405c6 1613 x = strjoin("LISTEN_FDNAMES=", joined);
8dd4c05b
LP
1614 if (!x)
1615 return -ENOMEM;
1616 our_env[n_env++] = x;
7cae38c4
LP
1617 }
1618
b08af3b1 1619 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
ccd06097 1620 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
09812eb7
LP
1621 return -ENOMEM;
1622 our_env[n_env++] = x;
1623
1e22b5cd 1624 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
09812eb7
LP
1625 return -ENOMEM;
1626 our_env[n_env++] = x;
1627 }
1628
fd63e712
LP
1629 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1630 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1631 * check the database directly. */
1632 if (unit_has_name(u, SPECIAL_DBUS_SERVICE)) {
1633 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1634 if (!x)
1635 return -ENOMEM;
1636 our_env[n_env++] = x;
1637 }
1638
7cae38c4
LP
1639 if (home) {
1640 x = strappend("HOME=", home);
1641 if (!x)
1642 return -ENOMEM;
1643 our_env[n_env++] = x;
1644 }
1645
1646 if (username) {
1647 x = strappend("LOGNAME=", username);
1648 if (!x)
1649 return -ENOMEM;
1650 our_env[n_env++] = x;
1651
1652 x = strappend("USER=", username);
1653 if (!x)
1654 return -ENOMEM;
1655 our_env[n_env++] = x;
1656 }
1657
1658 if (shell) {
1659 x = strappend("SHELL=", shell);
1660 if (!x)
1661 return -ENOMEM;
1662 our_env[n_env++] = x;
1663 }
1664
4b58153d
LP
1665 if (!sd_id128_is_null(u->invocation_id)) {
1666 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1667 return -ENOMEM;
1668
1669 our_env[n_env++] = x;
1670 }
1671
6af760f3
LP
1672 if (exec_context_needs_term(c)) {
1673 const char *tty_path, *term = NULL;
1674
1675 tty_path = exec_context_tty_path(c);
1676
1677 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1678 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1679 * passes to PID 1 ends up all the way in the console login shown. */
1680
1681 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1682 term = getenv("TERM");
1683 if (!term)
1684 term = default_term_for_tty(tty_path);
7cae38c4 1685
6af760f3 1686 x = strappend("TERM=", term);
7cae38c4
LP
1687 if (!x)
1688 return -ENOMEM;
1689 our_env[n_env++] = x;
1690 }
1691
7bce046b
LP
1692 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1693 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1694 return -ENOMEM;
1695
1696 our_env[n_env++] = x;
1697 }
1698
7cae38c4 1699 our_env[n_env++] = NULL;
7bce046b 1700 assert(n_env <= 12);
7cae38c4
LP
1701
1702 *ret = our_env;
1703 our_env = NULL;
1704
1705 return 0;
1706}
1707
b4c14404
FB
1708static int build_pass_environment(const ExecContext *c, char ***ret) {
1709 _cleanup_strv_free_ char **pass_env = NULL;
1710 size_t n_env = 0, n_bufsize = 0;
1711 char **i;
1712
1713 STRV_FOREACH(i, c->pass_environment) {
1714 _cleanup_free_ char *x = NULL;
1715 char *v;
1716
1717 v = getenv(*i);
1718 if (!v)
1719 continue;
605405c6 1720 x = strjoin(*i, "=", v);
b4c14404
FB
1721 if (!x)
1722 return -ENOMEM;
1723 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1724 return -ENOMEM;
1725 pass_env[n_env++] = x;
1726 pass_env[n_env] = NULL;
1727 x = NULL;
1728 }
1729
1730 *ret = pass_env;
1731 pass_env = NULL;
1732
1733 return 0;
1734}
1735
8b44a3d2
LP
1736static bool exec_needs_mount_namespace(
1737 const ExecContext *context,
1738 const ExecParameters *params,
1739 ExecRuntime *runtime) {
1740
1741 assert(context);
1742 assert(params);
1743
2a624c36
AP
1744 if (!strv_isempty(context->read_write_paths) ||
1745 !strv_isempty(context->read_only_paths) ||
1746 !strv_isempty(context->inaccessible_paths))
8b44a3d2
LP
1747 return true;
1748
1749 if (context->mount_flags != 0)
1750 return true;
1751
1752 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1753 return true;
1754
8b44a3d2
LP
1755 if (context->private_devices ||
1756 context->protect_system != PROTECT_SYSTEM_NO ||
59eeb84b
LP
1757 context->protect_home != PROTECT_HOME_NO ||
1758 context->protect_kernel_tunables ||
c575770b 1759 context->protect_kernel_modules ||
59eeb84b 1760 context->protect_control_groups)
8b44a3d2
LP
1761 return true;
1762
1763 return false;
1764}
1765
d251207d
LP
1766static int setup_private_users(uid_t uid, gid_t gid) {
1767 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1768 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1769 _cleanup_close_ int unshare_ready_fd = -1;
1770 _cleanup_(sigkill_waitp) pid_t pid = 0;
1771 uint64_t c = 1;
1772 siginfo_t si;
1773 ssize_t n;
1774 int r;
1775
1776 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1777 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1778 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1779 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1780 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1781 * continues execution normally. */
1782
1783 if (uid != 0 && uid_is_valid(uid))
1784 asprintf(&uid_map,
1785 "0 0 1\n" /* Map root → root */
1786 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
e0f3720e 1787 uid, uid);
d251207d 1788 else
e0f3720e 1789 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
d251207d
LP
1790 if (!uid_map)
1791 return -ENOMEM;
1792
1793 if (gid != 0 && gid_is_valid(gid))
1794 asprintf(&gid_map,
1795 "0 0 1\n" /* Map root → root */
1796 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1797 gid, gid);
1798 else
1799 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1800 if (!gid_map)
1801 return -ENOMEM;
1802
1803 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1804 * namespace. */
1805 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1806 if (unshare_ready_fd < 0)
1807 return -errno;
1808
1809 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1810 * failed. */
1811 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1812 return -errno;
1813
1814 pid = fork();
1815 if (pid < 0)
1816 return -errno;
1817
1818 if (pid == 0) {
1819 _cleanup_close_ int fd = -1;
1820 const char *a;
1821 pid_t ppid;
1822
1823 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1824 * here, after the parent opened its own user namespace. */
1825
1826 ppid = getppid();
1827 errno_pipe[0] = safe_close(errno_pipe[0]);
1828
1829 /* Wait until the parent unshared the user namespace */
1830 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1831 r = -errno;
1832 goto child_fail;
1833 }
1834
1835 /* Disable the setgroups() system call in the child user namespace, for good. */
1836 a = procfs_file_alloca(ppid, "setgroups");
1837 fd = open(a, O_WRONLY|O_CLOEXEC);
1838 if (fd < 0) {
1839 if (errno != ENOENT) {
1840 r = -errno;
1841 goto child_fail;
1842 }
1843
1844 /* If the file is missing the kernel is too old, let's continue anyway. */
1845 } else {
1846 if (write(fd, "deny\n", 5) < 0) {
1847 r = -errno;
1848 goto child_fail;
1849 }
1850
1851 fd = safe_close(fd);
1852 }
1853
1854 /* First write the GID map */
1855 a = procfs_file_alloca(ppid, "gid_map");
1856 fd = open(a, O_WRONLY|O_CLOEXEC);
1857 if (fd < 0) {
1858 r = -errno;
1859 goto child_fail;
1860 }
1861 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1862 r = -errno;
1863 goto child_fail;
1864 }
1865 fd = safe_close(fd);
1866
1867 /* The write the UID map */
1868 a = procfs_file_alloca(ppid, "uid_map");
1869 fd = open(a, O_WRONLY|O_CLOEXEC);
1870 if (fd < 0) {
1871 r = -errno;
1872 goto child_fail;
1873 }
1874 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1875 r = -errno;
1876 goto child_fail;
1877 }
1878
1879 _exit(EXIT_SUCCESS);
1880
1881 child_fail:
1882 (void) write(errno_pipe[1], &r, sizeof(r));
1883 _exit(EXIT_FAILURE);
1884 }
1885
1886 errno_pipe[1] = safe_close(errno_pipe[1]);
1887
1888 if (unshare(CLONE_NEWUSER) < 0)
1889 return -errno;
1890
1891 /* Let the child know that the namespace is ready now */
1892 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1893 return -errno;
1894
1895 /* Try to read an error code from the child */
1896 n = read(errno_pipe[0], &r, sizeof(r));
1897 if (n < 0)
1898 return -errno;
1899 if (n == sizeof(r)) { /* an error code was sent to us */
1900 if (r < 0)
1901 return r;
1902 return -EIO;
1903 }
1904 if (n != 0) /* on success we should have read 0 bytes */
1905 return -EIO;
1906
1907 r = wait_for_terminate(pid, &si);
1908 if (r < 0)
1909 return r;
1910 pid = 0;
1911
1912 /* If something strange happened with the child, let's consider this fatal, too */
1913 if (si.si_code != CLD_EXITED || si.si_status != 0)
1914 return -EIO;
1915
1916 return 0;
1917}
1918
07689d5d
LP
1919static int setup_runtime_directory(
1920 const ExecContext *context,
1921 const ExecParameters *params,
1922 uid_t uid,
1923 gid_t gid) {
1924
1925 char **rt;
1926 int r;
1927
1928 assert(context);
1929 assert(params);
1930
1931 STRV_FOREACH(rt, context->runtime_directory) {
1932 _cleanup_free_ char *p;
1933
605405c6 1934 p = strjoin(params->runtime_prefix, "/", *rt);
07689d5d
LP
1935 if (!p)
1936 return -ENOMEM;
1937
1938 r = mkdir_p_label(p, context->runtime_directory_mode);
1939 if (r < 0)
1940 return r;
1941
1942 r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
1943 if (r < 0)
1944 return r;
1945 }
1946
1947 return 0;
1948}
1949
cefc33ae
LP
1950static int setup_smack(
1951 const ExecContext *context,
1952 const ExecCommand *command) {
1953
1954#ifdef HAVE_SMACK
1955 int r;
1956
1957 assert(context);
1958 assert(command);
1959
1960 if (!mac_smack_use())
1961 return 0;
1962
1963 if (context->smack_process_label) {
1964 r = mac_smack_apply_pid(0, context->smack_process_label);
1965 if (r < 0)
1966 return r;
1967 }
1968#ifdef SMACK_DEFAULT_PROCESS_LABEL
1969 else {
1970 _cleanup_free_ char *exec_label = NULL;
1971
1972 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1973 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
1974 return r;
1975
1976 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1977 if (r < 0)
1978 return r;
1979 }
1980#endif
1981#endif
1982
1983 return 0;
1984}
1985
3fbe8dbe
LP
1986static int compile_read_write_paths(
1987 const ExecContext *context,
1988 const ExecParameters *params,
1989 char ***ret) {
1990
1991 _cleanup_strv_free_ char **l = NULL;
1992 char **rt;
1993
1994 /* Compile the list of writable paths. This is the combination of the explicitly configured paths, plus all
1995 * runtime directories. */
1996
1997 if (strv_isempty(context->read_write_paths) &&
1998 strv_isempty(context->runtime_directory)) {
1999 *ret = NULL; /* NOP if neither is set */
2000 return 0;
2001 }
2002
2003 l = strv_copy(context->read_write_paths);
2004 if (!l)
2005 return -ENOMEM;
2006
2007 STRV_FOREACH(rt, context->runtime_directory) {
2008 char *s;
2009
605405c6 2010 s = strjoin(params->runtime_prefix, "/", *rt);
3fbe8dbe
LP
2011 if (!s)
2012 return -ENOMEM;
2013
2014 if (strv_consume(&l, s) < 0)
2015 return -ENOMEM;
2016 }
2017
2018 *ret = l;
2019 l = NULL;
2020
2021 return 0;
2022}
2023
93c6bb51
DH
2024static int apply_mount_namespace(Unit *u, const ExecContext *context,
2025 const ExecParameters *params,
2026 ExecRuntime *runtime) {
2027 int r;
2028 _cleanup_free_ char **rw = NULL;
2029 char *tmp = NULL, *var = NULL;
2030 const char *root_dir = NULL;
2031 NameSpaceInfo ns_info = {
af964954 2032 .ignore_protect_paths = false,
93c6bb51
DH
2033 .private_dev = context->private_devices,
2034 .protect_control_groups = context->protect_control_groups,
2035 .protect_kernel_tunables = context->protect_kernel_tunables,
2036 .protect_kernel_modules = context->protect_kernel_modules,
2037 };
2038
2b3c1b9e
DH
2039 assert(context);
2040
93c6bb51
DH
2041 /* The runtime struct only contains the parent of the private /tmp,
2042 * which is non-accessible to world users. Inside of it there's a /tmp
2043 * that is sticky, and that's the one we want to use here. */
2044
2045 if (context->private_tmp && runtime) {
2046 if (runtime->tmp_dir)
2047 tmp = strjoina(runtime->tmp_dir, "/tmp");
2048 if (runtime->var_tmp_dir)
2049 var = strjoina(runtime->var_tmp_dir, "/tmp");
2050 }
2051
2052 r = compile_read_write_paths(context, params, &rw);
2053 if (r < 0)
2054 return r;
2055
2056 if (params->flags & EXEC_APPLY_CHROOT)
2057 root_dir = context->root_directory;
2058
af964954
DH
2059 /*
2060 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2061 * sandbox info, otherwise enforce it, don't ignore protected paths and
2062 * fail if we are enable to apply the sandbox inside the mount namespace.
2063 */
2064 if (!context->dynamic_user && root_dir)
2065 ns_info.ignore_protect_paths = true;
2066
93c6bb51
DH
2067 r = setup_namespace(root_dir, &ns_info, rw,
2068 context->read_only_paths,
2069 context->inaccessible_paths,
2070 tmp,
2071 var,
2072 context->protect_home,
2073 context->protect_system,
2074 context->mount_flags);
2075
2076 /* If we couldn't set up the namespace this is probably due to a
2077 * missing capability. In this case, silently proceeed. */
2078 if (IN_SET(r, -EPERM, -EACCES)) {
2079 log_open();
2080 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2081 log_close();
2082 r = 0;
2083 }
2084
2085 return r;
2086}
2087
e7f1e7c6
DH
2088static int apply_working_directory(const ExecContext *context,
2089 const ExecParameters *params,
2b3c1b9e 2090 const char *home,
e7f1e7c6 2091 const bool needs_mount_ns) {
2b3c1b9e
DH
2092 const char *d;
2093 const char *wd;
2094
2095 assert(context);
2096
2097 if (context->working_directory_home)
2098 wd = home;
2099 else if (context->working_directory)
2100 wd = context->working_directory;
2101 else
2102 wd = "/";
e7f1e7c6
DH
2103
2104 if (params->flags & EXEC_APPLY_CHROOT) {
2105 if (!needs_mount_ns && context->root_directory)
2106 if (chroot(context->root_directory) < 0)
2107 return -errno;
2108
2b3c1b9e
DH
2109 d = wd;
2110 } else
2111 d = strjoina(strempty(context->root_directory), "/", strempty(wd));
e7f1e7c6 2112
2b3c1b9e
DH
2113 if (chdir(d) < 0 && !context->working_directory_missing_ok)
2114 return -errno;
e7f1e7c6
DH
2115
2116 return 0;
2117}
2118
29206d46
LP
2119static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2120 assert(array);
2121 assert(n);
2122
2123 if (!pair)
2124 return;
2125
2126 if (pair[0] >= 0)
2127 array[(*n)++] = pair[0];
2128 if (pair[1] >= 0)
2129 array[(*n)++] = pair[1];
2130}
2131
a34ceba6
LP
2132static int close_remaining_fds(
2133 const ExecParameters *params,
2134 ExecRuntime *runtime,
29206d46 2135 DynamicCreds *dcreds,
00d9ef85 2136 int user_lookup_fd,
a34ceba6
LP
2137 int socket_fd,
2138 int *fds, unsigned n_fds) {
2139
2140 unsigned n_dont_close = 0;
00d9ef85 2141 int dont_close[n_fds + 12];
a34ceba6
LP
2142
2143 assert(params);
2144
2145 if (params->stdin_fd >= 0)
2146 dont_close[n_dont_close++] = params->stdin_fd;
2147 if (params->stdout_fd >= 0)
2148 dont_close[n_dont_close++] = params->stdout_fd;
2149 if (params->stderr_fd >= 0)
2150 dont_close[n_dont_close++] = params->stderr_fd;
2151
2152 if (socket_fd >= 0)
2153 dont_close[n_dont_close++] = socket_fd;
2154 if (n_fds > 0) {
2155 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2156 n_dont_close += n_fds;
2157 }
2158
29206d46
LP
2159 if (runtime)
2160 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2161
2162 if (dcreds) {
2163 if (dcreds->user)
2164 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2165 if (dcreds->group)
2166 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
a34ceba6
LP
2167 }
2168
00d9ef85
LP
2169 if (user_lookup_fd >= 0)
2170 dont_close[n_dont_close++] = user_lookup_fd;
2171
a34ceba6
LP
2172 return close_all_fds(dont_close, n_dont_close);
2173}
2174
59eeb84b
LP
2175static bool context_has_address_families(const ExecContext *c) {
2176 assert(c);
2177
2178 return c->address_families_whitelist ||
2179 !set_isempty(c->address_families);
2180}
2181
2182static bool context_has_syscall_filters(const ExecContext *c) {
2183 assert(c);
2184
2185 return c->syscall_whitelist ||
2186 !set_isempty(c->syscall_filter) ||
2187 !set_isempty(c->syscall_archs);
2188}
2189
2190static bool context_has_no_new_privileges(const ExecContext *c) {
2191 assert(c);
2192
2193 if (c->no_new_privileges)
2194 return true;
2195
2196 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
2197 return false;
2198
2199 return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */
2200 c->memory_deny_write_execute ||
2201 c->restrict_realtime ||
add00535 2202 exec_context_restrict_namespaces_set(c) ||
59eeb84b 2203 c->protect_kernel_tunables ||
4084e8fc
DH
2204 c->protect_kernel_modules ||
2205 c->private_devices ||
59eeb84b
LP
2206 context_has_syscall_filters(c);
2207}
2208
00d9ef85
LP
2209static int send_user_lookup(
2210 Unit *unit,
2211 int user_lookup_fd,
2212 uid_t uid,
2213 gid_t gid) {
2214
2215 assert(unit);
2216
2217 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2218 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2219 * specified. */
2220
2221 if (user_lookup_fd < 0)
2222 return 0;
2223
2224 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2225 return 0;
2226
2227 if (writev(user_lookup_fd,
2228 (struct iovec[]) {
2229 { .iov_base = &uid, .iov_len = sizeof(uid) },
2230 { .iov_base = &gid, .iov_len = sizeof(gid) },
2231 { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
2232 return -errno;
2233
2234 return 0;
2235}
2236
ff0af2a1 2237static int exec_child(
f2341e0a 2238 Unit *unit,
ff0af2a1
LP
2239 ExecCommand *command,
2240 const ExecContext *context,
2241 const ExecParameters *params,
2242 ExecRuntime *runtime,
29206d46 2243 DynamicCreds *dcreds,
ff0af2a1
LP
2244 char **argv,
2245 int socket_fd,
52c239d7 2246 int named_iofds[3],
ff0af2a1
LP
2247 int *fds, unsigned n_fds,
2248 char **files_env,
00d9ef85 2249 int user_lookup_fd,
ff0af2a1 2250 int *exit_status) {
d35fbf6b 2251
2065ca69 2252 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
9008e1ac 2253 _cleanup_free_ char *mac_selinux_context_net = NULL;
4d885bd3
DH
2254 _cleanup_free_ gid_t *supplementary_gids = NULL;
2255 const char *username = NULL, *groupname = NULL;
2b3c1b9e 2256 const char *home = NULL, *shell = NULL;
7bce046b
LP
2257 dev_t journal_stream_dev = 0;
2258 ino_t journal_stream_ino = 0;
2259 bool needs_mount_namespace;
fed1e721
LP
2260 uid_t uid = UID_INVALID;
2261 gid_t gid = GID_INVALID;
4d885bd3 2262 int i, r, ngids = 0;
034c6ed7 2263
f2341e0a 2264 assert(unit);
5cb5a6ff
LP
2265 assert(command);
2266 assert(context);
d35fbf6b 2267 assert(params);
ff0af2a1 2268 assert(exit_status);
d35fbf6b
DM
2269
2270 rename_process_from_path(command->path);
2271
2272 /* We reset exactly these signals, since they are the
2273 * only ones we set to SIG_IGN in the main daemon. All
2274 * others we leave untouched because we set them to
2275 * SIG_DFL or a valid handler initially, both of which
2276 * will be demoted to SIG_DFL. */
ce30c8dc
LP
2277 (void) default_signals(SIGNALS_CRASH_HANDLER,
2278 SIGNALS_IGNORE, -1);
d35fbf6b
DM
2279
2280 if (context->ignore_sigpipe)
ce30c8dc 2281 (void) ignore_signals(SIGPIPE, -1);
d35fbf6b 2282
ff0af2a1
LP
2283 r = reset_signal_mask();
2284 if (r < 0) {
2285 *exit_status = EXIT_SIGNAL_MASK;
2286 return r;
d35fbf6b 2287 }
034c6ed7 2288
d35fbf6b
DM
2289 if (params->idle_pipe)
2290 do_idle_pipe_dance(params->idle_pipe);
4f2d528d 2291
d35fbf6b
DM
2292 /* Close sockets very early to make sure we don't
2293 * block init reexecution because it cannot bind its
2294 * sockets */
ff0af2a1 2295
d35fbf6b 2296 log_forget_fds();
4f2d528d 2297
00d9ef85 2298 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
ff0af2a1
LP
2299 if (r < 0) {
2300 *exit_status = EXIT_FDS;
2301 return r;
8c7be95e
LP
2302 }
2303
d35fbf6b
DM
2304 if (!context->same_pgrp)
2305 if (setsid() < 0) {
ff0af2a1 2306 *exit_status = EXIT_SETSID;
d35fbf6b
DM
2307 return -errno;
2308 }
9e2f7c11 2309
1e22b5cd 2310 exec_context_tty_reset(context, params);
d35fbf6b 2311
7d5ceb64
FB
2312 if (params->confirm_spawn) {
2313 const char *vc = params->confirm_spawn;
d35fbf6b
DM
2314 char response;
2315
7d5ceb64 2316 r = ask_for_confirmation(vc, &response, argv);
ff0af2a1 2317 if (r == -ETIMEDOUT)
7d5ceb64 2318 write_confirm_message(vc, "Confirmation question timed out, assuming positive response.\n");
ff0af2a1 2319 else if (r < 0)
7d5ceb64 2320 write_confirm_message(vc, "Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
d35fbf6b 2321 else if (response == 's') {
7d5ceb64 2322 write_confirm_message(vc, "Skipping execution.\n");
ff0af2a1 2323 *exit_status = EXIT_CONFIRM;
d35fbf6b
DM
2324 return -ECANCELED;
2325 } else if (response == 'n') {
7d5ceb64 2326 write_confirm_message(vc, "Failing execution.\n");
ff0af2a1 2327 *exit_status = 0;
d35fbf6b
DM
2328 return 0;
2329 }
2330 }
1a63a750 2331
29206d46
LP
2332 if (context->dynamic_user && dcreds) {
2333
409093fe
LP
2334 /* Make sure we bypass our own NSS module for any NSS checks */
2335 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2336 *exit_status = EXIT_USER;
2337 return -errno;
2338 }
2339
29206d46 2340 r = dynamic_creds_realize(dcreds, &uid, &gid);
ff0af2a1
LP
2341 if (r < 0) {
2342 *exit_status = EXIT_USER;
2343 return r;
524daa8c 2344 }
524daa8c 2345
92b25bca 2346 if (!uid_is_valid(uid) || !gid_is_valid(gid)) {
29206d46
LP
2347 *exit_status = EXIT_USER;
2348 return -ESRCH;
2349 }
5bc7452b 2350
29206d46
LP
2351 if (dcreds->user)
2352 username = dcreds->user->name;
2353
2354 } else {
4d885bd3
DH
2355 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2356 if (r < 0) {
2357 *exit_status = EXIT_USER;
2358 return r;
5bc7452b 2359 }
5bc7452b 2360
4d885bd3
DH
2361 r = get_fixed_group(context, &groupname, &gid);
2362 if (r < 0) {
2363 *exit_status = EXIT_GROUP;
2364 return r;
2365 }
cdc5d5c5 2366 }
29206d46 2367
cdc5d5c5
DH
2368 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2369 r = get_supplementary_groups(context, username, groupname, gid,
2370 &supplementary_gids, &ngids);
2371 if (r < 0) {
2372 *exit_status = EXIT_GROUP;
2373 return r;
29206d46 2374 }
5bc7452b 2375
00d9ef85
LP
2376 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2377 if (r < 0) {
2378 *exit_status = EXIT_USER;
2379 return r;
2380 }
2381
2382 user_lookup_fd = safe_close(user_lookup_fd);
2383
d35fbf6b
DM
2384 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2385 * must sure to drop O_NONBLOCK */
2386 if (socket_fd >= 0)
a34ceba6 2387 (void) fd_nonblock(socket_fd, false);
acbb0225 2388
52c239d7 2389 r = setup_input(context, params, socket_fd, named_iofds);
ff0af2a1
LP
2390 if (r < 0) {
2391 *exit_status = EXIT_STDIN;
2392 return r;
d35fbf6b 2393 }
034c6ed7 2394
52c239d7 2395 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2396 if (r < 0) {
2397 *exit_status = EXIT_STDOUT;
2398 return r;
d35fbf6b
DM
2399 }
2400
52c239d7 2401 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
ff0af2a1
LP
2402 if (r < 0) {
2403 *exit_status = EXIT_STDERR;
2404 return r;
d35fbf6b
DM
2405 }
2406
2407 if (params->cgroup_path) {
ff0af2a1
LP
2408 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2409 if (r < 0) {
2410 *exit_status = EXIT_CGROUP;
2411 return r;
309bff19 2412 }
d35fbf6b 2413 }
309bff19 2414
d35fbf6b 2415 if (context->oom_score_adjust_set) {
d5243d62 2416 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
f2b68789 2417
d5243d62
LP
2418 /* When we can't make this change due to EPERM, then
2419 * let's silently skip over it. User namespaces
2420 * prohibit write access to this file, and we
2421 * shouldn't trip up over that. */
613b411c 2422
d5243d62 2423 sprintf(t, "%i", context->oom_score_adjust);
ad118bda 2424 r = write_string_file("/proc/self/oom_score_adj", t, 0);
6cb7fa17 2425 if (r == -EPERM || r == -EACCES) {
ff0af2a1 2426 log_open();
f2341e0a 2427 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
ff0af2a1
LP
2428 log_close();
2429 } else if (r < 0) {
2430 *exit_status = EXIT_OOM_ADJUST;
d35fbf6b 2431 return -errno;
613b411c 2432 }
d35fbf6b
DM
2433 }
2434
2435 if (context->nice_set)
2436 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
ff0af2a1 2437 *exit_status = EXIT_NICE;
d35fbf6b 2438 return -errno;
613b411c
LP
2439 }
2440
d35fbf6b
DM
2441 if (context->cpu_sched_set) {
2442 struct sched_param param = {
2443 .sched_priority = context->cpu_sched_priority,
2444 };
2445
ff0af2a1
LP
2446 r = sched_setscheduler(0,
2447 context->cpu_sched_policy |
2448 (context->cpu_sched_reset_on_fork ?
2449 SCHED_RESET_ON_FORK : 0),
2450 &param);
2451 if (r < 0) {
2452 *exit_status = EXIT_SETSCHEDULER;
d35fbf6b 2453 return -errno;
fc9b2a84 2454 }
d35fbf6b 2455 }
fc9b2a84 2456
d35fbf6b
DM
2457 if (context->cpuset)
2458 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
ff0af2a1 2459 *exit_status = EXIT_CPUAFFINITY;
d35fbf6b 2460 return -errno;
034c6ed7
LP
2461 }
2462
d35fbf6b
DM
2463 if (context->ioprio_set)
2464 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
ff0af2a1 2465 *exit_status = EXIT_IOPRIO;
d35fbf6b
DM
2466 return -errno;
2467 }
da726a4d 2468
d35fbf6b
DM
2469 if (context->timer_slack_nsec != NSEC_INFINITY)
2470 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
ff0af2a1 2471 *exit_status = EXIT_TIMERSLACK;
d35fbf6b 2472 return -errno;
4c2630eb 2473 }
9eba9da4 2474
050f7277 2475 if (context->personality != PERSONALITY_INVALID)
d35fbf6b 2476 if (personality(context->personality) < 0) {
ff0af2a1 2477 *exit_status = EXIT_PERSONALITY;
d35fbf6b 2478 return -errno;
4c2630eb 2479 }
94f04347 2480
d35fbf6b 2481 if (context->utmp_id)
023a4f67
LP
2482 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path,
2483 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2484 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2485 USER_PROCESS,
2486 username ? "root" : context->user);
d35fbf6b 2487
e0d2adfd 2488 if (context->user) {
ff0af2a1
LP
2489 r = chown_terminal(STDIN_FILENO, uid);
2490 if (r < 0) {
2491 *exit_status = EXIT_STDIN;
2492 return r;
071830ff 2493 }
d35fbf6b 2494 }
8e274523 2495
a931ad47
LP
2496 /* If delegation is enabled we'll pass ownership of the cgroup
2497 * (but only in systemd's own controller hierarchy!) to the
2498 * user of the new process. */
2499 if (params->cgroup_path && context->user && params->cgroup_delegate) {
ff0af2a1
LP
2500 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2501 if (r < 0) {
2502 *exit_status = EXIT_CGROUP;
2503 return r;
d35fbf6b 2504 }
034c6ed7 2505
034c6ed7 2506
ff0af2a1
LP
2507 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2508 if (r < 0) {
2509 *exit_status = EXIT_CGROUP;
2510 return r;
034c6ed7 2511 }
d35fbf6b 2512 }
034c6ed7 2513
d35fbf6b 2514 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
07689d5d
LP
2515 r = setup_runtime_directory(context, params, uid, gid);
2516 if (r < 0) {
2517 *exit_status = EXIT_RUNTIME_DIRECTORY;
2518 return r;
d35fbf6b
DM
2519 }
2520 }
94f04347 2521
7bce046b 2522 r = build_environment(
fd63e712 2523 unit,
7bce046b
LP
2524 context,
2525 params,
2526 n_fds,
2527 home,
2528 username,
2529 shell,
2530 journal_stream_dev,
2531 journal_stream_ino,
2532 &our_env);
2065ca69
JW
2533 if (r < 0) {
2534 *exit_status = EXIT_MEMORY;
2535 return r;
2536 }
2537
2538 r = build_pass_environment(context, &pass_env);
2539 if (r < 0) {
2540 *exit_status = EXIT_MEMORY;
2541 return r;
2542 }
2543
2544 accum_env = strv_env_merge(5,
2545 params->environment,
2546 our_env,
2547 pass_env,
2548 context->environment,
2549 files_env,
2550 NULL);
2551 if (!accum_env) {
2552 *exit_status = EXIT_MEMORY;
2553 return -ENOMEM;
2554 }
1280503b 2555 accum_env = strv_env_clean(accum_env);
2065ca69 2556
096424d1 2557 (void) umask(context->umask);
b213e1c1 2558
c39f1ce2 2559 if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
b213e1c1 2560 if (context->pam_name && username) {
2d6fce8d 2561 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
b213e1c1
SW
2562 if (r < 0) {
2563 *exit_status = EXIT_PAM;
2564 return r;
2565 }
d35fbf6b 2566 }
b213e1c1 2567 }
ac45f971 2568
d35fbf6b 2569 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
ff0af2a1
LP
2570 r = setup_netns(runtime->netns_storage_socket);
2571 if (r < 0) {
2572 *exit_status = EXIT_NETWORK;
2573 return r;
d35fbf6b
DM
2574 }
2575 }
169c1bda 2576
ee818b89 2577 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
ee818b89 2578 if (needs_mount_namespace) {
93c6bb51 2579 r = apply_mount_namespace(unit, context, params, runtime);
3fbe8dbe
LP
2580 if (r < 0) {
2581 *exit_status = EXIT_NAMESPACE;
2582 return r;
2583 }
d35fbf6b 2584 }
81a2b7ce 2585
50b3dfb9
DH
2586 /* Apply just after mount namespace setup */
2587 r = apply_working_directory(context, params, home, needs_mount_namespace);
2588 if (r < 0) {
2589 *exit_status = EXIT_CHROOT;
2590 return r;
2591 }
2592
bbeea271 2593 /* Drop groups as early as possbile */
096424d1 2594 if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
4d885bd3 2595 r = enforce_groups(context, gid, supplementary_gids, ngids);
096424d1
LP
2596 if (r < 0) {
2597 *exit_status = EXIT_GROUP;
2598 return r;
2599 }
2600 }
2601
9008e1ac 2602#ifdef HAVE_SELINUX
c39f1ce2
LP
2603 if ((params->flags & EXEC_APPLY_PERMISSIONS) &&
2604 mac_selinux_use() &&
2605 params->selinux_context_net &&
2606 socket_fd >= 0 &&
2607 !command->privileged) {
2608
ff0af2a1
LP
2609 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
2610 if (r < 0) {
2611 *exit_status = EXIT_SELINUX_CONTEXT;
2612 return r;
9008e1ac
MS
2613 }
2614 }
2615#endif
2616
d87a2ef7 2617 if ((params->flags & EXEC_APPLY_PERMISSIONS) && context->private_users) {
d251207d
LP
2618 r = setup_private_users(uid, gid);
2619 if (r < 0) {
2620 *exit_status = EXIT_USER;
2621 return r;
2622 }
2623 }
2624
d35fbf6b
DM
2625 /* We repeat the fd closing here, to make sure that
2626 * nothing is leaked from the PAM modules. Note that
2627 * we are more aggressive this time since socket_fd
e44da745
DM
2628 * and the netns fds we don't need anymore. The custom
2629 * endpoint fd was needed to upload the policy and can
2630 * now be closed as well. */
ff0af2a1
LP
2631 r = close_all_fds(fds, n_fds);
2632 if (r >= 0)
2633 r = shift_fds(fds, n_fds);
2634 if (r >= 0)
2635 r = flags_fds(fds, n_fds, context->non_blocking);
2636 if (r < 0) {
2637 *exit_status = EXIT_FDS;
2638 return r;
d35fbf6b 2639 }
e66cf1a3 2640
c39f1ce2 2641 if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
e66cf1a3 2642
755d4b67
IP
2643 int secure_bits = context->secure_bits;
2644
d35fbf6b 2645 for (i = 0; i < _RLIMIT_MAX; i++) {
03857c43 2646
d35fbf6b
DM
2647 if (!context->rlimit[i])
2648 continue;
2649
03857c43
LP
2650 r = setrlimit_closest(i, context->rlimit[i]);
2651 if (r < 0) {
ff0af2a1 2652 *exit_status = EXIT_LIMITS;
03857c43 2653 return r;
e66cf1a3
LP
2654 }
2655 }
2656
f4170c67
LP
2657 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2658 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
2659 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
2660 *exit_status = EXIT_LIMITS;
2661 return -errno;
2662 }
2663 }
2664
a103496c
IP
2665 if (!cap_test_all(context->capability_bounding_set)) {
2666 r = capability_bounding_set_drop(context->capability_bounding_set, false);
ff0af2a1
LP
2667 if (r < 0) {
2668 *exit_status = EXIT_CAPABILITIES;
2669 return r;
3b8bddde 2670 }
4c2630eb 2671 }
3b8bddde 2672
755d4b67
IP
2673 /* This is done before enforce_user, but ambient set
2674 * does not survive over setresuid() if keep_caps is not set. */
2675 if (context->capability_ambient_set != 0) {
2676 r = capability_ambient_set_apply(context->capability_ambient_set, true);
2677 if (r < 0) {
2678 *exit_status = EXIT_CAPABILITIES;
2679 return r;
2680 }
755d4b67
IP
2681 }
2682
d35fbf6b 2683 if (context->user) {
ff0af2a1
LP
2684 r = enforce_user(context, uid);
2685 if (r < 0) {
2686 *exit_status = EXIT_USER;
2687 return r;
5b6319dc 2688 }
755d4b67
IP
2689 if (context->capability_ambient_set != 0) {
2690
2691 /* Fix the ambient capabilities after user change. */
2692 r = capability_ambient_set_apply(context->capability_ambient_set, false);
2693 if (r < 0) {
2694 *exit_status = EXIT_CAPABILITIES;
2695 return r;
2696 }
2697
2698 /* If we were asked to change user and ambient capabilities
2699 * were requested, we had to add keep-caps to the securebits
2700 * so that we would maintain the inherited capability set
2701 * through the setresuid(). Make sure that the bit is added
2702 * also to the context secure_bits so that we don't try to
2703 * drop the bit away next. */
2704
7f508f2c 2705 secure_bits |= 1<<SECURE_KEEP_CAPS;
755d4b67 2706 }
5b6319dc 2707 }
d35fbf6b 2708
5cd9cd35
LP
2709 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
2710 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
2711 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
2712 * are restricted. */
2713
2714#ifdef HAVE_SELINUX
2715 if (mac_selinux_use()) {
2716 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
2717
2718 if (exec_context) {
2719 r = setexeccon(exec_context);
2720 if (r < 0) {
2721 *exit_status = EXIT_SELINUX_CONTEXT;
2722 return r;
2723 }
2724 }
2725 }
2726#endif
2727
2728 r = setup_smack(context, command);
2729 if (r < 0) {
2730 *exit_status = EXIT_SMACK_PROCESS_LABEL;
2731 return r;
2732 }
2733
2734#ifdef HAVE_APPARMOR
2735 if (context->apparmor_profile && mac_apparmor_use()) {
2736 r = aa_change_onexec(context->apparmor_profile);
2737 if (r < 0 && !context->apparmor_profile_ignore) {
2738 *exit_status = EXIT_APPARMOR_PROFILE;
2739 return -errno;
2740 }
2741 }
2742#endif
2743
d35fbf6b
DM
2744 /* PR_GET_SECUREBITS is not privileged, while
2745 * PR_SET_SECUREBITS is. So to suppress
2746 * potential EPERMs we'll try not to call
2747 * PR_SET_SECUREBITS unless necessary. */
755d4b67
IP
2748 if (prctl(PR_GET_SECUREBITS) != secure_bits)
2749 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
ff0af2a1 2750 *exit_status = EXIT_SECUREBITS;
d35fbf6b 2751 return -errno;
ff01d048 2752 }
5b6319dc 2753
59eeb84b 2754 if (context_has_no_new_privileges(context))
d35fbf6b 2755 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
ff0af2a1 2756 *exit_status = EXIT_NO_NEW_PRIVILEGES;
d35fbf6b
DM
2757 return -errno;
2758 }
2759
2760#ifdef HAVE_SECCOMP
59eeb84b 2761 if (context_has_address_families(context)) {
83f12b27 2762 r = apply_address_families(unit, context);
ff0af2a1
LP
2763 if (r < 0) {
2764 *exit_status = EXIT_ADDRESS_FAMILIES;
2765 return r;
4c2630eb
MS
2766 }
2767 }
04aa0cb9 2768
f3e43635 2769 if (context->memory_deny_write_execute) {
83f12b27 2770 r = apply_memory_deny_write_execute(unit, context);
f3e43635
TM
2771 if (r < 0) {
2772 *exit_status = EXIT_SECCOMP;
2773 return r;
2774 }
2775 }
f4170c67
LP
2776
2777 if (context->restrict_realtime) {
83f12b27 2778 r = apply_restrict_realtime(unit, context);
f4170c67
LP
2779 if (r < 0) {
2780 *exit_status = EXIT_SECCOMP;
2781 return r;
2782 }
2783 }
2784
add00535
LP
2785 r = apply_restrict_namespaces(unit, context);
2786 if (r < 0) {
2787 *exit_status = EXIT_SECCOMP;
2788 return r;
2789 }
2790
59eeb84b
LP
2791 if (context->protect_kernel_tunables) {
2792 r = apply_protect_sysctl(unit, context);
2793 if (r < 0) {
2794 *exit_status = EXIT_SECCOMP;
2795 return r;
502d704e
DH
2796 }
2797 }
2798
2799 if (context->protect_kernel_modules) {
2800 r = apply_protect_kernel_modules(unit, context);
2801 if (r < 0) {
2802 *exit_status = EXIT_SECCOMP;
2803 return r;
59eeb84b
LP
2804 }
2805 }
2806
ba128bb8
LP
2807 if (context->private_devices) {
2808 r = apply_private_devices(unit, context);
2809 if (r < 0) {
2810 *exit_status = EXIT_SECCOMP;
2811 return r;
2812 }
2813 }
2814
5cd9cd35
LP
2815 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
2816 * by the filter as little as possible. */
59eeb84b 2817 if (context_has_syscall_filters(context)) {
83f12b27 2818 r = apply_seccomp(unit, context);
ff0af2a1
LP
2819 if (r < 0) {
2820 *exit_status = EXIT_SECCOMP;
2821 return r;
81a2b7ce 2822 }
d35fbf6b
DM
2823 }
2824#endif
d35fbf6b 2825 }
034c6ed7 2826
2065ca69 2827 final_argv = replace_env_argv(argv, accum_env);
d35fbf6b 2828 if (!final_argv) {
ff0af2a1 2829 *exit_status = EXIT_MEMORY;
d35fbf6b
DM
2830 return -ENOMEM;
2831 }
034c6ed7 2832
553d2243 2833 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
d35fbf6b 2834 _cleanup_free_ char *line;
81a2b7ce 2835
d35fbf6b
DM
2836 line = exec_command_line(final_argv);
2837 if (line) {
2838 log_open();
f2341e0a
LP
2839 log_struct(LOG_DEBUG,
2840 LOG_UNIT_ID(unit),
2841 "EXECUTABLE=%s", command->path,
2842 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
2843 NULL);
d35fbf6b
DM
2844 log_close();
2845 }
2846 }
dd305ec9 2847
2065ca69 2848 execve(command->path, final_argv, accum_env);
ff0af2a1 2849 *exit_status = EXIT_EXEC;
d35fbf6b
DM
2850 return -errno;
2851}
81a2b7ce 2852
f2341e0a
LP
2853int exec_spawn(Unit *unit,
2854 ExecCommand *command,
d35fbf6b
DM
2855 const ExecContext *context,
2856 const ExecParameters *params,
2857 ExecRuntime *runtime,
29206d46 2858 DynamicCreds *dcreds,
d35fbf6b 2859 pid_t *ret) {
8351ceae 2860
d35fbf6b
DM
2861 _cleanup_strv_free_ char **files_env = NULL;
2862 int *fds = NULL; unsigned n_fds = 0;
ff0af2a1
LP
2863 _cleanup_free_ char *line = NULL;
2864 int socket_fd, r;
52c239d7 2865 int named_iofds[3] = { -1, -1, -1 };
ff0af2a1 2866 char **argv;
d35fbf6b 2867 pid_t pid;
8351ceae 2868
f2341e0a 2869 assert(unit);
d35fbf6b
DM
2870 assert(command);
2871 assert(context);
2872 assert(ret);
2873 assert(params);
2874 assert(params->fds || params->n_fds <= 0);
4298d0b5 2875
d35fbf6b
DM
2876 if (context->std_input == EXEC_INPUT_SOCKET ||
2877 context->std_output == EXEC_OUTPUT_SOCKET ||
2878 context->std_error == EXEC_OUTPUT_SOCKET) {
17df7223 2879
ff0af2a1 2880 if (params->n_fds != 1) {
f2341e0a 2881 log_unit_error(unit, "Got more than one socket.");
d35fbf6b 2882 return -EINVAL;
ff0af2a1 2883 }
eef65bf3 2884
d35fbf6b
DM
2885 socket_fd = params->fds[0];
2886 } else {
2887 socket_fd = -1;
2888 fds = params->fds;
2889 n_fds = params->n_fds;
2890 }
94f04347 2891
52c239d7
LB
2892 r = exec_context_named_iofds(unit, context, params, named_iofds);
2893 if (r < 0)
2894 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
2895
f2341e0a 2896 r = exec_context_load_environment(unit, context, &files_env);
ff0af2a1 2897 if (r < 0)
f2341e0a 2898 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
034c6ed7 2899
d35fbf6b 2900 argv = params->argv ?: command->argv;
d35fbf6b
DM
2901 line = exec_command_line(argv);
2902 if (!line)
2903 return log_oom();
fab56fc5 2904
f2341e0a
LP
2905 log_struct(LOG_DEBUG,
2906 LOG_UNIT_ID(unit),
2907 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
2908 "EXECUTABLE=%s", command->path,
2909 NULL);
d35fbf6b
DM
2910 pid = fork();
2911 if (pid < 0)
74129a12 2912 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
d35fbf6b
DM
2913
2914 if (pid == 0) {
ff0af2a1
LP
2915 int exit_status;
2916
f2341e0a
LP
2917 r = exec_child(unit,
2918 command,
ff0af2a1
LP
2919 context,
2920 params,
2921 runtime,
29206d46 2922 dcreds,
ff0af2a1
LP
2923 argv,
2924 socket_fd,
52c239d7 2925 named_iofds,
ff0af2a1
LP
2926 fds, n_fds,
2927 files_env,
00d9ef85 2928 unit->manager->user_lookup_fds[1],
ff0af2a1
LP
2929 &exit_status);
2930 if (r < 0) {
4c2630eb 2931 log_open();
f2341e0a
LP
2932 log_struct_errno(LOG_ERR, r,
2933 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
2934 LOG_UNIT_ID(unit),
2935 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
2936 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
2937 command->path),
2938 "EXECUTABLE=%s", command->path,
2939 NULL);
4c2630eb
MS
2940 }
2941
ff0af2a1 2942 _exit(exit_status);
034c6ed7
LP
2943 }
2944
f2341e0a 2945 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
23635a85 2946
80876c20
LP
2947 /* We add the new process to the cgroup both in the child (so
2948 * that we can be sure that no user code is ever executed
2949 * outside of the cgroup) and in the parent (so that we can be
2950 * sure that when we kill the cgroup the process will be
2951 * killed too). */
d35fbf6b 2952 if (params->cgroup_path)
dd305ec9 2953 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2da3263a 2954
b58b4116 2955 exec_status_start(&command->exec_status, pid);
9fb86720 2956
034c6ed7 2957 *ret = pid;
5cb5a6ff
LP
2958 return 0;
2959}
2960
034c6ed7
LP
2961void exec_context_init(ExecContext *c) {
2962 assert(c);
2963
4c12626c 2964 c->umask = 0022;
9eba9da4 2965 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
94f04347 2966 c->cpu_sched_policy = SCHED_OTHER;
071830ff 2967 c->syslog_priority = LOG_DAEMON|LOG_INFO;
74922904 2968 c->syslog_level_prefix = true;
353e12c2 2969 c->ignore_sigpipe = true;
3a43da28 2970 c->timer_slack_nsec = NSEC_INFINITY;
050f7277 2971 c->personality = PERSONALITY_INVALID;
e66cf1a3 2972 c->runtime_directory_mode = 0755;
a103496c 2973 c->capability_bounding_set = CAP_ALL;
add00535 2974 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
034c6ed7
LP
2975}
2976
613b411c 2977void exec_context_done(ExecContext *c) {
5cb5a6ff
LP
2978 unsigned l;
2979
2980 assert(c);
2981
6796073e
LP
2982 c->environment = strv_free(c->environment);
2983 c->environment_files = strv_free(c->environment_files);
b4c14404 2984 c->pass_environment = strv_free(c->pass_environment);
8c7be95e 2985
1f6b4113 2986 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
a1e58e8e 2987 c->rlimit[l] = mfree(c->rlimit[l]);
034c6ed7 2988
52c239d7
LB
2989 for (l = 0; l < 3; l++)
2990 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
2991
a1e58e8e
LP
2992 c->working_directory = mfree(c->working_directory);
2993 c->root_directory = mfree(c->root_directory);
2994 c->tty_path = mfree(c->tty_path);
2995 c->syslog_identifier = mfree(c->syslog_identifier);
2996 c->user = mfree(c->user);
2997 c->group = mfree(c->group);
034c6ed7 2998
6796073e 2999 c->supplementary_groups = strv_free(c->supplementary_groups);
94f04347 3000
a1e58e8e 3001 c->pam_name = mfree(c->pam_name);
5b6319dc 3002
2a624c36
AP
3003 c->read_only_paths = strv_free(c->read_only_paths);
3004 c->read_write_paths = strv_free(c->read_write_paths);
3005 c->inaccessible_paths = strv_free(c->inaccessible_paths);
82c121a4
LP
3006
3007 if (c->cpuset)
3008 CPU_FREE(c->cpuset);
86a3475b 3009
a1e58e8e
LP
3010 c->utmp_id = mfree(c->utmp_id);
3011 c->selinux_context = mfree(c->selinux_context);
3012 c->apparmor_profile = mfree(c->apparmor_profile);
eef65bf3 3013
525d3cc7
LP
3014 c->syscall_filter = set_free(c->syscall_filter);
3015 c->syscall_archs = set_free(c->syscall_archs);
3016 c->address_families = set_free(c->address_families);
e66cf1a3 3017
6796073e 3018 c->runtime_directory = strv_free(c->runtime_directory);
e66cf1a3
LP
3019}
3020
3021int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3022 char **i;
3023
3024 assert(c);
3025
3026 if (!runtime_prefix)
3027 return 0;
3028
3029 STRV_FOREACH(i, c->runtime_directory) {
3030 _cleanup_free_ char *p;
3031
605405c6 3032 p = strjoin(runtime_prefix, "/", *i);
e66cf1a3
LP
3033 if (!p)
3034 return -ENOMEM;
3035
3036 /* We execute this synchronously, since we need to be
3037 * sure this is gone when we start the service
3038 * next. */
c6878637 3039 (void) rm_rf(p, REMOVE_ROOT);
e66cf1a3
LP
3040 }
3041
3042 return 0;
5cb5a6ff
LP
3043}
3044
43d0fcbd
LP
3045void exec_command_done(ExecCommand *c) {
3046 assert(c);
3047
a1e58e8e 3048 c->path = mfree(c->path);
43d0fcbd 3049
6796073e 3050 c->argv = strv_free(c->argv);
43d0fcbd
LP
3051}
3052
3053void exec_command_done_array(ExecCommand *c, unsigned n) {
3054 unsigned i;
3055
3056 for (i = 0; i < n; i++)
3057 exec_command_done(c+i);
3058}
3059
f1acf85a 3060ExecCommand* exec_command_free_list(ExecCommand *c) {
5cb5a6ff
LP
3061 ExecCommand *i;
3062
3063 while ((i = c)) {
71fda00f 3064 LIST_REMOVE(command, c, i);
43d0fcbd 3065 exec_command_done(i);
5cb5a6ff
LP
3066 free(i);
3067 }
f1acf85a
ZJS
3068
3069 return NULL;
5cb5a6ff
LP
3070}
3071
034c6ed7
LP
3072void exec_command_free_array(ExecCommand **c, unsigned n) {
3073 unsigned i;
3074
f1acf85a
ZJS
3075 for (i = 0; i < n; i++)
3076 c[i] = exec_command_free_list(c[i]);
034c6ed7
LP
3077}
3078
039f0e70 3079typedef struct InvalidEnvInfo {
f2341e0a 3080 Unit *unit;
039f0e70
LP
3081 const char *path;
3082} InvalidEnvInfo;
3083
3084static void invalid_env(const char *p, void *userdata) {
3085 InvalidEnvInfo *info = userdata;
3086
f2341e0a 3087 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
039f0e70
LP
3088}
3089
52c239d7
LB
3090const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3091 assert(c);
3092
3093 switch (fd_index) {
3094 case STDIN_FILENO:
3095 if (c->std_input != EXEC_INPUT_NAMED_FD)
3096 return NULL;
3097 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3098 case STDOUT_FILENO:
3099 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3100 return NULL;
3101 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3102 case STDERR_FILENO:
3103 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3104 return NULL;
3105 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3106 default:
3107 return NULL;
3108 }
3109}
3110
3111int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3112 unsigned i, targets;
3113 const char *stdio_fdname[3];
3114
3115 assert(c);
3116 assert(p);
3117
3118 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3119 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3120 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3121
3122 for (i = 0; i < 3; i++)
3123 stdio_fdname[i] = exec_context_fdname(c, i);
3124
3125 for (i = 0; i < p->n_fds && targets > 0; i++)
3126 if (named_iofds[STDIN_FILENO] < 0 && c->std_input == EXEC_INPUT_NAMED_FD && stdio_fdname[STDIN_FILENO] && streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3127 named_iofds[STDIN_FILENO] = p->fds[i];
3128 targets--;
3129 } else if (named_iofds[STDOUT_FILENO] < 0 && c->std_output == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDOUT_FILENO] && streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3130 named_iofds[STDOUT_FILENO] = p->fds[i];
3131 targets--;
3132 } else if (named_iofds[STDERR_FILENO] < 0 && c->std_error == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDERR_FILENO] && streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3133 named_iofds[STDERR_FILENO] = p->fds[i];
3134 targets--;
3135 }
3136
3137 return (targets == 0 ? 0 : -ENOENT);
3138}
3139
f2341e0a 3140int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
8c7be95e
LP
3141 char **i, **r = NULL;
3142
3143 assert(c);
3144 assert(l);
3145
3146 STRV_FOREACH(i, c->environment_files) {
3147 char *fn;
3148 int k;
3149 bool ignore = false;
3150 char **p;
7fd1b19b 3151 _cleanup_globfree_ glob_t pglob = {};
2bef10ab 3152 int count, n;
8c7be95e
LP
3153
3154 fn = *i;
3155
3156 if (fn[0] == '-') {
3157 ignore = true;
313cefa1 3158 fn++;
8c7be95e
LP
3159 }
3160
3161 if (!path_is_absolute(fn)) {
8c7be95e
LP
3162 if (ignore)
3163 continue;
3164
3165 strv_free(r);
3166 return -EINVAL;
3167 }
3168
2bef10ab 3169 /* Filename supports globbing, take all matching files */
2bef10ab
PL
3170 errno = 0;
3171 if (glob(fn, 0, NULL, &pglob) != 0) {
2bef10ab
PL
3172 if (ignore)
3173 continue;
8c7be95e 3174
2bef10ab 3175 strv_free(r);
f5e5c28f 3176 return errno > 0 ? -errno : -EINVAL;
2bef10ab
PL
3177 }
3178 count = pglob.gl_pathc;
3179 if (count == 0) {
8c7be95e
LP
3180 if (ignore)
3181 continue;
3182
3183 strv_free(r);
2bef10ab 3184 return -EINVAL;
8c7be95e 3185 }
2bef10ab 3186 for (n = 0; n < count; n++) {
717603e3 3187 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2bef10ab
PL
3188 if (k < 0) {
3189 if (ignore)
3190 continue;
8c7be95e 3191
2bef10ab 3192 strv_free(r);
2bef10ab 3193 return k;
e9c1ea9d 3194 }
ebc05a09 3195 /* Log invalid environment variables with filename */
039f0e70
LP
3196 if (p) {
3197 InvalidEnvInfo info = {
f2341e0a 3198 .unit = unit,
039f0e70
LP
3199 .path = pglob.gl_pathv[n]
3200 };
3201
3202 p = strv_env_clean_with_callback(p, invalid_env, &info);
3203 }
8c7be95e 3204
2bef10ab
PL
3205 if (r == NULL)
3206 r = p;
3207 else {
3208 char **m;
8c7be95e 3209
2bef10ab
PL
3210 m = strv_env_merge(2, r, p);
3211 strv_free(r);
3212 strv_free(p);
c84a9488 3213 if (!m)
2bef10ab 3214 return -ENOMEM;
2bef10ab
PL
3215
3216 r = m;
3217 }
8c7be95e
LP
3218 }
3219 }
3220
3221 *l = r;
3222
3223 return 0;
3224}
3225
6ac8fdc9 3226static bool tty_may_match_dev_console(const char *tty) {
e1d75803 3227 _cleanup_free_ char *active = NULL;
7d6884b6 3228 char *console;
6ac8fdc9 3229
1e22b5cd
LP
3230 if (!tty)
3231 return true;
3232
6ac8fdc9
MS
3233 if (startswith(tty, "/dev/"))
3234 tty += 5;
3235
3236 /* trivial identity? */
3237 if (streq(tty, "console"))
3238 return true;
3239
3240 console = resolve_dev_console(&active);
3241 /* if we could not resolve, assume it may */
3242 if (!console)
3243 return true;
3244
3245 /* "tty0" means the active VC, so it may be the same sometimes */
e1d75803 3246 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
6ac8fdc9
MS
3247}
3248
3249bool exec_context_may_touch_console(ExecContext *ec) {
1e22b5cd
LP
3250
3251 return (ec->tty_reset ||
3252 ec->tty_vhangup ||
3253 ec->tty_vt_disallocate ||
6ac8fdc9
MS
3254 is_terminal_input(ec->std_input) ||
3255 is_terminal_output(ec->std_output) ||
3256 is_terminal_output(ec->std_error)) &&
1e22b5cd 3257 tty_may_match_dev_console(exec_context_tty_path(ec));
6ac8fdc9
MS
3258}
3259
15ae422b
LP
3260static void strv_fprintf(FILE *f, char **l) {
3261 char **g;
3262
3263 assert(f);
3264
3265 STRV_FOREACH(g, l)
3266 fprintf(f, " %s", *g);
3267}
3268
5cb5a6ff 3269void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
c2bbd90b 3270 char **e, **d;
94f04347 3271 unsigned i;
add00535 3272 int r;
9eba9da4 3273
5cb5a6ff
LP
3274 assert(c);
3275 assert(f);
3276
4ad49000 3277 prefix = strempty(prefix);
5cb5a6ff
LP
3278
3279 fprintf(f,
94f04347
LP
3280 "%sUMask: %04o\n"
3281 "%sWorkingDirectory: %s\n"
451a074f 3282 "%sRootDirectory: %s\n"
15ae422b 3283 "%sNonBlocking: %s\n"
64747e2d 3284 "%sPrivateTmp: %s\n"
7f112f50 3285 "%sPrivateDevices: %s\n"
59eeb84b 3286 "%sProtectKernelTunables: %s\n"
e66a2f65 3287 "%sProtectKernelModules: %s\n"
59eeb84b 3288 "%sProtectControlGroups: %s\n"
d251207d
LP
3289 "%sPrivateNetwork: %s\n"
3290 "%sPrivateUsers: %s\n"
1b8689f9
LP
3291 "%sProtectHome: %s\n"
3292 "%sProtectSystem: %s\n"
f3e43635 3293 "%sIgnoreSIGPIPE: %s\n"
f4170c67
LP
3294 "%sMemoryDenyWriteExecute: %s\n"
3295 "%sRestrictRealtime: %s\n",
5cb5a6ff 3296 prefix, c->umask,
9eba9da4 3297 prefix, c->working_directory ? c->working_directory : "/",
451a074f 3298 prefix, c->root_directory ? c->root_directory : "/",
15ae422b 3299 prefix, yes_no(c->non_blocking),
64747e2d 3300 prefix, yes_no(c->private_tmp),
7f112f50 3301 prefix, yes_no(c->private_devices),
59eeb84b 3302 prefix, yes_no(c->protect_kernel_tunables),
e66a2f65 3303 prefix, yes_no(c->protect_kernel_modules),
59eeb84b 3304 prefix, yes_no(c->protect_control_groups),
d251207d
LP
3305 prefix, yes_no(c->private_network),
3306 prefix, yes_no(c->private_users),
1b8689f9
LP
3307 prefix, protect_home_to_string(c->protect_home),
3308 prefix, protect_system_to_string(c->protect_system),
f3e43635 3309 prefix, yes_no(c->ignore_sigpipe),
f4170c67
LP
3310 prefix, yes_no(c->memory_deny_write_execute),
3311 prefix, yes_no(c->restrict_realtime));
fb33a393 3312
8c7be95e
LP
3313 STRV_FOREACH(e, c->environment)
3314 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3315
3316 STRV_FOREACH(e, c->environment_files)
3317 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
94f04347 3318
b4c14404
FB
3319 STRV_FOREACH(e, c->pass_environment)
3320 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3321
c2bbd90b
EV
3322 fprintf(f, "%sRuntimeDirectoryMode: %04o\n", prefix, c->runtime_directory_mode);
3323
3324 STRV_FOREACH(d, c->runtime_directory)
3325 fprintf(f, "%sRuntimeDirectory: %s\n", prefix, *d);
3326
fb33a393
LP
3327 if (c->nice_set)
3328 fprintf(f,
3329 "%sNice: %i\n",
3330 prefix, c->nice);
3331
dd6c17b1 3332 if (c->oom_score_adjust_set)
fb33a393 3333 fprintf(f,
dd6c17b1
LP
3334 "%sOOMScoreAdjust: %i\n",
3335 prefix, c->oom_score_adjust);
9eba9da4 3336
94f04347 3337 for (i = 0; i < RLIM_NLIMITS; i++)
3c11da9d
EV
3338 if (c->rlimit[i]) {
3339 fprintf(f, "%s%s: " RLIM_FMT "\n",
3340 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3341 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3342 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3343 }
94f04347 3344
f8b69d1d 3345 if (c->ioprio_set) {
1756a011 3346 _cleanup_free_ char *class_str = NULL;
f8b69d1d 3347
1756a011 3348 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
9eba9da4
LP
3349 fprintf(f,
3350 "%sIOSchedulingClass: %s\n"
3351 "%sIOPriority: %i\n",
f8b69d1d 3352 prefix, strna(class_str),
9eba9da4 3353 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
f8b69d1d 3354 }
94f04347 3355
f8b69d1d 3356 if (c->cpu_sched_set) {
1756a011 3357 _cleanup_free_ char *policy_str = NULL;
f8b69d1d 3358
1756a011 3359 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
94f04347
LP
3360 fprintf(f,
3361 "%sCPUSchedulingPolicy: %s\n"
38b48754
LP
3362 "%sCPUSchedulingPriority: %i\n"
3363 "%sCPUSchedulingResetOnFork: %s\n",
f8b69d1d 3364 prefix, strna(policy_str),
38b48754
LP
3365 prefix, c->cpu_sched_priority,
3366 prefix, yes_no(c->cpu_sched_reset_on_fork));
b929bf04 3367 }
94f04347 3368
82c121a4 3369 if (c->cpuset) {
94f04347 3370 fprintf(f, "%sCPUAffinity:", prefix);
82c121a4
LP
3371 for (i = 0; i < c->cpuset_ncpus; i++)
3372 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
43a99a7a 3373 fprintf(f, " %u", i);
94f04347
LP
3374 fputs("\n", f);
3375 }
3376
3a43da28 3377 if (c->timer_slack_nsec != NSEC_INFINITY)
ccd06097 3378 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
94f04347
LP
3379
3380 fprintf(f,
80876c20
LP
3381 "%sStandardInput: %s\n"
3382 "%sStandardOutput: %s\n"
3383 "%sStandardError: %s\n",
3384 prefix, exec_input_to_string(c->std_input),
3385 prefix, exec_output_to_string(c->std_output),
3386 prefix, exec_output_to_string(c->std_error));
3387
3388 if (c->tty_path)
3389 fprintf(f,
6ea832a2
LP
3390 "%sTTYPath: %s\n"
3391 "%sTTYReset: %s\n"
3392 "%sTTYVHangup: %s\n"
3393 "%sTTYVTDisallocate: %s\n",
3394 prefix, c->tty_path,
3395 prefix, yes_no(c->tty_reset),
3396 prefix, yes_no(c->tty_vhangup),
3397 prefix, yes_no(c->tty_vt_disallocate));
94f04347 3398
5ce70e5b
ZJS
3399 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
3400 c->std_output == EXEC_OUTPUT_KMSG ||
3401 c->std_output == EXEC_OUTPUT_JOURNAL ||
3402 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
3403 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
3404 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
3405 c->std_error == EXEC_OUTPUT_SYSLOG ||
3406 c->std_error == EXEC_OUTPUT_KMSG ||
3407 c->std_error == EXEC_OUTPUT_JOURNAL ||
3408 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
3409 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
3410 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
f8b69d1d 3411
5ce70e5b 3412 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
f8b69d1d 3413
5ce70e5b
ZJS
3414 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3415 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
f8b69d1d 3416
94f04347
LP
3417 fprintf(f,
3418 "%sSyslogFacility: %s\n"
3419 "%sSyslogLevel: %s\n",
f8b69d1d
MS
3420 prefix, strna(fac_str),
3421 prefix, strna(lvl_str));
f8b69d1d 3422 }
94f04347 3423
94f04347
LP
3424 if (c->secure_bits)
3425 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
3426 prefix,
cbb21cca
ZJS
3427 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
3428 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
3429 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
3430 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
3431 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
3432 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
94f04347 3433
a103496c 3434 if (c->capability_bounding_set != CAP_ALL) {
ae556c21 3435 unsigned long l;
260abb78 3436 fprintf(f, "%sCapabilityBoundingSet:", prefix);
94f04347 3437
64685e0c 3438 for (l = 0; l <= cap_last_cap(); l++)
a103496c 3439 if (c->capability_bounding_set & (UINT64_C(1) << l))
2822da4f 3440 fprintf(f, " %s", strna(capability_to_name(l)));
94f04347
LP
3441
3442 fputs("\n", f);
755d4b67
IP
3443 }
3444
3445 if (c->capability_ambient_set != 0) {
3446 unsigned long l;
3447 fprintf(f, "%sAmbientCapabilities:", prefix);
3448
3449 for (l = 0; l <= cap_last_cap(); l++)
3450 if (c->capability_ambient_set & (UINT64_C(1) << l))
3451 fprintf(f, " %s", strna(capability_to_name(l)));
3452
3453 fputs("\n", f);
94f04347
LP
3454 }
3455
3456 if (c->user)
f2d3769a 3457 fprintf(f, "%sUser: %s\n", prefix, c->user);
94f04347 3458 if (c->group)
f2d3769a 3459 fprintf(f, "%sGroup: %s\n", prefix, c->group);
94f04347 3460
29206d46
LP
3461 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
3462
15ae422b 3463 if (strv_length(c->supplementary_groups) > 0) {
94f04347 3464 fprintf(f, "%sSupplementaryGroups:", prefix);
15ae422b
LP
3465 strv_fprintf(f, c->supplementary_groups);
3466 fputs("\n", f);
3467 }
94f04347 3468
5b6319dc 3469 if (c->pam_name)
f2d3769a 3470 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
5b6319dc 3471
2a624c36
AP
3472 if (strv_length(c->read_write_paths) > 0) {
3473 fprintf(f, "%sReadWritePaths:", prefix);
3474 strv_fprintf(f, c->read_write_paths);
15ae422b
LP
3475 fputs("\n", f);
3476 }
3477
2a624c36
AP
3478 if (strv_length(c->read_only_paths) > 0) {
3479 fprintf(f, "%sReadOnlyPaths:", prefix);
3480 strv_fprintf(f, c->read_only_paths);
15ae422b
LP
3481 fputs("\n", f);
3482 }
94f04347 3483
2a624c36
AP
3484 if (strv_length(c->inaccessible_paths) > 0) {
3485 fprintf(f, "%sInaccessiblePaths:", prefix);
3486 strv_fprintf(f, c->inaccessible_paths);
94f04347
LP
3487 fputs("\n", f);
3488 }
2e22afe9 3489
169c1bda
LP
3490 if (c->utmp_id)
3491 fprintf(f,
3492 "%sUtmpIdentifier: %s\n",
3493 prefix, c->utmp_id);
7b52a628
MS
3494
3495 if (c->selinux_context)
3496 fprintf(f,
5f8640fb
LP
3497 "%sSELinuxContext: %s%s\n",
3498 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
17df7223 3499
050f7277 3500 if (c->personality != PERSONALITY_INVALID)
ac45f971
LP
3501 fprintf(f,
3502 "%sPersonality: %s\n",
3503 prefix, strna(personality_to_string(c->personality)));
3504
17df7223 3505 if (c->syscall_filter) {
351a19b1 3506#ifdef HAVE_SECCOMP
17df7223
LP
3507 Iterator j;
3508 void *id;
3509 bool first = true;
351a19b1 3510#endif
17df7223
LP
3511
3512 fprintf(f,
57183d11 3513 "%sSystemCallFilter: ",
17df7223
LP
3514 prefix);
3515
3516 if (!c->syscall_whitelist)
3517 fputc('~', f);
3518
351a19b1 3519#ifdef HAVE_SECCOMP
17df7223
LP
3520 SET_FOREACH(id, c->syscall_filter, j) {
3521 _cleanup_free_ char *name = NULL;
3522
3523 if (first)
3524 first = false;
3525 else
3526 fputc(' ', f);
3527
57183d11 3528 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
17df7223
LP
3529 fputs(strna(name), f);
3530 }
351a19b1 3531#endif
17df7223
LP
3532
3533 fputc('\n', f);
3534 }
3535
57183d11
LP
3536 if (c->syscall_archs) {
3537#ifdef HAVE_SECCOMP
3538 Iterator j;
3539 void *id;
3540#endif
3541
3542 fprintf(f,
3543 "%sSystemCallArchitectures:",
3544 prefix);
3545
3546#ifdef HAVE_SECCOMP
3547 SET_FOREACH(id, c->syscall_archs, j)
3548 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
3549#endif
3550 fputc('\n', f);
3551 }
3552
add00535
LP
3553 if (exec_context_restrict_namespaces_set(c)) {
3554 _cleanup_free_ char *s = NULL;
3555
3556 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
3557 if (r >= 0)
3558 fprintf(f, "%sRestrictNamespaces: %s\n",
3559 prefix, s);
3560 }
3561
b3267152 3562 if (c->syscall_errno > 0)
17df7223
LP
3563 fprintf(f,
3564 "%sSystemCallErrorNumber: %s\n",
3565 prefix, strna(errno_to_name(c->syscall_errno)));
eef65bf3
MS
3566
3567 if (c->apparmor_profile)
3568 fprintf(f,
3569 "%sAppArmorProfile: %s%s\n",
3570 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
5cb5a6ff
LP
3571}
3572
a931ad47
LP
3573bool exec_context_maintains_privileges(ExecContext *c) {
3574 assert(c);
3575
61233823 3576 /* Returns true if the process forked off would run under
a931ad47
LP
3577 * an unchanged UID or as root. */
3578
3579 if (!c->user)
3580 return true;
3581
3582 if (streq(c->user, "root") || streq(c->user, "0"))
3583 return true;
3584
3585 return false;
3586}
3587
b58b4116 3588void exec_status_start(ExecStatus *s, pid_t pid) {
034c6ed7 3589 assert(s);
5cb5a6ff 3590
b58b4116
LP
3591 zero(*s);
3592 s->pid = pid;
3593 dual_timestamp_get(&s->start_timestamp);
3594}
3595
6ea832a2 3596void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
b58b4116
LP
3597 assert(s);
3598
0b1f4ae6 3599 if (s->pid && s->pid != pid)
b58b4116
LP
3600 zero(*s);
3601
034c6ed7 3602 s->pid = pid;
63983207 3603 dual_timestamp_get(&s->exit_timestamp);
9fb86720 3604
034c6ed7
LP
3605 s->code = code;
3606 s->status = status;
169c1bda 3607
6ea832a2
LP
3608 if (context) {
3609 if (context->utmp_id)
3610 utmp_put_dead_process(context->utmp_id, pid, code, status);
3611
1e22b5cd 3612 exec_context_tty_reset(context, NULL);
6ea832a2 3613 }
9fb86720
LP
3614}
3615
3616void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
3617 char buf[FORMAT_TIMESTAMP_MAX];
3618
3619 assert(s);
3620 assert(f);
3621
9fb86720
LP
3622 if (s->pid <= 0)
3623 return;
3624
4c940960
LP
3625 prefix = strempty(prefix);
3626
9fb86720 3627 fprintf(f,
ccd06097
ZJS
3628 "%sPID: "PID_FMT"\n",
3629 prefix, s->pid);
9fb86720 3630
af9d16e1 3631 if (dual_timestamp_is_set(&s->start_timestamp))
9fb86720
LP
3632 fprintf(f,
3633 "%sStart Timestamp: %s\n",
63983207 3634 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
9fb86720 3635
af9d16e1 3636 if (dual_timestamp_is_set(&s->exit_timestamp))
9fb86720
LP
3637 fprintf(f,
3638 "%sExit Timestamp: %s\n"
3639 "%sExit Code: %s\n"
3640 "%sExit Status: %i\n",
63983207 3641 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
9fb86720
LP
3642 prefix, sigchld_code_to_string(s->code),
3643 prefix, s->status);
5cb5a6ff 3644}
44d8db9e 3645
9e2f7c11 3646char *exec_command_line(char **argv) {
44d8db9e
LP
3647 size_t k;
3648 char *n, *p, **a;
3649 bool first = true;
3650
9e2f7c11 3651 assert(argv);
44d8db9e 3652
9164977d 3653 k = 1;
9e2f7c11 3654 STRV_FOREACH(a, argv)
44d8db9e
LP
3655 k += strlen(*a)+3;
3656
5cd9cd35
LP
3657 n = new(char, k);
3658 if (!n)
44d8db9e
LP
3659 return NULL;
3660
3661 p = n;
9e2f7c11 3662 STRV_FOREACH(a, argv) {
44d8db9e
LP
3663
3664 if (!first)
3665 *(p++) = ' ';
3666 else
3667 first = false;
3668
3669 if (strpbrk(*a, WHITESPACE)) {
3670 *(p++) = '\'';
3671 p = stpcpy(p, *a);
3672 *(p++) = '\'';
3673 } else
3674 p = stpcpy(p, *a);
3675
3676 }
3677
9164977d
LP
3678 *p = 0;
3679
44d8db9e
LP
3680 /* FIXME: this doesn't really handle arguments that have
3681 * spaces and ticks in them */
3682
3683 return n;
3684}
3685
3686void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e1d75803 3687 _cleanup_free_ char *cmd = NULL;
4c940960 3688 const char *prefix2;
44d8db9e
LP
3689
3690 assert(c);
3691 assert(f);
3692
4c940960 3693 prefix = strempty(prefix);
63c372cb 3694 prefix2 = strjoina(prefix, "\t");
44d8db9e 3695
9e2f7c11 3696 cmd = exec_command_line(c->argv);
44d8db9e
LP
3697 fprintf(f,
3698 "%sCommand Line: %s\n",
3699 prefix, cmd ? cmd : strerror(ENOMEM));
3700
9fb86720 3701 exec_status_dump(&c->exec_status, f, prefix2);
44d8db9e
LP
3702}
3703
3704void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
3705 assert(f);
3706
4c940960 3707 prefix = strempty(prefix);
44d8db9e
LP
3708
3709 LIST_FOREACH(command, c, c)
3710 exec_command_dump(c, f, prefix);
3711}
94f04347 3712
a6a80b4f
LP
3713void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
3714 ExecCommand *end;
3715
3716 assert(l);
3717 assert(e);
3718
3719 if (*l) {
35b8ca3a 3720 /* It's kind of important, that we keep the order here */
71fda00f
LP
3721 LIST_FIND_TAIL(command, *l, end);
3722 LIST_INSERT_AFTER(command, *l, end, e);
a6a80b4f
LP
3723 } else
3724 *l = e;
3725}
3726
26fd040d
LP
3727int exec_command_set(ExecCommand *c, const char *path, ...) {
3728 va_list ap;
3729 char **l, *p;
3730
3731 assert(c);
3732 assert(path);
3733
3734 va_start(ap, path);
3735 l = strv_new_ap(path, ap);
3736 va_end(ap);
3737
3738 if (!l)
3739 return -ENOMEM;
3740
250a918d
LP
3741 p = strdup(path);
3742 if (!p) {
26fd040d
LP
3743 strv_free(l);
3744 return -ENOMEM;
3745 }
3746
3747 free(c->path);
3748 c->path = p;
3749
3750 strv_free(c->argv);
3751 c->argv = l;
3752
3753 return 0;
3754}
3755
86b23b07 3756int exec_command_append(ExecCommand *c, const char *path, ...) {
e63ff941 3757 _cleanup_strv_free_ char **l = NULL;
86b23b07 3758 va_list ap;
86b23b07
JS
3759 int r;
3760
3761 assert(c);
3762 assert(path);
3763
3764 va_start(ap, path);
3765 l = strv_new_ap(path, ap);
3766 va_end(ap);
3767
3768 if (!l)
3769 return -ENOMEM;
3770
e287086b 3771 r = strv_extend_strv(&c->argv, l, false);
e63ff941 3772 if (r < 0)
86b23b07 3773 return r;
86b23b07
JS
3774
3775 return 0;
3776}
3777
3778
613b411c
LP
3779static int exec_runtime_allocate(ExecRuntime **rt) {
3780
3781 if (*rt)
3782 return 0;
3783
3784 *rt = new0(ExecRuntime, 1);
f146f5e1 3785 if (!*rt)
613b411c
LP
3786 return -ENOMEM;
3787
3788 (*rt)->n_ref = 1;
3789 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
3790
3791 return 0;
3792}
3793
3794int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
3795 int r;
3796
3797 assert(rt);
3798 assert(c);
3799 assert(id);
3800
3801 if (*rt)
3802 return 1;
3803
3804 if (!c->private_network && !c->private_tmp)
3805 return 0;
3806
3807 r = exec_runtime_allocate(rt);
3808 if (r < 0)
3809 return r;
3810
3811 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
33df919d 3812 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
613b411c
LP
3813 return -errno;
3814 }
3815
3816 if (c->private_tmp && !(*rt)->tmp_dir) {
3817 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
3818 if (r < 0)
3819 return r;
3820 }
3821
3822 return 1;
3823}
3824
3825ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
3826 assert(r);
3827 assert(r->n_ref > 0);
3828
3829 r->n_ref++;
3830 return r;
3831}
3832
3833ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
3834
3835 if (!r)
3836 return NULL;
3837
3838 assert(r->n_ref > 0);
3839
3840 r->n_ref--;
f2341e0a
LP
3841 if (r->n_ref > 0)
3842 return NULL;
3843
3844 free(r->tmp_dir);
3845 free(r->var_tmp_dir);
3846 safe_close_pair(r->netns_storage_socket);
6b430fdb 3847 return mfree(r);
613b411c
LP
3848}
3849
f2341e0a 3850int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
613b411c
LP
3851 assert(u);
3852 assert(f);
3853 assert(fds);
3854
3855 if (!rt)
3856 return 0;
3857
3858 if (rt->tmp_dir)
3859 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
3860
3861 if (rt->var_tmp_dir)
3862 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
3863
3864 if (rt->netns_storage_socket[0] >= 0) {
3865 int copy;
3866
3867 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
3868 if (copy < 0)
3869 return copy;
3870
3871 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
3872 }
3873
3874 if (rt->netns_storage_socket[1] >= 0) {
3875 int copy;
3876
3877 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
3878 if (copy < 0)
3879 return copy;
3880
3881 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
3882 }
3883
3884 return 0;
3885}
3886
f2341e0a 3887int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
613b411c
LP
3888 int r;
3889
3890 assert(rt);
3891 assert(key);
3892 assert(value);
3893
3894 if (streq(key, "tmp-dir")) {
3895 char *copy;
3896
3897 r = exec_runtime_allocate(rt);
3898 if (r < 0)
f2341e0a 3899 return log_oom();
613b411c
LP
3900
3901 copy = strdup(value);
3902 if (!copy)
3903 return log_oom();
3904
3905 free((*rt)->tmp_dir);
3906 (*rt)->tmp_dir = copy;
3907
3908 } else if (streq(key, "var-tmp-dir")) {
3909 char *copy;
3910
3911 r = exec_runtime_allocate(rt);
3912 if (r < 0)
f2341e0a 3913 return log_oom();
613b411c
LP
3914
3915 copy = strdup(value);
3916 if (!copy)
3917 return log_oom();
3918
3919 free((*rt)->var_tmp_dir);
3920 (*rt)->var_tmp_dir = copy;
3921
3922 } else if (streq(key, "netns-socket-0")) {
3923 int fd;
3924
3925 r = exec_runtime_allocate(rt);
3926 if (r < 0)
f2341e0a 3927 return log_oom();
613b411c
LP
3928
3929 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 3930 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 3931 else {
03e334a1 3932 safe_close((*rt)->netns_storage_socket[0]);
613b411c
LP
3933 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
3934 }
3935 } else if (streq(key, "netns-socket-1")) {
3936 int fd;
3937
3938 r = exec_runtime_allocate(rt);
3939 if (r < 0)
f2341e0a 3940 return log_oom();
613b411c
LP
3941
3942 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
f2341e0a 3943 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
613b411c 3944 else {
03e334a1 3945 safe_close((*rt)->netns_storage_socket[1]);
613b411c
LP
3946 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
3947 }
3948 } else
3949 return 0;
3950
3951 return 1;
3952}
3953
3954static void *remove_tmpdir_thread(void *p) {
3955 _cleanup_free_ char *path = p;
3956
c6878637 3957 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
613b411c
LP
3958 return NULL;
3959}
3960
3961void exec_runtime_destroy(ExecRuntime *rt) {
98b47d54
LP
3962 int r;
3963
613b411c
LP
3964 if (!rt)
3965 return;
3966
3967 /* If there are multiple users of this, let's leave the stuff around */
3968 if (rt->n_ref > 1)
3969 return;
3970
3971 if (rt->tmp_dir) {
3972 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
98b47d54
LP
3973
3974 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
3975 if (r < 0) {
da927ba9 3976 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
98b47d54
LP
3977 free(rt->tmp_dir);
3978 }
3979
613b411c
LP
3980 rt->tmp_dir = NULL;
3981 }
3982
3983 if (rt->var_tmp_dir) {
3984 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
98b47d54
LP
3985
3986 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
3987 if (r < 0) {
da927ba9 3988 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
98b47d54
LP
3989 free(rt->var_tmp_dir);
3990 }
3991
613b411c
LP
3992 rt->var_tmp_dir = NULL;
3993 }
3994
3d94f76c 3995 safe_close_pair(rt->netns_storage_socket);
613b411c
LP
3996}
3997
80876c20
LP
3998static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
3999 [EXEC_INPUT_NULL] = "null",
4000 [EXEC_INPUT_TTY] = "tty",
4001 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4f2d528d 4002 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
52c239d7
LB
4003 [EXEC_INPUT_SOCKET] = "socket",
4004 [EXEC_INPUT_NAMED_FD] = "fd",
80876c20
LP
4005};
4006
8a0867d6
LP
4007DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4008
94f04347 4009static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
80876c20 4010 [EXEC_OUTPUT_INHERIT] = "inherit",
94f04347 4011 [EXEC_OUTPUT_NULL] = "null",
80876c20 4012 [EXEC_OUTPUT_TTY] = "tty",
94f04347 4013 [EXEC_OUTPUT_SYSLOG] = "syslog",
28dbc1e8 4014 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
9a6bca7a 4015 [EXEC_OUTPUT_KMSG] = "kmsg",
28dbc1e8 4016 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
706343f4
LP
4017 [EXEC_OUTPUT_JOURNAL] = "journal",
4018 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
52c239d7
LB
4019 [EXEC_OUTPUT_SOCKET] = "socket",
4020 [EXEC_OUTPUT_NAMED_FD] = "fd",
94f04347
LP
4021};
4022
4023DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
023a4f67
LP
4024
4025static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4026 [EXEC_UTMP_INIT] = "init",
4027 [EXEC_UTMP_LOGIN] = "login",
4028 [EXEC_UTMP_USER] = "user",
4029};
4030
4031DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);