]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
Merge pull request #12508 from keszybz/no-root-checks
[thirdparty/systemd.git] / src / core / execute.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <glob.h>
6 #include <grp.h>
7 #include <poll.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <sys/capability.h>
11 #include <sys/eventfd.h>
12 #include <sys/mman.h>
13 #include <sys/personality.h>
14 #include <sys/prctl.h>
15 #include <sys/shm.h>
16 #include <sys/socket.h>
17 #include <sys/stat.h>
18 #include <sys/types.h>
19 #include <sys/un.h>
20 #include <unistd.h>
21 #include <utmpx.h>
22
23 #if HAVE_PAM
24 #include <security/pam_appl.h>
25 #endif
26
27 #if HAVE_SELINUX
28 #include <selinux/selinux.h>
29 #endif
30
31 #if HAVE_SECCOMP
32 #include <seccomp.h>
33 #endif
34
35 #if HAVE_APPARMOR
36 #include <sys/apparmor.h>
37 #endif
38
39 #include "sd-messages.h"
40
41 #include "af-list.h"
42 #include "alloc-util.h"
43 #if HAVE_APPARMOR
44 #include "apparmor-util.h"
45 #endif
46 #include "async.h"
47 #include "barrier.h"
48 #include "cap-list.h"
49 #include "capability-util.h"
50 #include "chown-recursive.h"
51 #include "cpu-set-util.h"
52 #include "def.h"
53 #include "env-file.h"
54 #include "env-util.h"
55 #include "errno-list.h"
56 #include "execute.h"
57 #include "exit-status.h"
58 #include "fd-util.h"
59 #include "format-util.h"
60 #include "fs-util.h"
61 #include "glob-util.h"
62 #include "io-util.h"
63 #include "ioprio.h"
64 #include "label.h"
65 #include "log.h"
66 #include "macro.h"
67 #include "manager.h"
68 #include "memory-util.h"
69 #include "missing.h"
70 #include "mkdir.h"
71 #include "namespace.h"
72 #include "parse-util.h"
73 #include "path-util.h"
74 #include "process-util.h"
75 #include "rlimit-util.h"
76 #include "rm-rf.h"
77 #if HAVE_SECCOMP
78 #include "seccomp-util.h"
79 #endif
80 #include "securebits-util.h"
81 #include "selinux-util.h"
82 #include "signal-util.h"
83 #include "smack-util.h"
84 #include "socket-util.h"
85 #include "special.h"
86 #include "stat-util.h"
87 #include "string-table.h"
88 #include "string-util.h"
89 #include "strv.h"
90 #include "syslog-util.h"
91 #include "terminal-util.h"
92 #include "umask-util.h"
93 #include "unit.h"
94 #include "user-util.h"
95 #include "utmp-wtmp.h"
96
97 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
98 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
99
100 #define SNDBUF_SIZE (8*1024*1024)
101
102 static int shift_fds(int fds[], size_t n_fds) {
103 int start, restart_from;
104
105 if (n_fds <= 0)
106 return 0;
107
108 /* Modifies the fds array! (sorts it) */
109
110 assert(fds);
111
112 start = 0;
113 for (;;) {
114 int i;
115
116 restart_from = -1;
117
118 for (i = start; i < (int) n_fds; i++) {
119 int nfd;
120
121 /* Already at right index? */
122 if (fds[i] == i+3)
123 continue;
124
125 nfd = fcntl(fds[i], F_DUPFD, i + 3);
126 if (nfd < 0)
127 return -errno;
128
129 safe_close(fds[i]);
130 fds[i] = nfd;
131
132 /* Hmm, the fd we wanted isn't free? Then
133 * let's remember that and try again from here */
134 if (nfd != i+3 && restart_from < 0)
135 restart_from = i;
136 }
137
138 if (restart_from < 0)
139 break;
140
141 start = restart_from;
142 }
143
144 return 0;
145 }
146
147 static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
148 size_t i, n_fds;
149 int r;
150
151 n_fds = n_socket_fds + n_storage_fds;
152 if (n_fds <= 0)
153 return 0;
154
155 assert(fds);
156
157 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
158 * O_NONBLOCK only applies to socket activation though. */
159
160 for (i = 0; i < n_fds; i++) {
161
162 if (i < n_socket_fds) {
163 r = fd_nonblock(fds[i], nonblock);
164 if (r < 0)
165 return r;
166 }
167
168 /* We unconditionally drop FD_CLOEXEC from the fds,
169 * since after all we want to pass these fds to our
170 * children */
171
172 r = fd_cloexec(fds[i], false);
173 if (r < 0)
174 return r;
175 }
176
177 return 0;
178 }
179
180 static const char *exec_context_tty_path(const ExecContext *context) {
181 assert(context);
182
183 if (context->stdio_as_fds)
184 return NULL;
185
186 if (context->tty_path)
187 return context->tty_path;
188
189 return "/dev/console";
190 }
191
192 static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
193 const char *path;
194
195 assert(context);
196
197 path = exec_context_tty_path(context);
198
199 if (context->tty_vhangup) {
200 if (p && p->stdin_fd >= 0)
201 (void) terminal_vhangup_fd(p->stdin_fd);
202 else if (path)
203 (void) terminal_vhangup(path);
204 }
205
206 if (context->tty_reset) {
207 if (p && p->stdin_fd >= 0)
208 (void) reset_terminal_fd(p->stdin_fd, true);
209 else if (path)
210 (void) reset_terminal(path);
211 }
212
213 if (context->tty_vt_disallocate && path)
214 (void) vt_disallocate(path);
215 }
216
217 static bool is_terminal_input(ExecInput i) {
218 return IN_SET(i,
219 EXEC_INPUT_TTY,
220 EXEC_INPUT_TTY_FORCE,
221 EXEC_INPUT_TTY_FAIL);
222 }
223
224 static bool is_terminal_output(ExecOutput o) {
225 return IN_SET(o,
226 EXEC_OUTPUT_TTY,
227 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
228 EXEC_OUTPUT_KMSG_AND_CONSOLE,
229 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
230 }
231
232 static bool is_syslog_output(ExecOutput o) {
233 return IN_SET(o,
234 EXEC_OUTPUT_SYSLOG,
235 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
236 }
237
238 static bool is_kmsg_output(ExecOutput o) {
239 return IN_SET(o,
240 EXEC_OUTPUT_KMSG,
241 EXEC_OUTPUT_KMSG_AND_CONSOLE);
242 }
243
244 static bool exec_context_needs_term(const ExecContext *c) {
245 assert(c);
246
247 /* Return true if the execution context suggests we should set $TERM to something useful. */
248
249 if (is_terminal_input(c->std_input))
250 return true;
251
252 if (is_terminal_output(c->std_output))
253 return true;
254
255 if (is_terminal_output(c->std_error))
256 return true;
257
258 return !!c->tty_path;
259 }
260
261 static int open_null_as(int flags, int nfd) {
262 int fd;
263
264 assert(nfd >= 0);
265
266 fd = open("/dev/null", flags|O_NOCTTY);
267 if (fd < 0)
268 return -errno;
269
270 return move_fd(fd, nfd, false);
271 }
272
273 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
274 static const union sockaddr_union sa = {
275 .un.sun_family = AF_UNIX,
276 .un.sun_path = "/run/systemd/journal/stdout",
277 };
278 uid_t olduid = UID_INVALID;
279 gid_t oldgid = GID_INVALID;
280 int r;
281
282 if (gid_is_valid(gid)) {
283 oldgid = getgid();
284
285 if (setegid(gid) < 0)
286 return -errno;
287 }
288
289 if (uid_is_valid(uid)) {
290 olduid = getuid();
291
292 if (seteuid(uid) < 0) {
293 r = -errno;
294 goto restore_gid;
295 }
296 }
297
298 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
299
300 /* If we fail to restore the uid or gid, things will likely
301 fail later on. This should only happen if an LSM interferes. */
302
303 if (uid_is_valid(uid))
304 (void) seteuid(olduid);
305
306 restore_gid:
307 if (gid_is_valid(gid))
308 (void) setegid(oldgid);
309
310 return r;
311 }
312
313 static int connect_logger_as(
314 const Unit *unit,
315 const ExecContext *context,
316 const ExecParameters *params,
317 ExecOutput output,
318 const char *ident,
319 int nfd,
320 uid_t uid,
321 gid_t gid) {
322
323 _cleanup_close_ int fd = -1;
324 int r;
325
326 assert(context);
327 assert(params);
328 assert(output < _EXEC_OUTPUT_MAX);
329 assert(ident);
330 assert(nfd >= 0);
331
332 fd = socket(AF_UNIX, SOCK_STREAM, 0);
333 if (fd < 0)
334 return -errno;
335
336 r = connect_journal_socket(fd, uid, gid);
337 if (r < 0)
338 return r;
339
340 if (shutdown(fd, SHUT_RD) < 0)
341 return -errno;
342
343 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
344
345 if (dprintf(fd,
346 "%s\n"
347 "%s\n"
348 "%i\n"
349 "%i\n"
350 "%i\n"
351 "%i\n"
352 "%i\n",
353 context->syslog_identifier ?: ident,
354 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
355 context->syslog_priority,
356 !!context->syslog_level_prefix,
357 is_syslog_output(output),
358 is_kmsg_output(output),
359 is_terminal_output(output)) < 0)
360 return -errno;
361
362 return move_fd(TAKE_FD(fd), nfd, false);
363 }
364
365 static int open_terminal_as(const char *path, int flags, int nfd) {
366 int fd;
367
368 assert(path);
369 assert(nfd >= 0);
370
371 fd = open_terminal(path, flags | O_NOCTTY);
372 if (fd < 0)
373 return fd;
374
375 return move_fd(fd, nfd, false);
376 }
377
378 static int acquire_path(const char *path, int flags, mode_t mode) {
379 union sockaddr_union sa = {};
380 _cleanup_close_ int fd = -1;
381 int r, salen;
382
383 assert(path);
384
385 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
386 flags |= O_CREAT;
387
388 fd = open(path, flags|O_NOCTTY, mode);
389 if (fd >= 0)
390 return TAKE_FD(fd);
391
392 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
393 return -errno;
394 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
395 return -ENXIO;
396
397 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
398
399 fd = socket(AF_UNIX, SOCK_STREAM, 0);
400 if (fd < 0)
401 return -errno;
402
403 salen = sockaddr_un_set_path(&sa.un, path);
404 if (salen < 0)
405 return salen;
406
407 if (connect(fd, &sa.sa, salen) < 0)
408 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
409 * indication that his wasn't an AF_UNIX socket after all */
410
411 if ((flags & O_ACCMODE) == O_RDONLY)
412 r = shutdown(fd, SHUT_WR);
413 else if ((flags & O_ACCMODE) == O_WRONLY)
414 r = shutdown(fd, SHUT_RD);
415 else
416 return TAKE_FD(fd);
417 if (r < 0)
418 return -errno;
419
420 return TAKE_FD(fd);
421 }
422
423 static int fixup_input(
424 const ExecContext *context,
425 int socket_fd,
426 bool apply_tty_stdin) {
427
428 ExecInput std_input;
429
430 assert(context);
431
432 std_input = context->std_input;
433
434 if (is_terminal_input(std_input) && !apply_tty_stdin)
435 return EXEC_INPUT_NULL;
436
437 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
438 return EXEC_INPUT_NULL;
439
440 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
441 return EXEC_INPUT_NULL;
442
443 return std_input;
444 }
445
446 static int fixup_output(ExecOutput std_output, int socket_fd) {
447
448 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
449 return EXEC_OUTPUT_INHERIT;
450
451 return std_output;
452 }
453
454 static int setup_input(
455 const ExecContext *context,
456 const ExecParameters *params,
457 int socket_fd,
458 int named_iofds[3]) {
459
460 ExecInput i;
461
462 assert(context);
463 assert(params);
464
465 if (params->stdin_fd >= 0) {
466 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
467 return -errno;
468
469 /* Try to make this the controlling tty, if it is a tty, and reset it */
470 if (isatty(STDIN_FILENO)) {
471 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
472 (void) reset_terminal_fd(STDIN_FILENO, true);
473 }
474
475 return STDIN_FILENO;
476 }
477
478 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
479
480 switch (i) {
481
482 case EXEC_INPUT_NULL:
483 return open_null_as(O_RDONLY, STDIN_FILENO);
484
485 case EXEC_INPUT_TTY:
486 case EXEC_INPUT_TTY_FORCE:
487 case EXEC_INPUT_TTY_FAIL: {
488 int fd;
489
490 fd = acquire_terminal(exec_context_tty_path(context),
491 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
492 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
493 ACQUIRE_TERMINAL_WAIT,
494 USEC_INFINITY);
495 if (fd < 0)
496 return fd;
497
498 return move_fd(fd, STDIN_FILENO, false);
499 }
500
501 case EXEC_INPUT_SOCKET:
502 assert(socket_fd >= 0);
503
504 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
505
506 case EXEC_INPUT_NAMED_FD:
507 assert(named_iofds[STDIN_FILENO] >= 0);
508
509 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
510 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
511
512 case EXEC_INPUT_DATA: {
513 int fd;
514
515 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
516 if (fd < 0)
517 return fd;
518
519 return move_fd(fd, STDIN_FILENO, false);
520 }
521
522 case EXEC_INPUT_FILE: {
523 bool rw;
524 int fd;
525
526 assert(context->stdio_file[STDIN_FILENO]);
527
528 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
529 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
530
531 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
532 if (fd < 0)
533 return fd;
534
535 return move_fd(fd, STDIN_FILENO, false);
536 }
537
538 default:
539 assert_not_reached("Unknown input type");
540 }
541 }
542
543 static bool can_inherit_stderr_from_stdout(
544 const ExecContext *context,
545 ExecOutput o,
546 ExecOutput e) {
547
548 assert(context);
549
550 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
551 * stderr fd */
552
553 if (e == EXEC_OUTPUT_INHERIT)
554 return true;
555 if (e != o)
556 return false;
557
558 if (e == EXEC_OUTPUT_NAMED_FD)
559 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
560
561 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
562 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
563
564 return true;
565 }
566
567 static int setup_output(
568 const Unit *unit,
569 const ExecContext *context,
570 const ExecParameters *params,
571 int fileno,
572 int socket_fd,
573 int named_iofds[3],
574 const char *ident,
575 uid_t uid,
576 gid_t gid,
577 dev_t *journal_stream_dev,
578 ino_t *journal_stream_ino) {
579
580 ExecOutput o;
581 ExecInput i;
582 int r;
583
584 assert(unit);
585 assert(context);
586 assert(params);
587 assert(ident);
588 assert(journal_stream_dev);
589 assert(journal_stream_ino);
590
591 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
592
593 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
594 return -errno;
595
596 return STDOUT_FILENO;
597 }
598
599 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
600 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
601 return -errno;
602
603 return STDERR_FILENO;
604 }
605
606 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
607 o = fixup_output(context->std_output, socket_fd);
608
609 if (fileno == STDERR_FILENO) {
610 ExecOutput e;
611 e = fixup_output(context->std_error, socket_fd);
612
613 /* This expects the input and output are already set up */
614
615 /* Don't change the stderr file descriptor if we inherit all
616 * the way and are not on a tty */
617 if (e == EXEC_OUTPUT_INHERIT &&
618 o == EXEC_OUTPUT_INHERIT &&
619 i == EXEC_INPUT_NULL &&
620 !is_terminal_input(context->std_input) &&
621 getppid () != 1)
622 return fileno;
623
624 /* Duplicate from stdout if possible */
625 if (can_inherit_stderr_from_stdout(context, o, e))
626 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
627
628 o = e;
629
630 } else if (o == EXEC_OUTPUT_INHERIT) {
631 /* If input got downgraded, inherit the original value */
632 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
633 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
634
635 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
636 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
637 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
638
639 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
640 if (getppid() != 1)
641 return fileno;
642
643 /* We need to open /dev/null here anew, to get the right access mode. */
644 return open_null_as(O_WRONLY, fileno);
645 }
646
647 switch (o) {
648
649 case EXEC_OUTPUT_NULL:
650 return open_null_as(O_WRONLY, fileno);
651
652 case EXEC_OUTPUT_TTY:
653 if (is_terminal_input(i))
654 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
655
656 /* We don't reset the terminal if this is just about output */
657 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
658
659 case EXEC_OUTPUT_SYSLOG:
660 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
661 case EXEC_OUTPUT_KMSG:
662 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
663 case EXEC_OUTPUT_JOURNAL:
664 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
665 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
666 if (r < 0) {
667 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
668 r = open_null_as(O_WRONLY, fileno);
669 } else {
670 struct stat st;
671
672 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
673 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
674 * services to detect whether they are connected to the journal or not.
675 *
676 * If both stdout and stderr are connected to a stream then let's make sure to store the data
677 * about STDERR as that's usually the best way to do logging. */
678
679 if (fstat(fileno, &st) >= 0 &&
680 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
681 *journal_stream_dev = st.st_dev;
682 *journal_stream_ino = st.st_ino;
683 }
684 }
685 return r;
686
687 case EXEC_OUTPUT_SOCKET:
688 assert(socket_fd >= 0);
689
690 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
691
692 case EXEC_OUTPUT_NAMED_FD:
693 assert(named_iofds[fileno] >= 0);
694
695 (void) fd_nonblock(named_iofds[fileno], false);
696 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
697
698 case EXEC_OUTPUT_FILE:
699 case EXEC_OUTPUT_FILE_APPEND: {
700 bool rw;
701 int fd, flags;
702
703 assert(context->stdio_file[fileno]);
704
705 rw = context->std_input == EXEC_INPUT_FILE &&
706 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
707
708 if (rw)
709 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
710
711 flags = O_WRONLY;
712 if (o == EXEC_OUTPUT_FILE_APPEND)
713 flags |= O_APPEND;
714
715 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
716 if (fd < 0)
717 return fd;
718
719 return move_fd(fd, fileno, 0);
720 }
721
722 default:
723 assert_not_reached("Unknown error type");
724 }
725 }
726
727 static int chown_terminal(int fd, uid_t uid) {
728 int r;
729
730 assert(fd >= 0);
731
732 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
733 if (isatty(fd) < 1) {
734 if (IN_SET(errno, EINVAL, ENOTTY))
735 return 0; /* not a tty */
736
737 return -errno;
738 }
739
740 /* This might fail. What matters are the results. */
741 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
742 if (r < 0)
743 return r;
744
745 return 1;
746 }
747
748 static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
749 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
750 int r;
751
752 assert(_saved_stdin);
753 assert(_saved_stdout);
754
755 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
756 if (saved_stdin < 0)
757 return -errno;
758
759 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
760 if (saved_stdout < 0)
761 return -errno;
762
763 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
764 if (fd < 0)
765 return fd;
766
767 r = chown_terminal(fd, getuid());
768 if (r < 0)
769 return r;
770
771 r = reset_terminal_fd(fd, true);
772 if (r < 0)
773 return r;
774
775 r = rearrange_stdio(fd, fd, STDERR_FILENO);
776 fd = -1;
777 if (r < 0)
778 return r;
779
780 *_saved_stdin = saved_stdin;
781 *_saved_stdout = saved_stdout;
782
783 saved_stdin = saved_stdout = -1;
784
785 return 0;
786 }
787
788 static void write_confirm_error_fd(int err, int fd, const Unit *u) {
789 assert(err < 0);
790
791 if (err == -ETIMEDOUT)
792 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
793 else {
794 errno = -err;
795 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
796 }
797 }
798
799 static void write_confirm_error(int err, const char *vc, const Unit *u) {
800 _cleanup_close_ int fd = -1;
801
802 assert(vc);
803
804 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
805 if (fd < 0)
806 return;
807
808 write_confirm_error_fd(err, fd, u);
809 }
810
811 static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
812 int r = 0;
813
814 assert(saved_stdin);
815 assert(saved_stdout);
816
817 release_terminal();
818
819 if (*saved_stdin >= 0)
820 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
821 r = -errno;
822
823 if (*saved_stdout >= 0)
824 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
825 r = -errno;
826
827 *saved_stdin = safe_close(*saved_stdin);
828 *saved_stdout = safe_close(*saved_stdout);
829
830 return r;
831 }
832
833 enum {
834 CONFIRM_PRETEND_FAILURE = -1,
835 CONFIRM_PRETEND_SUCCESS = 0,
836 CONFIRM_EXECUTE = 1,
837 };
838
839 static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
840 int saved_stdout = -1, saved_stdin = -1, r;
841 _cleanup_free_ char *e = NULL;
842 char c;
843
844 /* For any internal errors, assume a positive response. */
845 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
846 if (r < 0) {
847 write_confirm_error(r, vc, u);
848 return CONFIRM_EXECUTE;
849 }
850
851 /* confirm_spawn might have been disabled while we were sleeping. */
852 if (manager_is_confirm_spawn_disabled(u->manager)) {
853 r = 1;
854 goto restore_stdio;
855 }
856
857 e = ellipsize(cmdline, 60, 100);
858 if (!e) {
859 log_oom();
860 r = CONFIRM_EXECUTE;
861 goto restore_stdio;
862 }
863
864 for (;;) {
865 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
866 if (r < 0) {
867 write_confirm_error_fd(r, STDOUT_FILENO, u);
868 r = CONFIRM_EXECUTE;
869 goto restore_stdio;
870 }
871
872 switch (c) {
873 case 'c':
874 printf("Resuming normal execution.\n");
875 manager_disable_confirm_spawn();
876 r = 1;
877 break;
878 case 'D':
879 unit_dump(u, stdout, " ");
880 continue; /* ask again */
881 case 'f':
882 printf("Failing execution.\n");
883 r = CONFIRM_PRETEND_FAILURE;
884 break;
885 case 'h':
886 printf(" c - continue, proceed without asking anymore\n"
887 " D - dump, show the state of the unit\n"
888 " f - fail, don't execute the command and pretend it failed\n"
889 " h - help\n"
890 " i - info, show a short summary of the unit\n"
891 " j - jobs, show jobs that are in progress\n"
892 " s - skip, don't execute the command and pretend it succeeded\n"
893 " y - yes, execute the command\n");
894 continue; /* ask again */
895 case 'i':
896 printf(" Description: %s\n"
897 " Unit: %s\n"
898 " Command: %s\n",
899 u->id, u->description, cmdline);
900 continue; /* ask again */
901 case 'j':
902 manager_dump_jobs(u->manager, stdout, " ");
903 continue; /* ask again */
904 case 'n':
905 /* 'n' was removed in favor of 'f'. */
906 printf("Didn't understand 'n', did you mean 'f'?\n");
907 continue; /* ask again */
908 case 's':
909 printf("Skipping execution.\n");
910 r = CONFIRM_PRETEND_SUCCESS;
911 break;
912 case 'y':
913 r = CONFIRM_EXECUTE;
914 break;
915 default:
916 assert_not_reached("Unhandled choice");
917 }
918 break;
919 }
920
921 restore_stdio:
922 restore_confirm_stdio(&saved_stdin, &saved_stdout);
923 return r;
924 }
925
926 static int get_fixed_user(const ExecContext *c, const char **user,
927 uid_t *uid, gid_t *gid,
928 const char **home, const char **shell) {
929 int r;
930 const char *name;
931
932 assert(c);
933
934 if (!c->user)
935 return 0;
936
937 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
938 * (i.e. are "/" or "/bin/nologin"). */
939
940 name = c->user;
941 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
942 if (r < 0)
943 return r;
944
945 *user = name;
946 return 0;
947 }
948
949 static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
950 int r;
951 const char *name;
952
953 assert(c);
954
955 if (!c->group)
956 return 0;
957
958 name = c->group;
959 r = get_group_creds(&name, gid, 0);
960 if (r < 0)
961 return r;
962
963 *group = name;
964 return 0;
965 }
966
967 static int get_supplementary_groups(const ExecContext *c, const char *user,
968 const char *group, gid_t gid,
969 gid_t **supplementary_gids, int *ngids) {
970 char **i;
971 int r, k = 0;
972 int ngroups_max;
973 bool keep_groups = false;
974 gid_t *groups = NULL;
975 _cleanup_free_ gid_t *l_gids = NULL;
976
977 assert(c);
978
979 /*
980 * If user is given, then lookup GID and supplementary groups list.
981 * We avoid NSS lookups for gid=0. Also we have to initialize groups
982 * here and as early as possible so we keep the list of supplementary
983 * groups of the caller.
984 */
985 if (user && gid_is_valid(gid) && gid != 0) {
986 /* First step, initialize groups from /etc/groups */
987 if (initgroups(user, gid) < 0)
988 return -errno;
989
990 keep_groups = true;
991 }
992
993 if (strv_isempty(c->supplementary_groups))
994 return 0;
995
996 /*
997 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
998 * be positive, otherwise fail.
999 */
1000 errno = 0;
1001 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
1002 if (ngroups_max <= 0) {
1003 if (errno > 0)
1004 return -errno;
1005 else
1006 return -EOPNOTSUPP; /* For all other values */
1007 }
1008
1009 l_gids = new(gid_t, ngroups_max);
1010 if (!l_gids)
1011 return -ENOMEM;
1012
1013 if (keep_groups) {
1014 /*
1015 * Lookup the list of groups that the user belongs to, we
1016 * avoid NSS lookups here too for gid=0.
1017 */
1018 k = ngroups_max;
1019 if (getgrouplist(user, gid, l_gids, &k) < 0)
1020 return -EINVAL;
1021 } else
1022 k = 0;
1023
1024 STRV_FOREACH(i, c->supplementary_groups) {
1025 const char *g;
1026
1027 if (k >= ngroups_max)
1028 return -E2BIG;
1029
1030 g = *i;
1031 r = get_group_creds(&g, l_gids+k, 0);
1032 if (r < 0)
1033 return r;
1034
1035 k++;
1036 }
1037
1038 /*
1039 * Sets ngids to zero to drop all supplementary groups, happens
1040 * when we are under root and SupplementaryGroups= is empty.
1041 */
1042 if (k == 0) {
1043 *ngids = 0;
1044 return 0;
1045 }
1046
1047 /* Otherwise get the final list of supplementary groups */
1048 groups = memdup(l_gids, sizeof(gid_t) * k);
1049 if (!groups)
1050 return -ENOMEM;
1051
1052 *supplementary_gids = groups;
1053 *ngids = k;
1054
1055 groups = NULL;
1056
1057 return 0;
1058 }
1059
1060 static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
1061 int r;
1062
1063 /* Handle SupplementaryGroups= if it is not empty */
1064 if (ngids > 0) {
1065 r = maybe_setgroups(ngids, supplementary_gids);
1066 if (r < 0)
1067 return r;
1068 }
1069
1070 if (gid_is_valid(gid)) {
1071 /* Then set our gids */
1072 if (setresgid(gid, gid, gid) < 0)
1073 return -errno;
1074 }
1075
1076 return 0;
1077 }
1078
1079 static int enforce_user(const ExecContext *context, uid_t uid) {
1080 assert(context);
1081
1082 if (!uid_is_valid(uid))
1083 return 0;
1084
1085 /* Sets (but doesn't look up) the uid and make sure we keep the
1086 * capabilities while doing so. */
1087
1088 if (context->capability_ambient_set != 0) {
1089
1090 /* First step: If we need to keep capabilities but
1091 * drop privileges we need to make sure we keep our
1092 * caps, while we drop privileges. */
1093 if (uid != 0) {
1094 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
1095
1096 if (prctl(PR_GET_SECUREBITS) != sb)
1097 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1098 return -errno;
1099 }
1100 }
1101
1102 /* Second step: actually set the uids */
1103 if (setresuid(uid, uid, uid) < 0)
1104 return -errno;
1105
1106 /* At this point we should have all necessary capabilities but
1107 are otherwise a normal user. However, the caps might got
1108 corrupted due to the setresuid() so we need clean them up
1109 later. This is done outside of this call. */
1110
1111 return 0;
1112 }
1113
1114 #if HAVE_PAM
1115
1116 static int null_conv(
1117 int num_msg,
1118 const struct pam_message **msg,
1119 struct pam_response **resp,
1120 void *appdata_ptr) {
1121
1122 /* We don't support conversations */
1123
1124 return PAM_CONV_ERR;
1125 }
1126
1127 #endif
1128
1129 static int setup_pam(
1130 const char *name,
1131 const char *user,
1132 uid_t uid,
1133 gid_t gid,
1134 const char *tty,
1135 char ***env,
1136 int fds[], size_t n_fds) {
1137
1138 #if HAVE_PAM
1139
1140 static const struct pam_conv conv = {
1141 .conv = null_conv,
1142 .appdata_ptr = NULL
1143 };
1144
1145 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
1146 pam_handle_t *handle = NULL;
1147 sigset_t old_ss;
1148 int pam_code = PAM_SUCCESS, r;
1149 char **nv, **e = NULL;
1150 bool close_session = false;
1151 pid_t pam_pid = 0, parent_pid;
1152 int flags = 0;
1153
1154 assert(name);
1155 assert(user);
1156 assert(env);
1157
1158 /* We set up PAM in the parent process, then fork. The child
1159 * will then stay around until killed via PR_GET_PDEATHSIG or
1160 * systemd via the cgroup logic. It will then remove the PAM
1161 * session again. The parent process will exec() the actual
1162 * daemon. We do things this way to ensure that the main PID
1163 * of the daemon is the one we initially fork()ed. */
1164
1165 r = barrier_create(&barrier);
1166 if (r < 0)
1167 goto fail;
1168
1169 if (log_get_max_level() < LOG_DEBUG)
1170 flags |= PAM_SILENT;
1171
1172 pam_code = pam_start(name, user, &conv, &handle);
1173 if (pam_code != PAM_SUCCESS) {
1174 handle = NULL;
1175 goto fail;
1176 }
1177
1178 if (!tty) {
1179 _cleanup_free_ char *q = NULL;
1180
1181 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1182 * out if that's the case, and read the TTY off it. */
1183
1184 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1185 tty = strjoina("/dev/", q);
1186 }
1187
1188 if (tty) {
1189 pam_code = pam_set_item(handle, PAM_TTY, tty);
1190 if (pam_code != PAM_SUCCESS)
1191 goto fail;
1192 }
1193
1194 STRV_FOREACH(nv, *env) {
1195 pam_code = pam_putenv(handle, *nv);
1196 if (pam_code != PAM_SUCCESS)
1197 goto fail;
1198 }
1199
1200 pam_code = pam_acct_mgmt(handle, flags);
1201 if (pam_code != PAM_SUCCESS)
1202 goto fail;
1203
1204 pam_code = pam_open_session(handle, flags);
1205 if (pam_code != PAM_SUCCESS)
1206 goto fail;
1207
1208 close_session = true;
1209
1210 e = pam_getenvlist(handle);
1211 if (!e) {
1212 pam_code = PAM_BUF_ERR;
1213 goto fail;
1214 }
1215
1216 /* Block SIGTERM, so that we know that it won't get lost in
1217 * the child */
1218
1219 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
1220
1221 parent_pid = getpid_cached();
1222
1223 r = safe_fork("(sd-pam)", 0, &pam_pid);
1224 if (r < 0)
1225 goto fail;
1226 if (r == 0) {
1227 int sig, ret = EXIT_PAM;
1228
1229 /* The child's job is to reset the PAM session on
1230 * termination */
1231 barrier_set_role(&barrier, BARRIER_CHILD);
1232
1233 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1234 * are open here that have been opened by PAM. */
1235 (void) close_many(fds, n_fds);
1236
1237 /* Drop privileges - we don't need any to pam_close_session
1238 * and this will make PR_SET_PDEATHSIG work in most cases.
1239 * If this fails, ignore the error - but expect sd-pam threads
1240 * to fail to exit normally */
1241
1242 r = maybe_setgroups(0, NULL);
1243 if (r < 0)
1244 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
1245 if (setresgid(gid, gid, gid) < 0)
1246 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
1247 if (setresuid(uid, uid, uid) < 0)
1248 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
1249
1250 (void) ignore_signals(SIGPIPE, -1);
1251
1252 /* Wait until our parent died. This will only work if
1253 * the above setresuid() succeeds, otherwise the kernel
1254 * will not allow unprivileged parents kill their privileged
1255 * children this way. We rely on the control groups kill logic
1256 * to do the rest for us. */
1257 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1258 goto child_finish;
1259
1260 /* Tell the parent that our setup is done. This is especially
1261 * important regarding dropping privileges. Otherwise, unit
1262 * setup might race against our setresuid(2) call.
1263 *
1264 * If the parent aborted, we'll detect this below, hence ignore
1265 * return failure here. */
1266 (void) barrier_place(&barrier);
1267
1268 /* Check if our parent process might already have died? */
1269 if (getppid() == parent_pid) {
1270 sigset_t ss;
1271
1272 assert_se(sigemptyset(&ss) >= 0);
1273 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1274
1275 for (;;) {
1276 if (sigwait(&ss, &sig) < 0) {
1277 if (errno == EINTR)
1278 continue;
1279
1280 goto child_finish;
1281 }
1282
1283 assert(sig == SIGTERM);
1284 break;
1285 }
1286 }
1287
1288 /* If our parent died we'll end the session */
1289 if (getppid() != parent_pid) {
1290 pam_code = pam_close_session(handle, flags);
1291 if (pam_code != PAM_SUCCESS)
1292 goto child_finish;
1293 }
1294
1295 ret = 0;
1296
1297 child_finish:
1298 pam_end(handle, pam_code | flags);
1299 _exit(ret);
1300 }
1301
1302 barrier_set_role(&barrier, BARRIER_PARENT);
1303
1304 /* If the child was forked off successfully it will do all the
1305 * cleanups, so forget about the handle here. */
1306 handle = NULL;
1307
1308 /* Unblock SIGTERM again in the parent */
1309 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
1310
1311 /* We close the log explicitly here, since the PAM modules
1312 * might have opened it, but we don't want this fd around. */
1313 closelog();
1314
1315 /* Synchronously wait for the child to initialize. We don't care for
1316 * errors as we cannot recover. However, warn loudly if it happens. */
1317 if (!barrier_place_and_sync(&barrier))
1318 log_error("PAM initialization failed");
1319
1320 return strv_free_and_replace(*env, e);
1321
1322 fail:
1323 if (pam_code != PAM_SUCCESS) {
1324 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
1325 r = -EPERM; /* PAM errors do not map to errno */
1326 } else
1327 log_error_errno(r, "PAM failed: %m");
1328
1329 if (handle) {
1330 if (close_session)
1331 pam_code = pam_close_session(handle, flags);
1332
1333 pam_end(handle, pam_code | flags);
1334 }
1335
1336 strv_free(e);
1337 closelog();
1338
1339 return r;
1340 #else
1341 return 0;
1342 #endif
1343 }
1344
1345 static void rename_process_from_path(const char *path) {
1346 char process_name[11];
1347 const char *p;
1348 size_t l;
1349
1350 /* This resulting string must fit in 10 chars (i.e. the length
1351 * of "/sbin/init") to look pretty in /bin/ps */
1352
1353 p = basename(path);
1354 if (isempty(p)) {
1355 rename_process("(...)");
1356 return;
1357 }
1358
1359 l = strlen(p);
1360 if (l > 8) {
1361 /* The end of the process name is usually more
1362 * interesting, since the first bit might just be
1363 * "systemd-" */
1364 p = p + l - 8;
1365 l = 8;
1366 }
1367
1368 process_name[0] = '(';
1369 memcpy(process_name+1, p, l);
1370 process_name[1+l] = ')';
1371 process_name[1+l+1] = 0;
1372
1373 rename_process(process_name);
1374 }
1375
1376 static bool context_has_address_families(const ExecContext *c) {
1377 assert(c);
1378
1379 return c->address_families_whitelist ||
1380 !set_isempty(c->address_families);
1381 }
1382
1383 static bool context_has_syscall_filters(const ExecContext *c) {
1384 assert(c);
1385
1386 return c->syscall_whitelist ||
1387 !hashmap_isempty(c->syscall_filter);
1388 }
1389
1390 static bool context_has_no_new_privileges(const ExecContext *c) {
1391 assert(c);
1392
1393 if (c->no_new_privileges)
1394 return true;
1395
1396 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1397 return false;
1398
1399 /* We need NNP if we have any form of seccomp and are unprivileged */
1400 return context_has_address_families(c) ||
1401 c->memory_deny_write_execute ||
1402 c->restrict_realtime ||
1403 c->restrict_suid_sgid ||
1404 exec_context_restrict_namespaces_set(c) ||
1405 c->protect_kernel_tunables ||
1406 c->protect_kernel_modules ||
1407 c->private_devices ||
1408 context_has_syscall_filters(c) ||
1409 !set_isempty(c->syscall_archs) ||
1410 c->lock_personality ||
1411 c->protect_hostname;
1412 }
1413
1414 #if HAVE_SECCOMP
1415
1416 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1417
1418 if (is_seccomp_available())
1419 return false;
1420
1421 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1422 return true;
1423 }
1424
1425 static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
1426 uint32_t negative_action, default_action, action;
1427 int r;
1428
1429 assert(u);
1430 assert(c);
1431
1432 if (!context_has_syscall_filters(c))
1433 return 0;
1434
1435 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1436 return 0;
1437
1438 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
1439
1440 if (c->syscall_whitelist) {
1441 default_action = negative_action;
1442 action = SCMP_ACT_ALLOW;
1443 } else {
1444 default_action = SCMP_ACT_ALLOW;
1445 action = negative_action;
1446 }
1447
1448 if (needs_ambient_hack) {
1449 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1450 if (r < 0)
1451 return r;
1452 }
1453
1454 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
1455 }
1456
1457 static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1458 assert(u);
1459 assert(c);
1460
1461 if (set_isempty(c->syscall_archs))
1462 return 0;
1463
1464 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1465 return 0;
1466
1467 return seccomp_restrict_archs(c->syscall_archs);
1468 }
1469
1470 static int apply_address_families(const Unit* u, const ExecContext *c) {
1471 assert(u);
1472 assert(c);
1473
1474 if (!context_has_address_families(c))
1475 return 0;
1476
1477 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1478 return 0;
1479
1480 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
1481 }
1482
1483 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
1484 assert(u);
1485 assert(c);
1486
1487 if (!c->memory_deny_write_execute)
1488 return 0;
1489
1490 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1491 return 0;
1492
1493 return seccomp_memory_deny_write_execute();
1494 }
1495
1496 static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
1497 assert(u);
1498 assert(c);
1499
1500 if (!c->restrict_realtime)
1501 return 0;
1502
1503 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1504 return 0;
1505
1506 return seccomp_restrict_realtime();
1507 }
1508
1509 static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1510 assert(u);
1511 assert(c);
1512
1513 if (!c->restrict_suid_sgid)
1514 return 0;
1515
1516 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1517 return 0;
1518
1519 return seccomp_restrict_suid_sgid();
1520 }
1521
1522 static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
1523 assert(u);
1524 assert(c);
1525
1526 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1527 * let's protect even those systems where this is left on in the kernel. */
1528
1529 if (!c->protect_kernel_tunables)
1530 return 0;
1531
1532 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1533 return 0;
1534
1535 return seccomp_protect_sysctl();
1536 }
1537
1538 static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
1539 assert(u);
1540 assert(c);
1541
1542 /* Turn off module syscalls on ProtectKernelModules=yes */
1543
1544 if (!c->protect_kernel_modules)
1545 return 0;
1546
1547 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1548 return 0;
1549
1550 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
1551 }
1552
1553 static int apply_private_devices(const Unit *u, const ExecContext *c) {
1554 assert(u);
1555 assert(c);
1556
1557 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1558
1559 if (!c->private_devices)
1560 return 0;
1561
1562 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1563 return 0;
1564
1565 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
1566 }
1567
1568 static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
1569 assert(u);
1570 assert(c);
1571
1572 if (!exec_context_restrict_namespaces_set(c))
1573 return 0;
1574
1575 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1576 return 0;
1577
1578 return seccomp_restrict_namespaces(c->restrict_namespaces);
1579 }
1580
1581 static int apply_lock_personality(const Unit* u, const ExecContext *c) {
1582 unsigned long personality;
1583 int r;
1584
1585 assert(u);
1586 assert(c);
1587
1588 if (!c->lock_personality)
1589 return 0;
1590
1591 if (skip_seccomp_unavailable(u, "LockPersonality="))
1592 return 0;
1593
1594 personality = c->personality;
1595
1596 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1597 if (personality == PERSONALITY_INVALID) {
1598
1599 r = opinionated_personality(&personality);
1600 if (r < 0)
1601 return r;
1602 }
1603
1604 return seccomp_lock_personality(personality);
1605 }
1606
1607 #endif
1608
1609 static void do_idle_pipe_dance(int idle_pipe[static 4]) {
1610 assert(idle_pipe);
1611
1612 idle_pipe[1] = safe_close(idle_pipe[1]);
1613 idle_pipe[2] = safe_close(idle_pipe[2]);
1614
1615 if (idle_pipe[0] >= 0) {
1616 int r;
1617
1618 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1619
1620 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1621 ssize_t n;
1622
1623 /* Signal systemd that we are bored and want to continue. */
1624 n = write(idle_pipe[3], "x", 1);
1625 if (n > 0)
1626 /* Wait for systemd to react to the signal above. */
1627 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1628 }
1629
1630 idle_pipe[0] = safe_close(idle_pipe[0]);
1631
1632 }
1633
1634 idle_pipe[3] = safe_close(idle_pipe[3]);
1635 }
1636
1637 static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1638
1639 static int build_environment(
1640 const Unit *u,
1641 const ExecContext *c,
1642 const ExecParameters *p,
1643 size_t n_fds,
1644 const char *home,
1645 const char *username,
1646 const char *shell,
1647 dev_t journal_stream_dev,
1648 ino_t journal_stream_ino,
1649 char ***ret) {
1650
1651 _cleanup_strv_free_ char **our_env = NULL;
1652 ExecDirectoryType t;
1653 size_t n_env = 0;
1654 char *x;
1655
1656 assert(u);
1657 assert(c);
1658 assert(p);
1659 assert(ret);
1660
1661 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
1662 if (!our_env)
1663 return -ENOMEM;
1664
1665 if (n_fds > 0) {
1666 _cleanup_free_ char *joined = NULL;
1667
1668 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
1669 return -ENOMEM;
1670 our_env[n_env++] = x;
1671
1672 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
1673 return -ENOMEM;
1674 our_env[n_env++] = x;
1675
1676 joined = strv_join(p->fd_names, ":");
1677 if (!joined)
1678 return -ENOMEM;
1679
1680 x = strjoin("LISTEN_FDNAMES=", joined);
1681 if (!x)
1682 return -ENOMEM;
1683 our_env[n_env++] = x;
1684 }
1685
1686 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
1687 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
1688 return -ENOMEM;
1689 our_env[n_env++] = x;
1690
1691 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
1692 return -ENOMEM;
1693 our_env[n_env++] = x;
1694 }
1695
1696 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1697 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1698 * check the database directly. */
1699 if (p->flags & EXEC_NSS_BYPASS_BUS) {
1700 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1701 if (!x)
1702 return -ENOMEM;
1703 our_env[n_env++] = x;
1704 }
1705
1706 if (home) {
1707 x = strappend("HOME=", home);
1708 if (!x)
1709 return -ENOMEM;
1710
1711 path_simplify(x + 5, true);
1712 our_env[n_env++] = x;
1713 }
1714
1715 if (username) {
1716 x = strappend("LOGNAME=", username);
1717 if (!x)
1718 return -ENOMEM;
1719 our_env[n_env++] = x;
1720
1721 x = strappend("USER=", username);
1722 if (!x)
1723 return -ENOMEM;
1724 our_env[n_env++] = x;
1725 }
1726
1727 if (shell) {
1728 x = strappend("SHELL=", shell);
1729 if (!x)
1730 return -ENOMEM;
1731
1732 path_simplify(x + 6, true);
1733 our_env[n_env++] = x;
1734 }
1735
1736 if (!sd_id128_is_null(u->invocation_id)) {
1737 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1738 return -ENOMEM;
1739
1740 our_env[n_env++] = x;
1741 }
1742
1743 if (exec_context_needs_term(c)) {
1744 const char *tty_path, *term = NULL;
1745
1746 tty_path = exec_context_tty_path(c);
1747
1748 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1749 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1750 * passes to PID 1 ends up all the way in the console login shown. */
1751
1752 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1753 term = getenv("TERM");
1754 if (!term)
1755 term = default_term_for_tty(tty_path);
1756
1757 x = strappend("TERM=", term);
1758 if (!x)
1759 return -ENOMEM;
1760 our_env[n_env++] = x;
1761 }
1762
1763 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1764 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1765 return -ENOMEM;
1766
1767 our_env[n_env++] = x;
1768 }
1769
1770 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1771 _cleanup_free_ char *pre = NULL, *joined = NULL;
1772 const char *n;
1773
1774 if (!p->prefix[t])
1775 continue;
1776
1777 if (strv_isempty(c->directories[t].paths))
1778 continue;
1779
1780 n = exec_directory_env_name_to_string(t);
1781 if (!n)
1782 continue;
1783
1784 pre = strjoin(p->prefix[t], "/");
1785 if (!pre)
1786 return -ENOMEM;
1787
1788 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1789 if (!joined)
1790 return -ENOMEM;
1791
1792 x = strjoin(n, "=", joined);
1793 if (!x)
1794 return -ENOMEM;
1795
1796 our_env[n_env++] = x;
1797 }
1798
1799 our_env[n_env++] = NULL;
1800 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
1801
1802 *ret = TAKE_PTR(our_env);
1803
1804 return 0;
1805 }
1806
1807 static int build_pass_environment(const ExecContext *c, char ***ret) {
1808 _cleanup_strv_free_ char **pass_env = NULL;
1809 size_t n_env = 0, n_bufsize = 0;
1810 char **i;
1811
1812 STRV_FOREACH(i, c->pass_environment) {
1813 _cleanup_free_ char *x = NULL;
1814 char *v;
1815
1816 v = getenv(*i);
1817 if (!v)
1818 continue;
1819 x = strjoin(*i, "=", v);
1820 if (!x)
1821 return -ENOMEM;
1822
1823 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1824 return -ENOMEM;
1825
1826 pass_env[n_env++] = TAKE_PTR(x);
1827 pass_env[n_env] = NULL;
1828 }
1829
1830 *ret = TAKE_PTR(pass_env);
1831
1832 return 0;
1833 }
1834
1835 static bool exec_needs_mount_namespace(
1836 const ExecContext *context,
1837 const ExecParameters *params,
1838 const ExecRuntime *runtime) {
1839
1840 assert(context);
1841 assert(params);
1842
1843 if (context->root_image)
1844 return true;
1845
1846 if (!strv_isempty(context->read_write_paths) ||
1847 !strv_isempty(context->read_only_paths) ||
1848 !strv_isempty(context->inaccessible_paths))
1849 return true;
1850
1851 if (context->n_bind_mounts > 0)
1852 return true;
1853
1854 if (context->n_temporary_filesystems > 0)
1855 return true;
1856
1857 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
1858 return true;
1859
1860 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1861 return true;
1862
1863 if (context->private_devices ||
1864 context->private_mounts ||
1865 context->protect_system != PROTECT_SYSTEM_NO ||
1866 context->protect_home != PROTECT_HOME_NO ||
1867 context->protect_kernel_tunables ||
1868 context->protect_kernel_modules ||
1869 context->protect_control_groups)
1870 return true;
1871
1872 if (context->root_directory) {
1873 ExecDirectoryType t;
1874
1875 if (context->mount_apivfs)
1876 return true;
1877
1878 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1879 if (!params->prefix[t])
1880 continue;
1881
1882 if (!strv_isempty(context->directories[t].paths))
1883 return true;
1884 }
1885 }
1886
1887 if (context->dynamic_user &&
1888 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
1889 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1890 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1891 return true;
1892
1893 return false;
1894 }
1895
1896 static int setup_private_users(uid_t uid, gid_t gid) {
1897 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1898 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1899 _cleanup_close_ int unshare_ready_fd = -1;
1900 _cleanup_(sigkill_waitp) pid_t pid = 0;
1901 uint64_t c = 1;
1902 ssize_t n;
1903 int r;
1904
1905 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1906 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1907 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1908 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1909 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1910 * continues execution normally. */
1911
1912 if (uid != 0 && uid_is_valid(uid)) {
1913 r = asprintf(&uid_map,
1914 "0 0 1\n" /* Map root → root */
1915 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1916 uid, uid);
1917 if (r < 0)
1918 return -ENOMEM;
1919 } else {
1920 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1921 if (!uid_map)
1922 return -ENOMEM;
1923 }
1924
1925 if (gid != 0 && gid_is_valid(gid)) {
1926 r = asprintf(&gid_map,
1927 "0 0 1\n" /* Map root → root */
1928 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1929 gid, gid);
1930 if (r < 0)
1931 return -ENOMEM;
1932 } else {
1933 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1934 if (!gid_map)
1935 return -ENOMEM;
1936 }
1937
1938 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1939 * namespace. */
1940 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1941 if (unshare_ready_fd < 0)
1942 return -errno;
1943
1944 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1945 * failed. */
1946 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1947 return -errno;
1948
1949 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1950 if (r < 0)
1951 return r;
1952 if (r == 0) {
1953 _cleanup_close_ int fd = -1;
1954 const char *a;
1955 pid_t ppid;
1956
1957 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1958 * here, after the parent opened its own user namespace. */
1959
1960 ppid = getppid();
1961 errno_pipe[0] = safe_close(errno_pipe[0]);
1962
1963 /* Wait until the parent unshared the user namespace */
1964 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1965 r = -errno;
1966 goto child_fail;
1967 }
1968
1969 /* Disable the setgroups() system call in the child user namespace, for good. */
1970 a = procfs_file_alloca(ppid, "setgroups");
1971 fd = open(a, O_WRONLY|O_CLOEXEC);
1972 if (fd < 0) {
1973 if (errno != ENOENT) {
1974 r = -errno;
1975 goto child_fail;
1976 }
1977
1978 /* If the file is missing the kernel is too old, let's continue anyway. */
1979 } else {
1980 if (write(fd, "deny\n", 5) < 0) {
1981 r = -errno;
1982 goto child_fail;
1983 }
1984
1985 fd = safe_close(fd);
1986 }
1987
1988 /* First write the GID map */
1989 a = procfs_file_alloca(ppid, "gid_map");
1990 fd = open(a, O_WRONLY|O_CLOEXEC);
1991 if (fd < 0) {
1992 r = -errno;
1993 goto child_fail;
1994 }
1995 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1996 r = -errno;
1997 goto child_fail;
1998 }
1999 fd = safe_close(fd);
2000
2001 /* The write the UID map */
2002 a = procfs_file_alloca(ppid, "uid_map");
2003 fd = open(a, O_WRONLY|O_CLOEXEC);
2004 if (fd < 0) {
2005 r = -errno;
2006 goto child_fail;
2007 }
2008 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2009 r = -errno;
2010 goto child_fail;
2011 }
2012
2013 _exit(EXIT_SUCCESS);
2014
2015 child_fail:
2016 (void) write(errno_pipe[1], &r, sizeof(r));
2017 _exit(EXIT_FAILURE);
2018 }
2019
2020 errno_pipe[1] = safe_close(errno_pipe[1]);
2021
2022 if (unshare(CLONE_NEWUSER) < 0)
2023 return -errno;
2024
2025 /* Let the child know that the namespace is ready now */
2026 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2027 return -errno;
2028
2029 /* Try to read an error code from the child */
2030 n = read(errno_pipe[0], &r, sizeof(r));
2031 if (n < 0)
2032 return -errno;
2033 if (n == sizeof(r)) { /* an error code was sent to us */
2034 if (r < 0)
2035 return r;
2036 return -EIO;
2037 }
2038 if (n != 0) /* on success we should have read 0 bytes */
2039 return -EIO;
2040
2041 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2042 pid = 0;
2043 if (r < 0)
2044 return r;
2045 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
2046 return -EIO;
2047
2048 return 0;
2049 }
2050
2051 static int setup_exec_directory(
2052 const ExecContext *context,
2053 const ExecParameters *params,
2054 uid_t uid,
2055 gid_t gid,
2056 ExecDirectoryType type,
2057 int *exit_status) {
2058
2059 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
2060 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2061 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2062 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2063 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2064 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2065 };
2066 char **rt;
2067 int r;
2068
2069 assert(context);
2070 assert(params);
2071 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
2072 assert(exit_status);
2073
2074 if (!params->prefix[type])
2075 return 0;
2076
2077 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
2078 if (!uid_is_valid(uid))
2079 uid = 0;
2080 if (!gid_is_valid(gid))
2081 gid = 0;
2082 }
2083
2084 STRV_FOREACH(rt, context->directories[type].paths) {
2085 _cleanup_free_ char *p = NULL, *pp = NULL;
2086
2087 p = path_join(params->prefix[type], *rt);
2088 if (!p) {
2089 r = -ENOMEM;
2090 goto fail;
2091 }
2092
2093 r = mkdir_parents_label(p, 0755);
2094 if (r < 0)
2095 goto fail;
2096
2097 if (context->dynamic_user &&
2098 (!IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) ||
2099 (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode != EXEC_PRESERVE_NO))) {
2100 _cleanup_free_ char *private_root = NULL;
2101
2102 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2103 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2104 * whose UID is later on reused. To lock this down we use the same trick used by container
2105 * managers to prohibit host users to get access to files of the same UID in containers: we
2106 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2107 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2108 * to make this directory permeable for the service itself.
2109 *
2110 * Specifically: for a service which wants a special directory "foo/" we first create a
2111 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2112 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2113 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2114 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2115 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2116 * disabling the access boundary for the service and making sure it only gets access to the
2117 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2118 *
2119 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
2120 * owned by the service itself.
2121 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2122 * files or sockets with other services. */
2123
2124 private_root = path_join(params->prefix[type], "private");
2125 if (!private_root) {
2126 r = -ENOMEM;
2127 goto fail;
2128 }
2129
2130 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
2131 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
2132 if (r < 0)
2133 goto fail;
2134
2135 pp = path_join(private_root, *rt);
2136 if (!pp) {
2137 r = -ENOMEM;
2138 goto fail;
2139 }
2140
2141 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2142 r = mkdir_parents_label(pp, 0755);
2143 if (r < 0)
2144 goto fail;
2145
2146 if (is_dir(p, false) > 0 &&
2147 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2148
2149 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2150 * it over. Most likely the service has been upgraded from one that didn't use
2151 * DynamicUser=1, to one that does. */
2152
2153 if (rename(p, pp) < 0) {
2154 r = -errno;
2155 goto fail;
2156 }
2157 } else {
2158 /* Otherwise, create the actual directory for the service */
2159
2160 r = mkdir_label(pp, context->directories[type].mode);
2161 if (r < 0 && r != -EEXIST)
2162 goto fail;
2163 }
2164
2165 /* And link it up from the original place */
2166 r = symlink_idempotent(pp, p, true);
2167 if (r < 0)
2168 goto fail;
2169
2170 } else {
2171 r = mkdir_label(p, context->directories[type].mode);
2172 if (r < 0) {
2173 if (r != -EEXIST)
2174 goto fail;
2175
2176 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2177 struct stat st;
2178
2179 /* Don't change the owner/access mode of the configuration directory,
2180 * as in the common case it is not written to by a service, and shall
2181 * not be writable. */
2182
2183 if (stat(p, &st) < 0) {
2184 r = -errno;
2185 goto fail;
2186 }
2187
2188 /* Still complain if the access mode doesn't match */
2189 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2190 log_warning("%s \'%s\' already exists but the mode is different. "
2191 "(File system: %o %sMode: %o)",
2192 exec_directory_type_to_string(type), *rt,
2193 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2194
2195 continue;
2196 }
2197 }
2198 }
2199
2200 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
2201 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
2202 * current UID/GID ownership.) */
2203 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2204 if (r < 0)
2205 goto fail;
2206
2207 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2208 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2209 * assignments to exist.*/
2210 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
2211 if (r < 0)
2212 goto fail;
2213 }
2214
2215 return 0;
2216
2217 fail:
2218 *exit_status = exit_status_table[type];
2219 return r;
2220 }
2221
2222 #if ENABLE_SMACK
2223 static int setup_smack(
2224 const ExecContext *context,
2225 const ExecCommand *command) {
2226
2227 int r;
2228
2229 assert(context);
2230 assert(command);
2231
2232 if (context->smack_process_label) {
2233 r = mac_smack_apply_pid(0, context->smack_process_label);
2234 if (r < 0)
2235 return r;
2236 }
2237 #ifdef SMACK_DEFAULT_PROCESS_LABEL
2238 else {
2239 _cleanup_free_ char *exec_label = NULL;
2240
2241 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
2242 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
2243 return r;
2244
2245 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2246 if (r < 0)
2247 return r;
2248 }
2249 #endif
2250
2251 return 0;
2252 }
2253 #endif
2254
2255 static int compile_bind_mounts(
2256 const ExecContext *context,
2257 const ExecParameters *params,
2258 BindMount **ret_bind_mounts,
2259 size_t *ret_n_bind_mounts,
2260 char ***ret_empty_directories) {
2261
2262 _cleanup_strv_free_ char **empty_directories = NULL;
2263 BindMount *bind_mounts;
2264 size_t n, h = 0, i;
2265 ExecDirectoryType t;
2266 int r;
2267
2268 assert(context);
2269 assert(params);
2270 assert(ret_bind_mounts);
2271 assert(ret_n_bind_mounts);
2272 assert(ret_empty_directories);
2273
2274 n = context->n_bind_mounts;
2275 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2276 if (!params->prefix[t])
2277 continue;
2278
2279 n += strv_length(context->directories[t].paths);
2280 }
2281
2282 if (n <= 0) {
2283 *ret_bind_mounts = NULL;
2284 *ret_n_bind_mounts = 0;
2285 *ret_empty_directories = NULL;
2286 return 0;
2287 }
2288
2289 bind_mounts = new(BindMount, n);
2290 if (!bind_mounts)
2291 return -ENOMEM;
2292
2293 for (i = 0; i < context->n_bind_mounts; i++) {
2294 BindMount *item = context->bind_mounts + i;
2295 char *s, *d;
2296
2297 s = strdup(item->source);
2298 if (!s) {
2299 r = -ENOMEM;
2300 goto finish;
2301 }
2302
2303 d = strdup(item->destination);
2304 if (!d) {
2305 free(s);
2306 r = -ENOMEM;
2307 goto finish;
2308 }
2309
2310 bind_mounts[h++] = (BindMount) {
2311 .source = s,
2312 .destination = d,
2313 .read_only = item->read_only,
2314 .recursive = item->recursive,
2315 .ignore_enoent = item->ignore_enoent,
2316 };
2317 }
2318
2319 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2320 char **suffix;
2321
2322 if (!params->prefix[t])
2323 continue;
2324
2325 if (strv_isempty(context->directories[t].paths))
2326 continue;
2327
2328 if (context->dynamic_user &&
2329 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2330 !(context->root_directory || context->root_image)) {
2331 char *private_root;
2332
2333 /* So this is for a dynamic user, and we need to make sure the process can access its own
2334 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2335 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2336
2337 private_root = strjoin(params->prefix[t], "/private");
2338 if (!private_root) {
2339 r = -ENOMEM;
2340 goto finish;
2341 }
2342
2343 r = strv_consume(&empty_directories, private_root);
2344 if (r < 0)
2345 goto finish;
2346 }
2347
2348 STRV_FOREACH(suffix, context->directories[t].paths) {
2349 char *s, *d;
2350
2351 if (context->dynamic_user &&
2352 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
2353 s = strjoin(params->prefix[t], "/private/", *suffix);
2354 else
2355 s = strjoin(params->prefix[t], "/", *suffix);
2356 if (!s) {
2357 r = -ENOMEM;
2358 goto finish;
2359 }
2360
2361 if (context->dynamic_user &&
2362 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2363 (context->root_directory || context->root_image))
2364 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2365 * directory is not created on the root directory. So, let's bind-mount the directory
2366 * on the 'non-private' place. */
2367 d = strjoin(params->prefix[t], "/", *suffix);
2368 else
2369 d = strdup(s);
2370 if (!d) {
2371 free(s);
2372 r = -ENOMEM;
2373 goto finish;
2374 }
2375
2376 bind_mounts[h++] = (BindMount) {
2377 .source = s,
2378 .destination = d,
2379 .read_only = false,
2380 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
2381 .recursive = true,
2382 .ignore_enoent = false,
2383 };
2384 }
2385 }
2386
2387 assert(h == n);
2388
2389 *ret_bind_mounts = bind_mounts;
2390 *ret_n_bind_mounts = n;
2391 *ret_empty_directories = TAKE_PTR(empty_directories);
2392
2393 return (int) n;
2394
2395 finish:
2396 bind_mount_free_many(bind_mounts, h);
2397 return r;
2398 }
2399
2400 static int apply_mount_namespace(
2401 const Unit *u,
2402 const ExecCommand *command,
2403 const ExecContext *context,
2404 const ExecParameters *params,
2405 const ExecRuntime *runtime,
2406 char **error_path) {
2407
2408 _cleanup_strv_free_ char **empty_directories = NULL;
2409 char *tmp = NULL, *var = NULL;
2410 const char *root_dir = NULL, *root_image = NULL;
2411 NamespaceInfo ns_info;
2412 bool needs_sandboxing;
2413 BindMount *bind_mounts = NULL;
2414 size_t n_bind_mounts = 0;
2415 int r;
2416
2417 assert(context);
2418
2419 /* The runtime struct only contains the parent of the private /tmp,
2420 * which is non-accessible to world users. Inside of it there's a /tmp
2421 * that is sticky, and that's the one we want to use here. */
2422
2423 if (context->private_tmp && runtime) {
2424 if (runtime->tmp_dir)
2425 tmp = strjoina(runtime->tmp_dir, "/tmp");
2426 if (runtime->var_tmp_dir)
2427 var = strjoina(runtime->var_tmp_dir, "/tmp");
2428 }
2429
2430 if (params->flags & EXEC_APPLY_CHROOT) {
2431 root_image = context->root_image;
2432
2433 if (!root_image)
2434 root_dir = context->root_directory;
2435 }
2436
2437 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2438 if (r < 0)
2439 return r;
2440
2441 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2442 if (needs_sandboxing)
2443 ns_info = (NamespaceInfo) {
2444 .ignore_protect_paths = false,
2445 .private_dev = context->private_devices,
2446 .protect_control_groups = context->protect_control_groups,
2447 .protect_kernel_tunables = context->protect_kernel_tunables,
2448 .protect_kernel_modules = context->protect_kernel_modules,
2449 .protect_hostname = context->protect_hostname,
2450 .mount_apivfs = context->mount_apivfs,
2451 .private_mounts = context->private_mounts,
2452 };
2453 else if (!context->dynamic_user && root_dir)
2454 /*
2455 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2456 * sandbox info, otherwise enforce it, don't ignore protected paths and
2457 * fail if we are enable to apply the sandbox inside the mount namespace.
2458 */
2459 ns_info = (NamespaceInfo) {
2460 .ignore_protect_paths = true,
2461 };
2462 else
2463 ns_info = (NamespaceInfo) {};
2464
2465 if (context->mount_flags == MS_SHARED)
2466 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2467
2468 r = setup_namespace(root_dir, root_image,
2469 &ns_info, context->read_write_paths,
2470 needs_sandboxing ? context->read_only_paths : NULL,
2471 needs_sandboxing ? context->inaccessible_paths : NULL,
2472 empty_directories,
2473 bind_mounts,
2474 n_bind_mounts,
2475 context->temporary_filesystems,
2476 context->n_temporary_filesystems,
2477 tmp,
2478 var,
2479 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2480 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
2481 context->mount_flags,
2482 DISSECT_IMAGE_DISCARD_ON_LOOP,
2483 error_path);
2484
2485 bind_mount_free_many(bind_mounts, n_bind_mounts);
2486
2487 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
2488 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
2489 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2490 * completely different execution environment. */
2491 if (r == -ENOANO) {
2492 if (n_bind_mounts == 0 &&
2493 context->n_temporary_filesystems == 0 &&
2494 !root_dir && !root_image &&
2495 !context->dynamic_user) {
2496 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
2497 return 0;
2498 }
2499
2500 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2501 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2502 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2503
2504 return -EOPNOTSUPP;
2505 }
2506
2507 return r;
2508 }
2509
2510 static int apply_working_directory(
2511 const ExecContext *context,
2512 const ExecParameters *params,
2513 const char *home,
2514 const bool needs_mount_ns,
2515 int *exit_status) {
2516
2517 const char *d, *wd;
2518
2519 assert(context);
2520 assert(exit_status);
2521
2522 if (context->working_directory_home) {
2523
2524 if (!home) {
2525 *exit_status = EXIT_CHDIR;
2526 return -ENXIO;
2527 }
2528
2529 wd = home;
2530
2531 } else if (context->working_directory)
2532 wd = context->working_directory;
2533 else
2534 wd = "/";
2535
2536 if (params->flags & EXEC_APPLY_CHROOT) {
2537 if (!needs_mount_ns && context->root_directory)
2538 if (chroot(context->root_directory) < 0) {
2539 *exit_status = EXIT_CHROOT;
2540 return -errno;
2541 }
2542
2543 d = wd;
2544 } else
2545 d = prefix_roota(context->root_directory, wd);
2546
2547 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2548 *exit_status = EXIT_CHDIR;
2549 return -errno;
2550 }
2551
2552 return 0;
2553 }
2554
2555 static int setup_keyring(
2556 const Unit *u,
2557 const ExecContext *context,
2558 const ExecParameters *p,
2559 uid_t uid, gid_t gid) {
2560
2561 key_serial_t keyring;
2562 int r = 0;
2563 uid_t saved_uid;
2564 gid_t saved_gid;
2565
2566 assert(u);
2567 assert(context);
2568 assert(p);
2569
2570 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2571 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2572 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2573 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2574 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2575 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2576
2577 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2578 return 0;
2579
2580 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2581 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2582 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2583 * & group is just as nasty as acquiring a reference to the user keyring. */
2584
2585 saved_uid = getuid();
2586 saved_gid = getgid();
2587
2588 if (gid_is_valid(gid) && gid != saved_gid) {
2589 if (setregid(gid, -1) < 0)
2590 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2591 }
2592
2593 if (uid_is_valid(uid) && uid != saved_uid) {
2594 if (setreuid(uid, -1) < 0) {
2595 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2596 goto out;
2597 }
2598 }
2599
2600 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2601 if (keyring == -1) {
2602 if (errno == ENOSYS)
2603 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
2604 else if (IN_SET(errno, EACCES, EPERM))
2605 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
2606 else if (errno == EDQUOT)
2607 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
2608 else
2609 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
2610
2611 goto out;
2612 }
2613
2614 /* When requested link the user keyring into the session keyring. */
2615 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2616
2617 if (keyctl(KEYCTL_LINK,
2618 KEY_SPEC_USER_KEYRING,
2619 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2620 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2621 goto out;
2622 }
2623 }
2624
2625 /* Restore uid/gid back */
2626 if (uid_is_valid(uid) && uid != saved_uid) {
2627 if (setreuid(saved_uid, -1) < 0) {
2628 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2629 goto out;
2630 }
2631 }
2632
2633 if (gid_is_valid(gid) && gid != saved_gid) {
2634 if (setregid(saved_gid, -1) < 0)
2635 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2636 }
2637
2638 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
2639 if (!sd_id128_is_null(u->invocation_id)) {
2640 key_serial_t key;
2641
2642 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2643 if (key == -1)
2644 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
2645 else {
2646 if (keyctl(KEYCTL_SETPERM, key,
2647 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2648 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2649 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
2650 }
2651 }
2652
2653 out:
2654 /* Revert back uid & gid for the the last time, and exit */
2655 /* no extra logging, as only the first already reported error matters */
2656 if (getuid() != saved_uid)
2657 (void) setreuid(saved_uid, -1);
2658
2659 if (getgid() != saved_gid)
2660 (void) setregid(saved_gid, -1);
2661
2662 return r;
2663 }
2664
2665 static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
2666 assert(array);
2667 assert(n);
2668
2669 if (!pair)
2670 return;
2671
2672 if (pair[0] >= 0)
2673 array[(*n)++] = pair[0];
2674 if (pair[1] >= 0)
2675 array[(*n)++] = pair[1];
2676 }
2677
2678 static int close_remaining_fds(
2679 const ExecParameters *params,
2680 const ExecRuntime *runtime,
2681 const DynamicCreds *dcreds,
2682 int user_lookup_fd,
2683 int socket_fd,
2684 int exec_fd,
2685 int *fds, size_t n_fds) {
2686
2687 size_t n_dont_close = 0;
2688 int dont_close[n_fds + 12];
2689
2690 assert(params);
2691
2692 if (params->stdin_fd >= 0)
2693 dont_close[n_dont_close++] = params->stdin_fd;
2694 if (params->stdout_fd >= 0)
2695 dont_close[n_dont_close++] = params->stdout_fd;
2696 if (params->stderr_fd >= 0)
2697 dont_close[n_dont_close++] = params->stderr_fd;
2698
2699 if (socket_fd >= 0)
2700 dont_close[n_dont_close++] = socket_fd;
2701 if (exec_fd >= 0)
2702 dont_close[n_dont_close++] = exec_fd;
2703 if (n_fds > 0) {
2704 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2705 n_dont_close += n_fds;
2706 }
2707
2708 if (runtime)
2709 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2710
2711 if (dcreds) {
2712 if (dcreds->user)
2713 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2714 if (dcreds->group)
2715 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
2716 }
2717
2718 if (user_lookup_fd >= 0)
2719 dont_close[n_dont_close++] = user_lookup_fd;
2720
2721 return close_all_fds(dont_close, n_dont_close);
2722 }
2723
2724 static int send_user_lookup(
2725 Unit *unit,
2726 int user_lookup_fd,
2727 uid_t uid,
2728 gid_t gid) {
2729
2730 assert(unit);
2731
2732 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2733 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2734 * specified. */
2735
2736 if (user_lookup_fd < 0)
2737 return 0;
2738
2739 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2740 return 0;
2741
2742 if (writev(user_lookup_fd,
2743 (struct iovec[]) {
2744 IOVEC_INIT(&uid, sizeof(uid)),
2745 IOVEC_INIT(&gid, sizeof(gid)),
2746 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
2747 return -errno;
2748
2749 return 0;
2750 }
2751
2752 static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2753 int r;
2754
2755 assert(c);
2756 assert(home);
2757 assert(buf);
2758
2759 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2760
2761 if (*home)
2762 return 0;
2763
2764 if (!c->working_directory_home)
2765 return 0;
2766
2767 r = get_home_dir(buf);
2768 if (r < 0)
2769 return r;
2770
2771 *home = *buf;
2772 return 1;
2773 }
2774
2775 static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2776 _cleanup_strv_free_ char ** list = NULL;
2777 ExecDirectoryType t;
2778 int r;
2779
2780 assert(c);
2781 assert(p);
2782 assert(ret);
2783
2784 assert(c->dynamic_user);
2785
2786 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2787 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2788 * directories. */
2789
2790 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2791 char **i;
2792
2793 if (t == EXEC_DIRECTORY_CONFIGURATION)
2794 continue;
2795
2796 if (!p->prefix[t])
2797 continue;
2798
2799 STRV_FOREACH(i, c->directories[t].paths) {
2800 char *e;
2801
2802 if (t == EXEC_DIRECTORY_RUNTIME)
2803 e = strjoin(p->prefix[t], "/", *i);
2804 else
2805 e = strjoin(p->prefix[t], "/private/", *i);
2806 if (!e)
2807 return -ENOMEM;
2808
2809 r = strv_consume(&list, e);
2810 if (r < 0)
2811 return r;
2812 }
2813 }
2814
2815 *ret = TAKE_PTR(list);
2816
2817 return 0;
2818 }
2819
2820 static char *exec_command_line(char **argv);
2821
2822 static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
2823 bool using_subcgroup;
2824 char *p;
2825
2826 assert(params);
2827 assert(ret);
2828
2829 if (!params->cgroup_path)
2830 return -EINVAL;
2831
2832 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2833 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2834 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2835 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2836 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2837 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2838 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2839 * flag, which is only passed for the former statements, not for the latter. */
2840
2841 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
2842 if (using_subcgroup)
2843 p = strjoin(params->cgroup_path, "/.control");
2844 else
2845 p = strdup(params->cgroup_path);
2846 if (!p)
2847 return -ENOMEM;
2848
2849 *ret = p;
2850 return using_subcgroup;
2851 }
2852
2853 static int exec_child(
2854 Unit *unit,
2855 const ExecCommand *command,
2856 const ExecContext *context,
2857 const ExecParameters *params,
2858 ExecRuntime *runtime,
2859 DynamicCreds *dcreds,
2860 int socket_fd,
2861 int named_iofds[3],
2862 int *fds,
2863 size_t n_socket_fds,
2864 size_t n_storage_fds,
2865 char **files_env,
2866 int user_lookup_fd,
2867 int *exit_status) {
2868
2869 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
2870 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
2871 _cleanup_free_ gid_t *supplementary_gids = NULL;
2872 const char *username = NULL, *groupname = NULL;
2873 _cleanup_free_ char *home_buffer = NULL;
2874 const char *home = NULL, *shell = NULL;
2875 char **final_argv = NULL;
2876 dev_t journal_stream_dev = 0;
2877 ino_t journal_stream_ino = 0;
2878 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2879 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2880 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2881 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
2882 #if HAVE_SELINUX
2883 _cleanup_free_ char *mac_selinux_context_net = NULL;
2884 bool use_selinux = false;
2885 #endif
2886 #if ENABLE_SMACK
2887 bool use_smack = false;
2888 #endif
2889 #if HAVE_APPARMOR
2890 bool use_apparmor = false;
2891 #endif
2892 uid_t uid = UID_INVALID;
2893 gid_t gid = GID_INVALID;
2894 size_t n_fds;
2895 ExecDirectoryType dt;
2896 int secure_bits;
2897
2898 assert(unit);
2899 assert(command);
2900 assert(context);
2901 assert(params);
2902 assert(exit_status);
2903
2904 rename_process_from_path(command->path);
2905
2906 /* We reset exactly these signals, since they are the
2907 * only ones we set to SIG_IGN in the main daemon. All
2908 * others we leave untouched because we set them to
2909 * SIG_DFL or a valid handler initially, both of which
2910 * will be demoted to SIG_DFL. */
2911 (void) default_signals(SIGNALS_CRASH_HANDLER,
2912 SIGNALS_IGNORE, -1);
2913
2914 if (context->ignore_sigpipe)
2915 (void) ignore_signals(SIGPIPE, -1);
2916
2917 r = reset_signal_mask();
2918 if (r < 0) {
2919 *exit_status = EXIT_SIGNAL_MASK;
2920 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
2921 }
2922
2923 if (params->idle_pipe)
2924 do_idle_pipe_dance(params->idle_pipe);
2925
2926 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2927 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2928 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2929 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
2930
2931 log_forget_fds();
2932 log_set_open_when_needed(true);
2933
2934 /* In case anything used libc syslog(), close this here, too */
2935 closelog();
2936
2937 n_fds = n_socket_fds + n_storage_fds;
2938 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
2939 if (r < 0) {
2940 *exit_status = EXIT_FDS;
2941 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
2942 }
2943
2944 if (!context->same_pgrp)
2945 if (setsid() < 0) {
2946 *exit_status = EXIT_SETSID;
2947 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
2948 }
2949
2950 exec_context_tty_reset(context, params);
2951
2952 if (unit_shall_confirm_spawn(unit)) {
2953 const char *vc = params->confirm_spawn;
2954 _cleanup_free_ char *cmdline = NULL;
2955
2956 cmdline = exec_command_line(command->argv);
2957 if (!cmdline) {
2958 *exit_status = EXIT_MEMORY;
2959 return log_oom();
2960 }
2961
2962 r = ask_for_confirmation(vc, unit, cmdline);
2963 if (r != CONFIRM_EXECUTE) {
2964 if (r == CONFIRM_PRETEND_SUCCESS) {
2965 *exit_status = EXIT_SUCCESS;
2966 return 0;
2967 }
2968 *exit_status = EXIT_CONFIRM;
2969 log_unit_error(unit, "Execution cancelled by the user");
2970 return -ECANCELED;
2971 }
2972 }
2973
2974 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
2975 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
2976 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
2977 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
2978 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
2979 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
2980 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
2981 *exit_status = EXIT_MEMORY;
2982 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2983 }
2984
2985 if (context->dynamic_user && dcreds) {
2986 _cleanup_strv_free_ char **suggested_paths = NULL;
2987
2988 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
2989 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
2990 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2991 *exit_status = EXIT_USER;
2992 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2993 }
2994
2995 r = compile_suggested_paths(context, params, &suggested_paths);
2996 if (r < 0) {
2997 *exit_status = EXIT_MEMORY;
2998 return log_oom();
2999 }
3000
3001 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
3002 if (r < 0) {
3003 *exit_status = EXIT_USER;
3004 if (r == -EILSEQ) {
3005 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3006 return -EOPNOTSUPP;
3007 }
3008 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
3009 }
3010
3011 if (!uid_is_valid(uid)) {
3012 *exit_status = EXIT_USER;
3013 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
3014 return -ESRCH;
3015 }
3016
3017 if (!gid_is_valid(gid)) {
3018 *exit_status = EXIT_USER;
3019 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
3020 return -ESRCH;
3021 }
3022
3023 if (dcreds->user)
3024 username = dcreds->user->name;
3025
3026 } else {
3027 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3028 if (r < 0) {
3029 *exit_status = EXIT_USER;
3030 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
3031 }
3032
3033 r = get_fixed_group(context, &groupname, &gid);
3034 if (r < 0) {
3035 *exit_status = EXIT_GROUP;
3036 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
3037 }
3038 }
3039
3040 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3041 r = get_supplementary_groups(context, username, groupname, gid,
3042 &supplementary_gids, &ngids);
3043 if (r < 0) {
3044 *exit_status = EXIT_GROUP;
3045 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
3046 }
3047
3048 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3049 if (r < 0) {
3050 *exit_status = EXIT_USER;
3051 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
3052 }
3053
3054 user_lookup_fd = safe_close(user_lookup_fd);
3055
3056 r = acquire_home(context, uid, &home, &home_buffer);
3057 if (r < 0) {
3058 *exit_status = EXIT_CHDIR;
3059 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
3060 }
3061
3062 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3063 * must sure to drop O_NONBLOCK */
3064 if (socket_fd >= 0)
3065 (void) fd_nonblock(socket_fd, false);
3066
3067 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3068 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3069 if (params->cgroup_path) {
3070 _cleanup_free_ char *p = NULL;
3071
3072 r = exec_parameters_get_cgroup_path(params, &p);
3073 if (r < 0) {
3074 *exit_status = EXIT_CGROUP;
3075 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3076 }
3077
3078 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3079 if (r < 0) {
3080 *exit_status = EXIT_CGROUP;
3081 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3082 }
3083 }
3084
3085 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3086 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3087 if (r < 0) {
3088 *exit_status = EXIT_NETWORK;
3089 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3090 }
3091 }
3092
3093 r = setup_input(context, params, socket_fd, named_iofds);
3094 if (r < 0) {
3095 *exit_status = EXIT_STDIN;
3096 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
3097 }
3098
3099 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
3100 if (r < 0) {
3101 *exit_status = EXIT_STDOUT;
3102 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
3103 }
3104
3105 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
3106 if (r < 0) {
3107 *exit_status = EXIT_STDERR;
3108 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
3109 }
3110
3111 if (context->oom_score_adjust_set) {
3112 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3113 * prohibit write access to this file, and we shouldn't trip up over that. */
3114 r = set_oom_score_adjust(context->oom_score_adjust);
3115 if (IN_SET(r, -EPERM, -EACCES))
3116 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
3117 else if (r < 0) {
3118 *exit_status = EXIT_OOM_ADJUST;
3119 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
3120 }
3121 }
3122
3123 if (context->nice_set)
3124 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
3125 *exit_status = EXIT_NICE;
3126 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
3127 }
3128
3129 if (context->cpu_sched_set) {
3130 struct sched_param param = {
3131 .sched_priority = context->cpu_sched_priority,
3132 };
3133
3134 r = sched_setscheduler(0,
3135 context->cpu_sched_policy |
3136 (context->cpu_sched_reset_on_fork ?
3137 SCHED_RESET_ON_FORK : 0),
3138 &param);
3139 if (r < 0) {
3140 *exit_status = EXIT_SETSCHEDULER;
3141 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
3142 }
3143 }
3144
3145 if (context->cpu_set.set)
3146 if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
3147 *exit_status = EXIT_CPUAFFINITY;
3148 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
3149 }
3150
3151 if (context->ioprio_set)
3152 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
3153 *exit_status = EXIT_IOPRIO;
3154 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
3155 }
3156
3157 if (context->timer_slack_nsec != NSEC_INFINITY)
3158 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
3159 *exit_status = EXIT_TIMERSLACK;
3160 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
3161 }
3162
3163 if (context->personality != PERSONALITY_INVALID) {
3164 r = safe_personality(context->personality);
3165 if (r < 0) {
3166 *exit_status = EXIT_PERSONALITY;
3167 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
3168 }
3169 }
3170
3171 if (context->utmp_id)
3172 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
3173 context->tty_path,
3174 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3175 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3176 USER_PROCESS,
3177 username);
3178
3179 if (uid_is_valid(uid)) {
3180 r = chown_terminal(STDIN_FILENO, uid);
3181 if (r < 0) {
3182 *exit_status = EXIT_STDIN;
3183 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
3184 }
3185 }
3186
3187 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
3188 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
3189 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
3190 * touch a single hierarchy too. */
3191 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
3192 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
3193 if (r < 0) {
3194 *exit_status = EXIT_CGROUP;
3195 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
3196 }
3197 }
3198
3199 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3200 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
3201 if (r < 0)
3202 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
3203 }
3204
3205 r = build_environment(
3206 unit,
3207 context,
3208 params,
3209 n_fds,
3210 home,
3211 username,
3212 shell,
3213 journal_stream_dev,
3214 journal_stream_ino,
3215 &our_env);
3216 if (r < 0) {
3217 *exit_status = EXIT_MEMORY;
3218 return log_oom();
3219 }
3220
3221 r = build_pass_environment(context, &pass_env);
3222 if (r < 0) {
3223 *exit_status = EXIT_MEMORY;
3224 return log_oom();
3225 }
3226
3227 accum_env = strv_env_merge(5,
3228 params->environment,
3229 our_env,
3230 pass_env,
3231 context->environment,
3232 files_env,
3233 NULL);
3234 if (!accum_env) {
3235 *exit_status = EXIT_MEMORY;
3236 return log_oom();
3237 }
3238 accum_env = strv_env_clean(accum_env);
3239
3240 (void) umask(context->umask);
3241
3242 r = setup_keyring(unit, context, params, uid, gid);
3243 if (r < 0) {
3244 *exit_status = EXIT_KEYRING;
3245 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
3246 }
3247
3248 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
3249 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
3250
3251 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3252 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
3253
3254 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3255 if (needs_ambient_hack)
3256 needs_setuid = false;
3257 else
3258 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3259
3260 if (needs_sandboxing) {
3261 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3262 * present. The actual MAC context application will happen later, as late as possible, to avoid
3263 * impacting our own code paths. */
3264
3265 #if HAVE_SELINUX
3266 use_selinux = mac_selinux_use();
3267 #endif
3268 #if ENABLE_SMACK
3269 use_smack = mac_smack_use();
3270 #endif
3271 #if HAVE_APPARMOR
3272 use_apparmor = mac_apparmor_use();
3273 #endif
3274 }
3275
3276 if (needs_sandboxing) {
3277 int which_failed;
3278
3279 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3280 * is set here. (See below.) */
3281
3282 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3283 if (r < 0) {
3284 *exit_status = EXIT_LIMITS;
3285 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3286 }
3287 }
3288
3289 if (needs_setuid) {
3290
3291 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3292 * wins here. (See above.) */
3293
3294 if (context->pam_name && username) {
3295 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3296 if (r < 0) {
3297 *exit_status = EXIT_PAM;
3298 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
3299 }
3300 }
3301 }
3302
3303 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3304
3305 if (ns_type_supported(NAMESPACE_NET)) {
3306 r = setup_netns(runtime->netns_storage_socket);
3307 if (r < 0) {
3308 *exit_status = EXIT_NETWORK;
3309 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3310 }
3311 } else if (context->network_namespace_path) {
3312 *exit_status = EXIT_NETWORK;
3313 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
3314 } else
3315 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
3316 }
3317
3318 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
3319 if (needs_mount_namespace) {
3320 _cleanup_free_ char *error_path = NULL;
3321
3322 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3323 if (r < 0) {
3324 *exit_status = EXIT_NAMESPACE;
3325 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3326 error_path ? ": " : "", strempty(error_path));
3327 }
3328 }
3329
3330 if (context->protect_hostname) {
3331 if (ns_type_supported(NAMESPACE_UTS)) {
3332 if (unshare(CLONE_NEWUTS) < 0) {
3333 *exit_status = EXIT_NAMESPACE;
3334 return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
3335 }
3336 } else
3337 log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3338 #if HAVE_SECCOMP
3339 r = seccomp_protect_hostname();
3340 if (r < 0) {
3341 *exit_status = EXIT_SECCOMP;
3342 return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
3343 }
3344 #endif
3345 }
3346
3347 /* Drop groups as early as possbile */
3348 if (needs_setuid) {
3349 r = enforce_groups(gid, supplementary_gids, ngids);
3350 if (r < 0) {
3351 *exit_status = EXIT_GROUP;
3352 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
3353 }
3354 }
3355
3356 if (needs_sandboxing) {
3357 #if HAVE_SELINUX
3358 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
3359 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3360 if (r < 0) {
3361 *exit_status = EXIT_SELINUX_CONTEXT;
3362 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
3363 }
3364 }
3365 #endif
3366
3367 if (context->private_users) {
3368 r = setup_private_users(uid, gid);
3369 if (r < 0) {
3370 *exit_status = EXIT_USER;
3371 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
3372 }
3373 }
3374 }
3375
3376 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3377 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3378 * however if we have it as we want to keep it open until the final execve(). */
3379
3380 if (params->exec_fd >= 0) {
3381 exec_fd = params->exec_fd;
3382
3383 if (exec_fd < 3 + (int) n_fds) {
3384 int moved_fd;
3385
3386 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3387 * process we are about to execute. */
3388
3389 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3390 if (moved_fd < 0) {
3391 *exit_status = EXIT_FDS;
3392 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3393 }
3394
3395 safe_close(exec_fd);
3396 exec_fd = moved_fd;
3397 } else {
3398 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3399 r = fd_cloexec(exec_fd, true);
3400 if (r < 0) {
3401 *exit_status = EXIT_FDS;
3402 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3403 }
3404 }
3405
3406 fds_with_exec_fd = newa(int, n_fds + 1);
3407 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
3408 fds_with_exec_fd[n_fds] = exec_fd;
3409 n_fds_with_exec_fd = n_fds + 1;
3410 } else {
3411 fds_with_exec_fd = fds;
3412 n_fds_with_exec_fd = n_fds;
3413 }
3414
3415 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
3416 if (r >= 0)
3417 r = shift_fds(fds, n_fds);
3418 if (r >= 0)
3419 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
3420 if (r < 0) {
3421 *exit_status = EXIT_FDS;
3422 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
3423 }
3424
3425 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3426 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3427 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3428 * came this far. */
3429
3430 secure_bits = context->secure_bits;
3431
3432 if (needs_sandboxing) {
3433 uint64_t bset;
3434
3435 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3436 * requested. (Note this is placed after the general resource limit initialization, see
3437 * above, in order to take precedence.) */
3438 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3439 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3440 *exit_status = EXIT_LIMITS;
3441 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
3442 }
3443 }
3444
3445 #if ENABLE_SMACK
3446 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3447 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3448 if (use_smack) {
3449 r = setup_smack(context, command);
3450 if (r < 0) {
3451 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3452 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3453 }
3454 }
3455 #endif
3456
3457 bset = context->capability_bounding_set;
3458 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3459 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3460 * instead of us doing that */
3461 if (needs_ambient_hack)
3462 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3463 (UINT64_C(1) << CAP_SETUID) |
3464 (UINT64_C(1) << CAP_SETGID);
3465
3466 if (!cap_test_all(bset)) {
3467 r = capability_bounding_set_drop(bset, false);
3468 if (r < 0) {
3469 *exit_status = EXIT_CAPABILITIES;
3470 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3471 }
3472 }
3473
3474 /* This is done before enforce_user, but ambient set
3475 * does not survive over setresuid() if keep_caps is not set. */
3476 if (!needs_ambient_hack &&
3477 context->capability_ambient_set != 0) {
3478 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3479 if (r < 0) {
3480 *exit_status = EXIT_CAPABILITIES;
3481 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
3482 }
3483 }
3484 }
3485
3486 if (needs_setuid) {
3487 if (uid_is_valid(uid)) {
3488 r = enforce_user(context, uid);
3489 if (r < 0) {
3490 *exit_status = EXIT_USER;
3491 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
3492 }
3493
3494 if (!needs_ambient_hack &&
3495 context->capability_ambient_set != 0) {
3496
3497 /* Fix the ambient capabilities after user change. */
3498 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3499 if (r < 0) {
3500 *exit_status = EXIT_CAPABILITIES;
3501 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
3502 }
3503
3504 /* If we were asked to change user and ambient capabilities
3505 * were requested, we had to add keep-caps to the securebits
3506 * so that we would maintain the inherited capability set
3507 * through the setresuid(). Make sure that the bit is added
3508 * also to the context secure_bits so that we don't try to
3509 * drop the bit away next. */
3510
3511 secure_bits |= 1<<SECURE_KEEP_CAPS;
3512 }
3513 }
3514 }
3515
3516 /* Apply working directory here, because the working directory might be on NFS and only the user running
3517 * this service might have the correct privilege to change to the working directory */
3518 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
3519 if (r < 0)
3520 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3521
3522 if (needs_sandboxing) {
3523 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3524 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3525 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3526 * are restricted. */
3527
3528 #if HAVE_SELINUX
3529 if (use_selinux) {
3530 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3531
3532 if (exec_context) {
3533 r = setexeccon(exec_context);
3534 if (r < 0) {
3535 *exit_status = EXIT_SELINUX_CONTEXT;
3536 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
3537 }
3538 }
3539 }
3540 #endif
3541
3542 #if HAVE_APPARMOR
3543 if (use_apparmor && context->apparmor_profile) {
3544 r = aa_change_onexec(context->apparmor_profile);
3545 if (r < 0 && !context->apparmor_profile_ignore) {
3546 *exit_status = EXIT_APPARMOR_PROFILE;
3547 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
3548 }
3549 }
3550 #endif
3551
3552 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3553 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
3554 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3555 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
3556 *exit_status = EXIT_SECUREBITS;
3557 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
3558 }
3559
3560 if (context_has_no_new_privileges(context))
3561 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
3562 *exit_status = EXIT_NO_NEW_PRIVILEGES;
3563 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
3564 }
3565
3566 #if HAVE_SECCOMP
3567 r = apply_address_families(unit, context);
3568 if (r < 0) {
3569 *exit_status = EXIT_ADDRESS_FAMILIES;
3570 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
3571 }
3572
3573 r = apply_memory_deny_write_execute(unit, context);
3574 if (r < 0) {
3575 *exit_status = EXIT_SECCOMP;
3576 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
3577 }
3578
3579 r = apply_restrict_realtime(unit, context);
3580 if (r < 0) {
3581 *exit_status = EXIT_SECCOMP;
3582 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
3583 }
3584
3585 r = apply_restrict_suid_sgid(unit, context);
3586 if (r < 0) {
3587 *exit_status = EXIT_SECCOMP;
3588 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3589 }
3590
3591 r = apply_restrict_namespaces(unit, context);
3592 if (r < 0) {
3593 *exit_status = EXIT_SECCOMP;
3594 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
3595 }
3596
3597 r = apply_protect_sysctl(unit, context);
3598 if (r < 0) {
3599 *exit_status = EXIT_SECCOMP;
3600 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
3601 }
3602
3603 r = apply_protect_kernel_modules(unit, context);
3604 if (r < 0) {
3605 *exit_status = EXIT_SECCOMP;
3606 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
3607 }
3608
3609 r = apply_private_devices(unit, context);
3610 if (r < 0) {
3611 *exit_status = EXIT_SECCOMP;
3612 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
3613 }
3614
3615 r = apply_syscall_archs(unit, context);
3616 if (r < 0) {
3617 *exit_status = EXIT_SECCOMP;
3618 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
3619 }
3620
3621 r = apply_lock_personality(unit, context);
3622 if (r < 0) {
3623 *exit_status = EXIT_SECCOMP;
3624 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
3625 }
3626
3627 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3628 * by the filter as little as possible. */
3629 r = apply_syscall_filter(unit, context, needs_ambient_hack);
3630 if (r < 0) {
3631 *exit_status = EXIT_SECCOMP;
3632 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
3633 }
3634 #endif
3635 }
3636
3637 if (!strv_isempty(context->unset_environment)) {
3638 char **ee = NULL;
3639
3640 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3641 if (!ee) {
3642 *exit_status = EXIT_MEMORY;
3643 return log_oom();
3644 }
3645
3646 strv_free_and_replace(accum_env, ee);
3647 }
3648
3649 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3650 replaced_argv = replace_env_argv(command->argv, accum_env);
3651 if (!replaced_argv) {
3652 *exit_status = EXIT_MEMORY;
3653 return log_oom();
3654 }
3655 final_argv = replaced_argv;
3656 } else
3657 final_argv = command->argv;
3658
3659 if (DEBUG_LOGGING) {
3660 _cleanup_free_ char *line;
3661
3662 line = exec_command_line(final_argv);
3663 if (line)
3664 log_struct(LOG_DEBUG,
3665 "EXECUTABLE=%s", command->path,
3666 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
3667 LOG_UNIT_ID(unit),
3668 LOG_UNIT_INVOCATION_ID(unit));
3669 }
3670
3671 if (exec_fd >= 0) {
3672 uint8_t hot = 1;
3673
3674 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3675 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3676
3677 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3678 *exit_status = EXIT_EXEC;
3679 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3680 }
3681 }
3682
3683 execve(command->path, final_argv, accum_env);
3684 r = -errno;
3685
3686 if (exec_fd >= 0) {
3687 uint8_t hot = 0;
3688
3689 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3690 * that POLLHUP on it no longer means execve() succeeded. */
3691
3692 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3693 *exit_status = EXIT_EXEC;
3694 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3695 }
3696 }
3697
3698 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3699 log_struct_errno(LOG_INFO, r,
3700 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3701 LOG_UNIT_ID(unit),
3702 LOG_UNIT_INVOCATION_ID(unit),
3703 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3704 command->path),
3705 "EXECUTABLE=%s", command->path);
3706 return 0;
3707 }
3708
3709 *exit_status = EXIT_EXEC;
3710 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
3711 }
3712
3713 static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
3714 static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
3715
3716 int exec_spawn(Unit *unit,
3717 ExecCommand *command,
3718 const ExecContext *context,
3719 const ExecParameters *params,
3720 ExecRuntime *runtime,
3721 DynamicCreds *dcreds,
3722 pid_t *ret) {
3723
3724 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
3725 _cleanup_free_ char *subcgroup_path = NULL;
3726 _cleanup_strv_free_ char **files_env = NULL;
3727 size_t n_storage_fds = 0, n_socket_fds = 0;
3728 _cleanup_free_ char *line = NULL;
3729 pid_t pid;
3730
3731 assert(unit);
3732 assert(command);
3733 assert(context);
3734 assert(ret);
3735 assert(params);
3736 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
3737
3738 if (context->std_input == EXEC_INPUT_SOCKET ||
3739 context->std_output == EXEC_OUTPUT_SOCKET ||
3740 context->std_error == EXEC_OUTPUT_SOCKET) {
3741
3742 if (params->n_socket_fds > 1) {
3743 log_unit_error(unit, "Got more than one socket.");
3744 return -EINVAL;
3745 }
3746
3747 if (params->n_socket_fds == 0) {
3748 log_unit_error(unit, "Got no socket.");
3749 return -EINVAL;
3750 }
3751
3752 socket_fd = params->fds[0];
3753 } else {
3754 socket_fd = -1;
3755 fds = params->fds;
3756 n_socket_fds = params->n_socket_fds;
3757 n_storage_fds = params->n_storage_fds;
3758 }
3759
3760 r = exec_context_named_iofds(context, params, named_iofds);
3761 if (r < 0)
3762 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3763
3764 r = exec_context_load_environment(unit, context, &files_env);
3765 if (r < 0)
3766 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
3767
3768 line = exec_command_line(command->argv);
3769 if (!line)
3770 return log_oom();
3771
3772 log_struct(LOG_DEBUG,
3773 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3774 "EXECUTABLE=%s", command->path,
3775 LOG_UNIT_ID(unit),
3776 LOG_UNIT_INVOCATION_ID(unit));
3777
3778 if (params->cgroup_path) {
3779 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
3780 if (r < 0)
3781 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
3782 if (r > 0) { /* We are using a child cgroup */
3783 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
3784 if (r < 0)
3785 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
3786 }
3787 }
3788
3789 pid = fork();
3790 if (pid < 0)
3791 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
3792
3793 if (pid == 0) {
3794 int exit_status = EXIT_SUCCESS;
3795
3796 r = exec_child(unit,
3797 command,
3798 context,
3799 params,
3800 runtime,
3801 dcreds,
3802 socket_fd,
3803 named_iofds,
3804 fds,
3805 n_socket_fds,
3806 n_storage_fds,
3807 files_env,
3808 unit->manager->user_lookup_fds[1],
3809 &exit_status);
3810
3811 if (r < 0)
3812 log_struct_errno(LOG_ERR, r,
3813 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3814 LOG_UNIT_ID(unit),
3815 LOG_UNIT_INVOCATION_ID(unit),
3816 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3817 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3818 command->path),
3819 "EXECUTABLE=%s", command->path);
3820
3821 _exit(exit_status);
3822 }
3823
3824 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
3825
3826 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
3827 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
3828 * process will be killed too). */
3829 if (subcgroup_path)
3830 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
3831
3832 exec_status_start(&command->exec_status, pid);
3833
3834 *ret = pid;
3835 return 0;
3836 }
3837
3838 void exec_context_init(ExecContext *c) {
3839 ExecDirectoryType i;
3840
3841 assert(c);
3842
3843 c->umask = 0022;
3844 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
3845 c->cpu_sched_policy = SCHED_OTHER;
3846 c->syslog_priority = LOG_DAEMON|LOG_INFO;
3847 c->syslog_level_prefix = true;
3848 c->ignore_sigpipe = true;
3849 c->timer_slack_nsec = NSEC_INFINITY;
3850 c->personality = PERSONALITY_INVALID;
3851 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3852 c->directories[i].mode = 0755;
3853 c->capability_bounding_set = CAP_ALL;
3854 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3855 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
3856 c->log_level_max = -1;
3857 }
3858
3859 void exec_context_done(ExecContext *c) {
3860 ExecDirectoryType i;
3861 size_t l;
3862
3863 assert(c);
3864
3865 c->environment = strv_free(c->environment);
3866 c->environment_files = strv_free(c->environment_files);
3867 c->pass_environment = strv_free(c->pass_environment);
3868 c->unset_environment = strv_free(c->unset_environment);
3869
3870 rlimit_free_all(c->rlimit);
3871
3872 for (l = 0; l < 3; l++) {
3873 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3874 c->stdio_file[l] = mfree(c->stdio_file[l]);
3875 }
3876
3877 c->working_directory = mfree(c->working_directory);
3878 c->root_directory = mfree(c->root_directory);
3879 c->root_image = mfree(c->root_image);
3880 c->tty_path = mfree(c->tty_path);
3881 c->syslog_identifier = mfree(c->syslog_identifier);
3882 c->user = mfree(c->user);
3883 c->group = mfree(c->group);
3884
3885 c->supplementary_groups = strv_free(c->supplementary_groups);
3886
3887 c->pam_name = mfree(c->pam_name);
3888
3889 c->read_only_paths = strv_free(c->read_only_paths);
3890 c->read_write_paths = strv_free(c->read_write_paths);
3891 c->inaccessible_paths = strv_free(c->inaccessible_paths);
3892
3893 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3894 c->bind_mounts = NULL;
3895 c->n_bind_mounts = 0;
3896 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3897 c->temporary_filesystems = NULL;
3898 c->n_temporary_filesystems = 0;
3899
3900 cpu_set_reset(&c->cpu_set);
3901
3902 c->utmp_id = mfree(c->utmp_id);
3903 c->selinux_context = mfree(c->selinux_context);
3904 c->apparmor_profile = mfree(c->apparmor_profile);
3905 c->smack_process_label = mfree(c->smack_process_label);
3906
3907 c->syscall_filter = hashmap_free(c->syscall_filter);
3908 c->syscall_archs = set_free(c->syscall_archs);
3909 c->address_families = set_free(c->address_families);
3910
3911 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3912 c->directories[i].paths = strv_free(c->directories[i].paths);
3913
3914 c->log_level_max = -1;
3915
3916 exec_context_free_log_extra_fields(c);
3917
3918 c->log_rate_limit_interval_usec = 0;
3919 c->log_rate_limit_burst = 0;
3920
3921 c->stdin_data = mfree(c->stdin_data);
3922 c->stdin_data_size = 0;
3923
3924 c->network_namespace_path = mfree(c->network_namespace_path);
3925 }
3926
3927 int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
3928 char **i;
3929
3930 assert(c);
3931
3932 if (!runtime_prefix)
3933 return 0;
3934
3935 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
3936 _cleanup_free_ char *p;
3937
3938 p = path_join(runtime_prefix, *i);
3939 if (!p)
3940 return -ENOMEM;
3941
3942 /* We execute this synchronously, since we need to be sure this is gone when we start the
3943 * service next. */
3944 (void) rm_rf(p, REMOVE_ROOT);
3945 }
3946
3947 return 0;
3948 }
3949
3950 static void exec_command_done(ExecCommand *c) {
3951 assert(c);
3952
3953 c->path = mfree(c->path);
3954 c->argv = strv_free(c->argv);
3955 }
3956
3957 void exec_command_done_array(ExecCommand *c, size_t n) {
3958 size_t i;
3959
3960 for (i = 0; i < n; i++)
3961 exec_command_done(c+i);
3962 }
3963
3964 ExecCommand* exec_command_free_list(ExecCommand *c) {
3965 ExecCommand *i;
3966
3967 while ((i = c)) {
3968 LIST_REMOVE(command, c, i);
3969 exec_command_done(i);
3970 free(i);
3971 }
3972
3973 return NULL;
3974 }
3975
3976 void exec_command_free_array(ExecCommand **c, size_t n) {
3977 size_t i;
3978
3979 for (i = 0; i < n; i++)
3980 c[i] = exec_command_free_list(c[i]);
3981 }
3982
3983 void exec_command_reset_status_array(ExecCommand *c, size_t n) {
3984 size_t i;
3985
3986 for (i = 0; i < n; i++)
3987 exec_status_reset(&c[i].exec_status);
3988 }
3989
3990 void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
3991 size_t i;
3992
3993 for (i = 0; i < n; i++) {
3994 ExecCommand *z;
3995
3996 LIST_FOREACH(command, z, c[i])
3997 exec_status_reset(&z->exec_status);
3998 }
3999 }
4000
4001 typedef struct InvalidEnvInfo {
4002 const Unit *unit;
4003 const char *path;
4004 } InvalidEnvInfo;
4005
4006 static void invalid_env(const char *p, void *userdata) {
4007 InvalidEnvInfo *info = userdata;
4008
4009 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
4010 }
4011
4012 const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4013 assert(c);
4014
4015 switch (fd_index) {
4016
4017 case STDIN_FILENO:
4018 if (c->std_input != EXEC_INPUT_NAMED_FD)
4019 return NULL;
4020
4021 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
4022
4023 case STDOUT_FILENO:
4024 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4025 return NULL;
4026
4027 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
4028
4029 case STDERR_FILENO:
4030 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4031 return NULL;
4032
4033 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
4034
4035 default:
4036 return NULL;
4037 }
4038 }
4039
4040 static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]) {
4041 size_t i, targets;
4042 const char* stdio_fdname[3];
4043 size_t n_fds;
4044
4045 assert(c);
4046 assert(p);
4047
4048 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4049 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4050 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4051
4052 for (i = 0; i < 3; i++)
4053 stdio_fdname[i] = exec_context_fdname(c, i);
4054
4055 n_fds = p->n_storage_fds + p->n_socket_fds;
4056
4057 for (i = 0; i < n_fds && targets > 0; i++)
4058 if (named_iofds[STDIN_FILENO] < 0 &&
4059 c->std_input == EXEC_INPUT_NAMED_FD &&
4060 stdio_fdname[STDIN_FILENO] &&
4061 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4062
4063 named_iofds[STDIN_FILENO] = p->fds[i];
4064 targets--;
4065
4066 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4067 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4068 stdio_fdname[STDOUT_FILENO] &&
4069 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4070
4071 named_iofds[STDOUT_FILENO] = p->fds[i];
4072 targets--;
4073
4074 } else if (named_iofds[STDERR_FILENO] < 0 &&
4075 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4076 stdio_fdname[STDERR_FILENO] &&
4077 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4078
4079 named_iofds[STDERR_FILENO] = p->fds[i];
4080 targets--;
4081 }
4082
4083 return targets == 0 ? 0 : -ENOENT;
4084 }
4085
4086 static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
4087 char **i, **r = NULL;
4088
4089 assert(c);
4090 assert(l);
4091
4092 STRV_FOREACH(i, c->environment_files) {
4093 char *fn;
4094 int k;
4095 unsigned n;
4096 bool ignore = false;
4097 char **p;
4098 _cleanup_globfree_ glob_t pglob = {};
4099
4100 fn = *i;
4101
4102 if (fn[0] == '-') {
4103 ignore = true;
4104 fn++;
4105 }
4106
4107 if (!path_is_absolute(fn)) {
4108 if (ignore)
4109 continue;
4110
4111 strv_free(r);
4112 return -EINVAL;
4113 }
4114
4115 /* Filename supports globbing, take all matching files */
4116 k = safe_glob(fn, 0, &pglob);
4117 if (k < 0) {
4118 if (ignore)
4119 continue;
4120
4121 strv_free(r);
4122 return k;
4123 }
4124
4125 /* When we don't match anything, -ENOENT should be returned */
4126 assert(pglob.gl_pathc > 0);
4127
4128 for (n = 0; n < pglob.gl_pathc; n++) {
4129 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
4130 if (k < 0) {
4131 if (ignore)
4132 continue;
4133
4134 strv_free(r);
4135 return k;
4136 }
4137 /* Log invalid environment variables with filename */
4138 if (p) {
4139 InvalidEnvInfo info = {
4140 .unit = unit,
4141 .path = pglob.gl_pathv[n]
4142 };
4143
4144 p = strv_env_clean_with_callback(p, invalid_env, &info);
4145 }
4146
4147 if (!r)
4148 r = p;
4149 else {
4150 char **m;
4151
4152 m = strv_env_merge(2, r, p);
4153 strv_free(r);
4154 strv_free(p);
4155 if (!m)
4156 return -ENOMEM;
4157
4158 r = m;
4159 }
4160 }
4161 }
4162
4163 *l = r;
4164
4165 return 0;
4166 }
4167
4168 static bool tty_may_match_dev_console(const char *tty) {
4169 _cleanup_free_ char *resolved = NULL;
4170
4171 if (!tty)
4172 return true;
4173
4174 tty = skip_dev_prefix(tty);
4175
4176 /* trivial identity? */
4177 if (streq(tty, "console"))
4178 return true;
4179
4180 if (resolve_dev_console(&resolved) < 0)
4181 return true; /* if we could not resolve, assume it may */
4182
4183 /* "tty0" means the active VC, so it may be the same sometimes */
4184 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
4185 }
4186
4187 static bool exec_context_may_touch_tty(const ExecContext *ec) {
4188 assert(ec);
4189
4190 return ec->tty_reset ||
4191 ec->tty_vhangup ||
4192 ec->tty_vt_disallocate ||
4193 is_terminal_input(ec->std_input) ||
4194 is_terminal_output(ec->std_output) ||
4195 is_terminal_output(ec->std_error);
4196 }
4197
4198 bool exec_context_may_touch_console(const ExecContext *ec) {
4199
4200 return exec_context_may_touch_tty(ec) &&
4201 tty_may_match_dev_console(exec_context_tty_path(ec));
4202 }
4203
4204 static void strv_fprintf(FILE *f, char **l) {
4205 char **g;
4206
4207 assert(f);
4208
4209 STRV_FOREACH(g, l)
4210 fprintf(f, " %s", *g);
4211 }
4212
4213 void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
4214 ExecDirectoryType dt;
4215 char **e, **d;
4216 unsigned i;
4217 int r;
4218
4219 assert(c);
4220 assert(f);
4221
4222 prefix = strempty(prefix);
4223
4224 fprintf(f,
4225 "%sUMask: %04o\n"
4226 "%sWorkingDirectory: %s\n"
4227 "%sRootDirectory: %s\n"
4228 "%sNonBlocking: %s\n"
4229 "%sPrivateTmp: %s\n"
4230 "%sPrivateDevices: %s\n"
4231 "%sProtectKernelTunables: %s\n"
4232 "%sProtectKernelModules: %s\n"
4233 "%sProtectControlGroups: %s\n"
4234 "%sPrivateNetwork: %s\n"
4235 "%sPrivateUsers: %s\n"
4236 "%sProtectHome: %s\n"
4237 "%sProtectSystem: %s\n"
4238 "%sMountAPIVFS: %s\n"
4239 "%sIgnoreSIGPIPE: %s\n"
4240 "%sMemoryDenyWriteExecute: %s\n"
4241 "%sRestrictRealtime: %s\n"
4242 "%sRestrictSUIDSGID: %s\n"
4243 "%sKeyringMode: %s\n"
4244 "%sProtectHostname: %s\n",
4245 prefix, c->umask,
4246 prefix, c->working_directory ? c->working_directory : "/",
4247 prefix, c->root_directory ? c->root_directory : "/",
4248 prefix, yes_no(c->non_blocking),
4249 prefix, yes_no(c->private_tmp),
4250 prefix, yes_no(c->private_devices),
4251 prefix, yes_no(c->protect_kernel_tunables),
4252 prefix, yes_no(c->protect_kernel_modules),
4253 prefix, yes_no(c->protect_control_groups),
4254 prefix, yes_no(c->private_network),
4255 prefix, yes_no(c->private_users),
4256 prefix, protect_home_to_string(c->protect_home),
4257 prefix, protect_system_to_string(c->protect_system),
4258 prefix, yes_no(c->mount_apivfs),
4259 prefix, yes_no(c->ignore_sigpipe),
4260 prefix, yes_no(c->memory_deny_write_execute),
4261 prefix, yes_no(c->restrict_realtime),
4262 prefix, yes_no(c->restrict_suid_sgid),
4263 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4264 prefix, yes_no(c->protect_hostname));
4265
4266 if (c->root_image)
4267 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4268
4269 STRV_FOREACH(e, c->environment)
4270 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4271
4272 STRV_FOREACH(e, c->environment_files)
4273 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
4274
4275 STRV_FOREACH(e, c->pass_environment)
4276 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4277
4278 STRV_FOREACH(e, c->unset_environment)
4279 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4280
4281 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4282
4283 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
4284 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4285
4286 STRV_FOREACH(d, c->directories[dt].paths)
4287 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4288 }
4289
4290 if (c->nice_set)
4291 fprintf(f,
4292 "%sNice: %i\n",
4293 prefix, c->nice);
4294
4295 if (c->oom_score_adjust_set)
4296 fprintf(f,
4297 "%sOOMScoreAdjust: %i\n",
4298 prefix, c->oom_score_adjust);
4299
4300 for (i = 0; i < RLIM_NLIMITS; i++)
4301 if (c->rlimit[i]) {
4302 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
4303 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4304 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
4305 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4306 }
4307
4308 if (c->ioprio_set) {
4309 _cleanup_free_ char *class_str = NULL;
4310
4311 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4312 if (r >= 0)
4313 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4314
4315 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
4316 }
4317
4318 if (c->cpu_sched_set) {
4319 _cleanup_free_ char *policy_str = NULL;
4320
4321 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4322 if (r >= 0)
4323 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4324
4325 fprintf(f,
4326 "%sCPUSchedulingPriority: %i\n"
4327 "%sCPUSchedulingResetOnFork: %s\n",
4328 prefix, c->cpu_sched_priority,
4329 prefix, yes_no(c->cpu_sched_reset_on_fork));
4330 }
4331
4332 if (c->cpu_set.set) {
4333 _cleanup_free_ char *affinity = NULL;
4334
4335 affinity = cpu_set_to_range_string(&c->cpu_set);
4336 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
4337 }
4338
4339 if (c->timer_slack_nsec != NSEC_INFINITY)
4340 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
4341
4342 fprintf(f,
4343 "%sStandardInput: %s\n"
4344 "%sStandardOutput: %s\n"
4345 "%sStandardError: %s\n",
4346 prefix, exec_input_to_string(c->std_input),
4347 prefix, exec_output_to_string(c->std_output),
4348 prefix, exec_output_to_string(c->std_error));
4349
4350 if (c->std_input == EXEC_INPUT_NAMED_FD)
4351 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4352 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4353 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4354 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4355 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4356
4357 if (c->std_input == EXEC_INPUT_FILE)
4358 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4359 if (c->std_output == EXEC_OUTPUT_FILE)
4360 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
4361 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4362 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
4363 if (c->std_error == EXEC_OUTPUT_FILE)
4364 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
4365 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4366 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
4367
4368 if (c->tty_path)
4369 fprintf(f,
4370 "%sTTYPath: %s\n"
4371 "%sTTYReset: %s\n"
4372 "%sTTYVHangup: %s\n"
4373 "%sTTYVTDisallocate: %s\n",
4374 prefix, c->tty_path,
4375 prefix, yes_no(c->tty_reset),
4376 prefix, yes_no(c->tty_vhangup),
4377 prefix, yes_no(c->tty_vt_disallocate));
4378
4379 if (IN_SET(c->std_output,
4380 EXEC_OUTPUT_SYSLOG,
4381 EXEC_OUTPUT_KMSG,
4382 EXEC_OUTPUT_JOURNAL,
4383 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4384 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4385 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4386 IN_SET(c->std_error,
4387 EXEC_OUTPUT_SYSLOG,
4388 EXEC_OUTPUT_KMSG,
4389 EXEC_OUTPUT_JOURNAL,
4390 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4391 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4392 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
4393
4394 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
4395
4396 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4397 if (r >= 0)
4398 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
4399
4400 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4401 if (r >= 0)
4402 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
4403 }
4404
4405 if (c->log_level_max >= 0) {
4406 _cleanup_free_ char *t = NULL;
4407
4408 (void) log_level_to_string_alloc(c->log_level_max, &t);
4409
4410 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4411 }
4412
4413 if (c->log_rate_limit_interval_usec > 0) {
4414 char buf_timespan[FORMAT_TIMESPAN_MAX];
4415
4416 fprintf(f,
4417 "%sLogRateLimitIntervalSec: %s\n",
4418 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
4419 }
4420
4421 if (c->log_rate_limit_burst > 0)
4422 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
4423
4424 if (c->n_log_extra_fields > 0) {
4425 size_t j;
4426
4427 for (j = 0; j < c->n_log_extra_fields; j++) {
4428 fprintf(f, "%sLogExtraFields: ", prefix);
4429 fwrite(c->log_extra_fields[j].iov_base,
4430 1, c->log_extra_fields[j].iov_len,
4431 f);
4432 fputc('\n', f);
4433 }
4434 }
4435
4436 if (c->secure_bits) {
4437 _cleanup_free_ char *str = NULL;
4438
4439 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4440 if (r >= 0)
4441 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4442 }
4443
4444 if (c->capability_bounding_set != CAP_ALL) {
4445 _cleanup_free_ char *str = NULL;
4446
4447 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4448 if (r >= 0)
4449 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
4450 }
4451
4452 if (c->capability_ambient_set != 0) {
4453 _cleanup_free_ char *str = NULL;
4454
4455 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4456 if (r >= 0)
4457 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
4458 }
4459
4460 if (c->user)
4461 fprintf(f, "%sUser: %s\n", prefix, c->user);
4462 if (c->group)
4463 fprintf(f, "%sGroup: %s\n", prefix, c->group);
4464
4465 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4466
4467 if (!strv_isempty(c->supplementary_groups)) {
4468 fprintf(f, "%sSupplementaryGroups:", prefix);
4469 strv_fprintf(f, c->supplementary_groups);
4470 fputs("\n", f);
4471 }
4472
4473 if (c->pam_name)
4474 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
4475
4476 if (!strv_isempty(c->read_write_paths)) {
4477 fprintf(f, "%sReadWritePaths:", prefix);
4478 strv_fprintf(f, c->read_write_paths);
4479 fputs("\n", f);
4480 }
4481
4482 if (!strv_isempty(c->read_only_paths)) {
4483 fprintf(f, "%sReadOnlyPaths:", prefix);
4484 strv_fprintf(f, c->read_only_paths);
4485 fputs("\n", f);
4486 }
4487
4488 if (!strv_isempty(c->inaccessible_paths)) {
4489 fprintf(f, "%sInaccessiblePaths:", prefix);
4490 strv_fprintf(f, c->inaccessible_paths);
4491 fputs("\n", f);
4492 }
4493
4494 if (c->n_bind_mounts > 0)
4495 for (i = 0; i < c->n_bind_mounts; i++)
4496 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
4497 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4498 c->bind_mounts[i].ignore_enoent ? "-": "",
4499 c->bind_mounts[i].source,
4500 c->bind_mounts[i].destination,
4501 c->bind_mounts[i].recursive ? "rbind" : "norbind");
4502
4503 if (c->n_temporary_filesystems > 0)
4504 for (i = 0; i < c->n_temporary_filesystems; i++) {
4505 TemporaryFileSystem *t = c->temporary_filesystems + i;
4506
4507 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4508 t->path,
4509 isempty(t->options) ? "" : ":",
4510 strempty(t->options));
4511 }
4512
4513 if (c->utmp_id)
4514 fprintf(f,
4515 "%sUtmpIdentifier: %s\n",
4516 prefix, c->utmp_id);
4517
4518 if (c->selinux_context)
4519 fprintf(f,
4520 "%sSELinuxContext: %s%s\n",
4521 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
4522
4523 if (c->apparmor_profile)
4524 fprintf(f,
4525 "%sAppArmorProfile: %s%s\n",
4526 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4527
4528 if (c->smack_process_label)
4529 fprintf(f,
4530 "%sSmackProcessLabel: %s%s\n",
4531 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4532
4533 if (c->personality != PERSONALITY_INVALID)
4534 fprintf(f,
4535 "%sPersonality: %s\n",
4536 prefix, strna(personality_to_string(c->personality)));
4537
4538 fprintf(f,
4539 "%sLockPersonality: %s\n",
4540 prefix, yes_no(c->lock_personality));
4541
4542 if (c->syscall_filter) {
4543 #if HAVE_SECCOMP
4544 Iterator j;
4545 void *id, *val;
4546 bool first = true;
4547 #endif
4548
4549 fprintf(f,
4550 "%sSystemCallFilter: ",
4551 prefix);
4552
4553 if (!c->syscall_whitelist)
4554 fputc('~', f);
4555
4556 #if HAVE_SECCOMP
4557 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
4558 _cleanup_free_ char *name = NULL;
4559 const char *errno_name = NULL;
4560 int num = PTR_TO_INT(val);
4561
4562 if (first)
4563 first = false;
4564 else
4565 fputc(' ', f);
4566
4567 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
4568 fputs(strna(name), f);
4569
4570 if (num >= 0) {
4571 errno_name = errno_to_name(num);
4572 if (errno_name)
4573 fprintf(f, ":%s", errno_name);
4574 else
4575 fprintf(f, ":%d", num);
4576 }
4577 }
4578 #endif
4579
4580 fputc('\n', f);
4581 }
4582
4583 if (c->syscall_archs) {
4584 #if HAVE_SECCOMP
4585 Iterator j;
4586 void *id;
4587 #endif
4588
4589 fprintf(f,
4590 "%sSystemCallArchitectures:",
4591 prefix);
4592
4593 #if HAVE_SECCOMP
4594 SET_FOREACH(id, c->syscall_archs, j)
4595 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4596 #endif
4597 fputc('\n', f);
4598 }
4599
4600 if (exec_context_restrict_namespaces_set(c)) {
4601 _cleanup_free_ char *s = NULL;
4602
4603 r = namespace_flags_to_string(c->restrict_namespaces, &s);
4604 if (r >= 0)
4605 fprintf(f, "%sRestrictNamespaces: %s\n",
4606 prefix, s);
4607 }
4608
4609 if (c->network_namespace_path)
4610 fprintf(f,
4611 "%sNetworkNamespacePath: %s\n",
4612 prefix, c->network_namespace_path);
4613
4614 if (c->syscall_errno > 0) {
4615 const char *errno_name;
4616
4617 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4618
4619 errno_name = errno_to_name(c->syscall_errno);
4620 if (errno_name)
4621 fprintf(f, "%s\n", errno_name);
4622 else
4623 fprintf(f, "%d\n", c->syscall_errno);
4624 }
4625 }
4626
4627 bool exec_context_maintains_privileges(const ExecContext *c) {
4628 assert(c);
4629
4630 /* Returns true if the process forked off would run under
4631 * an unchanged UID or as root. */
4632
4633 if (!c->user)
4634 return true;
4635
4636 if (streq(c->user, "root") || streq(c->user, "0"))
4637 return true;
4638
4639 return false;
4640 }
4641
4642 int exec_context_get_effective_ioprio(const ExecContext *c) {
4643 int p;
4644
4645 assert(c);
4646
4647 if (c->ioprio_set)
4648 return c->ioprio;
4649
4650 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4651 if (p < 0)
4652 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4653
4654 return p;
4655 }
4656
4657 void exec_context_free_log_extra_fields(ExecContext *c) {
4658 size_t l;
4659
4660 assert(c);
4661
4662 for (l = 0; l < c->n_log_extra_fields; l++)
4663 free(c->log_extra_fields[l].iov_base);
4664 c->log_extra_fields = mfree(c->log_extra_fields);
4665 c->n_log_extra_fields = 0;
4666 }
4667
4668 void exec_context_revert_tty(ExecContext *c) {
4669 int r;
4670
4671 assert(c);
4672
4673 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4674 exec_context_tty_reset(c, NULL);
4675
4676 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4677 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4678 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4679
4680 if (exec_context_may_touch_tty(c)) {
4681 const char *path;
4682
4683 path = exec_context_tty_path(c);
4684 if (path) {
4685 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
4686 if (r < 0 && r != -ENOENT)
4687 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
4688 }
4689 }
4690 }
4691
4692 void exec_status_start(ExecStatus *s, pid_t pid) {
4693 assert(s);
4694
4695 *s = (ExecStatus) {
4696 .pid = pid,
4697 };
4698
4699 dual_timestamp_get(&s->start_timestamp);
4700 }
4701
4702 void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
4703 assert(s);
4704
4705 if (s->pid != pid) {
4706 *s = (ExecStatus) {
4707 .pid = pid,
4708 };
4709 }
4710
4711 dual_timestamp_get(&s->exit_timestamp);
4712
4713 s->code = code;
4714 s->status = status;
4715
4716 if (context && context->utmp_id)
4717 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
4718 }
4719
4720 void exec_status_reset(ExecStatus *s) {
4721 assert(s);
4722
4723 *s = (ExecStatus) {};
4724 }
4725
4726 void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
4727 char buf[FORMAT_TIMESTAMP_MAX];
4728
4729 assert(s);
4730 assert(f);
4731
4732 if (s->pid <= 0)
4733 return;
4734
4735 prefix = strempty(prefix);
4736
4737 fprintf(f,
4738 "%sPID: "PID_FMT"\n",
4739 prefix, s->pid);
4740
4741 if (dual_timestamp_is_set(&s->start_timestamp))
4742 fprintf(f,
4743 "%sStart Timestamp: %s\n",
4744 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
4745
4746 if (dual_timestamp_is_set(&s->exit_timestamp))
4747 fprintf(f,
4748 "%sExit Timestamp: %s\n"
4749 "%sExit Code: %s\n"
4750 "%sExit Status: %i\n",
4751 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
4752 prefix, sigchld_code_to_string(s->code),
4753 prefix, s->status);
4754 }
4755
4756 static char *exec_command_line(char **argv) {
4757 size_t k;
4758 char *n, *p, **a;
4759 bool first = true;
4760
4761 assert(argv);
4762
4763 k = 1;
4764 STRV_FOREACH(a, argv)
4765 k += strlen(*a)+3;
4766
4767 n = new(char, k);
4768 if (!n)
4769 return NULL;
4770
4771 p = n;
4772 STRV_FOREACH(a, argv) {
4773
4774 if (!first)
4775 *(p++) = ' ';
4776 else
4777 first = false;
4778
4779 if (strpbrk(*a, WHITESPACE)) {
4780 *(p++) = '\'';
4781 p = stpcpy(p, *a);
4782 *(p++) = '\'';
4783 } else
4784 p = stpcpy(p, *a);
4785
4786 }
4787
4788 *p = 0;
4789
4790 /* FIXME: this doesn't really handle arguments that have
4791 * spaces and ticks in them */
4792
4793 return n;
4794 }
4795
4796 static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
4797 _cleanup_free_ char *cmd = NULL;
4798 const char *prefix2;
4799
4800 assert(c);
4801 assert(f);
4802
4803 prefix = strempty(prefix);
4804 prefix2 = strjoina(prefix, "\t");
4805
4806 cmd = exec_command_line(c->argv);
4807 fprintf(f,
4808 "%sCommand Line: %s\n",
4809 prefix, cmd ? cmd : strerror(ENOMEM));
4810
4811 exec_status_dump(&c->exec_status, f, prefix2);
4812 }
4813
4814 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4815 assert(f);
4816
4817 prefix = strempty(prefix);
4818
4819 LIST_FOREACH(command, c, c)
4820 exec_command_dump(c, f, prefix);
4821 }
4822
4823 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4824 ExecCommand *end;
4825
4826 assert(l);
4827 assert(e);
4828
4829 if (*l) {
4830 /* It's kind of important, that we keep the order here */
4831 LIST_FIND_TAIL(command, *l, end);
4832 LIST_INSERT_AFTER(command, *l, end, e);
4833 } else
4834 *l = e;
4835 }
4836
4837 int exec_command_set(ExecCommand *c, const char *path, ...) {
4838 va_list ap;
4839 char **l, *p;
4840
4841 assert(c);
4842 assert(path);
4843
4844 va_start(ap, path);
4845 l = strv_new_ap(path, ap);
4846 va_end(ap);
4847
4848 if (!l)
4849 return -ENOMEM;
4850
4851 p = strdup(path);
4852 if (!p) {
4853 strv_free(l);
4854 return -ENOMEM;
4855 }
4856
4857 free_and_replace(c->path, p);
4858
4859 return strv_free_and_replace(c->argv, l);
4860 }
4861
4862 int exec_command_append(ExecCommand *c, const char *path, ...) {
4863 _cleanup_strv_free_ char **l = NULL;
4864 va_list ap;
4865 int r;
4866
4867 assert(c);
4868 assert(path);
4869
4870 va_start(ap, path);
4871 l = strv_new_ap(path, ap);
4872 va_end(ap);
4873
4874 if (!l)
4875 return -ENOMEM;
4876
4877 r = strv_extend_strv(&c->argv, l, false);
4878 if (r < 0)
4879 return r;
4880
4881 return 0;
4882 }
4883
4884 static void *remove_tmpdir_thread(void *p) {
4885 _cleanup_free_ char *path = p;
4886
4887 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4888 return NULL;
4889 }
4890
4891 static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
4892 int r;
4893
4894 if (!rt)
4895 return NULL;
4896
4897 if (rt->manager)
4898 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
4899
4900 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
4901 if (destroy && rt->tmp_dir) {
4902 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4903
4904 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4905 if (r < 0) {
4906 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4907 free(rt->tmp_dir);
4908 }
4909
4910 rt->tmp_dir = NULL;
4911 }
4912
4913 if (destroy && rt->var_tmp_dir) {
4914 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4915
4916 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4917 if (r < 0) {
4918 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4919 free(rt->var_tmp_dir);
4920 }
4921
4922 rt->var_tmp_dir = NULL;
4923 }
4924
4925 rt->id = mfree(rt->id);
4926 rt->tmp_dir = mfree(rt->tmp_dir);
4927 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
4928 safe_close_pair(rt->netns_storage_socket);
4929 return mfree(rt);
4930 }
4931
4932 static void exec_runtime_freep(ExecRuntime **rt) {
4933 (void) exec_runtime_free(*rt, false);
4934 }
4935
4936 static int exec_runtime_allocate(ExecRuntime **ret) {
4937 ExecRuntime *n;
4938
4939 assert(ret);
4940
4941 n = new(ExecRuntime, 1);
4942 if (!n)
4943 return -ENOMEM;
4944
4945 *n = (ExecRuntime) {
4946 .netns_storage_socket = { -1, -1 },
4947 };
4948
4949 *ret = n;
4950 return 0;
4951 }
4952
4953 static int exec_runtime_add(
4954 Manager *m,
4955 const char *id,
4956 const char *tmp_dir,
4957 const char *var_tmp_dir,
4958 const int netns_storage_socket[2],
4959 ExecRuntime **ret) {
4960
4961 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
4962 int r;
4963
4964 assert(m);
4965 assert(id);
4966
4967 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
4968 if (r < 0)
4969 return r;
4970
4971 r = exec_runtime_allocate(&rt);
4972 if (r < 0)
4973 return r;
4974
4975 rt->id = strdup(id);
4976 if (!rt->id)
4977 return -ENOMEM;
4978
4979 if (tmp_dir) {
4980 rt->tmp_dir = strdup(tmp_dir);
4981 if (!rt->tmp_dir)
4982 return -ENOMEM;
4983
4984 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
4985 assert(var_tmp_dir);
4986 rt->var_tmp_dir = strdup(var_tmp_dir);
4987 if (!rt->var_tmp_dir)
4988 return -ENOMEM;
4989 }
4990
4991 if (netns_storage_socket) {
4992 rt->netns_storage_socket[0] = netns_storage_socket[0];
4993 rt->netns_storage_socket[1] = netns_storage_socket[1];
4994 }
4995
4996 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
4997 if (r < 0)
4998 return r;
4999
5000 rt->manager = m;
5001
5002 if (ret)
5003 *ret = rt;
5004
5005 /* do not remove created ExecRuntime object when the operation succeeds. */
5006 rt = NULL;
5007 return 0;
5008 }
5009
5010 static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5011 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
5012 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
5013 int r;
5014
5015 assert(m);
5016 assert(c);
5017 assert(id);
5018
5019 /* It is not necessary to create ExecRuntime object. */
5020 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
5021 return 0;
5022
5023 if (c->private_tmp) {
5024 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
5025 if (r < 0)
5026 return r;
5027 }
5028
5029 if (c->private_network || c->network_namespace_path) {
5030 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5031 return -errno;
5032 }
5033
5034 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5035 if (r < 0)
5036 return r;
5037
5038 /* Avoid cleanup */
5039 netns_storage_socket[0] = netns_storage_socket[1] = -1;
5040 return 1;
5041 }
5042
5043 int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5044 ExecRuntime *rt;
5045 int r;
5046
5047 assert(m);
5048 assert(id);
5049 assert(ret);
5050
5051 rt = hashmap_get(m->exec_runtime_by_id, id);
5052 if (rt)
5053 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5054 goto ref;
5055
5056 if (!create)
5057 return 0;
5058
5059 /* If not found, then create a new object. */
5060 r = exec_runtime_make(m, c, id, &rt);
5061 if (r <= 0)
5062 /* When r == 0, it is not necessary to create ExecRuntime object. */
5063 return r;
5064
5065 ref:
5066 /* increment reference counter. */
5067 rt->n_ref++;
5068 *ret = rt;
5069 return 1;
5070 }
5071
5072 ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5073 if (!rt)
5074 return NULL;
5075
5076 assert(rt->n_ref > 0);
5077
5078 rt->n_ref--;
5079 if (rt->n_ref > 0)
5080 return NULL;
5081
5082 return exec_runtime_free(rt, destroy);
5083 }
5084
5085 int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5086 ExecRuntime *rt;
5087 Iterator i;
5088
5089 assert(m);
5090 assert(f);
5091 assert(fds);
5092
5093 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5094 fprintf(f, "exec-runtime=%s", rt->id);
5095
5096 if (rt->tmp_dir)
5097 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
5098
5099 if (rt->var_tmp_dir)
5100 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
5101
5102 if (rt->netns_storage_socket[0] >= 0) {
5103 int copy;
5104
5105 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5106 if (copy < 0)
5107 return copy;
5108
5109 fprintf(f, " netns-socket-0=%i", copy);
5110 }
5111
5112 if (rt->netns_storage_socket[1] >= 0) {
5113 int copy;
5114
5115 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5116 if (copy < 0)
5117 return copy;
5118
5119 fprintf(f, " netns-socket-1=%i", copy);
5120 }
5121
5122 fputc('\n', f);
5123 }
5124
5125 return 0;
5126 }
5127
5128 int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5129 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5130 ExecRuntime *rt;
5131 int r;
5132
5133 /* This is for the migration from old (v237 or earlier) deserialization text.
5134 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5135 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5136 * so or not from the serialized text, then we always creates a new object owned by this. */
5137
5138 assert(u);
5139 assert(key);
5140 assert(value);
5141
5142 /* Manager manages ExecRuntime objects by the unit id.
5143 * So, we omit the serialized text when the unit does not have id (yet?)... */
5144 if (isempty(u->id)) {
5145 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5146 return 0;
5147 }
5148
5149 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5150 if (r < 0) {
5151 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5152 return 0;
5153 }
5154
5155 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5156 if (!rt) {
5157 r = exec_runtime_allocate(&rt_create);
5158 if (r < 0)
5159 return log_oom();
5160
5161 rt_create->id = strdup(u->id);
5162 if (!rt_create->id)
5163 return log_oom();
5164
5165 rt = rt_create;
5166 }
5167
5168 if (streq(key, "tmp-dir")) {
5169 char *copy;
5170
5171 copy = strdup(value);
5172 if (!copy)
5173 return log_oom();
5174
5175 free_and_replace(rt->tmp_dir, copy);
5176
5177 } else if (streq(key, "var-tmp-dir")) {
5178 char *copy;
5179
5180 copy = strdup(value);
5181 if (!copy)
5182 return log_oom();
5183
5184 free_and_replace(rt->var_tmp_dir, copy);
5185
5186 } else if (streq(key, "netns-socket-0")) {
5187 int fd;
5188
5189 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
5190 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
5191 return 0;
5192 }
5193
5194 safe_close(rt->netns_storage_socket[0]);
5195 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5196
5197 } else if (streq(key, "netns-socket-1")) {
5198 int fd;
5199
5200 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
5201 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
5202 return 0;
5203 }
5204
5205 safe_close(rt->netns_storage_socket[1]);
5206 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
5207 } else
5208 return 0;
5209
5210 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5211 if (rt_create) {
5212 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5213 if (r < 0) {
5214 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
5215 return 0;
5216 }
5217
5218 rt_create->manager = u->manager;
5219
5220 /* Avoid cleanup */
5221 rt_create = NULL;
5222 }
5223
5224 return 1;
5225 }
5226
5227 void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5228 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5229 int r, fd0 = -1, fd1 = -1;
5230 const char *p, *v = value;
5231 size_t n;
5232
5233 assert(m);
5234 assert(value);
5235 assert(fds);
5236
5237 n = strcspn(v, " ");
5238 id = strndupa(v, n);
5239 if (v[n] != ' ')
5240 goto finalize;
5241 p = v + n + 1;
5242
5243 v = startswith(p, "tmp-dir=");
5244 if (v) {
5245 n = strcspn(v, " ");
5246 tmp_dir = strndupa(v, n);
5247 if (v[n] != ' ')
5248 goto finalize;
5249 p = v + n + 1;
5250 }
5251
5252 v = startswith(p, "var-tmp-dir=");
5253 if (v) {
5254 n = strcspn(v, " ");
5255 var_tmp_dir = strndupa(v, n);
5256 if (v[n] != ' ')
5257 goto finalize;
5258 p = v + n + 1;
5259 }
5260
5261 v = startswith(p, "netns-socket-0=");
5262 if (v) {
5263 char *buf;
5264
5265 n = strcspn(v, " ");
5266 buf = strndupa(v, n);
5267 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5268 log_debug("Unable to process exec-runtime netns fd specification.");
5269 return;
5270 }
5271 fd0 = fdset_remove(fds, fd0);
5272 if (v[n] != ' ')
5273 goto finalize;
5274 p = v + n + 1;
5275 }
5276
5277 v = startswith(p, "netns-socket-1=");
5278 if (v) {
5279 char *buf;
5280
5281 n = strcspn(v, " ");
5282 buf = strndupa(v, n);
5283 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5284 log_debug("Unable to process exec-runtime netns fd specification.");
5285 return;
5286 }
5287 fd1 = fdset_remove(fds, fd1);
5288 }
5289
5290 finalize:
5291
5292 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
5293 if (r < 0)
5294 log_debug_errno(r, "Failed to add exec-runtime: %m");
5295 }
5296
5297 void exec_runtime_vacuum(Manager *m) {
5298 ExecRuntime *rt;
5299 Iterator i;
5300
5301 assert(m);
5302
5303 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5304
5305 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5306 if (rt->n_ref > 0)
5307 continue;
5308
5309 (void) exec_runtime_free(rt, false);
5310 }
5311 }
5312
5313 void exec_params_clear(ExecParameters *p) {
5314 if (!p)
5315 return;
5316
5317 strv_free(p->environment);
5318 }
5319
5320 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5321 [EXEC_INPUT_NULL] = "null",
5322 [EXEC_INPUT_TTY] = "tty",
5323 [EXEC_INPUT_TTY_FORCE] = "tty-force",
5324 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
5325 [EXEC_INPUT_SOCKET] = "socket",
5326 [EXEC_INPUT_NAMED_FD] = "fd",
5327 [EXEC_INPUT_DATA] = "data",
5328 [EXEC_INPUT_FILE] = "file",
5329 };
5330
5331 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5332
5333 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
5334 [EXEC_OUTPUT_INHERIT] = "inherit",
5335 [EXEC_OUTPUT_NULL] = "null",
5336 [EXEC_OUTPUT_TTY] = "tty",
5337 [EXEC_OUTPUT_SYSLOG] = "syslog",
5338 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
5339 [EXEC_OUTPUT_KMSG] = "kmsg",
5340 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
5341 [EXEC_OUTPUT_JOURNAL] = "journal",
5342 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
5343 [EXEC_OUTPUT_SOCKET] = "socket",
5344 [EXEC_OUTPUT_NAMED_FD] = "fd",
5345 [EXEC_OUTPUT_FILE] = "file",
5346 [EXEC_OUTPUT_FILE_APPEND] = "append",
5347 };
5348
5349 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
5350
5351 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5352 [EXEC_UTMP_INIT] = "init",
5353 [EXEC_UTMP_LOGIN] = "login",
5354 [EXEC_UTMP_USER] = "user",
5355 };
5356
5357 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
5358
5359 static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5360 [EXEC_PRESERVE_NO] = "no",
5361 [EXEC_PRESERVE_YES] = "yes",
5362 [EXEC_PRESERVE_RESTART] = "restart",
5363 };
5364
5365 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
5366
5367 static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5368 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5369 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5370 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5371 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5372 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5373 };
5374
5375 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
5376
5377 static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5378 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5379 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5380 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5381 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5382 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5383 };
5384
5385 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5386
5387 static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5388 [EXEC_KEYRING_INHERIT] = "inherit",
5389 [EXEC_KEYRING_PRIVATE] = "private",
5390 [EXEC_KEYRING_SHARED] = "shared",
5391 };
5392
5393 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);