]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
tree-wide: port various places over to use chmod_and_chown()
[thirdparty/systemd.git] / src / core / execute.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <glob.h>
6 #include <grp.h>
7 #include <poll.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <sys/capability.h>
11 #include <sys/eventfd.h>
12 #include <sys/mman.h>
13 #include <sys/personality.h>
14 #include <sys/prctl.h>
15 #include <sys/shm.h>
16 #include <sys/socket.h>
17 #include <sys/stat.h>
18 #include <sys/types.h>
19 #include <sys/un.h>
20 #include <unistd.h>
21 #include <utmpx.h>
22
23 #if HAVE_PAM
24 #include <security/pam_appl.h>
25 #endif
26
27 #if HAVE_SELINUX
28 #include <selinux/selinux.h>
29 #endif
30
31 #if HAVE_SECCOMP
32 #include <seccomp.h>
33 #endif
34
35 #if HAVE_APPARMOR
36 #include <sys/apparmor.h>
37 #endif
38
39 #include "sd-messages.h"
40
41 #include "af-list.h"
42 #include "alloc-util.h"
43 #if HAVE_APPARMOR
44 #include "apparmor-util.h"
45 #endif
46 #include "async.h"
47 #include "barrier.h"
48 #include "cap-list.h"
49 #include "capability-util.h"
50 #include "chown-recursive.h"
51 #include "cpu-set-util.h"
52 #include "def.h"
53 #include "env-file.h"
54 #include "env-util.h"
55 #include "errno-list.h"
56 #include "execute.h"
57 #include "exit-status.h"
58 #include "fd-util.h"
59 #include "format-util.h"
60 #include "fs-util.h"
61 #include "glob-util.h"
62 #include "io-util.h"
63 #include "ioprio.h"
64 #include "label.h"
65 #include "log.h"
66 #include "macro.h"
67 #include "manager.h"
68 #include "memory-util.h"
69 #include "missing.h"
70 #include "mkdir.h"
71 #include "namespace.h"
72 #include "parse-util.h"
73 #include "path-util.h"
74 #include "process-util.h"
75 #include "rlimit-util.h"
76 #include "rm-rf.h"
77 #if HAVE_SECCOMP
78 #include "seccomp-util.h"
79 #endif
80 #include "securebits-util.h"
81 #include "selinux-util.h"
82 #include "signal-util.h"
83 #include "smack-util.h"
84 #include "socket-util.h"
85 #include "special.h"
86 #include "stat-util.h"
87 #include "string-table.h"
88 #include "string-util.h"
89 #include "strv.h"
90 #include "syslog-util.h"
91 #include "terminal-util.h"
92 #include "umask-util.h"
93 #include "unit.h"
94 #include "user-util.h"
95 #include "utmp-wtmp.h"
96
97 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
98 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
99
100 /* This assumes there is a 'tty' group */
101 #define TTY_MODE 0620
102
103 #define SNDBUF_SIZE (8*1024*1024)
104
105 static int shift_fds(int fds[], size_t n_fds) {
106 int start, restart_from;
107
108 if (n_fds <= 0)
109 return 0;
110
111 /* Modifies the fds array! (sorts it) */
112
113 assert(fds);
114
115 start = 0;
116 for (;;) {
117 int i;
118
119 restart_from = -1;
120
121 for (i = start; i < (int) n_fds; i++) {
122 int nfd;
123
124 /* Already at right index? */
125 if (fds[i] == i+3)
126 continue;
127
128 nfd = fcntl(fds[i], F_DUPFD, i + 3);
129 if (nfd < 0)
130 return -errno;
131
132 safe_close(fds[i]);
133 fds[i] = nfd;
134
135 /* Hmm, the fd we wanted isn't free? Then
136 * let's remember that and try again from here */
137 if (nfd != i+3 && restart_from < 0)
138 restart_from = i;
139 }
140
141 if (restart_from < 0)
142 break;
143
144 start = restart_from;
145 }
146
147 return 0;
148 }
149
150 static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
151 size_t i, n_fds;
152 int r;
153
154 n_fds = n_socket_fds + n_storage_fds;
155 if (n_fds <= 0)
156 return 0;
157
158 assert(fds);
159
160 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
161 * O_NONBLOCK only applies to socket activation though. */
162
163 for (i = 0; i < n_fds; i++) {
164
165 if (i < n_socket_fds) {
166 r = fd_nonblock(fds[i], nonblock);
167 if (r < 0)
168 return r;
169 }
170
171 /* We unconditionally drop FD_CLOEXEC from the fds,
172 * since after all we want to pass these fds to our
173 * children */
174
175 r = fd_cloexec(fds[i], false);
176 if (r < 0)
177 return r;
178 }
179
180 return 0;
181 }
182
183 static const char *exec_context_tty_path(const ExecContext *context) {
184 assert(context);
185
186 if (context->stdio_as_fds)
187 return NULL;
188
189 if (context->tty_path)
190 return context->tty_path;
191
192 return "/dev/console";
193 }
194
195 static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
196 const char *path;
197
198 assert(context);
199
200 path = exec_context_tty_path(context);
201
202 if (context->tty_vhangup) {
203 if (p && p->stdin_fd >= 0)
204 (void) terminal_vhangup_fd(p->stdin_fd);
205 else if (path)
206 (void) terminal_vhangup(path);
207 }
208
209 if (context->tty_reset) {
210 if (p && p->stdin_fd >= 0)
211 (void) reset_terminal_fd(p->stdin_fd, true);
212 else if (path)
213 (void) reset_terminal(path);
214 }
215
216 if (context->tty_vt_disallocate && path)
217 (void) vt_disallocate(path);
218 }
219
220 static bool is_terminal_input(ExecInput i) {
221 return IN_SET(i,
222 EXEC_INPUT_TTY,
223 EXEC_INPUT_TTY_FORCE,
224 EXEC_INPUT_TTY_FAIL);
225 }
226
227 static bool is_terminal_output(ExecOutput o) {
228 return IN_SET(o,
229 EXEC_OUTPUT_TTY,
230 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
231 EXEC_OUTPUT_KMSG_AND_CONSOLE,
232 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
233 }
234
235 static bool is_syslog_output(ExecOutput o) {
236 return IN_SET(o,
237 EXEC_OUTPUT_SYSLOG,
238 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
239 }
240
241 static bool is_kmsg_output(ExecOutput o) {
242 return IN_SET(o,
243 EXEC_OUTPUT_KMSG,
244 EXEC_OUTPUT_KMSG_AND_CONSOLE);
245 }
246
247 static bool exec_context_needs_term(const ExecContext *c) {
248 assert(c);
249
250 /* Return true if the execution context suggests we should set $TERM to something useful. */
251
252 if (is_terminal_input(c->std_input))
253 return true;
254
255 if (is_terminal_output(c->std_output))
256 return true;
257
258 if (is_terminal_output(c->std_error))
259 return true;
260
261 return !!c->tty_path;
262 }
263
264 static int open_null_as(int flags, int nfd) {
265 int fd;
266
267 assert(nfd >= 0);
268
269 fd = open("/dev/null", flags|O_NOCTTY);
270 if (fd < 0)
271 return -errno;
272
273 return move_fd(fd, nfd, false);
274 }
275
276 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
277 static const union sockaddr_union sa = {
278 .un.sun_family = AF_UNIX,
279 .un.sun_path = "/run/systemd/journal/stdout",
280 };
281 uid_t olduid = UID_INVALID;
282 gid_t oldgid = GID_INVALID;
283 int r;
284
285 if (gid_is_valid(gid)) {
286 oldgid = getgid();
287
288 if (setegid(gid) < 0)
289 return -errno;
290 }
291
292 if (uid_is_valid(uid)) {
293 olduid = getuid();
294
295 if (seteuid(uid) < 0) {
296 r = -errno;
297 goto restore_gid;
298 }
299 }
300
301 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
302
303 /* If we fail to restore the uid or gid, things will likely
304 fail later on. This should only happen if an LSM interferes. */
305
306 if (uid_is_valid(uid))
307 (void) seteuid(olduid);
308
309 restore_gid:
310 if (gid_is_valid(gid))
311 (void) setegid(oldgid);
312
313 return r;
314 }
315
316 static int connect_logger_as(
317 const Unit *unit,
318 const ExecContext *context,
319 const ExecParameters *params,
320 ExecOutput output,
321 const char *ident,
322 int nfd,
323 uid_t uid,
324 gid_t gid) {
325
326 _cleanup_close_ int fd = -1;
327 int r;
328
329 assert(context);
330 assert(params);
331 assert(output < _EXEC_OUTPUT_MAX);
332 assert(ident);
333 assert(nfd >= 0);
334
335 fd = socket(AF_UNIX, SOCK_STREAM, 0);
336 if (fd < 0)
337 return -errno;
338
339 r = connect_journal_socket(fd, uid, gid);
340 if (r < 0)
341 return r;
342
343 if (shutdown(fd, SHUT_RD) < 0)
344 return -errno;
345
346 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
347
348 if (dprintf(fd,
349 "%s\n"
350 "%s\n"
351 "%i\n"
352 "%i\n"
353 "%i\n"
354 "%i\n"
355 "%i\n",
356 context->syslog_identifier ?: ident,
357 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
358 context->syslog_priority,
359 !!context->syslog_level_prefix,
360 is_syslog_output(output),
361 is_kmsg_output(output),
362 is_terminal_output(output)) < 0)
363 return -errno;
364
365 return move_fd(TAKE_FD(fd), nfd, false);
366 }
367
368 static int open_terminal_as(const char *path, int flags, int nfd) {
369 int fd;
370
371 assert(path);
372 assert(nfd >= 0);
373
374 fd = open_terminal(path, flags | O_NOCTTY);
375 if (fd < 0)
376 return fd;
377
378 return move_fd(fd, nfd, false);
379 }
380
381 static int acquire_path(const char *path, int flags, mode_t mode) {
382 union sockaddr_union sa = {};
383 _cleanup_close_ int fd = -1;
384 int r, salen;
385
386 assert(path);
387
388 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
389 flags |= O_CREAT;
390
391 fd = open(path, flags|O_NOCTTY, mode);
392 if (fd >= 0)
393 return TAKE_FD(fd);
394
395 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
396 return -errno;
397 if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
398 return -ENXIO;
399
400 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
401
402 fd = socket(AF_UNIX, SOCK_STREAM, 0);
403 if (fd < 0)
404 return -errno;
405
406 salen = sockaddr_un_set_path(&sa.un, path);
407 if (salen < 0)
408 return salen;
409
410 if (connect(fd, &sa.sa, salen) < 0)
411 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
412 * indication that his wasn't an AF_UNIX socket after all */
413
414 if ((flags & O_ACCMODE) == O_RDONLY)
415 r = shutdown(fd, SHUT_WR);
416 else if ((flags & O_ACCMODE) == O_WRONLY)
417 r = shutdown(fd, SHUT_RD);
418 else
419 return TAKE_FD(fd);
420 if (r < 0)
421 return -errno;
422
423 return TAKE_FD(fd);
424 }
425
426 static int fixup_input(
427 const ExecContext *context,
428 int socket_fd,
429 bool apply_tty_stdin) {
430
431 ExecInput std_input;
432
433 assert(context);
434
435 std_input = context->std_input;
436
437 if (is_terminal_input(std_input) && !apply_tty_stdin)
438 return EXEC_INPUT_NULL;
439
440 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
441 return EXEC_INPUT_NULL;
442
443 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
444 return EXEC_INPUT_NULL;
445
446 return std_input;
447 }
448
449 static int fixup_output(ExecOutput std_output, int socket_fd) {
450
451 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
452 return EXEC_OUTPUT_INHERIT;
453
454 return std_output;
455 }
456
457 static int setup_input(
458 const ExecContext *context,
459 const ExecParameters *params,
460 int socket_fd,
461 int named_iofds[3]) {
462
463 ExecInput i;
464
465 assert(context);
466 assert(params);
467
468 if (params->stdin_fd >= 0) {
469 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
470 return -errno;
471
472 /* Try to make this the controlling tty, if it is a tty, and reset it */
473 if (isatty(STDIN_FILENO)) {
474 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
475 (void) reset_terminal_fd(STDIN_FILENO, true);
476 }
477
478 return STDIN_FILENO;
479 }
480
481 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
482
483 switch (i) {
484
485 case EXEC_INPUT_NULL:
486 return open_null_as(O_RDONLY, STDIN_FILENO);
487
488 case EXEC_INPUT_TTY:
489 case EXEC_INPUT_TTY_FORCE:
490 case EXEC_INPUT_TTY_FAIL: {
491 int fd;
492
493 fd = acquire_terminal(exec_context_tty_path(context),
494 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
495 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
496 ACQUIRE_TERMINAL_WAIT,
497 USEC_INFINITY);
498 if (fd < 0)
499 return fd;
500
501 return move_fd(fd, STDIN_FILENO, false);
502 }
503
504 case EXEC_INPUT_SOCKET:
505 assert(socket_fd >= 0);
506
507 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
508
509 case EXEC_INPUT_NAMED_FD:
510 assert(named_iofds[STDIN_FILENO] >= 0);
511
512 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
513 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
514
515 case EXEC_INPUT_DATA: {
516 int fd;
517
518 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
519 if (fd < 0)
520 return fd;
521
522 return move_fd(fd, STDIN_FILENO, false);
523 }
524
525 case EXEC_INPUT_FILE: {
526 bool rw;
527 int fd;
528
529 assert(context->stdio_file[STDIN_FILENO]);
530
531 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
532 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
533
534 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
535 if (fd < 0)
536 return fd;
537
538 return move_fd(fd, STDIN_FILENO, false);
539 }
540
541 default:
542 assert_not_reached("Unknown input type");
543 }
544 }
545
546 static bool can_inherit_stderr_from_stdout(
547 const ExecContext *context,
548 ExecOutput o,
549 ExecOutput e) {
550
551 assert(context);
552
553 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
554 * stderr fd */
555
556 if (e == EXEC_OUTPUT_INHERIT)
557 return true;
558 if (e != o)
559 return false;
560
561 if (e == EXEC_OUTPUT_NAMED_FD)
562 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
563
564 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
565 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
566
567 return true;
568 }
569
570 static int setup_output(
571 const Unit *unit,
572 const ExecContext *context,
573 const ExecParameters *params,
574 int fileno,
575 int socket_fd,
576 int named_iofds[3],
577 const char *ident,
578 uid_t uid,
579 gid_t gid,
580 dev_t *journal_stream_dev,
581 ino_t *journal_stream_ino) {
582
583 ExecOutput o;
584 ExecInput i;
585 int r;
586
587 assert(unit);
588 assert(context);
589 assert(params);
590 assert(ident);
591 assert(journal_stream_dev);
592 assert(journal_stream_ino);
593
594 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
595
596 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
597 return -errno;
598
599 return STDOUT_FILENO;
600 }
601
602 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
603 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
604 return -errno;
605
606 return STDERR_FILENO;
607 }
608
609 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
610 o = fixup_output(context->std_output, socket_fd);
611
612 if (fileno == STDERR_FILENO) {
613 ExecOutput e;
614 e = fixup_output(context->std_error, socket_fd);
615
616 /* This expects the input and output are already set up */
617
618 /* Don't change the stderr file descriptor if we inherit all
619 * the way and are not on a tty */
620 if (e == EXEC_OUTPUT_INHERIT &&
621 o == EXEC_OUTPUT_INHERIT &&
622 i == EXEC_INPUT_NULL &&
623 !is_terminal_input(context->std_input) &&
624 getppid () != 1)
625 return fileno;
626
627 /* Duplicate from stdout if possible */
628 if (can_inherit_stderr_from_stdout(context, o, e))
629 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
630
631 o = e;
632
633 } else if (o == EXEC_OUTPUT_INHERIT) {
634 /* If input got downgraded, inherit the original value */
635 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
636 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
637
638 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
639 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
640 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
641
642 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
643 if (getppid() != 1)
644 return fileno;
645
646 /* We need to open /dev/null here anew, to get the right access mode. */
647 return open_null_as(O_WRONLY, fileno);
648 }
649
650 switch (o) {
651
652 case EXEC_OUTPUT_NULL:
653 return open_null_as(O_WRONLY, fileno);
654
655 case EXEC_OUTPUT_TTY:
656 if (is_terminal_input(i))
657 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
658
659 /* We don't reset the terminal if this is just about output */
660 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
661
662 case EXEC_OUTPUT_SYSLOG:
663 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
664 case EXEC_OUTPUT_KMSG:
665 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
666 case EXEC_OUTPUT_JOURNAL:
667 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
668 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
669 if (r < 0) {
670 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
671 r = open_null_as(O_WRONLY, fileno);
672 } else {
673 struct stat st;
674
675 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
676 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
677 * services to detect whether they are connected to the journal or not.
678 *
679 * If both stdout and stderr are connected to a stream then let's make sure to store the data
680 * about STDERR as that's usually the best way to do logging. */
681
682 if (fstat(fileno, &st) >= 0 &&
683 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
684 *journal_stream_dev = st.st_dev;
685 *journal_stream_ino = st.st_ino;
686 }
687 }
688 return r;
689
690 case EXEC_OUTPUT_SOCKET:
691 assert(socket_fd >= 0);
692
693 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
694
695 case EXEC_OUTPUT_NAMED_FD:
696 assert(named_iofds[fileno] >= 0);
697
698 (void) fd_nonblock(named_iofds[fileno], false);
699 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
700
701 case EXEC_OUTPUT_FILE:
702 case EXEC_OUTPUT_FILE_APPEND: {
703 bool rw;
704 int fd, flags;
705
706 assert(context->stdio_file[fileno]);
707
708 rw = context->std_input == EXEC_INPUT_FILE &&
709 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
710
711 if (rw)
712 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
713
714 flags = O_WRONLY;
715 if (o == EXEC_OUTPUT_FILE_APPEND)
716 flags |= O_APPEND;
717
718 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
719 if (fd < 0)
720 return fd;
721
722 return move_fd(fd, fileno, 0);
723 }
724
725 default:
726 assert_not_reached("Unknown error type");
727 }
728 }
729
730 static int chown_terminal(int fd, uid_t uid) {
731 int r;
732
733 assert(fd >= 0);
734
735 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
736 if (isatty(fd) < 1) {
737 if (IN_SET(errno, EINVAL, ENOTTY))
738 return 0; /* not a tty */
739
740 return -errno;
741 }
742
743 /* This might fail. What matters are the results. */
744 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
745 if (r < 0)
746 return r;
747
748 return 1;
749 }
750
751 static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
752 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
753 int r;
754
755 assert(_saved_stdin);
756 assert(_saved_stdout);
757
758 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
759 if (saved_stdin < 0)
760 return -errno;
761
762 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
763 if (saved_stdout < 0)
764 return -errno;
765
766 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
767 if (fd < 0)
768 return fd;
769
770 r = chown_terminal(fd, getuid());
771 if (r < 0)
772 return r;
773
774 r = reset_terminal_fd(fd, true);
775 if (r < 0)
776 return r;
777
778 r = rearrange_stdio(fd, fd, STDERR_FILENO);
779 fd = -1;
780 if (r < 0)
781 return r;
782
783 *_saved_stdin = saved_stdin;
784 *_saved_stdout = saved_stdout;
785
786 saved_stdin = saved_stdout = -1;
787
788 return 0;
789 }
790
791 static void write_confirm_error_fd(int err, int fd, const Unit *u) {
792 assert(err < 0);
793
794 if (err == -ETIMEDOUT)
795 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
796 else {
797 errno = -err;
798 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
799 }
800 }
801
802 static void write_confirm_error(int err, const char *vc, const Unit *u) {
803 _cleanup_close_ int fd = -1;
804
805 assert(vc);
806
807 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
808 if (fd < 0)
809 return;
810
811 write_confirm_error_fd(err, fd, u);
812 }
813
814 static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
815 int r = 0;
816
817 assert(saved_stdin);
818 assert(saved_stdout);
819
820 release_terminal();
821
822 if (*saved_stdin >= 0)
823 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
824 r = -errno;
825
826 if (*saved_stdout >= 0)
827 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
828 r = -errno;
829
830 *saved_stdin = safe_close(*saved_stdin);
831 *saved_stdout = safe_close(*saved_stdout);
832
833 return r;
834 }
835
836 enum {
837 CONFIRM_PRETEND_FAILURE = -1,
838 CONFIRM_PRETEND_SUCCESS = 0,
839 CONFIRM_EXECUTE = 1,
840 };
841
842 static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
843 int saved_stdout = -1, saved_stdin = -1, r;
844 _cleanup_free_ char *e = NULL;
845 char c;
846
847 /* For any internal errors, assume a positive response. */
848 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
849 if (r < 0) {
850 write_confirm_error(r, vc, u);
851 return CONFIRM_EXECUTE;
852 }
853
854 /* confirm_spawn might have been disabled while we were sleeping. */
855 if (manager_is_confirm_spawn_disabled(u->manager)) {
856 r = 1;
857 goto restore_stdio;
858 }
859
860 e = ellipsize(cmdline, 60, 100);
861 if (!e) {
862 log_oom();
863 r = CONFIRM_EXECUTE;
864 goto restore_stdio;
865 }
866
867 for (;;) {
868 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
869 if (r < 0) {
870 write_confirm_error_fd(r, STDOUT_FILENO, u);
871 r = CONFIRM_EXECUTE;
872 goto restore_stdio;
873 }
874
875 switch (c) {
876 case 'c':
877 printf("Resuming normal execution.\n");
878 manager_disable_confirm_spawn();
879 r = 1;
880 break;
881 case 'D':
882 unit_dump(u, stdout, " ");
883 continue; /* ask again */
884 case 'f':
885 printf("Failing execution.\n");
886 r = CONFIRM_PRETEND_FAILURE;
887 break;
888 case 'h':
889 printf(" c - continue, proceed without asking anymore\n"
890 " D - dump, show the state of the unit\n"
891 " f - fail, don't execute the command and pretend it failed\n"
892 " h - help\n"
893 " i - info, show a short summary of the unit\n"
894 " j - jobs, show jobs that are in progress\n"
895 " s - skip, don't execute the command and pretend it succeeded\n"
896 " y - yes, execute the command\n");
897 continue; /* ask again */
898 case 'i':
899 printf(" Description: %s\n"
900 " Unit: %s\n"
901 " Command: %s\n",
902 u->id, u->description, cmdline);
903 continue; /* ask again */
904 case 'j':
905 manager_dump_jobs(u->manager, stdout, " ");
906 continue; /* ask again */
907 case 'n':
908 /* 'n' was removed in favor of 'f'. */
909 printf("Didn't understand 'n', did you mean 'f'?\n");
910 continue; /* ask again */
911 case 's':
912 printf("Skipping execution.\n");
913 r = CONFIRM_PRETEND_SUCCESS;
914 break;
915 case 'y':
916 r = CONFIRM_EXECUTE;
917 break;
918 default:
919 assert_not_reached("Unhandled choice");
920 }
921 break;
922 }
923
924 restore_stdio:
925 restore_confirm_stdio(&saved_stdin, &saved_stdout);
926 return r;
927 }
928
929 static int get_fixed_user(const ExecContext *c, const char **user,
930 uid_t *uid, gid_t *gid,
931 const char **home, const char **shell) {
932 int r;
933 const char *name;
934
935 assert(c);
936
937 if (!c->user)
938 return 0;
939
940 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
941 * (i.e. are "/" or "/bin/nologin"). */
942
943 name = c->user;
944 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
945 if (r < 0)
946 return r;
947
948 *user = name;
949 return 0;
950 }
951
952 static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
953 int r;
954 const char *name;
955
956 assert(c);
957
958 if (!c->group)
959 return 0;
960
961 name = c->group;
962 r = get_group_creds(&name, gid, 0);
963 if (r < 0)
964 return r;
965
966 *group = name;
967 return 0;
968 }
969
970 static int get_supplementary_groups(const ExecContext *c, const char *user,
971 const char *group, gid_t gid,
972 gid_t **supplementary_gids, int *ngids) {
973 char **i;
974 int r, k = 0;
975 int ngroups_max;
976 bool keep_groups = false;
977 gid_t *groups = NULL;
978 _cleanup_free_ gid_t *l_gids = NULL;
979
980 assert(c);
981
982 /*
983 * If user is given, then lookup GID and supplementary groups list.
984 * We avoid NSS lookups for gid=0. Also we have to initialize groups
985 * here and as early as possible so we keep the list of supplementary
986 * groups of the caller.
987 */
988 if (user && gid_is_valid(gid) && gid != 0) {
989 /* First step, initialize groups from /etc/groups */
990 if (initgroups(user, gid) < 0)
991 return -errno;
992
993 keep_groups = true;
994 }
995
996 if (strv_isempty(c->supplementary_groups))
997 return 0;
998
999 /*
1000 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
1001 * be positive, otherwise fail.
1002 */
1003 errno = 0;
1004 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
1005 if (ngroups_max <= 0) {
1006 if (errno > 0)
1007 return -errno;
1008 else
1009 return -EOPNOTSUPP; /* For all other values */
1010 }
1011
1012 l_gids = new(gid_t, ngroups_max);
1013 if (!l_gids)
1014 return -ENOMEM;
1015
1016 if (keep_groups) {
1017 /*
1018 * Lookup the list of groups that the user belongs to, we
1019 * avoid NSS lookups here too for gid=0.
1020 */
1021 k = ngroups_max;
1022 if (getgrouplist(user, gid, l_gids, &k) < 0)
1023 return -EINVAL;
1024 } else
1025 k = 0;
1026
1027 STRV_FOREACH(i, c->supplementary_groups) {
1028 const char *g;
1029
1030 if (k >= ngroups_max)
1031 return -E2BIG;
1032
1033 g = *i;
1034 r = get_group_creds(&g, l_gids+k, 0);
1035 if (r < 0)
1036 return r;
1037
1038 k++;
1039 }
1040
1041 /*
1042 * Sets ngids to zero to drop all supplementary groups, happens
1043 * when we are under root and SupplementaryGroups= is empty.
1044 */
1045 if (k == 0) {
1046 *ngids = 0;
1047 return 0;
1048 }
1049
1050 /* Otherwise get the final list of supplementary groups */
1051 groups = memdup(l_gids, sizeof(gid_t) * k);
1052 if (!groups)
1053 return -ENOMEM;
1054
1055 *supplementary_gids = groups;
1056 *ngids = k;
1057
1058 groups = NULL;
1059
1060 return 0;
1061 }
1062
1063 static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
1064 int r;
1065
1066 /* Handle SupplementaryGroups= if it is not empty */
1067 if (ngids > 0) {
1068 r = maybe_setgroups(ngids, supplementary_gids);
1069 if (r < 0)
1070 return r;
1071 }
1072
1073 if (gid_is_valid(gid)) {
1074 /* Then set our gids */
1075 if (setresgid(gid, gid, gid) < 0)
1076 return -errno;
1077 }
1078
1079 return 0;
1080 }
1081
1082 static int enforce_user(const ExecContext *context, uid_t uid) {
1083 assert(context);
1084
1085 if (!uid_is_valid(uid))
1086 return 0;
1087
1088 /* Sets (but doesn't look up) the uid and make sure we keep the
1089 * capabilities while doing so. */
1090
1091 if (context->capability_ambient_set != 0) {
1092
1093 /* First step: If we need to keep capabilities but
1094 * drop privileges we need to make sure we keep our
1095 * caps, while we drop privileges. */
1096 if (uid != 0) {
1097 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
1098
1099 if (prctl(PR_GET_SECUREBITS) != sb)
1100 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1101 return -errno;
1102 }
1103 }
1104
1105 /* Second step: actually set the uids */
1106 if (setresuid(uid, uid, uid) < 0)
1107 return -errno;
1108
1109 /* At this point we should have all necessary capabilities but
1110 are otherwise a normal user. However, the caps might got
1111 corrupted due to the setresuid() so we need clean them up
1112 later. This is done outside of this call. */
1113
1114 return 0;
1115 }
1116
1117 #if HAVE_PAM
1118
1119 static int null_conv(
1120 int num_msg,
1121 const struct pam_message **msg,
1122 struct pam_response **resp,
1123 void *appdata_ptr) {
1124
1125 /* We don't support conversations */
1126
1127 return PAM_CONV_ERR;
1128 }
1129
1130 #endif
1131
1132 static int setup_pam(
1133 const char *name,
1134 const char *user,
1135 uid_t uid,
1136 gid_t gid,
1137 const char *tty,
1138 char ***env,
1139 int fds[], size_t n_fds) {
1140
1141 #if HAVE_PAM
1142
1143 static const struct pam_conv conv = {
1144 .conv = null_conv,
1145 .appdata_ptr = NULL
1146 };
1147
1148 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
1149 pam_handle_t *handle = NULL;
1150 sigset_t old_ss;
1151 int pam_code = PAM_SUCCESS, r;
1152 char **nv, **e = NULL;
1153 bool close_session = false;
1154 pid_t pam_pid = 0, parent_pid;
1155 int flags = 0;
1156
1157 assert(name);
1158 assert(user);
1159 assert(env);
1160
1161 /* We set up PAM in the parent process, then fork. The child
1162 * will then stay around until killed via PR_GET_PDEATHSIG or
1163 * systemd via the cgroup logic. It will then remove the PAM
1164 * session again. The parent process will exec() the actual
1165 * daemon. We do things this way to ensure that the main PID
1166 * of the daemon is the one we initially fork()ed. */
1167
1168 r = barrier_create(&barrier);
1169 if (r < 0)
1170 goto fail;
1171
1172 if (log_get_max_level() < LOG_DEBUG)
1173 flags |= PAM_SILENT;
1174
1175 pam_code = pam_start(name, user, &conv, &handle);
1176 if (pam_code != PAM_SUCCESS) {
1177 handle = NULL;
1178 goto fail;
1179 }
1180
1181 if (!tty) {
1182 _cleanup_free_ char *q = NULL;
1183
1184 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1185 * out if that's the case, and read the TTY off it. */
1186
1187 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1188 tty = strjoina("/dev/", q);
1189 }
1190
1191 if (tty) {
1192 pam_code = pam_set_item(handle, PAM_TTY, tty);
1193 if (pam_code != PAM_SUCCESS)
1194 goto fail;
1195 }
1196
1197 STRV_FOREACH(nv, *env) {
1198 pam_code = pam_putenv(handle, *nv);
1199 if (pam_code != PAM_SUCCESS)
1200 goto fail;
1201 }
1202
1203 pam_code = pam_acct_mgmt(handle, flags);
1204 if (pam_code != PAM_SUCCESS)
1205 goto fail;
1206
1207 pam_code = pam_open_session(handle, flags);
1208 if (pam_code != PAM_SUCCESS)
1209 goto fail;
1210
1211 close_session = true;
1212
1213 e = pam_getenvlist(handle);
1214 if (!e) {
1215 pam_code = PAM_BUF_ERR;
1216 goto fail;
1217 }
1218
1219 /* Block SIGTERM, so that we know that it won't get lost in
1220 * the child */
1221
1222 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
1223
1224 parent_pid = getpid_cached();
1225
1226 r = safe_fork("(sd-pam)", 0, &pam_pid);
1227 if (r < 0)
1228 goto fail;
1229 if (r == 0) {
1230 int sig, ret = EXIT_PAM;
1231
1232 /* The child's job is to reset the PAM session on
1233 * termination */
1234 barrier_set_role(&barrier, BARRIER_CHILD);
1235
1236 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1237 * are open here that have been opened by PAM. */
1238 (void) close_many(fds, n_fds);
1239
1240 /* Drop privileges - we don't need any to pam_close_session
1241 * and this will make PR_SET_PDEATHSIG work in most cases.
1242 * If this fails, ignore the error - but expect sd-pam threads
1243 * to fail to exit normally */
1244
1245 r = maybe_setgroups(0, NULL);
1246 if (r < 0)
1247 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
1248 if (setresgid(gid, gid, gid) < 0)
1249 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
1250 if (setresuid(uid, uid, uid) < 0)
1251 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
1252
1253 (void) ignore_signals(SIGPIPE, -1);
1254
1255 /* Wait until our parent died. This will only work if
1256 * the above setresuid() succeeds, otherwise the kernel
1257 * will not allow unprivileged parents kill their privileged
1258 * children this way. We rely on the control groups kill logic
1259 * to do the rest for us. */
1260 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1261 goto child_finish;
1262
1263 /* Tell the parent that our setup is done. This is especially
1264 * important regarding dropping privileges. Otherwise, unit
1265 * setup might race against our setresuid(2) call.
1266 *
1267 * If the parent aborted, we'll detect this below, hence ignore
1268 * return failure here. */
1269 (void) barrier_place(&barrier);
1270
1271 /* Check if our parent process might already have died? */
1272 if (getppid() == parent_pid) {
1273 sigset_t ss;
1274
1275 assert_se(sigemptyset(&ss) >= 0);
1276 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1277
1278 for (;;) {
1279 if (sigwait(&ss, &sig) < 0) {
1280 if (errno == EINTR)
1281 continue;
1282
1283 goto child_finish;
1284 }
1285
1286 assert(sig == SIGTERM);
1287 break;
1288 }
1289 }
1290
1291 /* If our parent died we'll end the session */
1292 if (getppid() != parent_pid) {
1293 pam_code = pam_close_session(handle, flags);
1294 if (pam_code != PAM_SUCCESS)
1295 goto child_finish;
1296 }
1297
1298 ret = 0;
1299
1300 child_finish:
1301 pam_end(handle, pam_code | flags);
1302 _exit(ret);
1303 }
1304
1305 barrier_set_role(&barrier, BARRIER_PARENT);
1306
1307 /* If the child was forked off successfully it will do all the
1308 * cleanups, so forget about the handle here. */
1309 handle = NULL;
1310
1311 /* Unblock SIGTERM again in the parent */
1312 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
1313
1314 /* We close the log explicitly here, since the PAM modules
1315 * might have opened it, but we don't want this fd around. */
1316 closelog();
1317
1318 /* Synchronously wait for the child to initialize. We don't care for
1319 * errors as we cannot recover. However, warn loudly if it happens. */
1320 if (!barrier_place_and_sync(&barrier))
1321 log_error("PAM initialization failed");
1322
1323 return strv_free_and_replace(*env, e);
1324
1325 fail:
1326 if (pam_code != PAM_SUCCESS) {
1327 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
1328 r = -EPERM; /* PAM errors do not map to errno */
1329 } else
1330 log_error_errno(r, "PAM failed: %m");
1331
1332 if (handle) {
1333 if (close_session)
1334 pam_code = pam_close_session(handle, flags);
1335
1336 pam_end(handle, pam_code | flags);
1337 }
1338
1339 strv_free(e);
1340 closelog();
1341
1342 return r;
1343 #else
1344 return 0;
1345 #endif
1346 }
1347
1348 static void rename_process_from_path(const char *path) {
1349 char process_name[11];
1350 const char *p;
1351 size_t l;
1352
1353 /* This resulting string must fit in 10 chars (i.e. the length
1354 * of "/sbin/init") to look pretty in /bin/ps */
1355
1356 p = basename(path);
1357 if (isempty(p)) {
1358 rename_process("(...)");
1359 return;
1360 }
1361
1362 l = strlen(p);
1363 if (l > 8) {
1364 /* The end of the process name is usually more
1365 * interesting, since the first bit might just be
1366 * "systemd-" */
1367 p = p + l - 8;
1368 l = 8;
1369 }
1370
1371 process_name[0] = '(';
1372 memcpy(process_name+1, p, l);
1373 process_name[1+l] = ')';
1374 process_name[1+l+1] = 0;
1375
1376 rename_process(process_name);
1377 }
1378
1379 static bool context_has_address_families(const ExecContext *c) {
1380 assert(c);
1381
1382 return c->address_families_whitelist ||
1383 !set_isempty(c->address_families);
1384 }
1385
1386 static bool context_has_syscall_filters(const ExecContext *c) {
1387 assert(c);
1388
1389 return c->syscall_whitelist ||
1390 !hashmap_isempty(c->syscall_filter);
1391 }
1392
1393 static bool context_has_no_new_privileges(const ExecContext *c) {
1394 assert(c);
1395
1396 if (c->no_new_privileges)
1397 return true;
1398
1399 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1400 return false;
1401
1402 /* We need NNP if we have any form of seccomp and are unprivileged */
1403 return context_has_address_families(c) ||
1404 c->memory_deny_write_execute ||
1405 c->restrict_realtime ||
1406 c->restrict_suid_sgid ||
1407 exec_context_restrict_namespaces_set(c) ||
1408 c->protect_kernel_tunables ||
1409 c->protect_kernel_modules ||
1410 c->private_devices ||
1411 context_has_syscall_filters(c) ||
1412 !set_isempty(c->syscall_archs) ||
1413 c->lock_personality ||
1414 c->protect_hostname;
1415 }
1416
1417 #if HAVE_SECCOMP
1418
1419 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1420
1421 if (is_seccomp_available())
1422 return false;
1423
1424 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1425 return true;
1426 }
1427
1428 static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
1429 uint32_t negative_action, default_action, action;
1430 int r;
1431
1432 assert(u);
1433 assert(c);
1434
1435 if (!context_has_syscall_filters(c))
1436 return 0;
1437
1438 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1439 return 0;
1440
1441 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
1442
1443 if (c->syscall_whitelist) {
1444 default_action = negative_action;
1445 action = SCMP_ACT_ALLOW;
1446 } else {
1447 default_action = SCMP_ACT_ALLOW;
1448 action = negative_action;
1449 }
1450
1451 if (needs_ambient_hack) {
1452 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1453 if (r < 0)
1454 return r;
1455 }
1456
1457 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
1458 }
1459
1460 static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1461 assert(u);
1462 assert(c);
1463
1464 if (set_isempty(c->syscall_archs))
1465 return 0;
1466
1467 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1468 return 0;
1469
1470 return seccomp_restrict_archs(c->syscall_archs);
1471 }
1472
1473 static int apply_address_families(const Unit* u, const ExecContext *c) {
1474 assert(u);
1475 assert(c);
1476
1477 if (!context_has_address_families(c))
1478 return 0;
1479
1480 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1481 return 0;
1482
1483 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
1484 }
1485
1486 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
1487 assert(u);
1488 assert(c);
1489
1490 if (!c->memory_deny_write_execute)
1491 return 0;
1492
1493 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1494 return 0;
1495
1496 return seccomp_memory_deny_write_execute();
1497 }
1498
1499 static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
1500 assert(u);
1501 assert(c);
1502
1503 if (!c->restrict_realtime)
1504 return 0;
1505
1506 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1507 return 0;
1508
1509 return seccomp_restrict_realtime();
1510 }
1511
1512 static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1513 assert(u);
1514 assert(c);
1515
1516 if (!c->restrict_suid_sgid)
1517 return 0;
1518
1519 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1520 return 0;
1521
1522 return seccomp_restrict_suid_sgid();
1523 }
1524
1525 static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
1526 assert(u);
1527 assert(c);
1528
1529 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1530 * let's protect even those systems where this is left on in the kernel. */
1531
1532 if (!c->protect_kernel_tunables)
1533 return 0;
1534
1535 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1536 return 0;
1537
1538 return seccomp_protect_sysctl();
1539 }
1540
1541 static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
1542 assert(u);
1543 assert(c);
1544
1545 /* Turn off module syscalls on ProtectKernelModules=yes */
1546
1547 if (!c->protect_kernel_modules)
1548 return 0;
1549
1550 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1551 return 0;
1552
1553 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
1554 }
1555
1556 static int apply_private_devices(const Unit *u, const ExecContext *c) {
1557 assert(u);
1558 assert(c);
1559
1560 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1561
1562 if (!c->private_devices)
1563 return 0;
1564
1565 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1566 return 0;
1567
1568 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
1569 }
1570
1571 static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
1572 assert(u);
1573 assert(c);
1574
1575 if (!exec_context_restrict_namespaces_set(c))
1576 return 0;
1577
1578 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1579 return 0;
1580
1581 return seccomp_restrict_namespaces(c->restrict_namespaces);
1582 }
1583
1584 static int apply_lock_personality(const Unit* u, const ExecContext *c) {
1585 unsigned long personality;
1586 int r;
1587
1588 assert(u);
1589 assert(c);
1590
1591 if (!c->lock_personality)
1592 return 0;
1593
1594 if (skip_seccomp_unavailable(u, "LockPersonality="))
1595 return 0;
1596
1597 personality = c->personality;
1598
1599 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1600 if (personality == PERSONALITY_INVALID) {
1601
1602 r = opinionated_personality(&personality);
1603 if (r < 0)
1604 return r;
1605 }
1606
1607 return seccomp_lock_personality(personality);
1608 }
1609
1610 #endif
1611
1612 static void do_idle_pipe_dance(int idle_pipe[static 4]) {
1613 assert(idle_pipe);
1614
1615 idle_pipe[1] = safe_close(idle_pipe[1]);
1616 idle_pipe[2] = safe_close(idle_pipe[2]);
1617
1618 if (idle_pipe[0] >= 0) {
1619 int r;
1620
1621 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1622
1623 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1624 ssize_t n;
1625
1626 /* Signal systemd that we are bored and want to continue. */
1627 n = write(idle_pipe[3], "x", 1);
1628 if (n > 0)
1629 /* Wait for systemd to react to the signal above. */
1630 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1631 }
1632
1633 idle_pipe[0] = safe_close(idle_pipe[0]);
1634
1635 }
1636
1637 idle_pipe[3] = safe_close(idle_pipe[3]);
1638 }
1639
1640 static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1641
1642 static int build_environment(
1643 const Unit *u,
1644 const ExecContext *c,
1645 const ExecParameters *p,
1646 size_t n_fds,
1647 const char *home,
1648 const char *username,
1649 const char *shell,
1650 dev_t journal_stream_dev,
1651 ino_t journal_stream_ino,
1652 char ***ret) {
1653
1654 _cleanup_strv_free_ char **our_env = NULL;
1655 ExecDirectoryType t;
1656 size_t n_env = 0;
1657 char *x;
1658
1659 assert(u);
1660 assert(c);
1661 assert(p);
1662 assert(ret);
1663
1664 our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
1665 if (!our_env)
1666 return -ENOMEM;
1667
1668 if (n_fds > 0) {
1669 _cleanup_free_ char *joined = NULL;
1670
1671 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
1672 return -ENOMEM;
1673 our_env[n_env++] = x;
1674
1675 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
1676 return -ENOMEM;
1677 our_env[n_env++] = x;
1678
1679 joined = strv_join(p->fd_names, ":");
1680 if (!joined)
1681 return -ENOMEM;
1682
1683 x = strjoin("LISTEN_FDNAMES=", joined);
1684 if (!x)
1685 return -ENOMEM;
1686 our_env[n_env++] = x;
1687 }
1688
1689 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
1690 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
1691 return -ENOMEM;
1692 our_env[n_env++] = x;
1693
1694 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
1695 return -ENOMEM;
1696 our_env[n_env++] = x;
1697 }
1698
1699 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1700 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1701 * check the database directly. */
1702 if (p->flags & EXEC_NSS_BYPASS_BUS) {
1703 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1704 if (!x)
1705 return -ENOMEM;
1706 our_env[n_env++] = x;
1707 }
1708
1709 if (home) {
1710 x = strappend("HOME=", home);
1711 if (!x)
1712 return -ENOMEM;
1713
1714 path_simplify(x + 5, true);
1715 our_env[n_env++] = x;
1716 }
1717
1718 if (username) {
1719 x = strappend("LOGNAME=", username);
1720 if (!x)
1721 return -ENOMEM;
1722 our_env[n_env++] = x;
1723
1724 x = strappend("USER=", username);
1725 if (!x)
1726 return -ENOMEM;
1727 our_env[n_env++] = x;
1728 }
1729
1730 if (shell) {
1731 x = strappend("SHELL=", shell);
1732 if (!x)
1733 return -ENOMEM;
1734
1735 path_simplify(x + 6, true);
1736 our_env[n_env++] = x;
1737 }
1738
1739 if (!sd_id128_is_null(u->invocation_id)) {
1740 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1741 return -ENOMEM;
1742
1743 our_env[n_env++] = x;
1744 }
1745
1746 if (exec_context_needs_term(c)) {
1747 const char *tty_path, *term = NULL;
1748
1749 tty_path = exec_context_tty_path(c);
1750
1751 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1752 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1753 * passes to PID 1 ends up all the way in the console login shown. */
1754
1755 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1756 term = getenv("TERM");
1757 if (!term)
1758 term = default_term_for_tty(tty_path);
1759
1760 x = strappend("TERM=", term);
1761 if (!x)
1762 return -ENOMEM;
1763 our_env[n_env++] = x;
1764 }
1765
1766 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1767 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1768 return -ENOMEM;
1769
1770 our_env[n_env++] = x;
1771 }
1772
1773 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1774 _cleanup_free_ char *pre = NULL, *joined = NULL;
1775 const char *n;
1776
1777 if (!p->prefix[t])
1778 continue;
1779
1780 if (strv_isempty(c->directories[t].paths))
1781 continue;
1782
1783 n = exec_directory_env_name_to_string(t);
1784 if (!n)
1785 continue;
1786
1787 pre = strjoin(p->prefix[t], "/");
1788 if (!pre)
1789 return -ENOMEM;
1790
1791 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1792 if (!joined)
1793 return -ENOMEM;
1794
1795 x = strjoin(n, "=", joined);
1796 if (!x)
1797 return -ENOMEM;
1798
1799 our_env[n_env++] = x;
1800 }
1801
1802 our_env[n_env++] = NULL;
1803 assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
1804
1805 *ret = TAKE_PTR(our_env);
1806
1807 return 0;
1808 }
1809
1810 static int build_pass_environment(const ExecContext *c, char ***ret) {
1811 _cleanup_strv_free_ char **pass_env = NULL;
1812 size_t n_env = 0, n_bufsize = 0;
1813 char **i;
1814
1815 STRV_FOREACH(i, c->pass_environment) {
1816 _cleanup_free_ char *x = NULL;
1817 char *v;
1818
1819 v = getenv(*i);
1820 if (!v)
1821 continue;
1822 x = strjoin(*i, "=", v);
1823 if (!x)
1824 return -ENOMEM;
1825
1826 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1827 return -ENOMEM;
1828
1829 pass_env[n_env++] = TAKE_PTR(x);
1830 pass_env[n_env] = NULL;
1831 }
1832
1833 *ret = TAKE_PTR(pass_env);
1834
1835 return 0;
1836 }
1837
1838 static bool exec_needs_mount_namespace(
1839 const ExecContext *context,
1840 const ExecParameters *params,
1841 const ExecRuntime *runtime) {
1842
1843 assert(context);
1844 assert(params);
1845
1846 if (context->root_image)
1847 return true;
1848
1849 if (!strv_isempty(context->read_write_paths) ||
1850 !strv_isempty(context->read_only_paths) ||
1851 !strv_isempty(context->inaccessible_paths))
1852 return true;
1853
1854 if (context->n_bind_mounts > 0)
1855 return true;
1856
1857 if (context->n_temporary_filesystems > 0)
1858 return true;
1859
1860 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
1861 return true;
1862
1863 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1864 return true;
1865
1866 if (context->private_devices ||
1867 context->private_mounts ||
1868 context->protect_system != PROTECT_SYSTEM_NO ||
1869 context->protect_home != PROTECT_HOME_NO ||
1870 context->protect_kernel_tunables ||
1871 context->protect_kernel_modules ||
1872 context->protect_control_groups)
1873 return true;
1874
1875 if (context->root_directory) {
1876 ExecDirectoryType t;
1877
1878 if (context->mount_apivfs)
1879 return true;
1880
1881 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1882 if (!params->prefix[t])
1883 continue;
1884
1885 if (!strv_isempty(context->directories[t].paths))
1886 return true;
1887 }
1888 }
1889
1890 if (context->dynamic_user &&
1891 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
1892 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1893 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1894 return true;
1895
1896 return false;
1897 }
1898
1899 static int setup_private_users(uid_t uid, gid_t gid) {
1900 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1901 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1902 _cleanup_close_ int unshare_ready_fd = -1;
1903 _cleanup_(sigkill_waitp) pid_t pid = 0;
1904 uint64_t c = 1;
1905 ssize_t n;
1906 int r;
1907
1908 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1909 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1910 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1911 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1912 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1913 * continues execution normally. */
1914
1915 if (uid != 0 && uid_is_valid(uid)) {
1916 r = asprintf(&uid_map,
1917 "0 0 1\n" /* Map root → root */
1918 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1919 uid, uid);
1920 if (r < 0)
1921 return -ENOMEM;
1922 } else {
1923 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1924 if (!uid_map)
1925 return -ENOMEM;
1926 }
1927
1928 if (gid != 0 && gid_is_valid(gid)) {
1929 r = asprintf(&gid_map,
1930 "0 0 1\n" /* Map root → root */
1931 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1932 gid, gid);
1933 if (r < 0)
1934 return -ENOMEM;
1935 } else {
1936 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1937 if (!gid_map)
1938 return -ENOMEM;
1939 }
1940
1941 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1942 * namespace. */
1943 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1944 if (unshare_ready_fd < 0)
1945 return -errno;
1946
1947 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1948 * failed. */
1949 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1950 return -errno;
1951
1952 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
1953 if (r < 0)
1954 return r;
1955 if (r == 0) {
1956 _cleanup_close_ int fd = -1;
1957 const char *a;
1958 pid_t ppid;
1959
1960 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1961 * here, after the parent opened its own user namespace. */
1962
1963 ppid = getppid();
1964 errno_pipe[0] = safe_close(errno_pipe[0]);
1965
1966 /* Wait until the parent unshared the user namespace */
1967 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1968 r = -errno;
1969 goto child_fail;
1970 }
1971
1972 /* Disable the setgroups() system call in the child user namespace, for good. */
1973 a = procfs_file_alloca(ppid, "setgroups");
1974 fd = open(a, O_WRONLY|O_CLOEXEC);
1975 if (fd < 0) {
1976 if (errno != ENOENT) {
1977 r = -errno;
1978 goto child_fail;
1979 }
1980
1981 /* If the file is missing the kernel is too old, let's continue anyway. */
1982 } else {
1983 if (write(fd, "deny\n", 5) < 0) {
1984 r = -errno;
1985 goto child_fail;
1986 }
1987
1988 fd = safe_close(fd);
1989 }
1990
1991 /* First write the GID map */
1992 a = procfs_file_alloca(ppid, "gid_map");
1993 fd = open(a, O_WRONLY|O_CLOEXEC);
1994 if (fd < 0) {
1995 r = -errno;
1996 goto child_fail;
1997 }
1998 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1999 r = -errno;
2000 goto child_fail;
2001 }
2002 fd = safe_close(fd);
2003
2004 /* The write the UID map */
2005 a = procfs_file_alloca(ppid, "uid_map");
2006 fd = open(a, O_WRONLY|O_CLOEXEC);
2007 if (fd < 0) {
2008 r = -errno;
2009 goto child_fail;
2010 }
2011 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2012 r = -errno;
2013 goto child_fail;
2014 }
2015
2016 _exit(EXIT_SUCCESS);
2017
2018 child_fail:
2019 (void) write(errno_pipe[1], &r, sizeof(r));
2020 _exit(EXIT_FAILURE);
2021 }
2022
2023 errno_pipe[1] = safe_close(errno_pipe[1]);
2024
2025 if (unshare(CLONE_NEWUSER) < 0)
2026 return -errno;
2027
2028 /* Let the child know that the namespace is ready now */
2029 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2030 return -errno;
2031
2032 /* Try to read an error code from the child */
2033 n = read(errno_pipe[0], &r, sizeof(r));
2034 if (n < 0)
2035 return -errno;
2036 if (n == sizeof(r)) { /* an error code was sent to us */
2037 if (r < 0)
2038 return r;
2039 return -EIO;
2040 }
2041 if (n != 0) /* on success we should have read 0 bytes */
2042 return -EIO;
2043
2044 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2045 pid = 0;
2046 if (r < 0)
2047 return r;
2048 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
2049 return -EIO;
2050
2051 return 0;
2052 }
2053
2054 static int setup_exec_directory(
2055 const ExecContext *context,
2056 const ExecParameters *params,
2057 uid_t uid,
2058 gid_t gid,
2059 ExecDirectoryType type,
2060 int *exit_status) {
2061
2062 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
2063 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2064 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2065 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2066 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2067 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2068 };
2069 char **rt;
2070 int r;
2071
2072 assert(context);
2073 assert(params);
2074 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
2075 assert(exit_status);
2076
2077 if (!params->prefix[type])
2078 return 0;
2079
2080 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
2081 if (!uid_is_valid(uid))
2082 uid = 0;
2083 if (!gid_is_valid(gid))
2084 gid = 0;
2085 }
2086
2087 STRV_FOREACH(rt, context->directories[type].paths) {
2088 _cleanup_free_ char *p = NULL, *pp = NULL;
2089
2090 p = path_join(params->prefix[type], *rt);
2091 if (!p) {
2092 r = -ENOMEM;
2093 goto fail;
2094 }
2095
2096 r = mkdir_parents_label(p, 0755);
2097 if (r < 0)
2098 goto fail;
2099
2100 if (context->dynamic_user &&
2101 (!IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) ||
2102 (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode != EXEC_PRESERVE_NO))) {
2103 _cleanup_free_ char *private_root = NULL;
2104
2105 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2106 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2107 * whose UID is later on reused. To lock this down we use the same trick used by container
2108 * managers to prohibit host users to get access to files of the same UID in containers: we
2109 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2110 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2111 * to make this directory permeable for the service itself.
2112 *
2113 * Specifically: for a service which wants a special directory "foo/" we first create a
2114 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2115 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2116 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2117 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2118 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2119 * disabling the access boundary for the service and making sure it only gets access to the
2120 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2121 *
2122 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
2123 * owned by the service itself.
2124 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2125 * files or sockets with other services. */
2126
2127 private_root = path_join(params->prefix[type], "private");
2128 if (!private_root) {
2129 r = -ENOMEM;
2130 goto fail;
2131 }
2132
2133 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
2134 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
2135 if (r < 0)
2136 goto fail;
2137
2138 pp = path_join(private_root, *rt);
2139 if (!pp) {
2140 r = -ENOMEM;
2141 goto fail;
2142 }
2143
2144 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2145 r = mkdir_parents_label(pp, 0755);
2146 if (r < 0)
2147 goto fail;
2148
2149 if (is_dir(p, false) > 0 &&
2150 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2151
2152 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2153 * it over. Most likely the service has been upgraded from one that didn't use
2154 * DynamicUser=1, to one that does. */
2155
2156 if (rename(p, pp) < 0) {
2157 r = -errno;
2158 goto fail;
2159 }
2160 } else {
2161 /* Otherwise, create the actual directory for the service */
2162
2163 r = mkdir_label(pp, context->directories[type].mode);
2164 if (r < 0 && r != -EEXIST)
2165 goto fail;
2166 }
2167
2168 /* And link it up from the original place */
2169 r = symlink_idempotent(pp, p, true);
2170 if (r < 0)
2171 goto fail;
2172
2173 } else {
2174 r = mkdir_label(p, context->directories[type].mode);
2175 if (r < 0) {
2176 if (r != -EEXIST)
2177 goto fail;
2178
2179 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2180 struct stat st;
2181
2182 /* Don't change the owner/access mode of the configuration directory,
2183 * as in the common case it is not written to by a service, and shall
2184 * not be writable. */
2185
2186 if (stat(p, &st) < 0) {
2187 r = -errno;
2188 goto fail;
2189 }
2190
2191 /* Still complain if the access mode doesn't match */
2192 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2193 log_warning("%s \'%s\' already exists but the mode is different. "
2194 "(File system: %o %sMode: %o)",
2195 exec_directory_type_to_string(type), *rt,
2196 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2197
2198 continue;
2199 }
2200 }
2201 }
2202
2203 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
2204 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
2205 * current UID/GID ownership.) */
2206 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2207 if (r < 0)
2208 goto fail;
2209
2210 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2211 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2212 * assignments to exist.*/
2213 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
2214 if (r < 0)
2215 goto fail;
2216 }
2217
2218 return 0;
2219
2220 fail:
2221 *exit_status = exit_status_table[type];
2222 return r;
2223 }
2224
2225 #if ENABLE_SMACK
2226 static int setup_smack(
2227 const ExecContext *context,
2228 const ExecCommand *command) {
2229
2230 int r;
2231
2232 assert(context);
2233 assert(command);
2234
2235 if (context->smack_process_label) {
2236 r = mac_smack_apply_pid(0, context->smack_process_label);
2237 if (r < 0)
2238 return r;
2239 }
2240 #ifdef SMACK_DEFAULT_PROCESS_LABEL
2241 else {
2242 _cleanup_free_ char *exec_label = NULL;
2243
2244 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
2245 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
2246 return r;
2247
2248 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2249 if (r < 0)
2250 return r;
2251 }
2252 #endif
2253
2254 return 0;
2255 }
2256 #endif
2257
2258 static int compile_bind_mounts(
2259 const ExecContext *context,
2260 const ExecParameters *params,
2261 BindMount **ret_bind_mounts,
2262 size_t *ret_n_bind_mounts,
2263 char ***ret_empty_directories) {
2264
2265 _cleanup_strv_free_ char **empty_directories = NULL;
2266 BindMount *bind_mounts;
2267 size_t n, h = 0, i;
2268 ExecDirectoryType t;
2269 int r;
2270
2271 assert(context);
2272 assert(params);
2273 assert(ret_bind_mounts);
2274 assert(ret_n_bind_mounts);
2275 assert(ret_empty_directories);
2276
2277 n = context->n_bind_mounts;
2278 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2279 if (!params->prefix[t])
2280 continue;
2281
2282 n += strv_length(context->directories[t].paths);
2283 }
2284
2285 if (n <= 0) {
2286 *ret_bind_mounts = NULL;
2287 *ret_n_bind_mounts = 0;
2288 *ret_empty_directories = NULL;
2289 return 0;
2290 }
2291
2292 bind_mounts = new(BindMount, n);
2293 if (!bind_mounts)
2294 return -ENOMEM;
2295
2296 for (i = 0; i < context->n_bind_mounts; i++) {
2297 BindMount *item = context->bind_mounts + i;
2298 char *s, *d;
2299
2300 s = strdup(item->source);
2301 if (!s) {
2302 r = -ENOMEM;
2303 goto finish;
2304 }
2305
2306 d = strdup(item->destination);
2307 if (!d) {
2308 free(s);
2309 r = -ENOMEM;
2310 goto finish;
2311 }
2312
2313 bind_mounts[h++] = (BindMount) {
2314 .source = s,
2315 .destination = d,
2316 .read_only = item->read_only,
2317 .recursive = item->recursive,
2318 .ignore_enoent = item->ignore_enoent,
2319 };
2320 }
2321
2322 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2323 char **suffix;
2324
2325 if (!params->prefix[t])
2326 continue;
2327
2328 if (strv_isempty(context->directories[t].paths))
2329 continue;
2330
2331 if (context->dynamic_user &&
2332 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2333 !(context->root_directory || context->root_image)) {
2334 char *private_root;
2335
2336 /* So this is for a dynamic user, and we need to make sure the process can access its own
2337 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2338 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2339
2340 private_root = strjoin(params->prefix[t], "/private");
2341 if (!private_root) {
2342 r = -ENOMEM;
2343 goto finish;
2344 }
2345
2346 r = strv_consume(&empty_directories, private_root);
2347 if (r < 0)
2348 goto finish;
2349 }
2350
2351 STRV_FOREACH(suffix, context->directories[t].paths) {
2352 char *s, *d;
2353
2354 if (context->dynamic_user &&
2355 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
2356 s = strjoin(params->prefix[t], "/private/", *suffix);
2357 else
2358 s = strjoin(params->prefix[t], "/", *suffix);
2359 if (!s) {
2360 r = -ENOMEM;
2361 goto finish;
2362 }
2363
2364 if (context->dynamic_user &&
2365 !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
2366 (context->root_directory || context->root_image))
2367 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2368 * directory is not created on the root directory. So, let's bind-mount the directory
2369 * on the 'non-private' place. */
2370 d = strjoin(params->prefix[t], "/", *suffix);
2371 else
2372 d = strdup(s);
2373 if (!d) {
2374 free(s);
2375 r = -ENOMEM;
2376 goto finish;
2377 }
2378
2379 bind_mounts[h++] = (BindMount) {
2380 .source = s,
2381 .destination = d,
2382 .read_only = false,
2383 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
2384 .recursive = true,
2385 .ignore_enoent = false,
2386 };
2387 }
2388 }
2389
2390 assert(h == n);
2391
2392 *ret_bind_mounts = bind_mounts;
2393 *ret_n_bind_mounts = n;
2394 *ret_empty_directories = TAKE_PTR(empty_directories);
2395
2396 return (int) n;
2397
2398 finish:
2399 bind_mount_free_many(bind_mounts, h);
2400 return r;
2401 }
2402
2403 static int apply_mount_namespace(
2404 const Unit *u,
2405 const ExecCommand *command,
2406 const ExecContext *context,
2407 const ExecParameters *params,
2408 const ExecRuntime *runtime,
2409 char **error_path) {
2410
2411 _cleanup_strv_free_ char **empty_directories = NULL;
2412 char *tmp = NULL, *var = NULL;
2413 const char *root_dir = NULL, *root_image = NULL;
2414 NamespaceInfo ns_info;
2415 bool needs_sandboxing;
2416 BindMount *bind_mounts = NULL;
2417 size_t n_bind_mounts = 0;
2418 int r;
2419
2420 assert(context);
2421
2422 /* The runtime struct only contains the parent of the private /tmp,
2423 * which is non-accessible to world users. Inside of it there's a /tmp
2424 * that is sticky, and that's the one we want to use here. */
2425
2426 if (context->private_tmp && runtime) {
2427 if (runtime->tmp_dir)
2428 tmp = strjoina(runtime->tmp_dir, "/tmp");
2429 if (runtime->var_tmp_dir)
2430 var = strjoina(runtime->var_tmp_dir, "/tmp");
2431 }
2432
2433 if (params->flags & EXEC_APPLY_CHROOT) {
2434 root_image = context->root_image;
2435
2436 if (!root_image)
2437 root_dir = context->root_directory;
2438 }
2439
2440 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2441 if (r < 0)
2442 return r;
2443
2444 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2445 if (needs_sandboxing)
2446 ns_info = (NamespaceInfo) {
2447 .ignore_protect_paths = false,
2448 .private_dev = context->private_devices,
2449 .protect_control_groups = context->protect_control_groups,
2450 .protect_kernel_tunables = context->protect_kernel_tunables,
2451 .protect_kernel_modules = context->protect_kernel_modules,
2452 .protect_hostname = context->protect_hostname,
2453 .mount_apivfs = context->mount_apivfs,
2454 .private_mounts = context->private_mounts,
2455 };
2456 else if (!context->dynamic_user && root_dir)
2457 /*
2458 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2459 * sandbox info, otherwise enforce it, don't ignore protected paths and
2460 * fail if we are enable to apply the sandbox inside the mount namespace.
2461 */
2462 ns_info = (NamespaceInfo) {
2463 .ignore_protect_paths = true,
2464 };
2465 else
2466 ns_info = (NamespaceInfo) {};
2467
2468 if (context->mount_flags == MS_SHARED)
2469 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2470
2471 r = setup_namespace(root_dir, root_image,
2472 &ns_info, context->read_write_paths,
2473 needs_sandboxing ? context->read_only_paths : NULL,
2474 needs_sandboxing ? context->inaccessible_paths : NULL,
2475 empty_directories,
2476 bind_mounts,
2477 n_bind_mounts,
2478 context->temporary_filesystems,
2479 context->n_temporary_filesystems,
2480 tmp,
2481 var,
2482 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2483 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
2484 context->mount_flags,
2485 DISSECT_IMAGE_DISCARD_ON_LOOP,
2486 error_path);
2487
2488 bind_mount_free_many(bind_mounts, n_bind_mounts);
2489
2490 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
2491 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
2492 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2493 * completely different execution environment. */
2494 if (r == -ENOANO) {
2495 if (n_bind_mounts == 0 &&
2496 context->n_temporary_filesystems == 0 &&
2497 !root_dir && !root_image &&
2498 !context->dynamic_user) {
2499 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
2500 return 0;
2501 }
2502
2503 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2504 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2505 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2506
2507 return -EOPNOTSUPP;
2508 }
2509
2510 return r;
2511 }
2512
2513 static int apply_working_directory(
2514 const ExecContext *context,
2515 const ExecParameters *params,
2516 const char *home,
2517 const bool needs_mount_ns,
2518 int *exit_status) {
2519
2520 const char *d, *wd;
2521
2522 assert(context);
2523 assert(exit_status);
2524
2525 if (context->working_directory_home) {
2526
2527 if (!home) {
2528 *exit_status = EXIT_CHDIR;
2529 return -ENXIO;
2530 }
2531
2532 wd = home;
2533
2534 } else if (context->working_directory)
2535 wd = context->working_directory;
2536 else
2537 wd = "/";
2538
2539 if (params->flags & EXEC_APPLY_CHROOT) {
2540 if (!needs_mount_ns && context->root_directory)
2541 if (chroot(context->root_directory) < 0) {
2542 *exit_status = EXIT_CHROOT;
2543 return -errno;
2544 }
2545
2546 d = wd;
2547 } else
2548 d = prefix_roota(context->root_directory, wd);
2549
2550 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2551 *exit_status = EXIT_CHDIR;
2552 return -errno;
2553 }
2554
2555 return 0;
2556 }
2557
2558 static int setup_keyring(
2559 const Unit *u,
2560 const ExecContext *context,
2561 const ExecParameters *p,
2562 uid_t uid, gid_t gid) {
2563
2564 key_serial_t keyring;
2565 int r = 0;
2566 uid_t saved_uid;
2567 gid_t saved_gid;
2568
2569 assert(u);
2570 assert(context);
2571 assert(p);
2572
2573 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2574 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2575 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2576 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2577 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2578 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2579
2580 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2581 return 0;
2582
2583 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2584 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2585 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2586 * & group is just as nasty as acquiring a reference to the user keyring. */
2587
2588 saved_uid = getuid();
2589 saved_gid = getgid();
2590
2591 if (gid_is_valid(gid) && gid != saved_gid) {
2592 if (setregid(gid, -1) < 0)
2593 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2594 }
2595
2596 if (uid_is_valid(uid) && uid != saved_uid) {
2597 if (setreuid(uid, -1) < 0) {
2598 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2599 goto out;
2600 }
2601 }
2602
2603 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2604 if (keyring == -1) {
2605 if (errno == ENOSYS)
2606 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
2607 else if (IN_SET(errno, EACCES, EPERM))
2608 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
2609 else if (errno == EDQUOT)
2610 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
2611 else
2612 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
2613
2614 goto out;
2615 }
2616
2617 /* When requested link the user keyring into the session keyring. */
2618 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2619
2620 if (keyctl(KEYCTL_LINK,
2621 KEY_SPEC_USER_KEYRING,
2622 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2623 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2624 goto out;
2625 }
2626 }
2627
2628 /* Restore uid/gid back */
2629 if (uid_is_valid(uid) && uid != saved_uid) {
2630 if (setreuid(saved_uid, -1) < 0) {
2631 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2632 goto out;
2633 }
2634 }
2635
2636 if (gid_is_valid(gid) && gid != saved_gid) {
2637 if (setregid(saved_gid, -1) < 0)
2638 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2639 }
2640
2641 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
2642 if (!sd_id128_is_null(u->invocation_id)) {
2643 key_serial_t key;
2644
2645 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2646 if (key == -1)
2647 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
2648 else {
2649 if (keyctl(KEYCTL_SETPERM, key,
2650 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2651 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2652 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
2653 }
2654 }
2655
2656 out:
2657 /* Revert back uid & gid for the the last time, and exit */
2658 /* no extra logging, as only the first already reported error matters */
2659 if (getuid() != saved_uid)
2660 (void) setreuid(saved_uid, -1);
2661
2662 if (getgid() != saved_gid)
2663 (void) setregid(saved_gid, -1);
2664
2665 return r;
2666 }
2667
2668 static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
2669 assert(array);
2670 assert(n);
2671
2672 if (!pair)
2673 return;
2674
2675 if (pair[0] >= 0)
2676 array[(*n)++] = pair[0];
2677 if (pair[1] >= 0)
2678 array[(*n)++] = pair[1];
2679 }
2680
2681 static int close_remaining_fds(
2682 const ExecParameters *params,
2683 const ExecRuntime *runtime,
2684 const DynamicCreds *dcreds,
2685 int user_lookup_fd,
2686 int socket_fd,
2687 int exec_fd,
2688 int *fds, size_t n_fds) {
2689
2690 size_t n_dont_close = 0;
2691 int dont_close[n_fds + 12];
2692
2693 assert(params);
2694
2695 if (params->stdin_fd >= 0)
2696 dont_close[n_dont_close++] = params->stdin_fd;
2697 if (params->stdout_fd >= 0)
2698 dont_close[n_dont_close++] = params->stdout_fd;
2699 if (params->stderr_fd >= 0)
2700 dont_close[n_dont_close++] = params->stderr_fd;
2701
2702 if (socket_fd >= 0)
2703 dont_close[n_dont_close++] = socket_fd;
2704 if (exec_fd >= 0)
2705 dont_close[n_dont_close++] = exec_fd;
2706 if (n_fds > 0) {
2707 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2708 n_dont_close += n_fds;
2709 }
2710
2711 if (runtime)
2712 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2713
2714 if (dcreds) {
2715 if (dcreds->user)
2716 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2717 if (dcreds->group)
2718 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
2719 }
2720
2721 if (user_lookup_fd >= 0)
2722 dont_close[n_dont_close++] = user_lookup_fd;
2723
2724 return close_all_fds(dont_close, n_dont_close);
2725 }
2726
2727 static int send_user_lookup(
2728 Unit *unit,
2729 int user_lookup_fd,
2730 uid_t uid,
2731 gid_t gid) {
2732
2733 assert(unit);
2734
2735 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2736 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2737 * specified. */
2738
2739 if (user_lookup_fd < 0)
2740 return 0;
2741
2742 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2743 return 0;
2744
2745 if (writev(user_lookup_fd,
2746 (struct iovec[]) {
2747 IOVEC_INIT(&uid, sizeof(uid)),
2748 IOVEC_INIT(&gid, sizeof(gid)),
2749 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
2750 return -errno;
2751
2752 return 0;
2753 }
2754
2755 static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2756 int r;
2757
2758 assert(c);
2759 assert(home);
2760 assert(buf);
2761
2762 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2763
2764 if (*home)
2765 return 0;
2766
2767 if (!c->working_directory_home)
2768 return 0;
2769
2770 r = get_home_dir(buf);
2771 if (r < 0)
2772 return r;
2773
2774 *home = *buf;
2775 return 1;
2776 }
2777
2778 static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2779 _cleanup_strv_free_ char ** list = NULL;
2780 ExecDirectoryType t;
2781 int r;
2782
2783 assert(c);
2784 assert(p);
2785 assert(ret);
2786
2787 assert(c->dynamic_user);
2788
2789 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2790 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2791 * directories. */
2792
2793 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2794 char **i;
2795
2796 if (t == EXEC_DIRECTORY_CONFIGURATION)
2797 continue;
2798
2799 if (!p->prefix[t])
2800 continue;
2801
2802 STRV_FOREACH(i, c->directories[t].paths) {
2803 char *e;
2804
2805 if (t == EXEC_DIRECTORY_RUNTIME)
2806 e = strjoin(p->prefix[t], "/", *i);
2807 else
2808 e = strjoin(p->prefix[t], "/private/", *i);
2809 if (!e)
2810 return -ENOMEM;
2811
2812 r = strv_consume(&list, e);
2813 if (r < 0)
2814 return r;
2815 }
2816 }
2817
2818 *ret = TAKE_PTR(list);
2819
2820 return 0;
2821 }
2822
2823 static char *exec_command_line(char **argv);
2824
2825 static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
2826 bool using_subcgroup;
2827 char *p;
2828
2829 assert(params);
2830 assert(ret);
2831
2832 if (!params->cgroup_path)
2833 return -EINVAL;
2834
2835 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2836 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2837 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2838 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2839 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2840 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2841 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2842 * flag, which is only passed for the former statements, not for the latter. */
2843
2844 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
2845 if (using_subcgroup)
2846 p = strjoin(params->cgroup_path, "/.control");
2847 else
2848 p = strdup(params->cgroup_path);
2849 if (!p)
2850 return -ENOMEM;
2851
2852 *ret = p;
2853 return using_subcgroup;
2854 }
2855
2856 static int exec_child(
2857 Unit *unit,
2858 const ExecCommand *command,
2859 const ExecContext *context,
2860 const ExecParameters *params,
2861 ExecRuntime *runtime,
2862 DynamicCreds *dcreds,
2863 int socket_fd,
2864 int named_iofds[3],
2865 int *fds,
2866 size_t n_socket_fds,
2867 size_t n_storage_fds,
2868 char **files_env,
2869 int user_lookup_fd,
2870 int *exit_status) {
2871
2872 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
2873 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
2874 _cleanup_free_ gid_t *supplementary_gids = NULL;
2875 const char *username = NULL, *groupname = NULL;
2876 _cleanup_free_ char *home_buffer = NULL;
2877 const char *home = NULL, *shell = NULL;
2878 char **final_argv = NULL;
2879 dev_t journal_stream_dev = 0;
2880 ino_t journal_stream_ino = 0;
2881 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2882 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2883 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2884 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
2885 #if HAVE_SELINUX
2886 _cleanup_free_ char *mac_selinux_context_net = NULL;
2887 bool use_selinux = false;
2888 #endif
2889 #if ENABLE_SMACK
2890 bool use_smack = false;
2891 #endif
2892 #if HAVE_APPARMOR
2893 bool use_apparmor = false;
2894 #endif
2895 uid_t uid = UID_INVALID;
2896 gid_t gid = GID_INVALID;
2897 size_t n_fds;
2898 ExecDirectoryType dt;
2899 int secure_bits;
2900
2901 assert(unit);
2902 assert(command);
2903 assert(context);
2904 assert(params);
2905 assert(exit_status);
2906
2907 rename_process_from_path(command->path);
2908
2909 /* We reset exactly these signals, since they are the
2910 * only ones we set to SIG_IGN in the main daemon. All
2911 * others we leave untouched because we set them to
2912 * SIG_DFL or a valid handler initially, both of which
2913 * will be demoted to SIG_DFL. */
2914 (void) default_signals(SIGNALS_CRASH_HANDLER,
2915 SIGNALS_IGNORE, -1);
2916
2917 if (context->ignore_sigpipe)
2918 (void) ignore_signals(SIGPIPE, -1);
2919
2920 r = reset_signal_mask();
2921 if (r < 0) {
2922 *exit_status = EXIT_SIGNAL_MASK;
2923 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
2924 }
2925
2926 if (params->idle_pipe)
2927 do_idle_pipe_dance(params->idle_pipe);
2928
2929 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2930 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2931 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2932 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
2933
2934 log_forget_fds();
2935 log_set_open_when_needed(true);
2936
2937 /* In case anything used libc syslog(), close this here, too */
2938 closelog();
2939
2940 n_fds = n_socket_fds + n_storage_fds;
2941 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
2942 if (r < 0) {
2943 *exit_status = EXIT_FDS;
2944 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
2945 }
2946
2947 if (!context->same_pgrp)
2948 if (setsid() < 0) {
2949 *exit_status = EXIT_SETSID;
2950 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
2951 }
2952
2953 exec_context_tty_reset(context, params);
2954
2955 if (unit_shall_confirm_spawn(unit)) {
2956 const char *vc = params->confirm_spawn;
2957 _cleanup_free_ char *cmdline = NULL;
2958
2959 cmdline = exec_command_line(command->argv);
2960 if (!cmdline) {
2961 *exit_status = EXIT_MEMORY;
2962 return log_oom();
2963 }
2964
2965 r = ask_for_confirmation(vc, unit, cmdline);
2966 if (r != CONFIRM_EXECUTE) {
2967 if (r == CONFIRM_PRETEND_SUCCESS) {
2968 *exit_status = EXIT_SUCCESS;
2969 return 0;
2970 }
2971 *exit_status = EXIT_CONFIRM;
2972 log_unit_error(unit, "Execution cancelled by the user");
2973 return -ECANCELED;
2974 }
2975 }
2976
2977 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
2978 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
2979 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
2980 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
2981 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
2982 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
2983 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
2984 *exit_status = EXIT_MEMORY;
2985 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2986 }
2987
2988 if (context->dynamic_user && dcreds) {
2989 _cleanup_strv_free_ char **suggested_paths = NULL;
2990
2991 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
2992 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
2993 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2994 *exit_status = EXIT_USER;
2995 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2996 }
2997
2998 r = compile_suggested_paths(context, params, &suggested_paths);
2999 if (r < 0) {
3000 *exit_status = EXIT_MEMORY;
3001 return log_oom();
3002 }
3003
3004 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
3005 if (r < 0) {
3006 *exit_status = EXIT_USER;
3007 if (r == -EILSEQ) {
3008 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3009 return -EOPNOTSUPP;
3010 }
3011 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
3012 }
3013
3014 if (!uid_is_valid(uid)) {
3015 *exit_status = EXIT_USER;
3016 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
3017 return -ESRCH;
3018 }
3019
3020 if (!gid_is_valid(gid)) {
3021 *exit_status = EXIT_USER;
3022 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
3023 return -ESRCH;
3024 }
3025
3026 if (dcreds->user)
3027 username = dcreds->user->name;
3028
3029 } else {
3030 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3031 if (r < 0) {
3032 *exit_status = EXIT_USER;
3033 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
3034 }
3035
3036 r = get_fixed_group(context, &groupname, &gid);
3037 if (r < 0) {
3038 *exit_status = EXIT_GROUP;
3039 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
3040 }
3041 }
3042
3043 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3044 r = get_supplementary_groups(context, username, groupname, gid,
3045 &supplementary_gids, &ngids);
3046 if (r < 0) {
3047 *exit_status = EXIT_GROUP;
3048 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
3049 }
3050
3051 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3052 if (r < 0) {
3053 *exit_status = EXIT_USER;
3054 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
3055 }
3056
3057 user_lookup_fd = safe_close(user_lookup_fd);
3058
3059 r = acquire_home(context, uid, &home, &home_buffer);
3060 if (r < 0) {
3061 *exit_status = EXIT_CHDIR;
3062 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
3063 }
3064
3065 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3066 * must sure to drop O_NONBLOCK */
3067 if (socket_fd >= 0)
3068 (void) fd_nonblock(socket_fd, false);
3069
3070 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3071 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3072 if (params->cgroup_path) {
3073 _cleanup_free_ char *p = NULL;
3074
3075 r = exec_parameters_get_cgroup_path(params, &p);
3076 if (r < 0) {
3077 *exit_status = EXIT_CGROUP;
3078 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3079 }
3080
3081 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3082 if (r < 0) {
3083 *exit_status = EXIT_CGROUP;
3084 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3085 }
3086 }
3087
3088 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3089 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3090 if (r < 0) {
3091 *exit_status = EXIT_NETWORK;
3092 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3093 }
3094 }
3095
3096 r = setup_input(context, params, socket_fd, named_iofds);
3097 if (r < 0) {
3098 *exit_status = EXIT_STDIN;
3099 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
3100 }
3101
3102 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
3103 if (r < 0) {
3104 *exit_status = EXIT_STDOUT;
3105 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
3106 }
3107
3108 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
3109 if (r < 0) {
3110 *exit_status = EXIT_STDERR;
3111 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
3112 }
3113
3114 if (context->oom_score_adjust_set) {
3115 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3116 * prohibit write access to this file, and we shouldn't trip up over that. */
3117 r = set_oom_score_adjust(context->oom_score_adjust);
3118 if (IN_SET(r, -EPERM, -EACCES))
3119 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
3120 else if (r < 0) {
3121 *exit_status = EXIT_OOM_ADJUST;
3122 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
3123 }
3124 }
3125
3126 if (context->nice_set)
3127 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
3128 *exit_status = EXIT_NICE;
3129 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
3130 }
3131
3132 if (context->cpu_sched_set) {
3133 struct sched_param param = {
3134 .sched_priority = context->cpu_sched_priority,
3135 };
3136
3137 r = sched_setscheduler(0,
3138 context->cpu_sched_policy |
3139 (context->cpu_sched_reset_on_fork ?
3140 SCHED_RESET_ON_FORK : 0),
3141 &param);
3142 if (r < 0) {
3143 *exit_status = EXIT_SETSCHEDULER;
3144 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
3145 }
3146 }
3147
3148 if (context->cpuset)
3149 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
3150 *exit_status = EXIT_CPUAFFINITY;
3151 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
3152 }
3153
3154 if (context->ioprio_set)
3155 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
3156 *exit_status = EXIT_IOPRIO;
3157 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
3158 }
3159
3160 if (context->timer_slack_nsec != NSEC_INFINITY)
3161 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
3162 *exit_status = EXIT_TIMERSLACK;
3163 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
3164 }
3165
3166 if (context->personality != PERSONALITY_INVALID) {
3167 r = safe_personality(context->personality);
3168 if (r < 0) {
3169 *exit_status = EXIT_PERSONALITY;
3170 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
3171 }
3172 }
3173
3174 if (context->utmp_id)
3175 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
3176 context->tty_path,
3177 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3178 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3179 USER_PROCESS,
3180 username);
3181
3182 if (uid_is_valid(uid)) {
3183 r = chown_terminal(STDIN_FILENO, uid);
3184 if (r < 0) {
3185 *exit_status = EXIT_STDIN;
3186 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
3187 }
3188 }
3189
3190 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
3191 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
3192 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
3193 * touch a single hierarchy too. */
3194 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
3195 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
3196 if (r < 0) {
3197 *exit_status = EXIT_CGROUP;
3198 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
3199 }
3200 }
3201
3202 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3203 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
3204 if (r < 0)
3205 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
3206 }
3207
3208 r = build_environment(
3209 unit,
3210 context,
3211 params,
3212 n_fds,
3213 home,
3214 username,
3215 shell,
3216 journal_stream_dev,
3217 journal_stream_ino,
3218 &our_env);
3219 if (r < 0) {
3220 *exit_status = EXIT_MEMORY;
3221 return log_oom();
3222 }
3223
3224 r = build_pass_environment(context, &pass_env);
3225 if (r < 0) {
3226 *exit_status = EXIT_MEMORY;
3227 return log_oom();
3228 }
3229
3230 accum_env = strv_env_merge(5,
3231 params->environment,
3232 our_env,
3233 pass_env,
3234 context->environment,
3235 files_env,
3236 NULL);
3237 if (!accum_env) {
3238 *exit_status = EXIT_MEMORY;
3239 return log_oom();
3240 }
3241 accum_env = strv_env_clean(accum_env);
3242
3243 (void) umask(context->umask);
3244
3245 r = setup_keyring(unit, context, params, uid, gid);
3246 if (r < 0) {
3247 *exit_status = EXIT_KEYRING;
3248 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
3249 }
3250
3251 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
3252 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
3253
3254 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3255 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
3256
3257 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3258 if (needs_ambient_hack)
3259 needs_setuid = false;
3260 else
3261 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3262
3263 if (needs_sandboxing) {
3264 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3265 * present. The actual MAC context application will happen later, as late as possible, to avoid
3266 * impacting our own code paths. */
3267
3268 #if HAVE_SELINUX
3269 use_selinux = mac_selinux_use();
3270 #endif
3271 #if ENABLE_SMACK
3272 use_smack = mac_smack_use();
3273 #endif
3274 #if HAVE_APPARMOR
3275 use_apparmor = mac_apparmor_use();
3276 #endif
3277 }
3278
3279 if (needs_sandboxing) {
3280 int which_failed;
3281
3282 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3283 * is set here. (See below.) */
3284
3285 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3286 if (r < 0) {
3287 *exit_status = EXIT_LIMITS;
3288 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3289 }
3290 }
3291
3292 if (needs_setuid) {
3293
3294 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3295 * wins here. (See above.) */
3296
3297 if (context->pam_name && username) {
3298 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
3299 if (r < 0) {
3300 *exit_status = EXIT_PAM;
3301 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
3302 }
3303 }
3304 }
3305
3306 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3307
3308 if (ns_type_supported(NAMESPACE_NET)) {
3309 r = setup_netns(runtime->netns_storage_socket);
3310 if (r < 0) {
3311 *exit_status = EXIT_NETWORK;
3312 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3313 }
3314 } else if (context->network_namespace_path) {
3315 *exit_status = EXIT_NETWORK;
3316 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
3317 } else
3318 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
3319 }
3320
3321 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
3322 if (needs_mount_namespace) {
3323 _cleanup_free_ char *error_path = NULL;
3324
3325 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
3326 if (r < 0) {
3327 *exit_status = EXIT_NAMESPACE;
3328 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3329 error_path ? ": " : "", strempty(error_path));
3330 }
3331 }
3332
3333 if (context->protect_hostname) {
3334 if (ns_type_supported(NAMESPACE_UTS)) {
3335 if (unshare(CLONE_NEWUTS) < 0) {
3336 *exit_status = EXIT_NAMESPACE;
3337 return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
3338 }
3339 } else
3340 log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3341 #if HAVE_SECCOMP
3342 r = seccomp_protect_hostname();
3343 if (r < 0) {
3344 *exit_status = EXIT_SECCOMP;
3345 return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
3346 }
3347 #endif
3348 }
3349
3350 /* Drop groups as early as possbile */
3351 if (needs_setuid) {
3352 r = enforce_groups(gid, supplementary_gids, ngids);
3353 if (r < 0) {
3354 *exit_status = EXIT_GROUP;
3355 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
3356 }
3357 }
3358
3359 if (needs_sandboxing) {
3360 #if HAVE_SELINUX
3361 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
3362 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3363 if (r < 0) {
3364 *exit_status = EXIT_SELINUX_CONTEXT;
3365 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
3366 }
3367 }
3368 #endif
3369
3370 if (context->private_users) {
3371 r = setup_private_users(uid, gid);
3372 if (r < 0) {
3373 *exit_status = EXIT_USER;
3374 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
3375 }
3376 }
3377 }
3378
3379 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3380 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3381 * however if we have it as we want to keep it open until the final execve(). */
3382
3383 if (params->exec_fd >= 0) {
3384 exec_fd = params->exec_fd;
3385
3386 if (exec_fd < 3 + (int) n_fds) {
3387 int moved_fd;
3388
3389 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3390 * process we are about to execute. */
3391
3392 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3393 if (moved_fd < 0) {
3394 *exit_status = EXIT_FDS;
3395 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3396 }
3397
3398 safe_close(exec_fd);
3399 exec_fd = moved_fd;
3400 } else {
3401 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3402 r = fd_cloexec(exec_fd, true);
3403 if (r < 0) {
3404 *exit_status = EXIT_FDS;
3405 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3406 }
3407 }
3408
3409 fds_with_exec_fd = newa(int, n_fds + 1);
3410 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
3411 fds_with_exec_fd[n_fds] = exec_fd;
3412 n_fds_with_exec_fd = n_fds + 1;
3413 } else {
3414 fds_with_exec_fd = fds;
3415 n_fds_with_exec_fd = n_fds;
3416 }
3417
3418 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
3419 if (r >= 0)
3420 r = shift_fds(fds, n_fds);
3421 if (r >= 0)
3422 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
3423 if (r < 0) {
3424 *exit_status = EXIT_FDS;
3425 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
3426 }
3427
3428 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3429 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3430 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3431 * came this far. */
3432
3433 secure_bits = context->secure_bits;
3434
3435 if (needs_sandboxing) {
3436 uint64_t bset;
3437
3438 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3439 * requested. (Note this is placed after the general resource limit initialization, see
3440 * above, in order to take precedence.) */
3441 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3442 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
3443 *exit_status = EXIT_LIMITS;
3444 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
3445 }
3446 }
3447
3448 #if ENABLE_SMACK
3449 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3450 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3451 if (use_smack) {
3452 r = setup_smack(context, command);
3453 if (r < 0) {
3454 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3455 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3456 }
3457 }
3458 #endif
3459
3460 bset = context->capability_bounding_set;
3461 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3462 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3463 * instead of us doing that */
3464 if (needs_ambient_hack)
3465 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3466 (UINT64_C(1) << CAP_SETUID) |
3467 (UINT64_C(1) << CAP_SETGID);
3468
3469 if (!cap_test_all(bset)) {
3470 r = capability_bounding_set_drop(bset, false);
3471 if (r < 0) {
3472 *exit_status = EXIT_CAPABILITIES;
3473 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
3474 }
3475 }
3476
3477 /* This is done before enforce_user, but ambient set
3478 * does not survive over setresuid() if keep_caps is not set. */
3479 if (!needs_ambient_hack &&
3480 context->capability_ambient_set != 0) {
3481 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3482 if (r < 0) {
3483 *exit_status = EXIT_CAPABILITIES;
3484 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
3485 }
3486 }
3487 }
3488
3489 if (needs_setuid) {
3490 if (uid_is_valid(uid)) {
3491 r = enforce_user(context, uid);
3492 if (r < 0) {
3493 *exit_status = EXIT_USER;
3494 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
3495 }
3496
3497 if (!needs_ambient_hack &&
3498 context->capability_ambient_set != 0) {
3499
3500 /* Fix the ambient capabilities after user change. */
3501 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3502 if (r < 0) {
3503 *exit_status = EXIT_CAPABILITIES;
3504 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
3505 }
3506
3507 /* If we were asked to change user and ambient capabilities
3508 * were requested, we had to add keep-caps to the securebits
3509 * so that we would maintain the inherited capability set
3510 * through the setresuid(). Make sure that the bit is added
3511 * also to the context secure_bits so that we don't try to
3512 * drop the bit away next. */
3513
3514 secure_bits |= 1<<SECURE_KEEP_CAPS;
3515 }
3516 }
3517 }
3518
3519 /* Apply working directory here, because the working directory might be on NFS and only the user running
3520 * this service might have the correct privilege to change to the working directory */
3521 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
3522 if (r < 0)
3523 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3524
3525 if (needs_sandboxing) {
3526 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3527 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3528 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3529 * are restricted. */
3530
3531 #if HAVE_SELINUX
3532 if (use_selinux) {
3533 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3534
3535 if (exec_context) {
3536 r = setexeccon(exec_context);
3537 if (r < 0) {
3538 *exit_status = EXIT_SELINUX_CONTEXT;
3539 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
3540 }
3541 }
3542 }
3543 #endif
3544
3545 #if HAVE_APPARMOR
3546 if (use_apparmor && context->apparmor_profile) {
3547 r = aa_change_onexec(context->apparmor_profile);
3548 if (r < 0 && !context->apparmor_profile_ignore) {
3549 *exit_status = EXIT_APPARMOR_PROFILE;
3550 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
3551 }
3552 }
3553 #endif
3554
3555 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3556 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
3557 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3558 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
3559 *exit_status = EXIT_SECUREBITS;
3560 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
3561 }
3562
3563 if (context_has_no_new_privileges(context))
3564 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
3565 *exit_status = EXIT_NO_NEW_PRIVILEGES;
3566 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
3567 }
3568
3569 #if HAVE_SECCOMP
3570 r = apply_address_families(unit, context);
3571 if (r < 0) {
3572 *exit_status = EXIT_ADDRESS_FAMILIES;
3573 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
3574 }
3575
3576 r = apply_memory_deny_write_execute(unit, context);
3577 if (r < 0) {
3578 *exit_status = EXIT_SECCOMP;
3579 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
3580 }
3581
3582 r = apply_restrict_realtime(unit, context);
3583 if (r < 0) {
3584 *exit_status = EXIT_SECCOMP;
3585 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
3586 }
3587
3588 r = apply_restrict_suid_sgid(unit, context);
3589 if (r < 0) {
3590 *exit_status = EXIT_SECCOMP;
3591 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3592 }
3593
3594 r = apply_restrict_namespaces(unit, context);
3595 if (r < 0) {
3596 *exit_status = EXIT_SECCOMP;
3597 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
3598 }
3599
3600 r = apply_protect_sysctl(unit, context);
3601 if (r < 0) {
3602 *exit_status = EXIT_SECCOMP;
3603 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
3604 }
3605
3606 r = apply_protect_kernel_modules(unit, context);
3607 if (r < 0) {
3608 *exit_status = EXIT_SECCOMP;
3609 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
3610 }
3611
3612 r = apply_private_devices(unit, context);
3613 if (r < 0) {
3614 *exit_status = EXIT_SECCOMP;
3615 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
3616 }
3617
3618 r = apply_syscall_archs(unit, context);
3619 if (r < 0) {
3620 *exit_status = EXIT_SECCOMP;
3621 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
3622 }
3623
3624 r = apply_lock_personality(unit, context);
3625 if (r < 0) {
3626 *exit_status = EXIT_SECCOMP;
3627 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
3628 }
3629
3630 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3631 * by the filter as little as possible. */
3632 r = apply_syscall_filter(unit, context, needs_ambient_hack);
3633 if (r < 0) {
3634 *exit_status = EXIT_SECCOMP;
3635 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
3636 }
3637 #endif
3638 }
3639
3640 if (!strv_isempty(context->unset_environment)) {
3641 char **ee = NULL;
3642
3643 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3644 if (!ee) {
3645 *exit_status = EXIT_MEMORY;
3646 return log_oom();
3647 }
3648
3649 strv_free_and_replace(accum_env, ee);
3650 }
3651
3652 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3653 replaced_argv = replace_env_argv(command->argv, accum_env);
3654 if (!replaced_argv) {
3655 *exit_status = EXIT_MEMORY;
3656 return log_oom();
3657 }
3658 final_argv = replaced_argv;
3659 } else
3660 final_argv = command->argv;
3661
3662 if (DEBUG_LOGGING) {
3663 _cleanup_free_ char *line;
3664
3665 line = exec_command_line(final_argv);
3666 if (line)
3667 log_struct(LOG_DEBUG,
3668 "EXECUTABLE=%s", command->path,
3669 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
3670 LOG_UNIT_ID(unit),
3671 LOG_UNIT_INVOCATION_ID(unit));
3672 }
3673
3674 if (exec_fd >= 0) {
3675 uint8_t hot = 1;
3676
3677 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3678 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3679
3680 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3681 *exit_status = EXIT_EXEC;
3682 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3683 }
3684 }
3685
3686 execve(command->path, final_argv, accum_env);
3687 r = -errno;
3688
3689 if (exec_fd >= 0) {
3690 uint8_t hot = 0;
3691
3692 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3693 * that POLLHUP on it no longer means execve() succeeded. */
3694
3695 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3696 *exit_status = EXIT_EXEC;
3697 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
3698 }
3699 }
3700
3701 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3702 log_struct_errno(LOG_INFO, r,
3703 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3704 LOG_UNIT_ID(unit),
3705 LOG_UNIT_INVOCATION_ID(unit),
3706 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3707 command->path),
3708 "EXECUTABLE=%s", command->path);
3709 return 0;
3710 }
3711
3712 *exit_status = EXIT_EXEC;
3713 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
3714 }
3715
3716 static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
3717 static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
3718
3719 int exec_spawn(Unit *unit,
3720 ExecCommand *command,
3721 const ExecContext *context,
3722 const ExecParameters *params,
3723 ExecRuntime *runtime,
3724 DynamicCreds *dcreds,
3725 pid_t *ret) {
3726
3727 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
3728 _cleanup_free_ char *subcgroup_path = NULL;
3729 _cleanup_strv_free_ char **files_env = NULL;
3730 size_t n_storage_fds = 0, n_socket_fds = 0;
3731 _cleanup_free_ char *line = NULL;
3732 pid_t pid;
3733
3734 assert(unit);
3735 assert(command);
3736 assert(context);
3737 assert(ret);
3738 assert(params);
3739 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
3740
3741 if (context->std_input == EXEC_INPUT_SOCKET ||
3742 context->std_output == EXEC_OUTPUT_SOCKET ||
3743 context->std_error == EXEC_OUTPUT_SOCKET) {
3744
3745 if (params->n_socket_fds > 1) {
3746 log_unit_error(unit, "Got more than one socket.");
3747 return -EINVAL;
3748 }
3749
3750 if (params->n_socket_fds == 0) {
3751 log_unit_error(unit, "Got no socket.");
3752 return -EINVAL;
3753 }
3754
3755 socket_fd = params->fds[0];
3756 } else {
3757 socket_fd = -1;
3758 fds = params->fds;
3759 n_socket_fds = params->n_socket_fds;
3760 n_storage_fds = params->n_storage_fds;
3761 }
3762
3763 r = exec_context_named_iofds(context, params, named_iofds);
3764 if (r < 0)
3765 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3766
3767 r = exec_context_load_environment(unit, context, &files_env);
3768 if (r < 0)
3769 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
3770
3771 line = exec_command_line(command->argv);
3772 if (!line)
3773 return log_oom();
3774
3775 log_struct(LOG_DEBUG,
3776 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3777 "EXECUTABLE=%s", command->path,
3778 LOG_UNIT_ID(unit),
3779 LOG_UNIT_INVOCATION_ID(unit));
3780
3781 if (params->cgroup_path) {
3782 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
3783 if (r < 0)
3784 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
3785 if (r > 0) { /* We are using a child cgroup */
3786 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
3787 if (r < 0)
3788 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
3789 }
3790 }
3791
3792 pid = fork();
3793 if (pid < 0)
3794 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
3795
3796 if (pid == 0) {
3797 int exit_status = EXIT_SUCCESS;
3798
3799 r = exec_child(unit,
3800 command,
3801 context,
3802 params,
3803 runtime,
3804 dcreds,
3805 socket_fd,
3806 named_iofds,
3807 fds,
3808 n_socket_fds,
3809 n_storage_fds,
3810 files_env,
3811 unit->manager->user_lookup_fds[1],
3812 &exit_status);
3813
3814 if (r < 0)
3815 log_struct_errno(LOG_ERR, r,
3816 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3817 LOG_UNIT_ID(unit),
3818 LOG_UNIT_INVOCATION_ID(unit),
3819 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3820 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3821 command->path),
3822 "EXECUTABLE=%s", command->path);
3823
3824 _exit(exit_status);
3825 }
3826
3827 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
3828
3829 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
3830 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
3831 * process will be killed too). */
3832 if (subcgroup_path)
3833 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
3834
3835 exec_status_start(&command->exec_status, pid);
3836
3837 *ret = pid;
3838 return 0;
3839 }
3840
3841 void exec_context_init(ExecContext *c) {
3842 ExecDirectoryType i;
3843
3844 assert(c);
3845
3846 c->umask = 0022;
3847 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
3848 c->cpu_sched_policy = SCHED_OTHER;
3849 c->syslog_priority = LOG_DAEMON|LOG_INFO;
3850 c->syslog_level_prefix = true;
3851 c->ignore_sigpipe = true;
3852 c->timer_slack_nsec = NSEC_INFINITY;
3853 c->personality = PERSONALITY_INVALID;
3854 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3855 c->directories[i].mode = 0755;
3856 c->capability_bounding_set = CAP_ALL;
3857 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
3858 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
3859 c->log_level_max = -1;
3860 }
3861
3862 void exec_context_done(ExecContext *c) {
3863 ExecDirectoryType i;
3864 size_t l;
3865
3866 assert(c);
3867
3868 c->environment = strv_free(c->environment);
3869 c->environment_files = strv_free(c->environment_files);
3870 c->pass_environment = strv_free(c->pass_environment);
3871 c->unset_environment = strv_free(c->unset_environment);
3872
3873 rlimit_free_all(c->rlimit);
3874
3875 for (l = 0; l < 3; l++) {
3876 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3877 c->stdio_file[l] = mfree(c->stdio_file[l]);
3878 }
3879
3880 c->working_directory = mfree(c->working_directory);
3881 c->root_directory = mfree(c->root_directory);
3882 c->root_image = mfree(c->root_image);
3883 c->tty_path = mfree(c->tty_path);
3884 c->syslog_identifier = mfree(c->syslog_identifier);
3885 c->user = mfree(c->user);
3886 c->group = mfree(c->group);
3887
3888 c->supplementary_groups = strv_free(c->supplementary_groups);
3889
3890 c->pam_name = mfree(c->pam_name);
3891
3892 c->read_only_paths = strv_free(c->read_only_paths);
3893 c->read_write_paths = strv_free(c->read_write_paths);
3894 c->inaccessible_paths = strv_free(c->inaccessible_paths);
3895
3896 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3897 c->bind_mounts = NULL;
3898 c->n_bind_mounts = 0;
3899 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
3900 c->temporary_filesystems = NULL;
3901 c->n_temporary_filesystems = 0;
3902
3903 c->cpuset = cpu_set_mfree(c->cpuset);
3904
3905 c->utmp_id = mfree(c->utmp_id);
3906 c->selinux_context = mfree(c->selinux_context);
3907 c->apparmor_profile = mfree(c->apparmor_profile);
3908 c->smack_process_label = mfree(c->smack_process_label);
3909
3910 c->syscall_filter = hashmap_free(c->syscall_filter);
3911 c->syscall_archs = set_free(c->syscall_archs);
3912 c->address_families = set_free(c->address_families);
3913
3914 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
3915 c->directories[i].paths = strv_free(c->directories[i].paths);
3916
3917 c->log_level_max = -1;
3918
3919 exec_context_free_log_extra_fields(c);
3920
3921 c->log_rate_limit_interval_usec = 0;
3922 c->log_rate_limit_burst = 0;
3923
3924 c->stdin_data = mfree(c->stdin_data);
3925 c->stdin_data_size = 0;
3926
3927 c->network_namespace_path = mfree(c->network_namespace_path);
3928 }
3929
3930 int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
3931 char **i;
3932
3933 assert(c);
3934
3935 if (!runtime_prefix)
3936 return 0;
3937
3938 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
3939 _cleanup_free_ char *p;
3940
3941 p = path_join(runtime_prefix, *i);
3942 if (!p)
3943 return -ENOMEM;
3944
3945 /* We execute this synchronously, since we need to be sure this is gone when we start the
3946 * service next. */
3947 (void) rm_rf(p, REMOVE_ROOT);
3948 }
3949
3950 return 0;
3951 }
3952
3953 static void exec_command_done(ExecCommand *c) {
3954 assert(c);
3955
3956 c->path = mfree(c->path);
3957 c->argv = strv_free(c->argv);
3958 }
3959
3960 void exec_command_done_array(ExecCommand *c, size_t n) {
3961 size_t i;
3962
3963 for (i = 0; i < n; i++)
3964 exec_command_done(c+i);
3965 }
3966
3967 ExecCommand* exec_command_free_list(ExecCommand *c) {
3968 ExecCommand *i;
3969
3970 while ((i = c)) {
3971 LIST_REMOVE(command, c, i);
3972 exec_command_done(i);
3973 free(i);
3974 }
3975
3976 return NULL;
3977 }
3978
3979 void exec_command_free_array(ExecCommand **c, size_t n) {
3980 size_t i;
3981
3982 for (i = 0; i < n; i++)
3983 c[i] = exec_command_free_list(c[i]);
3984 }
3985
3986 void exec_command_reset_status_array(ExecCommand *c, size_t n) {
3987 size_t i;
3988
3989 for (i = 0; i < n; i++)
3990 exec_status_reset(&c[i].exec_status);
3991 }
3992
3993 void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
3994 size_t i;
3995
3996 for (i = 0; i < n; i++) {
3997 ExecCommand *z;
3998
3999 LIST_FOREACH(command, z, c[i])
4000 exec_status_reset(&z->exec_status);
4001 }
4002 }
4003
4004 typedef struct InvalidEnvInfo {
4005 const Unit *unit;
4006 const char *path;
4007 } InvalidEnvInfo;
4008
4009 static void invalid_env(const char *p, void *userdata) {
4010 InvalidEnvInfo *info = userdata;
4011
4012 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
4013 }
4014
4015 const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4016 assert(c);
4017
4018 switch (fd_index) {
4019
4020 case STDIN_FILENO:
4021 if (c->std_input != EXEC_INPUT_NAMED_FD)
4022 return NULL;
4023
4024 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
4025
4026 case STDOUT_FILENO:
4027 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4028 return NULL;
4029
4030 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
4031
4032 case STDERR_FILENO:
4033 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4034 return NULL;
4035
4036 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
4037
4038 default:
4039 return NULL;
4040 }
4041 }
4042
4043 static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]) {
4044 size_t i, targets;
4045 const char* stdio_fdname[3];
4046 size_t n_fds;
4047
4048 assert(c);
4049 assert(p);
4050
4051 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4052 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4053 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4054
4055 for (i = 0; i < 3; i++)
4056 stdio_fdname[i] = exec_context_fdname(c, i);
4057
4058 n_fds = p->n_storage_fds + p->n_socket_fds;
4059
4060 for (i = 0; i < n_fds && targets > 0; i++)
4061 if (named_iofds[STDIN_FILENO] < 0 &&
4062 c->std_input == EXEC_INPUT_NAMED_FD &&
4063 stdio_fdname[STDIN_FILENO] &&
4064 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4065
4066 named_iofds[STDIN_FILENO] = p->fds[i];
4067 targets--;
4068
4069 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4070 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4071 stdio_fdname[STDOUT_FILENO] &&
4072 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4073
4074 named_iofds[STDOUT_FILENO] = p->fds[i];
4075 targets--;
4076
4077 } else if (named_iofds[STDERR_FILENO] < 0 &&
4078 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4079 stdio_fdname[STDERR_FILENO] &&
4080 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4081
4082 named_iofds[STDERR_FILENO] = p->fds[i];
4083 targets--;
4084 }
4085
4086 return targets == 0 ? 0 : -ENOENT;
4087 }
4088
4089 static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
4090 char **i, **r = NULL;
4091
4092 assert(c);
4093 assert(l);
4094
4095 STRV_FOREACH(i, c->environment_files) {
4096 char *fn;
4097 int k;
4098 unsigned n;
4099 bool ignore = false;
4100 char **p;
4101 _cleanup_globfree_ glob_t pglob = {};
4102
4103 fn = *i;
4104
4105 if (fn[0] == '-') {
4106 ignore = true;
4107 fn++;
4108 }
4109
4110 if (!path_is_absolute(fn)) {
4111 if (ignore)
4112 continue;
4113
4114 strv_free(r);
4115 return -EINVAL;
4116 }
4117
4118 /* Filename supports globbing, take all matching files */
4119 k = safe_glob(fn, 0, &pglob);
4120 if (k < 0) {
4121 if (ignore)
4122 continue;
4123
4124 strv_free(r);
4125 return k;
4126 }
4127
4128 /* When we don't match anything, -ENOENT should be returned */
4129 assert(pglob.gl_pathc > 0);
4130
4131 for (n = 0; n < pglob.gl_pathc; n++) {
4132 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
4133 if (k < 0) {
4134 if (ignore)
4135 continue;
4136
4137 strv_free(r);
4138 return k;
4139 }
4140 /* Log invalid environment variables with filename */
4141 if (p) {
4142 InvalidEnvInfo info = {
4143 .unit = unit,
4144 .path = pglob.gl_pathv[n]
4145 };
4146
4147 p = strv_env_clean_with_callback(p, invalid_env, &info);
4148 }
4149
4150 if (!r)
4151 r = p;
4152 else {
4153 char **m;
4154
4155 m = strv_env_merge(2, r, p);
4156 strv_free(r);
4157 strv_free(p);
4158 if (!m)
4159 return -ENOMEM;
4160
4161 r = m;
4162 }
4163 }
4164 }
4165
4166 *l = r;
4167
4168 return 0;
4169 }
4170
4171 static bool tty_may_match_dev_console(const char *tty) {
4172 _cleanup_free_ char *resolved = NULL;
4173
4174 if (!tty)
4175 return true;
4176
4177 tty = skip_dev_prefix(tty);
4178
4179 /* trivial identity? */
4180 if (streq(tty, "console"))
4181 return true;
4182
4183 if (resolve_dev_console(&resolved) < 0)
4184 return true; /* if we could not resolve, assume it may */
4185
4186 /* "tty0" means the active VC, so it may be the same sometimes */
4187 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
4188 }
4189
4190 static bool exec_context_may_touch_tty(const ExecContext *ec) {
4191 assert(ec);
4192
4193 return ec->tty_reset ||
4194 ec->tty_vhangup ||
4195 ec->tty_vt_disallocate ||
4196 is_terminal_input(ec->std_input) ||
4197 is_terminal_output(ec->std_output) ||
4198 is_terminal_output(ec->std_error);
4199 }
4200
4201 bool exec_context_may_touch_console(const ExecContext *ec) {
4202
4203 return exec_context_may_touch_tty(ec) &&
4204 tty_may_match_dev_console(exec_context_tty_path(ec));
4205 }
4206
4207 static void strv_fprintf(FILE *f, char **l) {
4208 char **g;
4209
4210 assert(f);
4211
4212 STRV_FOREACH(g, l)
4213 fprintf(f, " %s", *g);
4214 }
4215
4216 void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
4217 ExecDirectoryType dt;
4218 char **e, **d;
4219 unsigned i;
4220 int r;
4221
4222 assert(c);
4223 assert(f);
4224
4225 prefix = strempty(prefix);
4226
4227 fprintf(f,
4228 "%sUMask: %04o\n"
4229 "%sWorkingDirectory: %s\n"
4230 "%sRootDirectory: %s\n"
4231 "%sNonBlocking: %s\n"
4232 "%sPrivateTmp: %s\n"
4233 "%sPrivateDevices: %s\n"
4234 "%sProtectKernelTunables: %s\n"
4235 "%sProtectKernelModules: %s\n"
4236 "%sProtectControlGroups: %s\n"
4237 "%sPrivateNetwork: %s\n"
4238 "%sPrivateUsers: %s\n"
4239 "%sProtectHome: %s\n"
4240 "%sProtectSystem: %s\n"
4241 "%sMountAPIVFS: %s\n"
4242 "%sIgnoreSIGPIPE: %s\n"
4243 "%sMemoryDenyWriteExecute: %s\n"
4244 "%sRestrictRealtime: %s\n"
4245 "%sRestrictSUIDSGID: %s\n"
4246 "%sKeyringMode: %s\n"
4247 "%sProtectHostname: %s\n",
4248 prefix, c->umask,
4249 prefix, c->working_directory ? c->working_directory : "/",
4250 prefix, c->root_directory ? c->root_directory : "/",
4251 prefix, yes_no(c->non_blocking),
4252 prefix, yes_no(c->private_tmp),
4253 prefix, yes_no(c->private_devices),
4254 prefix, yes_no(c->protect_kernel_tunables),
4255 prefix, yes_no(c->protect_kernel_modules),
4256 prefix, yes_no(c->protect_control_groups),
4257 prefix, yes_no(c->private_network),
4258 prefix, yes_no(c->private_users),
4259 prefix, protect_home_to_string(c->protect_home),
4260 prefix, protect_system_to_string(c->protect_system),
4261 prefix, yes_no(c->mount_apivfs),
4262 prefix, yes_no(c->ignore_sigpipe),
4263 prefix, yes_no(c->memory_deny_write_execute),
4264 prefix, yes_no(c->restrict_realtime),
4265 prefix, yes_no(c->restrict_suid_sgid),
4266 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4267 prefix, yes_no(c->protect_hostname));
4268
4269 if (c->root_image)
4270 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4271
4272 STRV_FOREACH(e, c->environment)
4273 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4274
4275 STRV_FOREACH(e, c->environment_files)
4276 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
4277
4278 STRV_FOREACH(e, c->pass_environment)
4279 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4280
4281 STRV_FOREACH(e, c->unset_environment)
4282 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4283
4284 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4285
4286 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
4287 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
4288
4289 STRV_FOREACH(d, c->directories[dt].paths)
4290 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4291 }
4292
4293 if (c->nice_set)
4294 fprintf(f,
4295 "%sNice: %i\n",
4296 prefix, c->nice);
4297
4298 if (c->oom_score_adjust_set)
4299 fprintf(f,
4300 "%sOOMScoreAdjust: %i\n",
4301 prefix, c->oom_score_adjust);
4302
4303 for (i = 0; i < RLIM_NLIMITS; i++)
4304 if (c->rlimit[i]) {
4305 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
4306 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
4307 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
4308 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4309 }
4310
4311 if (c->ioprio_set) {
4312 _cleanup_free_ char *class_str = NULL;
4313
4314 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4315 if (r >= 0)
4316 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4317
4318 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
4319 }
4320
4321 if (c->cpu_sched_set) {
4322 _cleanup_free_ char *policy_str = NULL;
4323
4324 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4325 if (r >= 0)
4326 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4327
4328 fprintf(f,
4329 "%sCPUSchedulingPriority: %i\n"
4330 "%sCPUSchedulingResetOnFork: %s\n",
4331 prefix, c->cpu_sched_priority,
4332 prefix, yes_no(c->cpu_sched_reset_on_fork));
4333 }
4334
4335 if (c->cpuset) {
4336 fprintf(f, "%sCPUAffinity:", prefix);
4337 for (i = 0; i < c->cpuset_ncpus; i++)
4338 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
4339 fprintf(f, " %u", i);
4340 fputs("\n", f);
4341 }
4342
4343 if (c->timer_slack_nsec != NSEC_INFINITY)
4344 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
4345
4346 fprintf(f,
4347 "%sStandardInput: %s\n"
4348 "%sStandardOutput: %s\n"
4349 "%sStandardError: %s\n",
4350 prefix, exec_input_to_string(c->std_input),
4351 prefix, exec_output_to_string(c->std_output),
4352 prefix, exec_output_to_string(c->std_error));
4353
4354 if (c->std_input == EXEC_INPUT_NAMED_FD)
4355 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4356 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4357 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4358 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4359 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4360
4361 if (c->std_input == EXEC_INPUT_FILE)
4362 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4363 if (c->std_output == EXEC_OUTPUT_FILE)
4364 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
4365 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4366 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
4367 if (c->std_error == EXEC_OUTPUT_FILE)
4368 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
4369 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4370 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
4371
4372 if (c->tty_path)
4373 fprintf(f,
4374 "%sTTYPath: %s\n"
4375 "%sTTYReset: %s\n"
4376 "%sTTYVHangup: %s\n"
4377 "%sTTYVTDisallocate: %s\n",
4378 prefix, c->tty_path,
4379 prefix, yes_no(c->tty_reset),
4380 prefix, yes_no(c->tty_vhangup),
4381 prefix, yes_no(c->tty_vt_disallocate));
4382
4383 if (IN_SET(c->std_output,
4384 EXEC_OUTPUT_SYSLOG,
4385 EXEC_OUTPUT_KMSG,
4386 EXEC_OUTPUT_JOURNAL,
4387 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4388 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4389 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4390 IN_SET(c->std_error,
4391 EXEC_OUTPUT_SYSLOG,
4392 EXEC_OUTPUT_KMSG,
4393 EXEC_OUTPUT_JOURNAL,
4394 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4395 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4396 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
4397
4398 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
4399
4400 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4401 if (r >= 0)
4402 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
4403
4404 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4405 if (r >= 0)
4406 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
4407 }
4408
4409 if (c->log_level_max >= 0) {
4410 _cleanup_free_ char *t = NULL;
4411
4412 (void) log_level_to_string_alloc(c->log_level_max, &t);
4413
4414 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4415 }
4416
4417 if (c->log_rate_limit_interval_usec > 0) {
4418 char buf_timespan[FORMAT_TIMESPAN_MAX];
4419
4420 fprintf(f,
4421 "%sLogRateLimitIntervalSec: %s\n",
4422 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
4423 }
4424
4425 if (c->log_rate_limit_burst > 0)
4426 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
4427
4428 if (c->n_log_extra_fields > 0) {
4429 size_t j;
4430
4431 for (j = 0; j < c->n_log_extra_fields; j++) {
4432 fprintf(f, "%sLogExtraFields: ", prefix);
4433 fwrite(c->log_extra_fields[j].iov_base,
4434 1, c->log_extra_fields[j].iov_len,
4435 f);
4436 fputc('\n', f);
4437 }
4438 }
4439
4440 if (c->secure_bits) {
4441 _cleanup_free_ char *str = NULL;
4442
4443 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4444 if (r >= 0)
4445 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4446 }
4447
4448 if (c->capability_bounding_set != CAP_ALL) {
4449 _cleanup_free_ char *str = NULL;
4450
4451 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4452 if (r >= 0)
4453 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
4454 }
4455
4456 if (c->capability_ambient_set != 0) {
4457 _cleanup_free_ char *str = NULL;
4458
4459 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4460 if (r >= 0)
4461 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
4462 }
4463
4464 if (c->user)
4465 fprintf(f, "%sUser: %s\n", prefix, c->user);
4466 if (c->group)
4467 fprintf(f, "%sGroup: %s\n", prefix, c->group);
4468
4469 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4470
4471 if (!strv_isempty(c->supplementary_groups)) {
4472 fprintf(f, "%sSupplementaryGroups:", prefix);
4473 strv_fprintf(f, c->supplementary_groups);
4474 fputs("\n", f);
4475 }
4476
4477 if (c->pam_name)
4478 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
4479
4480 if (!strv_isempty(c->read_write_paths)) {
4481 fprintf(f, "%sReadWritePaths:", prefix);
4482 strv_fprintf(f, c->read_write_paths);
4483 fputs("\n", f);
4484 }
4485
4486 if (!strv_isempty(c->read_only_paths)) {
4487 fprintf(f, "%sReadOnlyPaths:", prefix);
4488 strv_fprintf(f, c->read_only_paths);
4489 fputs("\n", f);
4490 }
4491
4492 if (!strv_isempty(c->inaccessible_paths)) {
4493 fprintf(f, "%sInaccessiblePaths:", prefix);
4494 strv_fprintf(f, c->inaccessible_paths);
4495 fputs("\n", f);
4496 }
4497
4498 if (c->n_bind_mounts > 0)
4499 for (i = 0; i < c->n_bind_mounts; i++)
4500 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
4501 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
4502 c->bind_mounts[i].ignore_enoent ? "-": "",
4503 c->bind_mounts[i].source,
4504 c->bind_mounts[i].destination,
4505 c->bind_mounts[i].recursive ? "rbind" : "norbind");
4506
4507 if (c->n_temporary_filesystems > 0)
4508 for (i = 0; i < c->n_temporary_filesystems; i++) {
4509 TemporaryFileSystem *t = c->temporary_filesystems + i;
4510
4511 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4512 t->path,
4513 isempty(t->options) ? "" : ":",
4514 strempty(t->options));
4515 }
4516
4517 if (c->utmp_id)
4518 fprintf(f,
4519 "%sUtmpIdentifier: %s\n",
4520 prefix, c->utmp_id);
4521
4522 if (c->selinux_context)
4523 fprintf(f,
4524 "%sSELinuxContext: %s%s\n",
4525 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
4526
4527 if (c->apparmor_profile)
4528 fprintf(f,
4529 "%sAppArmorProfile: %s%s\n",
4530 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4531
4532 if (c->smack_process_label)
4533 fprintf(f,
4534 "%sSmackProcessLabel: %s%s\n",
4535 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4536
4537 if (c->personality != PERSONALITY_INVALID)
4538 fprintf(f,
4539 "%sPersonality: %s\n",
4540 prefix, strna(personality_to_string(c->personality)));
4541
4542 fprintf(f,
4543 "%sLockPersonality: %s\n",
4544 prefix, yes_no(c->lock_personality));
4545
4546 if (c->syscall_filter) {
4547 #if HAVE_SECCOMP
4548 Iterator j;
4549 void *id, *val;
4550 bool first = true;
4551 #endif
4552
4553 fprintf(f,
4554 "%sSystemCallFilter: ",
4555 prefix);
4556
4557 if (!c->syscall_whitelist)
4558 fputc('~', f);
4559
4560 #if HAVE_SECCOMP
4561 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
4562 _cleanup_free_ char *name = NULL;
4563 const char *errno_name = NULL;
4564 int num = PTR_TO_INT(val);
4565
4566 if (first)
4567 first = false;
4568 else
4569 fputc(' ', f);
4570
4571 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
4572 fputs(strna(name), f);
4573
4574 if (num >= 0) {
4575 errno_name = errno_to_name(num);
4576 if (errno_name)
4577 fprintf(f, ":%s", errno_name);
4578 else
4579 fprintf(f, ":%d", num);
4580 }
4581 }
4582 #endif
4583
4584 fputc('\n', f);
4585 }
4586
4587 if (c->syscall_archs) {
4588 #if HAVE_SECCOMP
4589 Iterator j;
4590 void *id;
4591 #endif
4592
4593 fprintf(f,
4594 "%sSystemCallArchitectures:",
4595 prefix);
4596
4597 #if HAVE_SECCOMP
4598 SET_FOREACH(id, c->syscall_archs, j)
4599 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4600 #endif
4601 fputc('\n', f);
4602 }
4603
4604 if (exec_context_restrict_namespaces_set(c)) {
4605 _cleanup_free_ char *s = NULL;
4606
4607 r = namespace_flags_to_string(c->restrict_namespaces, &s);
4608 if (r >= 0)
4609 fprintf(f, "%sRestrictNamespaces: %s\n",
4610 prefix, s);
4611 }
4612
4613 if (c->network_namespace_path)
4614 fprintf(f,
4615 "%sNetworkNamespacePath: %s\n",
4616 prefix, c->network_namespace_path);
4617
4618 if (c->syscall_errno > 0) {
4619 const char *errno_name;
4620
4621 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4622
4623 errno_name = errno_to_name(c->syscall_errno);
4624 if (errno_name)
4625 fprintf(f, "%s\n", errno_name);
4626 else
4627 fprintf(f, "%d\n", c->syscall_errno);
4628 }
4629 }
4630
4631 bool exec_context_maintains_privileges(const ExecContext *c) {
4632 assert(c);
4633
4634 /* Returns true if the process forked off would run under
4635 * an unchanged UID or as root. */
4636
4637 if (!c->user)
4638 return true;
4639
4640 if (streq(c->user, "root") || streq(c->user, "0"))
4641 return true;
4642
4643 return false;
4644 }
4645
4646 int exec_context_get_effective_ioprio(const ExecContext *c) {
4647 int p;
4648
4649 assert(c);
4650
4651 if (c->ioprio_set)
4652 return c->ioprio;
4653
4654 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
4655 if (p < 0)
4656 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
4657
4658 return p;
4659 }
4660
4661 void exec_context_free_log_extra_fields(ExecContext *c) {
4662 size_t l;
4663
4664 assert(c);
4665
4666 for (l = 0; l < c->n_log_extra_fields; l++)
4667 free(c->log_extra_fields[l].iov_base);
4668 c->log_extra_fields = mfree(c->log_extra_fields);
4669 c->n_log_extra_fields = 0;
4670 }
4671
4672 void exec_context_revert_tty(ExecContext *c) {
4673 int r;
4674
4675 assert(c);
4676
4677 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4678 exec_context_tty_reset(c, NULL);
4679
4680 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4681 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4682 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4683
4684 if (exec_context_may_touch_tty(c)) {
4685 const char *path;
4686
4687 path = exec_context_tty_path(c);
4688 if (path) {
4689 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
4690 if (r < 0 && r != -ENOENT)
4691 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
4692 }
4693 }
4694 }
4695
4696 void exec_status_start(ExecStatus *s, pid_t pid) {
4697 assert(s);
4698
4699 *s = (ExecStatus) {
4700 .pid = pid,
4701 };
4702
4703 dual_timestamp_get(&s->start_timestamp);
4704 }
4705
4706 void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
4707 assert(s);
4708
4709 if (s->pid != pid) {
4710 *s = (ExecStatus) {
4711 .pid = pid,
4712 };
4713 }
4714
4715 dual_timestamp_get(&s->exit_timestamp);
4716
4717 s->code = code;
4718 s->status = status;
4719
4720 if (context && context->utmp_id)
4721 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
4722 }
4723
4724 void exec_status_reset(ExecStatus *s) {
4725 assert(s);
4726
4727 *s = (ExecStatus) {};
4728 }
4729
4730 void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
4731 char buf[FORMAT_TIMESTAMP_MAX];
4732
4733 assert(s);
4734 assert(f);
4735
4736 if (s->pid <= 0)
4737 return;
4738
4739 prefix = strempty(prefix);
4740
4741 fprintf(f,
4742 "%sPID: "PID_FMT"\n",
4743 prefix, s->pid);
4744
4745 if (dual_timestamp_is_set(&s->start_timestamp))
4746 fprintf(f,
4747 "%sStart Timestamp: %s\n",
4748 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
4749
4750 if (dual_timestamp_is_set(&s->exit_timestamp))
4751 fprintf(f,
4752 "%sExit Timestamp: %s\n"
4753 "%sExit Code: %s\n"
4754 "%sExit Status: %i\n",
4755 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
4756 prefix, sigchld_code_to_string(s->code),
4757 prefix, s->status);
4758 }
4759
4760 static char *exec_command_line(char **argv) {
4761 size_t k;
4762 char *n, *p, **a;
4763 bool first = true;
4764
4765 assert(argv);
4766
4767 k = 1;
4768 STRV_FOREACH(a, argv)
4769 k += strlen(*a)+3;
4770
4771 n = new(char, k);
4772 if (!n)
4773 return NULL;
4774
4775 p = n;
4776 STRV_FOREACH(a, argv) {
4777
4778 if (!first)
4779 *(p++) = ' ';
4780 else
4781 first = false;
4782
4783 if (strpbrk(*a, WHITESPACE)) {
4784 *(p++) = '\'';
4785 p = stpcpy(p, *a);
4786 *(p++) = '\'';
4787 } else
4788 p = stpcpy(p, *a);
4789
4790 }
4791
4792 *p = 0;
4793
4794 /* FIXME: this doesn't really handle arguments that have
4795 * spaces and ticks in them */
4796
4797 return n;
4798 }
4799
4800 static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
4801 _cleanup_free_ char *cmd = NULL;
4802 const char *prefix2;
4803
4804 assert(c);
4805 assert(f);
4806
4807 prefix = strempty(prefix);
4808 prefix2 = strjoina(prefix, "\t");
4809
4810 cmd = exec_command_line(c->argv);
4811 fprintf(f,
4812 "%sCommand Line: %s\n",
4813 prefix, cmd ? cmd : strerror(ENOMEM));
4814
4815 exec_status_dump(&c->exec_status, f, prefix2);
4816 }
4817
4818 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4819 assert(f);
4820
4821 prefix = strempty(prefix);
4822
4823 LIST_FOREACH(command, c, c)
4824 exec_command_dump(c, f, prefix);
4825 }
4826
4827 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4828 ExecCommand *end;
4829
4830 assert(l);
4831 assert(e);
4832
4833 if (*l) {
4834 /* It's kind of important, that we keep the order here */
4835 LIST_FIND_TAIL(command, *l, end);
4836 LIST_INSERT_AFTER(command, *l, end, e);
4837 } else
4838 *l = e;
4839 }
4840
4841 int exec_command_set(ExecCommand *c, const char *path, ...) {
4842 va_list ap;
4843 char **l, *p;
4844
4845 assert(c);
4846 assert(path);
4847
4848 va_start(ap, path);
4849 l = strv_new_ap(path, ap);
4850 va_end(ap);
4851
4852 if (!l)
4853 return -ENOMEM;
4854
4855 p = strdup(path);
4856 if (!p) {
4857 strv_free(l);
4858 return -ENOMEM;
4859 }
4860
4861 free_and_replace(c->path, p);
4862
4863 return strv_free_and_replace(c->argv, l);
4864 }
4865
4866 int exec_command_append(ExecCommand *c, const char *path, ...) {
4867 _cleanup_strv_free_ char **l = NULL;
4868 va_list ap;
4869 int r;
4870
4871 assert(c);
4872 assert(path);
4873
4874 va_start(ap, path);
4875 l = strv_new_ap(path, ap);
4876 va_end(ap);
4877
4878 if (!l)
4879 return -ENOMEM;
4880
4881 r = strv_extend_strv(&c->argv, l, false);
4882 if (r < 0)
4883 return r;
4884
4885 return 0;
4886 }
4887
4888 static void *remove_tmpdir_thread(void *p) {
4889 _cleanup_free_ char *path = p;
4890
4891 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4892 return NULL;
4893 }
4894
4895 static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
4896 int r;
4897
4898 if (!rt)
4899 return NULL;
4900
4901 if (rt->manager)
4902 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
4903
4904 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
4905 if (destroy && rt->tmp_dir) {
4906 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4907
4908 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4909 if (r < 0) {
4910 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4911 free(rt->tmp_dir);
4912 }
4913
4914 rt->tmp_dir = NULL;
4915 }
4916
4917 if (destroy && rt->var_tmp_dir) {
4918 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4919
4920 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4921 if (r < 0) {
4922 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4923 free(rt->var_tmp_dir);
4924 }
4925
4926 rt->var_tmp_dir = NULL;
4927 }
4928
4929 rt->id = mfree(rt->id);
4930 rt->tmp_dir = mfree(rt->tmp_dir);
4931 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
4932 safe_close_pair(rt->netns_storage_socket);
4933 return mfree(rt);
4934 }
4935
4936 static void exec_runtime_freep(ExecRuntime **rt) {
4937 (void) exec_runtime_free(*rt, false);
4938 }
4939
4940 static int exec_runtime_allocate(ExecRuntime **ret) {
4941 ExecRuntime *n;
4942
4943 assert(ret);
4944
4945 n = new(ExecRuntime, 1);
4946 if (!n)
4947 return -ENOMEM;
4948
4949 *n = (ExecRuntime) {
4950 .netns_storage_socket = { -1, -1 },
4951 };
4952
4953 *ret = n;
4954 return 0;
4955 }
4956
4957 static int exec_runtime_add(
4958 Manager *m,
4959 const char *id,
4960 const char *tmp_dir,
4961 const char *var_tmp_dir,
4962 const int netns_storage_socket[2],
4963 ExecRuntime **ret) {
4964
4965 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
4966 int r;
4967
4968 assert(m);
4969 assert(id);
4970
4971 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
4972 if (r < 0)
4973 return r;
4974
4975 r = exec_runtime_allocate(&rt);
4976 if (r < 0)
4977 return r;
4978
4979 rt->id = strdup(id);
4980 if (!rt->id)
4981 return -ENOMEM;
4982
4983 if (tmp_dir) {
4984 rt->tmp_dir = strdup(tmp_dir);
4985 if (!rt->tmp_dir)
4986 return -ENOMEM;
4987
4988 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
4989 assert(var_tmp_dir);
4990 rt->var_tmp_dir = strdup(var_tmp_dir);
4991 if (!rt->var_tmp_dir)
4992 return -ENOMEM;
4993 }
4994
4995 if (netns_storage_socket) {
4996 rt->netns_storage_socket[0] = netns_storage_socket[0];
4997 rt->netns_storage_socket[1] = netns_storage_socket[1];
4998 }
4999
5000 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5001 if (r < 0)
5002 return r;
5003
5004 rt->manager = m;
5005
5006 if (ret)
5007 *ret = rt;
5008
5009 /* do not remove created ExecRuntime object when the operation succeeds. */
5010 rt = NULL;
5011 return 0;
5012 }
5013
5014 static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5015 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
5016 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
5017 int r;
5018
5019 assert(m);
5020 assert(c);
5021 assert(id);
5022
5023 /* It is not necessary to create ExecRuntime object. */
5024 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
5025 return 0;
5026
5027 if (c->private_tmp) {
5028 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
5029 if (r < 0)
5030 return r;
5031 }
5032
5033 if (c->private_network || c->network_namespace_path) {
5034 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5035 return -errno;
5036 }
5037
5038 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5039 if (r < 0)
5040 return r;
5041
5042 /* Avoid cleanup */
5043 netns_storage_socket[0] = netns_storage_socket[1] = -1;
5044 return 1;
5045 }
5046
5047 int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5048 ExecRuntime *rt;
5049 int r;
5050
5051 assert(m);
5052 assert(id);
5053 assert(ret);
5054
5055 rt = hashmap_get(m->exec_runtime_by_id, id);
5056 if (rt)
5057 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5058 goto ref;
5059
5060 if (!create)
5061 return 0;
5062
5063 /* If not found, then create a new object. */
5064 r = exec_runtime_make(m, c, id, &rt);
5065 if (r <= 0)
5066 /* When r == 0, it is not necessary to create ExecRuntime object. */
5067 return r;
5068
5069 ref:
5070 /* increment reference counter. */
5071 rt->n_ref++;
5072 *ret = rt;
5073 return 1;
5074 }
5075
5076 ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5077 if (!rt)
5078 return NULL;
5079
5080 assert(rt->n_ref > 0);
5081
5082 rt->n_ref--;
5083 if (rt->n_ref > 0)
5084 return NULL;
5085
5086 return exec_runtime_free(rt, destroy);
5087 }
5088
5089 int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5090 ExecRuntime *rt;
5091 Iterator i;
5092
5093 assert(m);
5094 assert(f);
5095 assert(fds);
5096
5097 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5098 fprintf(f, "exec-runtime=%s", rt->id);
5099
5100 if (rt->tmp_dir)
5101 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
5102
5103 if (rt->var_tmp_dir)
5104 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
5105
5106 if (rt->netns_storage_socket[0] >= 0) {
5107 int copy;
5108
5109 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5110 if (copy < 0)
5111 return copy;
5112
5113 fprintf(f, " netns-socket-0=%i", copy);
5114 }
5115
5116 if (rt->netns_storage_socket[1] >= 0) {
5117 int copy;
5118
5119 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5120 if (copy < 0)
5121 return copy;
5122
5123 fprintf(f, " netns-socket-1=%i", copy);
5124 }
5125
5126 fputc('\n', f);
5127 }
5128
5129 return 0;
5130 }
5131
5132 int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5133 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5134 ExecRuntime *rt;
5135 int r;
5136
5137 /* This is for the migration from old (v237 or earlier) deserialization text.
5138 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5139 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5140 * so or not from the serialized text, then we always creates a new object owned by this. */
5141
5142 assert(u);
5143 assert(key);
5144 assert(value);
5145
5146 /* Manager manages ExecRuntime objects by the unit id.
5147 * So, we omit the serialized text when the unit does not have id (yet?)... */
5148 if (isempty(u->id)) {
5149 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5150 return 0;
5151 }
5152
5153 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5154 if (r < 0) {
5155 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5156 return 0;
5157 }
5158
5159 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5160 if (!rt) {
5161 r = exec_runtime_allocate(&rt_create);
5162 if (r < 0)
5163 return log_oom();
5164
5165 rt_create->id = strdup(u->id);
5166 if (!rt_create->id)
5167 return log_oom();
5168
5169 rt = rt_create;
5170 }
5171
5172 if (streq(key, "tmp-dir")) {
5173 char *copy;
5174
5175 copy = strdup(value);
5176 if (!copy)
5177 return log_oom();
5178
5179 free_and_replace(rt->tmp_dir, copy);
5180
5181 } else if (streq(key, "var-tmp-dir")) {
5182 char *copy;
5183
5184 copy = strdup(value);
5185 if (!copy)
5186 return log_oom();
5187
5188 free_and_replace(rt->var_tmp_dir, copy);
5189
5190 } else if (streq(key, "netns-socket-0")) {
5191 int fd;
5192
5193 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
5194 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
5195 return 0;
5196 }
5197
5198 safe_close(rt->netns_storage_socket[0]);
5199 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5200
5201 } else if (streq(key, "netns-socket-1")) {
5202 int fd;
5203
5204 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
5205 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
5206 return 0;
5207 }
5208
5209 safe_close(rt->netns_storage_socket[1]);
5210 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
5211 } else
5212 return 0;
5213
5214 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5215 if (rt_create) {
5216 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5217 if (r < 0) {
5218 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
5219 return 0;
5220 }
5221
5222 rt_create->manager = u->manager;
5223
5224 /* Avoid cleanup */
5225 rt_create = NULL;
5226 }
5227
5228 return 1;
5229 }
5230
5231 void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5232 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5233 int r, fd0 = -1, fd1 = -1;
5234 const char *p, *v = value;
5235 size_t n;
5236
5237 assert(m);
5238 assert(value);
5239 assert(fds);
5240
5241 n = strcspn(v, " ");
5242 id = strndupa(v, n);
5243 if (v[n] != ' ')
5244 goto finalize;
5245 p = v + n + 1;
5246
5247 v = startswith(p, "tmp-dir=");
5248 if (v) {
5249 n = strcspn(v, " ");
5250 tmp_dir = strndupa(v, n);
5251 if (v[n] != ' ')
5252 goto finalize;
5253 p = v + n + 1;
5254 }
5255
5256 v = startswith(p, "var-tmp-dir=");
5257 if (v) {
5258 n = strcspn(v, " ");
5259 var_tmp_dir = strndupa(v, n);
5260 if (v[n] != ' ')
5261 goto finalize;
5262 p = v + n + 1;
5263 }
5264
5265 v = startswith(p, "netns-socket-0=");
5266 if (v) {
5267 char *buf;
5268
5269 n = strcspn(v, " ");
5270 buf = strndupa(v, n);
5271 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5272 log_debug("Unable to process exec-runtime netns fd specification.");
5273 return;
5274 }
5275 fd0 = fdset_remove(fds, fd0);
5276 if (v[n] != ' ')
5277 goto finalize;
5278 p = v + n + 1;
5279 }
5280
5281 v = startswith(p, "netns-socket-1=");
5282 if (v) {
5283 char *buf;
5284
5285 n = strcspn(v, " ");
5286 buf = strndupa(v, n);
5287 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5288 log_debug("Unable to process exec-runtime netns fd specification.");
5289 return;
5290 }
5291 fd1 = fdset_remove(fds, fd1);
5292 }
5293
5294 finalize:
5295
5296 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
5297 if (r < 0)
5298 log_debug_errno(r, "Failed to add exec-runtime: %m");
5299 }
5300
5301 void exec_runtime_vacuum(Manager *m) {
5302 ExecRuntime *rt;
5303 Iterator i;
5304
5305 assert(m);
5306
5307 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5308
5309 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5310 if (rt->n_ref > 0)
5311 continue;
5312
5313 (void) exec_runtime_free(rt, false);
5314 }
5315 }
5316
5317 void exec_params_clear(ExecParameters *p) {
5318 if (!p)
5319 return;
5320
5321 strv_free(p->environment);
5322 }
5323
5324 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5325 [EXEC_INPUT_NULL] = "null",
5326 [EXEC_INPUT_TTY] = "tty",
5327 [EXEC_INPUT_TTY_FORCE] = "tty-force",
5328 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
5329 [EXEC_INPUT_SOCKET] = "socket",
5330 [EXEC_INPUT_NAMED_FD] = "fd",
5331 [EXEC_INPUT_DATA] = "data",
5332 [EXEC_INPUT_FILE] = "file",
5333 };
5334
5335 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5336
5337 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
5338 [EXEC_OUTPUT_INHERIT] = "inherit",
5339 [EXEC_OUTPUT_NULL] = "null",
5340 [EXEC_OUTPUT_TTY] = "tty",
5341 [EXEC_OUTPUT_SYSLOG] = "syslog",
5342 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
5343 [EXEC_OUTPUT_KMSG] = "kmsg",
5344 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
5345 [EXEC_OUTPUT_JOURNAL] = "journal",
5346 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
5347 [EXEC_OUTPUT_SOCKET] = "socket",
5348 [EXEC_OUTPUT_NAMED_FD] = "fd",
5349 [EXEC_OUTPUT_FILE] = "file",
5350 [EXEC_OUTPUT_FILE_APPEND] = "append",
5351 };
5352
5353 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
5354
5355 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5356 [EXEC_UTMP_INIT] = "init",
5357 [EXEC_UTMP_LOGIN] = "login",
5358 [EXEC_UTMP_USER] = "user",
5359 };
5360
5361 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
5362
5363 static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5364 [EXEC_PRESERVE_NO] = "no",
5365 [EXEC_PRESERVE_YES] = "yes",
5366 [EXEC_PRESERVE_RESTART] = "restart",
5367 };
5368
5369 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
5370
5371 static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5372 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5373 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5374 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5375 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5376 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5377 };
5378
5379 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
5380
5381 static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5382 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5383 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5384 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5385 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5386 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5387 };
5388
5389 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5390
5391 static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5392 [EXEC_KEYRING_INHERIT] = "inherit",
5393 [EXEC_KEYRING_PRIVATE] = "private",
5394 [EXEC_KEYRING_SHARED] = "shared",
5395 };
5396
5397 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);