1 /* SPDX-License-Identifier: LGPL-2.1+ */
6 #include <sys/eventfd.h>
9 #include <sys/personality.h>
10 #include <sys/prctl.h>
12 #include <sys/types.h>
18 #include <security/pam_appl.h>
22 #include <selinux/selinux.h>
30 #include <sys/apparmor.h>
33 #include "sd-messages.h"
36 #include "alloc-util.h"
38 #include "apparmor-util.h"
43 #include "capability-util.h"
44 #include "chown-recursive.h"
45 #include "cgroup-setup.h"
46 #include "cpu-set-util.h"
50 #include "errno-list.h"
52 #include "exit-status.h"
54 #include "format-util.h"
56 #include "glob-util.h"
63 #include "memory-util.h"
64 #include "missing_fs.h"
66 #include "namespace.h"
67 #include "parse-util.h"
68 #include "path-util.h"
69 #include "process-util.h"
70 #include "rlimit-util.h"
73 #include "seccomp-util.h"
75 #include "securebits-util.h"
76 #include "selinux-util.h"
77 #include "signal-util.h"
78 #include "smack-util.h"
79 #include "socket-util.h"
81 #include "stat-util.h"
82 #include "string-table.h"
83 #include "string-util.h"
85 #include "syslog-util.h"
86 #include "terminal-util.h"
87 #include "umask-util.h"
89 #include "user-util.h"
90 #include "utmp-wtmp.h"
92 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
93 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
95 #define SNDBUF_SIZE (8*1024*1024)
97 static int shift_fds(int fds
[], size_t n_fds
) {
98 int start
, restart_from
;
103 /* Modifies the fds array! (sorts it) */
113 for (i
= start
; i
< (int) n_fds
; i
++) {
116 /* Already at right index? */
120 nfd
= fcntl(fds
[i
], F_DUPFD
, i
+ 3);
127 /* Hmm, the fd we wanted isn't free? Then
128 * let's remember that and try again from here */
129 if (nfd
!= i
+3 && restart_from
< 0)
133 if (restart_from
< 0)
136 start
= restart_from
;
142 static int flags_fds(const int fds
[], size_t n_socket_fds
, size_t n_storage_fds
, bool nonblock
) {
146 n_fds
= n_socket_fds
+ n_storage_fds
;
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
153 * O_NONBLOCK only applies to socket activation though. */
155 for (i
= 0; i
< n_fds
; i
++) {
157 if (i
< n_socket_fds
) {
158 r
= fd_nonblock(fds
[i
], nonblock
);
163 /* We unconditionally drop FD_CLOEXEC from the fds,
164 * since after all we want to pass these fds to our
167 r
= fd_cloexec(fds
[i
], false);
175 static const char *exec_context_tty_path(const ExecContext
*context
) {
178 if (context
->stdio_as_fds
)
181 if (context
->tty_path
)
182 return context
->tty_path
;
184 return "/dev/console";
187 static void exec_context_tty_reset(const ExecContext
*context
, const ExecParameters
*p
) {
192 path
= exec_context_tty_path(context
);
194 if (context
->tty_vhangup
) {
195 if (p
&& p
->stdin_fd
>= 0)
196 (void) terminal_vhangup_fd(p
->stdin_fd
);
198 (void) terminal_vhangup(path
);
201 if (context
->tty_reset
) {
202 if (p
&& p
->stdin_fd
>= 0)
203 (void) reset_terminal_fd(p
->stdin_fd
, true);
205 (void) reset_terminal(path
);
208 if (context
->tty_vt_disallocate
&& path
)
209 (void) vt_disallocate(path
);
212 static bool is_terminal_input(ExecInput i
) {
215 EXEC_INPUT_TTY_FORCE
,
216 EXEC_INPUT_TTY_FAIL
);
219 static bool is_terminal_output(ExecOutput o
) {
222 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
223 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
224 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
);
227 static bool is_syslog_output(ExecOutput o
) {
230 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
);
233 static bool is_kmsg_output(ExecOutput o
) {
236 EXEC_OUTPUT_KMSG_AND_CONSOLE
);
239 static bool exec_context_needs_term(const ExecContext
*c
) {
242 /* Return true if the execution context suggests we should set $TERM to something useful. */
244 if (is_terminal_input(c
->std_input
))
247 if (is_terminal_output(c
->std_output
))
250 if (is_terminal_output(c
->std_error
))
253 return !!c
->tty_path
;
256 static int open_null_as(int flags
, int nfd
) {
261 fd
= open("/dev/null", flags
|O_NOCTTY
);
265 return move_fd(fd
, nfd
, false);
268 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
269 static const union sockaddr_union sa
= {
270 .un
.sun_family
= AF_UNIX
,
271 .un
.sun_path
= "/run/systemd/journal/stdout",
273 uid_t olduid
= UID_INVALID
;
274 gid_t oldgid
= GID_INVALID
;
277 if (gid_is_valid(gid
)) {
280 if (setegid(gid
) < 0)
284 if (uid_is_valid(uid
)) {
287 if (seteuid(uid
) < 0) {
293 r
= connect(fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
)) < 0 ? -errno
: 0;
295 /* If we fail to restore the uid or gid, things will likely
296 fail later on. This should only happen if an LSM interferes. */
298 if (uid_is_valid(uid
))
299 (void) seteuid(olduid
);
302 if (gid_is_valid(gid
))
303 (void) setegid(oldgid
);
308 static int connect_logger_as(
310 const ExecContext
*context
,
311 const ExecParameters
*params
,
318 _cleanup_close_
int fd
= -1;
323 assert(output
< _EXEC_OUTPUT_MAX
);
327 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
331 r
= connect_journal_socket(fd
, uid
, gid
);
335 if (shutdown(fd
, SHUT_RD
) < 0)
338 (void) fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
348 context
->syslog_identifier
?: ident
,
349 params
->flags
& EXEC_PASS_LOG_UNIT
? unit
->id
: "",
350 context
->syslog_priority
,
351 !!context
->syslog_level_prefix
,
352 is_syslog_output(output
),
353 is_kmsg_output(output
),
354 is_terminal_output(output
)) < 0)
357 return move_fd(TAKE_FD(fd
), nfd
, false);
360 static int open_terminal_as(const char *path
, int flags
, int nfd
) {
366 fd
= open_terminal(path
, flags
| O_NOCTTY
);
370 return move_fd(fd
, nfd
, false);
373 static int acquire_path(const char *path
, int flags
, mode_t mode
) {
374 union sockaddr_union sa
= {};
375 _cleanup_close_
int fd
= -1;
380 if (IN_SET(flags
& O_ACCMODE
, O_WRONLY
, O_RDWR
))
383 fd
= open(path
, flags
|O_NOCTTY
, mode
);
387 if (errno
!= ENXIO
) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
389 if (strlen(path
) >= sizeof(sa
.un
.sun_path
)) /* Too long, can't be a UNIX socket */
392 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
394 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
398 salen
= sockaddr_un_set_path(&sa
.un
, path
);
402 if (connect(fd
, &sa
.sa
, salen
) < 0)
403 return errno
== EINVAL
? -ENXIO
: -errno
; /* Propagate initial error if we get EINVAL, i.e. we have
404 * indication that his wasn't an AF_UNIX socket after all */
406 if ((flags
& O_ACCMODE
) == O_RDONLY
)
407 r
= shutdown(fd
, SHUT_WR
);
408 else if ((flags
& O_ACCMODE
) == O_WRONLY
)
409 r
= shutdown(fd
, SHUT_RD
);
418 static int fixup_input(
419 const ExecContext
*context
,
421 bool apply_tty_stdin
) {
427 std_input
= context
->std_input
;
429 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
430 return EXEC_INPUT_NULL
;
432 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
433 return EXEC_INPUT_NULL
;
435 if (std_input
== EXEC_INPUT_DATA
&& context
->stdin_data_size
== 0)
436 return EXEC_INPUT_NULL
;
441 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
443 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
444 return EXEC_OUTPUT_INHERIT
;
449 static int setup_input(
450 const ExecContext
*context
,
451 const ExecParameters
*params
,
453 const int named_iofds
[static 3]) {
461 if (params
->stdin_fd
>= 0) {
462 if (dup2(params
->stdin_fd
, STDIN_FILENO
) < 0)
465 /* Try to make this the controlling tty, if it is a tty, and reset it */
466 if (isatty(STDIN_FILENO
)) {
467 (void) ioctl(STDIN_FILENO
, TIOCSCTTY
, context
->std_input
== EXEC_INPUT_TTY_FORCE
);
468 (void) reset_terminal_fd(STDIN_FILENO
, true);
474 i
= fixup_input(context
, socket_fd
, params
->flags
& EXEC_APPLY_TTY_STDIN
);
478 case EXEC_INPUT_NULL
:
479 return open_null_as(O_RDONLY
, STDIN_FILENO
);
482 case EXEC_INPUT_TTY_FORCE
:
483 case EXEC_INPUT_TTY_FAIL
: {
486 fd
= acquire_terminal(exec_context_tty_path(context
),
487 i
== EXEC_INPUT_TTY_FAIL
? ACQUIRE_TERMINAL_TRY
:
488 i
== EXEC_INPUT_TTY_FORCE
? ACQUIRE_TERMINAL_FORCE
:
489 ACQUIRE_TERMINAL_WAIT
,
494 return move_fd(fd
, STDIN_FILENO
, false);
497 case EXEC_INPUT_SOCKET
:
498 assert(socket_fd
>= 0);
500 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
502 case EXEC_INPUT_NAMED_FD
:
503 assert(named_iofds
[STDIN_FILENO
] >= 0);
505 (void) fd_nonblock(named_iofds
[STDIN_FILENO
], false);
506 return dup2(named_iofds
[STDIN_FILENO
], STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
508 case EXEC_INPUT_DATA
: {
511 fd
= acquire_data_fd(context
->stdin_data
, context
->stdin_data_size
, 0);
515 return move_fd(fd
, STDIN_FILENO
, false);
518 case EXEC_INPUT_FILE
: {
522 assert(context
->stdio_file
[STDIN_FILENO
]);
524 rw
= (context
->std_output
== EXEC_OUTPUT_FILE
&& streq_ptr(context
->stdio_file
[STDIN_FILENO
], context
->stdio_file
[STDOUT_FILENO
])) ||
525 (context
->std_error
== EXEC_OUTPUT_FILE
&& streq_ptr(context
->stdio_file
[STDIN_FILENO
], context
->stdio_file
[STDERR_FILENO
]));
527 fd
= acquire_path(context
->stdio_file
[STDIN_FILENO
], rw
? O_RDWR
: O_RDONLY
, 0666 & ~context
->umask
);
531 return move_fd(fd
, STDIN_FILENO
, false);
535 assert_not_reached("Unknown input type");
539 static bool can_inherit_stderr_from_stdout(
540 const ExecContext
*context
,
546 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
549 if (e
== EXEC_OUTPUT_INHERIT
)
554 if (e
== EXEC_OUTPUT_NAMED_FD
)
555 return streq_ptr(context
->stdio_fdname
[STDOUT_FILENO
], context
->stdio_fdname
[STDERR_FILENO
]);
557 if (IN_SET(e
, EXEC_OUTPUT_FILE
, EXEC_OUTPUT_FILE_APPEND
))
558 return streq_ptr(context
->stdio_file
[STDOUT_FILENO
], context
->stdio_file
[STDERR_FILENO
]);
563 static int setup_output(
565 const ExecContext
*context
,
566 const ExecParameters
*params
,
569 const int named_iofds
[static 3],
573 dev_t
*journal_stream_dev
,
574 ino_t
*journal_stream_ino
) {
584 assert(journal_stream_dev
);
585 assert(journal_stream_ino
);
587 if (fileno
== STDOUT_FILENO
&& params
->stdout_fd
>= 0) {
589 if (dup2(params
->stdout_fd
, STDOUT_FILENO
) < 0)
592 return STDOUT_FILENO
;
595 if (fileno
== STDERR_FILENO
&& params
->stderr_fd
>= 0) {
596 if (dup2(params
->stderr_fd
, STDERR_FILENO
) < 0)
599 return STDERR_FILENO
;
602 i
= fixup_input(context
, socket_fd
, params
->flags
& EXEC_APPLY_TTY_STDIN
);
603 o
= fixup_output(context
->std_output
, socket_fd
);
605 if (fileno
== STDERR_FILENO
) {
607 e
= fixup_output(context
->std_error
, socket_fd
);
609 /* This expects the input and output are already set up */
611 /* Don't change the stderr file descriptor if we inherit all
612 * the way and are not on a tty */
613 if (e
== EXEC_OUTPUT_INHERIT
&&
614 o
== EXEC_OUTPUT_INHERIT
&&
615 i
== EXEC_INPUT_NULL
&&
616 !is_terminal_input(context
->std_input
) &&
620 /* Duplicate from stdout if possible */
621 if (can_inherit_stderr_from_stdout(context
, o
, e
))
622 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
626 } else if (o
== EXEC_OUTPUT_INHERIT
) {
627 /* If input got downgraded, inherit the original value */
628 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
629 return open_terminal_as(exec_context_tty_path(context
), O_WRONLY
, fileno
);
631 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
632 if (!IN_SET(i
, EXEC_INPUT_NULL
, EXEC_INPUT_DATA
))
633 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
635 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
639 /* We need to open /dev/null here anew, to get the right access mode. */
640 return open_null_as(O_WRONLY
, fileno
);
645 case EXEC_OUTPUT_NULL
:
646 return open_null_as(O_WRONLY
, fileno
);
648 case EXEC_OUTPUT_TTY
:
649 if (is_terminal_input(i
))
650 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
652 /* We don't reset the terminal if this is just about output */
653 return open_terminal_as(exec_context_tty_path(context
), O_WRONLY
, fileno
);
655 case EXEC_OUTPUT_SYSLOG
:
656 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
657 case EXEC_OUTPUT_KMSG
:
658 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
659 case EXEC_OUTPUT_JOURNAL
:
660 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
661 r
= connect_logger_as(unit
, context
, params
, o
, ident
, fileno
, uid
, gid
);
663 log_unit_warning_errno(unit
, r
, "Failed to connect %s to the journal socket, ignoring: %m", fileno
== STDOUT_FILENO
? "stdout" : "stderr");
664 r
= open_null_as(O_WRONLY
, fileno
);
668 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
669 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
670 * services to detect whether they are connected to the journal or not.
672 * If both stdout and stderr are connected to a stream then let's make sure to store the data
673 * about STDERR as that's usually the best way to do logging. */
675 if (fstat(fileno
, &st
) >= 0 &&
676 (*journal_stream_ino
== 0 || fileno
== STDERR_FILENO
)) {
677 *journal_stream_dev
= st
.st_dev
;
678 *journal_stream_ino
= st
.st_ino
;
683 case EXEC_OUTPUT_SOCKET
:
684 assert(socket_fd
>= 0);
686 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
688 case EXEC_OUTPUT_NAMED_FD
:
689 assert(named_iofds
[fileno
] >= 0);
691 (void) fd_nonblock(named_iofds
[fileno
], false);
692 return dup2(named_iofds
[fileno
], fileno
) < 0 ? -errno
: fileno
;
694 case EXEC_OUTPUT_FILE
:
695 case EXEC_OUTPUT_FILE_APPEND
: {
699 assert(context
->stdio_file
[fileno
]);
701 rw
= context
->std_input
== EXEC_INPUT_FILE
&&
702 streq_ptr(context
->stdio_file
[fileno
], context
->stdio_file
[STDIN_FILENO
]);
705 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
708 if (o
== EXEC_OUTPUT_FILE_APPEND
)
711 fd
= acquire_path(context
->stdio_file
[fileno
], flags
, 0666 & ~context
->umask
);
715 return move_fd(fd
, fileno
, 0);
719 assert_not_reached("Unknown error type");
723 static int chown_terminal(int fd
, uid_t uid
) {
728 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
729 if (isatty(fd
) < 1) {
730 if (IN_SET(errno
, EINVAL
, ENOTTY
))
731 return 0; /* not a tty */
736 /* This might fail. What matters are the results. */
737 r
= fchmod_and_chown(fd
, TTY_MODE
, uid
, -1);
744 static int setup_confirm_stdio(const char *vc
, int *_saved_stdin
, int *_saved_stdout
) {
745 _cleanup_close_
int fd
= -1, saved_stdin
= -1, saved_stdout
= -1;
748 assert(_saved_stdin
);
749 assert(_saved_stdout
);
751 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
755 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
756 if (saved_stdout
< 0)
759 fd
= acquire_terminal(vc
, ACQUIRE_TERMINAL_WAIT
, DEFAULT_CONFIRM_USEC
);
763 r
= chown_terminal(fd
, getuid());
767 r
= reset_terminal_fd(fd
, true);
771 r
= rearrange_stdio(fd
, fd
, STDERR_FILENO
);
776 *_saved_stdin
= saved_stdin
;
777 *_saved_stdout
= saved_stdout
;
779 saved_stdin
= saved_stdout
= -1;
784 static void write_confirm_error_fd(int err
, int fd
, const Unit
*u
) {
787 if (err
== -ETIMEDOUT
)
788 dprintf(fd
, "Confirmation question timed out for %s, assuming positive response.\n", u
->id
);
791 dprintf(fd
, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u
->id
);
795 static void write_confirm_error(int err
, const char *vc
, const Unit
*u
) {
796 _cleanup_close_
int fd
= -1;
800 fd
= open_terminal(vc
, O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
804 write_confirm_error_fd(err
, fd
, u
);
807 static int restore_confirm_stdio(int *saved_stdin
, int *saved_stdout
) {
811 assert(saved_stdout
);
815 if (*saved_stdin
>= 0)
816 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
819 if (*saved_stdout
>= 0)
820 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
823 *saved_stdin
= safe_close(*saved_stdin
);
824 *saved_stdout
= safe_close(*saved_stdout
);
830 CONFIRM_PRETEND_FAILURE
= -1,
831 CONFIRM_PRETEND_SUCCESS
= 0,
835 static int ask_for_confirmation(const char *vc
, Unit
*u
, const char *cmdline
) {
836 int saved_stdout
= -1, saved_stdin
= -1, r
;
837 _cleanup_free_
char *e
= NULL
;
840 /* For any internal errors, assume a positive response. */
841 r
= setup_confirm_stdio(vc
, &saved_stdin
, &saved_stdout
);
843 write_confirm_error(r
, vc
, u
);
844 return CONFIRM_EXECUTE
;
847 /* confirm_spawn might have been disabled while we were sleeping. */
848 if (manager_is_confirm_spawn_disabled(u
->manager
)) {
853 e
= ellipsize(cmdline
, 60, 100);
861 r
= ask_char(&c
, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e
);
863 write_confirm_error_fd(r
, STDOUT_FILENO
, u
);
870 printf("Resuming normal execution.\n");
871 manager_disable_confirm_spawn();
875 unit_dump(u
, stdout
, " ");
876 continue; /* ask again */
878 printf("Failing execution.\n");
879 r
= CONFIRM_PRETEND_FAILURE
;
882 printf(" c - continue, proceed without asking anymore\n"
883 " D - dump, show the state of the unit\n"
884 " f - fail, don't execute the command and pretend it failed\n"
886 " i - info, show a short summary of the unit\n"
887 " j - jobs, show jobs that are in progress\n"
888 " s - skip, don't execute the command and pretend it succeeded\n"
889 " y - yes, execute the command\n");
890 continue; /* ask again */
892 printf(" Description: %s\n"
895 u
->id
, u
->description
, cmdline
);
896 continue; /* ask again */
898 manager_dump_jobs(u
->manager
, stdout
, " ");
899 continue; /* ask again */
901 /* 'n' was removed in favor of 'f'. */
902 printf("Didn't understand 'n', did you mean 'f'?\n");
903 continue; /* ask again */
905 printf("Skipping execution.\n");
906 r
= CONFIRM_PRETEND_SUCCESS
;
912 assert_not_reached("Unhandled choice");
918 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
922 static int get_fixed_user(const ExecContext
*c
, const char **user
,
923 uid_t
*uid
, gid_t
*gid
,
924 const char **home
, const char **shell
) {
933 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
934 * (i.e. are "/" or "/bin/nologin"). */
937 r
= get_user_creds(&name
, uid
, gid
, home
, shell
, USER_CREDS_CLEAN
);
945 static int get_fixed_group(const ExecContext
*c
, const char **group
, gid_t
*gid
) {
955 r
= get_group_creds(&name
, gid
, 0);
963 static int get_supplementary_groups(const ExecContext
*c
, const char *user
,
964 const char *group
, gid_t gid
,
965 gid_t
**supplementary_gids
, int *ngids
) {
969 bool keep_groups
= false;
970 gid_t
*groups
= NULL
;
971 _cleanup_free_ gid_t
*l_gids
= NULL
;
976 * If user is given, then lookup GID and supplementary groups list.
977 * We avoid NSS lookups for gid=0. Also we have to initialize groups
978 * here and as early as possible so we keep the list of supplementary
979 * groups of the caller.
981 if (user
&& gid_is_valid(gid
) && gid
!= 0) {
982 /* First step, initialize groups from /etc/groups */
983 if (initgroups(user
, gid
) < 0)
989 if (strv_isempty(c
->supplementary_groups
))
993 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
994 * be positive, otherwise fail.
997 ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
);
998 if (ngroups_max
<= 0)
999 return errno_or_else(EOPNOTSUPP
);
1001 l_gids
= new(gid_t
, ngroups_max
);
1007 * Lookup the list of groups that the user belongs to, we
1008 * avoid NSS lookups here too for gid=0.
1011 if (getgrouplist(user
, gid
, l_gids
, &k
) < 0)
1016 STRV_FOREACH(i
, c
->supplementary_groups
) {
1019 if (k
>= ngroups_max
)
1023 r
= get_group_creds(&g
, l_gids
+k
, 0);
1031 * Sets ngids to zero to drop all supplementary groups, happens
1032 * when we are under root and SupplementaryGroups= is empty.
1039 /* Otherwise get the final list of supplementary groups */
1040 groups
= memdup(l_gids
, sizeof(gid_t
) * k
);
1044 *supplementary_gids
= groups
;
1052 static int enforce_groups(gid_t gid
, const gid_t
*supplementary_gids
, int ngids
) {
1055 /* Handle SupplementaryGroups= if it is not empty */
1057 r
= maybe_setgroups(ngids
, supplementary_gids
);
1062 if (gid_is_valid(gid
)) {
1063 /* Then set our gids */
1064 if (setresgid(gid
, gid
, gid
) < 0)
1071 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
1074 if (!uid_is_valid(uid
))
1077 /* Sets (but doesn't look up) the uid and make sure we keep the
1078 * capabilities while doing so. */
1080 if (context
->capability_ambient_set
!= 0) {
1082 /* First step: If we need to keep capabilities but
1083 * drop privileges we need to make sure we keep our
1084 * caps, while we drop privileges. */
1086 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
1088 if (prctl(PR_GET_SECUREBITS
) != sb
)
1089 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
1094 /* Second step: actually set the uids */
1095 if (setresuid(uid
, uid
, uid
) < 0)
1098 /* At this point we should have all necessary capabilities but
1099 are otherwise a normal user. However, the caps might got
1100 corrupted due to the setresuid() so we need clean them up
1101 later. This is done outside of this call. */
1108 static int null_conv(
1110 const struct pam_message
**msg
,
1111 struct pam_response
**resp
,
1112 void *appdata_ptr
) {
1114 /* We don't support conversations */
1116 return PAM_CONV_ERR
;
1121 static int setup_pam(
1128 const int fds
[], size_t n_fds
) {
1132 static const struct pam_conv conv
= {
1137 _cleanup_(barrier_destroy
) Barrier barrier
= BARRIER_NULL
;
1138 pam_handle_t
*handle
= NULL
;
1140 int pam_code
= PAM_SUCCESS
, r
;
1141 char **nv
, **e
= NULL
;
1142 bool close_session
= false;
1143 pid_t pam_pid
= 0, parent_pid
;
1150 /* We set up PAM in the parent process, then fork. The child
1151 * will then stay around until killed via PR_GET_PDEATHSIG or
1152 * systemd via the cgroup logic. It will then remove the PAM
1153 * session again. The parent process will exec() the actual
1154 * daemon. We do things this way to ensure that the main PID
1155 * of the daemon is the one we initially fork()ed. */
1157 r
= barrier_create(&barrier
);
1161 if (log_get_max_level() < LOG_DEBUG
)
1162 flags
|= PAM_SILENT
;
1164 pam_code
= pam_start(name
, user
, &conv
, &handle
);
1165 if (pam_code
!= PAM_SUCCESS
) {
1171 _cleanup_free_
char *q
= NULL
;
1173 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1174 * out if that's the case, and read the TTY off it. */
1176 if (getttyname_malloc(STDIN_FILENO
, &q
) >= 0)
1177 tty
= strjoina("/dev/", q
);
1181 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
1182 if (pam_code
!= PAM_SUCCESS
)
1186 STRV_FOREACH(nv
, *env
) {
1187 pam_code
= pam_putenv(handle
, *nv
);
1188 if (pam_code
!= PAM_SUCCESS
)
1192 pam_code
= pam_acct_mgmt(handle
, flags
);
1193 if (pam_code
!= PAM_SUCCESS
)
1196 pam_code
= pam_setcred(handle
, PAM_ESTABLISH_CRED
| flags
);
1197 if (pam_code
!= PAM_SUCCESS
)
1198 log_debug("pam_setcred() failed, ignoring: %s", pam_strerror(handle
, pam_code
));
1200 pam_code
= pam_open_session(handle
, flags
);
1201 if (pam_code
!= PAM_SUCCESS
)
1204 close_session
= true;
1206 e
= pam_getenvlist(handle
);
1208 pam_code
= PAM_BUF_ERR
;
1212 /* Block SIGTERM, so that we know that it won't get lost in
1215 assert_se(sigprocmask_many(SIG_BLOCK
, &old_ss
, SIGTERM
, -1) >= 0);
1217 parent_pid
= getpid_cached();
1219 r
= safe_fork("(sd-pam)", 0, &pam_pid
);
1223 int sig
, ret
= EXIT_PAM
;
1225 /* The child's job is to reset the PAM session on
1227 barrier_set_role(&barrier
, BARRIER_CHILD
);
1229 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1230 * are open here that have been opened by PAM. */
1231 (void) close_many(fds
, n_fds
);
1233 /* Drop privileges - we don't need any to pam_close_session
1234 * and this will make PR_SET_PDEATHSIG work in most cases.
1235 * If this fails, ignore the error - but expect sd-pam threads
1236 * to fail to exit normally */
1238 r
= maybe_setgroups(0, NULL
);
1240 log_warning_errno(r
, "Failed to setgroups() in sd-pam: %m");
1241 if (setresgid(gid
, gid
, gid
) < 0)
1242 log_warning_errno(errno
, "Failed to setresgid() in sd-pam: %m");
1243 if (setresuid(uid
, uid
, uid
) < 0)
1244 log_warning_errno(errno
, "Failed to setresuid() in sd-pam: %m");
1246 (void) ignore_signals(SIGPIPE
, -1);
1248 /* Wait until our parent died. This will only work if
1249 * the above setresuid() succeeds, otherwise the kernel
1250 * will not allow unprivileged parents kill their privileged
1251 * children this way. We rely on the control groups kill logic
1252 * to do the rest for us. */
1253 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
1256 /* Tell the parent that our setup is done. This is especially
1257 * important regarding dropping privileges. Otherwise, unit
1258 * setup might race against our setresuid(2) call.
1260 * If the parent aborted, we'll detect this below, hence ignore
1261 * return failure here. */
1262 (void) barrier_place(&barrier
);
1264 /* Check if our parent process might already have died? */
1265 if (getppid() == parent_pid
) {
1268 assert_se(sigemptyset(&ss
) >= 0);
1269 assert_se(sigaddset(&ss
, SIGTERM
) >= 0);
1272 if (sigwait(&ss
, &sig
) < 0) {
1279 assert(sig
== SIGTERM
);
1284 pam_code
= pam_setcred(handle
, PAM_DELETE_CRED
| flags
);
1285 if (pam_code
!= PAM_SUCCESS
)
1288 /* If our parent died we'll end the session */
1289 if (getppid() != parent_pid
) {
1290 pam_code
= pam_close_session(handle
, flags
);
1291 if (pam_code
!= PAM_SUCCESS
)
1298 pam_end(handle
, pam_code
| flags
);
1302 barrier_set_role(&barrier
, BARRIER_PARENT
);
1304 /* If the child was forked off successfully it will do all the
1305 * cleanups, so forget about the handle here. */
1308 /* Unblock SIGTERM again in the parent */
1309 assert_se(sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) >= 0);
1311 /* We close the log explicitly here, since the PAM modules
1312 * might have opened it, but we don't want this fd around. */
1315 /* Synchronously wait for the child to initialize. We don't care for
1316 * errors as we cannot recover. However, warn loudly if it happens. */
1317 if (!barrier_place_and_sync(&barrier
))
1318 log_error("PAM initialization failed");
1320 return strv_free_and_replace(*env
, e
);
1323 if (pam_code
!= PAM_SUCCESS
) {
1324 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
1325 r
= -EPERM
; /* PAM errors do not map to errno */
1327 log_error_errno(r
, "PAM failed: %m");
1331 pam_code
= pam_close_session(handle
, flags
);
1333 pam_end(handle
, pam_code
| flags
);
1345 static void rename_process_from_path(const char *path
) {
1346 char process_name
[11];
1350 /* This resulting string must fit in 10 chars (i.e. the length
1351 * of "/sbin/init") to look pretty in /bin/ps */
1355 rename_process("(...)");
1361 /* The end of the process name is usually more
1362 * interesting, since the first bit might just be
1368 process_name
[0] = '(';
1369 memcpy(process_name
+1, p
, l
);
1370 process_name
[1+l
] = ')';
1371 process_name
[1+l
+1] = 0;
1373 rename_process(process_name
);
1376 static bool context_has_address_families(const ExecContext
*c
) {
1379 return c
->address_families_whitelist
||
1380 !set_isempty(c
->address_families
);
1383 static bool context_has_syscall_filters(const ExecContext
*c
) {
1386 return c
->syscall_whitelist
||
1387 !hashmap_isempty(c
->syscall_filter
);
1390 static bool context_has_no_new_privileges(const ExecContext
*c
) {
1393 if (c
->no_new_privileges
)
1396 if (have_effective_cap(CAP_SYS_ADMIN
)) /* if we are privileged, we don't need NNP */
1399 /* We need NNP if we have any form of seccomp and are unprivileged */
1400 return context_has_address_families(c
) ||
1401 c
->memory_deny_write_execute
||
1402 c
->restrict_realtime
||
1403 c
->restrict_suid_sgid
||
1404 exec_context_restrict_namespaces_set(c
) ||
1406 c
->protect_kernel_tunables
||
1407 c
->protect_kernel_modules
||
1408 c
->protect_kernel_logs
||
1409 c
->private_devices
||
1410 context_has_syscall_filters(c
) ||
1411 !set_isempty(c
->syscall_archs
) ||
1412 c
->lock_personality
||
1413 c
->protect_hostname
;
1418 static bool skip_seccomp_unavailable(const Unit
* u
, const char* msg
) {
1420 if (is_seccomp_available())
1423 log_unit_debug(u
, "SECCOMP features not detected in the kernel, skipping %s", msg
);
1427 static int apply_syscall_filter(const Unit
* u
, const ExecContext
*c
, bool needs_ambient_hack
) {
1428 uint32_t negative_action
, default_action
, action
;
1434 if (!context_has_syscall_filters(c
))
1437 if (skip_seccomp_unavailable(u
, "SystemCallFilter="))
1440 negative_action
= c
->syscall_errno
== 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c
->syscall_errno
);
1442 if (c
->syscall_whitelist
) {
1443 default_action
= negative_action
;
1444 action
= SCMP_ACT_ALLOW
;
1446 default_action
= SCMP_ACT_ALLOW
;
1447 action
= negative_action
;
1450 if (needs_ambient_hack
) {
1451 r
= seccomp_filter_set_add(c
->syscall_filter
, c
->syscall_whitelist
, syscall_filter_sets
+ SYSCALL_FILTER_SET_SETUID
);
1456 return seccomp_load_syscall_filter_set_raw(default_action
, c
->syscall_filter
, action
, false);
1459 static int apply_syscall_archs(const Unit
*u
, const ExecContext
*c
) {
1463 if (set_isempty(c
->syscall_archs
))
1466 if (skip_seccomp_unavailable(u
, "SystemCallArchitectures="))
1469 return seccomp_restrict_archs(c
->syscall_archs
);
1472 static int apply_address_families(const Unit
* u
, const ExecContext
*c
) {
1476 if (!context_has_address_families(c
))
1479 if (skip_seccomp_unavailable(u
, "RestrictAddressFamilies="))
1482 return seccomp_restrict_address_families(c
->address_families
, c
->address_families_whitelist
);
1485 static int apply_memory_deny_write_execute(const Unit
* u
, const ExecContext
*c
) {
1489 if (!c
->memory_deny_write_execute
)
1492 if (skip_seccomp_unavailable(u
, "MemoryDenyWriteExecute="))
1495 return seccomp_memory_deny_write_execute();
1498 static int apply_restrict_realtime(const Unit
* u
, const ExecContext
*c
) {
1502 if (!c
->restrict_realtime
)
1505 if (skip_seccomp_unavailable(u
, "RestrictRealtime="))
1508 return seccomp_restrict_realtime();
1511 static int apply_restrict_suid_sgid(const Unit
* u
, const ExecContext
*c
) {
1515 if (!c
->restrict_suid_sgid
)
1518 if (skip_seccomp_unavailable(u
, "RestrictSUIDSGID="))
1521 return seccomp_restrict_suid_sgid();
1524 static int apply_protect_sysctl(const Unit
*u
, const ExecContext
*c
) {
1528 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1529 * let's protect even those systems where this is left on in the kernel. */
1531 if (!c
->protect_kernel_tunables
)
1534 if (skip_seccomp_unavailable(u
, "ProtectKernelTunables="))
1537 return seccomp_protect_sysctl();
1540 static int apply_protect_kernel_modules(const Unit
*u
, const ExecContext
*c
) {
1544 /* Turn off module syscalls on ProtectKernelModules=yes */
1546 if (!c
->protect_kernel_modules
)
1549 if (skip_seccomp_unavailable(u
, "ProtectKernelModules="))
1552 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW
, syscall_filter_sets
+ SYSCALL_FILTER_SET_MODULE
, SCMP_ACT_ERRNO(EPERM
), false);
1555 static int apply_protect_kernel_logs(const Unit
*u
, const ExecContext
*c
) {
1559 if (!c
->protect_kernel_logs
)
1562 if (skip_seccomp_unavailable(u
, "ProtectKernelLogs="))
1565 return seccomp_protect_syslog();
1568 static int apply_protect_clock(const Unit
*u
, const ExecContext
*c
) {
1572 if (!c
->protect_clock
)
1575 if (skip_seccomp_unavailable(u
, "ProtectClock="))
1578 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW
, syscall_filter_sets
+ SYSCALL_FILTER_SET_CLOCK
, SCMP_ACT_ERRNO(EPERM
), false);
1581 static int apply_private_devices(const Unit
*u
, const ExecContext
*c
) {
1585 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1587 if (!c
->private_devices
)
1590 if (skip_seccomp_unavailable(u
, "PrivateDevices="))
1593 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW
, syscall_filter_sets
+ SYSCALL_FILTER_SET_RAW_IO
, SCMP_ACT_ERRNO(EPERM
), false);
1596 static int apply_restrict_namespaces(const Unit
*u
, const ExecContext
*c
) {
1600 if (!exec_context_restrict_namespaces_set(c
))
1603 if (skip_seccomp_unavailable(u
, "RestrictNamespaces="))
1606 return seccomp_restrict_namespaces(c
->restrict_namespaces
);
1609 static int apply_lock_personality(const Unit
* u
, const ExecContext
*c
) {
1610 unsigned long personality
;
1616 if (!c
->lock_personality
)
1619 if (skip_seccomp_unavailable(u
, "LockPersonality="))
1622 personality
= c
->personality
;
1624 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1625 if (personality
== PERSONALITY_INVALID
) {
1627 r
= opinionated_personality(&personality
);
1632 return seccomp_lock_personality(personality
);
1637 static void do_idle_pipe_dance(int idle_pipe
[static 4]) {
1640 idle_pipe
[1] = safe_close(idle_pipe
[1]);
1641 idle_pipe
[2] = safe_close(idle_pipe
[2]);
1643 if (idle_pipe
[0] >= 0) {
1646 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1648 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1651 /* Signal systemd that we are bored and want to continue. */
1652 n
= write(idle_pipe
[3], "x", 1);
1654 /* Wait for systemd to react to the signal above. */
1655 (void) fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1658 idle_pipe
[0] = safe_close(idle_pipe
[0]);
1662 idle_pipe
[3] = safe_close(idle_pipe
[3]);
1665 static const char *exec_directory_env_name_to_string(ExecDirectoryType t
);
1667 static int build_environment(
1669 const ExecContext
*c
,
1670 const ExecParameters
*p
,
1673 const char *username
,
1675 dev_t journal_stream_dev
,
1676 ino_t journal_stream_ino
,
1679 _cleanup_strv_free_
char **our_env
= NULL
;
1680 ExecDirectoryType t
;
1689 our_env
= new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX
);
1694 _cleanup_free_
char *joined
= NULL
;
1696 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid_cached()) < 0)
1698 our_env
[n_env
++] = x
;
1700 if (asprintf(&x
, "LISTEN_FDS=%zu", n_fds
) < 0)
1702 our_env
[n_env
++] = x
;
1704 joined
= strv_join(p
->fd_names
, ":");
1708 x
= strjoin("LISTEN_FDNAMES=", joined
);
1711 our_env
[n_env
++] = x
;
1714 if ((p
->flags
& EXEC_SET_WATCHDOG
) && p
->watchdog_usec
> 0) {
1715 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid_cached()) < 0)
1717 our_env
[n_env
++] = x
;
1719 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, p
->watchdog_usec
) < 0)
1721 our_env
[n_env
++] = x
;
1724 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1725 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1726 * check the database directly. */
1727 if (p
->flags
& EXEC_NSS_BYPASS_BUS
) {
1728 x
= strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1731 our_env
[n_env
++] = x
;
1735 x
= strjoin("HOME=", home
);
1739 path_simplify(x
+ 5, true);
1740 our_env
[n_env
++] = x
;
1744 x
= strjoin("LOGNAME=", username
);
1747 our_env
[n_env
++] = x
;
1749 x
= strjoin("USER=", username
);
1752 our_env
[n_env
++] = x
;
1756 x
= strjoin("SHELL=", shell
);
1760 path_simplify(x
+ 6, true);
1761 our_env
[n_env
++] = x
;
1764 if (!sd_id128_is_null(u
->invocation_id
)) {
1765 if (asprintf(&x
, "INVOCATION_ID=" SD_ID128_FORMAT_STR
, SD_ID128_FORMAT_VAL(u
->invocation_id
)) < 0)
1768 our_env
[n_env
++] = x
;
1771 if (exec_context_needs_term(c
)) {
1772 const char *tty_path
, *term
= NULL
;
1774 tty_path
= exec_context_tty_path(c
);
1776 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1777 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1778 * passes to PID 1 ends up all the way in the console login shown. */
1780 if (path_equal(tty_path
, "/dev/console") && getppid() == 1)
1781 term
= getenv("TERM");
1783 term
= default_term_for_tty(tty_path
);
1785 x
= strjoin("TERM=", term
);
1788 our_env
[n_env
++] = x
;
1791 if (journal_stream_dev
!= 0 && journal_stream_ino
!= 0) {
1792 if (asprintf(&x
, "JOURNAL_STREAM=" DEV_FMT
":" INO_FMT
, journal_stream_dev
, journal_stream_ino
) < 0)
1795 our_env
[n_env
++] = x
;
1798 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
1799 _cleanup_free_
char *pre
= NULL
, *joined
= NULL
;
1805 if (strv_isempty(c
->directories
[t
].paths
))
1808 n
= exec_directory_env_name_to_string(t
);
1812 pre
= strjoin(p
->prefix
[t
], "/");
1816 joined
= strv_join_prefix(c
->directories
[t
].paths
, ":", pre
);
1820 x
= strjoin(n
, "=", joined
);
1824 our_env
[n_env
++] = x
;
1827 our_env
[n_env
++] = NULL
;
1828 assert(n_env
<= 14 + _EXEC_DIRECTORY_TYPE_MAX
);
1830 *ret
= TAKE_PTR(our_env
);
1835 static int build_pass_environment(const ExecContext
*c
, char ***ret
) {
1836 _cleanup_strv_free_
char **pass_env
= NULL
;
1837 size_t n_env
= 0, n_bufsize
= 0;
1840 STRV_FOREACH(i
, c
->pass_environment
) {
1841 _cleanup_free_
char *x
= NULL
;
1847 x
= strjoin(*i
, "=", v
);
1851 if (!GREEDY_REALLOC(pass_env
, n_bufsize
, n_env
+ 2))
1854 pass_env
[n_env
++] = TAKE_PTR(x
);
1855 pass_env
[n_env
] = NULL
;
1858 *ret
= TAKE_PTR(pass_env
);
1863 static bool exec_needs_mount_namespace(
1864 const ExecContext
*context
,
1865 const ExecParameters
*params
,
1866 const ExecRuntime
*runtime
) {
1871 if (context
->root_image
)
1874 if (!strv_isempty(context
->read_write_paths
) ||
1875 !strv_isempty(context
->read_only_paths
) ||
1876 !strv_isempty(context
->inaccessible_paths
))
1879 if (context
->n_bind_mounts
> 0)
1882 if (context
->n_temporary_filesystems
> 0)
1885 if (!IN_SET(context
->mount_flags
, 0, MS_SHARED
))
1888 if (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
))
1891 if (context
->private_devices
||
1892 context
->private_mounts
||
1893 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1894 context
->protect_home
!= PROTECT_HOME_NO
||
1895 context
->protect_kernel_tunables
||
1896 context
->protect_kernel_modules
||
1897 context
->protect_kernel_logs
||
1898 context
->protect_control_groups
)
1901 if (context
->root_directory
) {
1902 ExecDirectoryType t
;
1904 if (context
->mount_apivfs
)
1907 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
1908 if (!params
->prefix
[t
])
1911 if (!strv_isempty(context
->directories
[t
].paths
))
1916 if (context
->dynamic_user
&&
1917 (!strv_isempty(context
->directories
[EXEC_DIRECTORY_STATE
].paths
) ||
1918 !strv_isempty(context
->directories
[EXEC_DIRECTORY_CACHE
].paths
) ||
1919 !strv_isempty(context
->directories
[EXEC_DIRECTORY_LOGS
].paths
)))
1925 static int setup_private_users(uid_t ouid
, gid_t ogid
, uid_t uid
, gid_t gid
) {
1926 _cleanup_free_
char *uid_map
= NULL
, *gid_map
= NULL
;
1927 _cleanup_close_pair_
int errno_pipe
[2] = { -1, -1 };
1928 _cleanup_close_
int unshare_ready_fd
= -1;
1929 _cleanup_(sigkill_waitp
) pid_t pid
= 0;
1934 /* Set up a user namespace and map the original UID/GID (IDs from before any user or group changes, i.e.
1935 * the IDs from the user or system manager(s)) to itself, the selected UID/GID to itself, and everything else to
1936 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1937 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1938 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1939 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1940 * continues execution normally.
1941 * For unprivileged users (i.e. without capabilities), the root to root mapping is excluded. As such, it
1942 * does not need CAP_SETUID to write the single line mapping to itself. */
1944 /* Can only set up multiple mappings with CAP_SETUID. */
1945 if (have_effective_cap(CAP_SETUID
) && uid
!= ouid
&& uid_is_valid(uid
))
1946 r
= asprintf(&uid_map
,
1947 UID_FMT
" " UID_FMT
" 1\n" /* Map $OUID → $OUID */
1948 UID_FMT
" " UID_FMT
" 1\n", /* Map $UID → $UID */
1949 ouid
, ouid
, uid
, uid
);
1951 r
= asprintf(&uid_map
,
1952 UID_FMT
" " UID_FMT
" 1\n", /* Map $OUID → $OUID */
1958 /* Can only set up multiple mappings with CAP_SETGID. */
1959 if (have_effective_cap(CAP_SETGID
) && gid
!= ogid
&& gid_is_valid(gid
))
1960 r
= asprintf(&gid_map
,
1961 GID_FMT
" " GID_FMT
" 1\n" /* Map $OGID → $OGID */
1962 GID_FMT
" " GID_FMT
" 1\n", /* Map $GID → $GID */
1963 ogid
, ogid
, gid
, gid
);
1965 r
= asprintf(&gid_map
,
1966 GID_FMT
" " GID_FMT
" 1\n", /* Map $OGID -> $OGID */
1972 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1974 unshare_ready_fd
= eventfd(0, EFD_CLOEXEC
);
1975 if (unshare_ready_fd
< 0)
1978 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1980 if (pipe2(errno_pipe
, O_CLOEXEC
) < 0)
1983 r
= safe_fork("(sd-userns)", FORK_RESET_SIGNALS
|FORK_DEATHSIG
, &pid
);
1987 _cleanup_close_
int fd
= -1;
1991 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1992 * here, after the parent opened its own user namespace. */
1995 errno_pipe
[0] = safe_close(errno_pipe
[0]);
1997 /* Wait until the parent unshared the user namespace */
1998 if (read(unshare_ready_fd
, &c
, sizeof(c
)) < 0) {
2003 /* Disable the setgroups() system call in the child user namespace, for good. */
2004 a
= procfs_file_alloca(ppid
, "setgroups");
2005 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
2007 if (errno
!= ENOENT
) {
2012 /* If the file is missing the kernel is too old, let's continue anyway. */
2014 if (write(fd
, "deny\n", 5) < 0) {
2019 fd
= safe_close(fd
);
2022 /* First write the GID map */
2023 a
= procfs_file_alloca(ppid
, "gid_map");
2024 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
2029 if (write(fd
, gid_map
, strlen(gid_map
)) < 0) {
2033 fd
= safe_close(fd
);
2035 /* The write the UID map */
2036 a
= procfs_file_alloca(ppid
, "uid_map");
2037 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
2042 if (write(fd
, uid_map
, strlen(uid_map
)) < 0) {
2047 _exit(EXIT_SUCCESS
);
2050 (void) write(errno_pipe
[1], &r
, sizeof(r
));
2051 _exit(EXIT_FAILURE
);
2054 errno_pipe
[1] = safe_close(errno_pipe
[1]);
2056 if (unshare(CLONE_NEWUSER
) < 0)
2059 /* Let the child know that the namespace is ready now */
2060 if (write(unshare_ready_fd
, &c
, sizeof(c
)) < 0)
2063 /* Try to read an error code from the child */
2064 n
= read(errno_pipe
[0], &r
, sizeof(r
));
2067 if (n
== sizeof(r
)) { /* an error code was sent to us */
2072 if (n
!= 0) /* on success we should have read 0 bytes */
2075 r
= wait_for_terminate_and_check("(sd-userns)", pid
, 0);
2079 if (r
!= EXIT_SUCCESS
) /* If something strange happened with the child, let's consider this fatal, too */
2085 static bool exec_directory_is_private(const ExecContext
*context
, ExecDirectoryType type
) {
2086 if (!context
->dynamic_user
)
2089 if (type
== EXEC_DIRECTORY_CONFIGURATION
)
2092 if (type
== EXEC_DIRECTORY_RUNTIME
&& context
->runtime_directory_preserve_mode
== EXEC_PRESERVE_NO
)
2098 static int setup_exec_directory(
2099 const ExecContext
*context
,
2100 const ExecParameters
*params
,
2103 ExecDirectoryType type
,
2106 static const int exit_status_table
[_EXEC_DIRECTORY_TYPE_MAX
] = {
2107 [EXEC_DIRECTORY_RUNTIME
] = EXIT_RUNTIME_DIRECTORY
,
2108 [EXEC_DIRECTORY_STATE
] = EXIT_STATE_DIRECTORY
,
2109 [EXEC_DIRECTORY_CACHE
] = EXIT_CACHE_DIRECTORY
,
2110 [EXEC_DIRECTORY_LOGS
] = EXIT_LOGS_DIRECTORY
,
2111 [EXEC_DIRECTORY_CONFIGURATION
] = EXIT_CONFIGURATION_DIRECTORY
,
2118 assert(type
>= 0 && type
< _EXEC_DIRECTORY_TYPE_MAX
);
2119 assert(exit_status
);
2121 if (!params
->prefix
[type
])
2124 if (params
->flags
& EXEC_CHOWN_DIRECTORIES
) {
2125 if (!uid_is_valid(uid
))
2127 if (!gid_is_valid(gid
))
2131 STRV_FOREACH(rt
, context
->directories
[type
].paths
) {
2132 _cleanup_free_
char *p
= NULL
, *pp
= NULL
;
2134 p
= path_join(params
->prefix
[type
], *rt
);
2140 r
= mkdir_parents_label(p
, 0755);
2144 if (exec_directory_is_private(context
, type
)) {
2145 _cleanup_free_
char *private_root
= NULL
;
2147 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
2148 * case we want to avoid leaving a directory around fully accessible that is owned by
2149 * a dynamic user whose UID is later on reused. To lock this down we use the same
2150 * trick used by container managers to prohibit host users to get access to files of
2151 * the same UID in containers: we place everything inside a directory that has an
2152 * access mode of 0700 and is owned root:root, so that it acts as security boundary
2153 * for unprivileged host code. We then use fs namespacing to make this directory
2154 * permeable for the service itself.
2156 * Specifically: for a service which wants a special directory "foo/" we first create
2157 * a directory "private/" with access mode 0700 owned by root:root. Then we place
2158 * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
2159 * "private/foo". This way, privileged host users can access "foo/" as usual, but
2160 * unprivileged host users can't look into it. Inside of the namespace of the unit
2161 * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
2162 * "private/foo/" is mounted under the same name, thus disabling the access boundary
2163 * for the service and making sure it only gets access to the dirs it needs but no
2164 * others. Tricky? Yes, absolutely, but it works!
2166 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
2167 * to be owned by the service itself.
2169 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
2170 * for sharing files or sockets with other services. */
2172 private_root
= path_join(params
->prefix
[type
], "private");
2173 if (!private_root
) {
2178 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
2179 r
= mkdir_safe_label(private_root
, 0700, 0, 0, MKDIR_WARN_MODE
);
2183 pp
= path_join(private_root
, *rt
);
2189 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2190 r
= mkdir_parents_label(pp
, 0755);
2194 if (is_dir(p
, false) > 0 &&
2195 (laccess(pp
, F_OK
) < 0 && errno
== ENOENT
)) {
2197 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2198 * it over. Most likely the service has been upgraded from one that didn't use
2199 * DynamicUser=1, to one that does. */
2201 log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
2202 "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
2203 exec_directory_type_to_string(type
), p
, pp
);
2205 if (rename(p
, pp
) < 0) {
2210 /* Otherwise, create the actual directory for the service */
2212 r
= mkdir_label(pp
, context
->directories
[type
].mode
);
2213 if (r
< 0 && r
!= -EEXIST
)
2217 /* And link it up from the original place */
2218 r
= symlink_idempotent(pp
, p
, true);
2223 _cleanup_free_
char *target
= NULL
;
2225 if (type
!= EXEC_DIRECTORY_CONFIGURATION
&&
2226 readlink_and_make_absolute(p
, &target
) >= 0) {
2227 _cleanup_free_
char *q
= NULL
;
2229 /* This already exists and is a symlink? Interesting. Maybe it's one created
2230 * by DynamicUser=1 (see above)?
2232 * We do this for all directory types except for ConfigurationDirectory=,
2233 * since they all support the private/ symlink logic at least in some
2234 * configurations, see above. */
2236 q
= path_join(params
->prefix
[type
], "private", *rt
);
2242 if (path_equal(q
, target
)) {
2244 /* Hmm, apparently DynamicUser= was once turned on for this service,
2245 * but is no longer. Let's move the directory back up. */
2247 log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
2248 "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
2249 exec_directory_type_to_string(type
), q
, p
);
2251 if (unlink(p
) < 0) {
2256 if (rename(q
, p
) < 0) {
2263 r
= mkdir_label(p
, context
->directories
[type
].mode
);
2268 if (type
== EXEC_DIRECTORY_CONFIGURATION
) {
2271 /* Don't change the owner/access mode of the configuration directory,
2272 * as in the common case it is not written to by a service, and shall
2273 * not be writable. */
2275 if (stat(p
, &st
) < 0) {
2280 /* Still complain if the access mode doesn't match */
2281 if (((st
.st_mode
^ context
->directories
[type
].mode
) & 07777) != 0)
2282 log_warning("%s \'%s\' already exists but the mode is different. "
2283 "(File system: %o %sMode: %o)",
2284 exec_directory_type_to_string(type
), *rt
,
2285 st
.st_mode
& 07777, exec_directory_type_to_string(type
), context
->directories
[type
].mode
& 07777);
2292 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
2293 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
2294 * current UID/GID ownership.) */
2295 r
= chmod_and_chown(pp
?: p
, context
->directories
[type
].mode
, UID_INVALID
, GID_INVALID
);
2299 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2300 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2301 * assignments to exist.*/
2302 r
= path_chown_recursive(pp
?: p
, uid
, gid
, context
->dynamic_user
? 01777 : 07777);
2310 *exit_status
= exit_status_table
[type
];
2315 static int setup_smack(
2316 const ExecContext
*context
,
2317 const ExecCommand
*command
) {
2324 if (context
->smack_process_label
) {
2325 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
2329 #ifdef SMACK_DEFAULT_PROCESS_LABEL
2331 _cleanup_free_
char *exec_label
= NULL
;
2333 r
= mac_smack_read(command
->path
, SMACK_ATTR_EXEC
, &exec_label
);
2334 if (r
< 0 && !IN_SET(r
, -ENODATA
, -EOPNOTSUPP
))
2337 r
= mac_smack_apply_pid(0, exec_label
? : SMACK_DEFAULT_PROCESS_LABEL
);
2347 static int compile_bind_mounts(
2348 const ExecContext
*context
,
2349 const ExecParameters
*params
,
2350 BindMount
**ret_bind_mounts
,
2351 size_t *ret_n_bind_mounts
,
2352 char ***ret_empty_directories
) {
2354 _cleanup_strv_free_
char **empty_directories
= NULL
;
2355 BindMount
*bind_mounts
;
2357 ExecDirectoryType t
;
2362 assert(ret_bind_mounts
);
2363 assert(ret_n_bind_mounts
);
2364 assert(ret_empty_directories
);
2366 n
= context
->n_bind_mounts
;
2367 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
2368 if (!params
->prefix
[t
])
2371 n
+= strv_length(context
->directories
[t
].paths
);
2375 *ret_bind_mounts
= NULL
;
2376 *ret_n_bind_mounts
= 0;
2377 *ret_empty_directories
= NULL
;
2381 bind_mounts
= new(BindMount
, n
);
2385 for (i
= 0; i
< context
->n_bind_mounts
; i
++) {
2386 BindMount
*item
= context
->bind_mounts
+ i
;
2389 s
= strdup(item
->source
);
2395 d
= strdup(item
->destination
);
2402 bind_mounts
[h
++] = (BindMount
) {
2405 .read_only
= item
->read_only
,
2406 .recursive
= item
->recursive
,
2407 .ignore_enoent
= item
->ignore_enoent
,
2411 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
2414 if (!params
->prefix
[t
])
2417 if (strv_isempty(context
->directories
[t
].paths
))
2420 if (exec_directory_is_private(context
, t
) &&
2421 !(context
->root_directory
|| context
->root_image
)) {
2424 /* So this is for a dynamic user, and we need to make sure the process can access its own
2425 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2426 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2428 private_root
= path_join(params
->prefix
[t
], "private");
2429 if (!private_root
) {
2434 r
= strv_consume(&empty_directories
, private_root
);
2439 STRV_FOREACH(suffix
, context
->directories
[t
].paths
) {
2442 if (exec_directory_is_private(context
, t
))
2443 s
= path_join(params
->prefix
[t
], "private", *suffix
);
2445 s
= path_join(params
->prefix
[t
], *suffix
);
2451 if (exec_directory_is_private(context
, t
) &&
2452 (context
->root_directory
|| context
->root_image
))
2453 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2454 * directory is not created on the root directory. So, let's bind-mount the directory
2455 * on the 'non-private' place. */
2456 d
= path_join(params
->prefix
[t
], *suffix
);
2465 bind_mounts
[h
++] = (BindMount
) {
2469 .nosuid
= context
->dynamic_user
, /* don't allow suid/sgid when DynamicUser= is on */
2471 .ignore_enoent
= false,
2478 *ret_bind_mounts
= bind_mounts
;
2479 *ret_n_bind_mounts
= n
;
2480 *ret_empty_directories
= TAKE_PTR(empty_directories
);
2485 bind_mount_free_many(bind_mounts
, h
);
2489 static bool insist_on_sandboxing(
2490 const ExecContext
*context
,
2491 const char *root_dir
,
2492 const char *root_image
,
2493 const BindMount
*bind_mounts
,
2494 size_t n_bind_mounts
) {
2499 assert(n_bind_mounts
== 0 || bind_mounts
);
2501 /* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
2502 * would alter the view on the file system beyond making things read-only or invisble, i.e. would
2503 * rearrange stuff in a way we cannot ignore gracefully. */
2505 if (context
->n_temporary_filesystems
> 0)
2508 if (root_dir
|| root_image
)
2511 if (context
->dynamic_user
)
2514 /* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes
2516 for (i
= 0; i
< n_bind_mounts
; i
++)
2517 if (!path_equal(bind_mounts
[i
].source
, bind_mounts
[i
].destination
))
2523 static int apply_mount_namespace(
2525 const ExecCommand
*command
,
2526 const ExecContext
*context
,
2527 const ExecParameters
*params
,
2528 const ExecRuntime
*runtime
,
2529 char **error_path
) {
2531 _cleanup_strv_free_
char **empty_directories
= NULL
;
2532 char *tmp
= NULL
, *var
= NULL
;
2533 const char *root_dir
= NULL
, *root_image
= NULL
;
2534 NamespaceInfo ns_info
;
2535 bool needs_sandboxing
;
2536 BindMount
*bind_mounts
= NULL
;
2537 size_t n_bind_mounts
= 0;
2542 /* The runtime struct only contains the parent of the private /tmp,
2543 * which is non-accessible to world users. Inside of it there's a /tmp
2544 * that is sticky, and that's the one we want to use here. */
2546 if (context
->private_tmp
&& runtime
) {
2547 if (runtime
->tmp_dir
)
2548 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
2549 if (runtime
->var_tmp_dir
)
2550 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
2553 if (params
->flags
& EXEC_APPLY_CHROOT
) {
2554 root_image
= context
->root_image
;
2557 root_dir
= context
->root_directory
;
2560 r
= compile_bind_mounts(context
, params
, &bind_mounts
, &n_bind_mounts
, &empty_directories
);
2564 needs_sandboxing
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& EXEC_COMMAND_FULLY_PRIVILEGED
);
2565 if (needs_sandboxing
)
2566 ns_info
= (NamespaceInfo
) {
2567 .ignore_protect_paths
= false,
2568 .private_dev
= context
->private_devices
,
2569 .protect_control_groups
= context
->protect_control_groups
,
2570 .protect_kernel_tunables
= context
->protect_kernel_tunables
,
2571 .protect_kernel_modules
= context
->protect_kernel_modules
,
2572 .protect_kernel_logs
= context
->protect_kernel_logs
,
2573 .protect_hostname
= context
->protect_hostname
,
2574 .mount_apivfs
= context
->mount_apivfs
,
2575 .private_mounts
= context
->private_mounts
,
2577 else if (!context
->dynamic_user
&& root_dir
)
2579 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2580 * sandbox info, otherwise enforce it, don't ignore protected paths and
2581 * fail if we are enable to apply the sandbox inside the mount namespace.
2583 ns_info
= (NamespaceInfo
) {
2584 .ignore_protect_paths
= true,
2587 ns_info
= (NamespaceInfo
) {};
2589 if (context
->mount_flags
== MS_SHARED
)
2590 log_unit_debug(u
, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2592 r
= setup_namespace(root_dir
, root_image
,
2593 &ns_info
, context
->read_write_paths
,
2594 needs_sandboxing
? context
->read_only_paths
: NULL
,
2595 needs_sandboxing
? context
->inaccessible_paths
: NULL
,
2599 context
->temporary_filesystems
,
2600 context
->n_temporary_filesystems
,
2603 needs_sandboxing
? context
->protect_home
: PROTECT_HOME_NO
,
2604 needs_sandboxing
? context
->protect_system
: PROTECT_SYSTEM_NO
,
2605 context
->mount_flags
,
2606 DISSECT_IMAGE_DISCARD_ON_LOOP
|DISSECT_IMAGE_RELAX_VAR_CHECK
|DISSECT_IMAGE_FSCK
,
2609 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
2610 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
2611 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2612 * completely different execution environment. */
2614 if (insist_on_sandboxing(
2616 root_dir
, root_image
,
2619 log_unit_debug(u
, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2620 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2621 n_bind_mounts
, context
->n_temporary_filesystems
, yes_no(root_dir
), yes_no(root_image
), yes_no(context
->dynamic_user
));
2625 log_unit_debug(u
, "Failed to set up namespace, assuming containerized execution and ignoring.");
2630 bind_mount_free_many(bind_mounts
, n_bind_mounts
);
2634 static int apply_working_directory(
2635 const ExecContext
*context
,
2636 const ExecParameters
*params
,
2643 assert(exit_status
);
2645 if (context
->working_directory_home
) {
2648 *exit_status
= EXIT_CHDIR
;
2654 } else if (context
->working_directory
)
2655 wd
= context
->working_directory
;
2659 if (params
->flags
& EXEC_APPLY_CHROOT
)
2662 d
= prefix_roota(context
->root_directory
, wd
);
2664 if (chdir(d
) < 0 && !context
->working_directory_missing_ok
) {
2665 *exit_status
= EXIT_CHDIR
;
2672 static int apply_root_directory(
2673 const ExecContext
*context
,
2674 const ExecParameters
*params
,
2675 const bool needs_mount_ns
,
2679 assert(exit_status
);
2681 if (params
->flags
& EXEC_APPLY_CHROOT
) {
2682 if (!needs_mount_ns
&& context
->root_directory
)
2683 if (chroot(context
->root_directory
) < 0) {
2684 *exit_status
= EXIT_CHROOT
;
2692 static int setup_keyring(
2694 const ExecContext
*context
,
2695 const ExecParameters
*p
,
2696 uid_t uid
, gid_t gid
) {
2698 key_serial_t keyring
;
2707 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2708 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2709 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2710 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2711 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2712 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2714 if (context
->keyring_mode
== EXEC_KEYRING_INHERIT
)
2717 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2718 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2719 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2720 * & group is just as nasty as acquiring a reference to the user keyring. */
2722 saved_uid
= getuid();
2723 saved_gid
= getgid();
2725 if (gid_is_valid(gid
) && gid
!= saved_gid
) {
2726 if (setregid(gid
, -1) < 0)
2727 return log_unit_error_errno(u
, errno
, "Failed to change GID for user keyring: %m");
2730 if (uid_is_valid(uid
) && uid
!= saved_uid
) {
2731 if (setreuid(uid
, -1) < 0) {
2732 r
= log_unit_error_errno(u
, errno
, "Failed to change UID for user keyring: %m");
2737 keyring
= keyctl(KEYCTL_JOIN_SESSION_KEYRING
, 0, 0, 0, 0);
2738 if (keyring
== -1) {
2739 if (errno
== ENOSYS
)
2740 log_unit_debug_errno(u
, errno
, "Kernel keyring not supported, ignoring.");
2741 else if (IN_SET(errno
, EACCES
, EPERM
))
2742 log_unit_debug_errno(u
, errno
, "Kernel keyring access prohibited, ignoring.");
2743 else if (errno
== EDQUOT
)
2744 log_unit_debug_errno(u
, errno
, "Out of kernel keyrings to allocate, ignoring.");
2746 r
= log_unit_error_errno(u
, errno
, "Setting up kernel keyring failed: %m");
2751 /* When requested link the user keyring into the session keyring. */
2752 if (context
->keyring_mode
== EXEC_KEYRING_SHARED
) {
2754 if (keyctl(KEYCTL_LINK
,
2755 KEY_SPEC_USER_KEYRING
,
2756 KEY_SPEC_SESSION_KEYRING
, 0, 0) < 0) {
2757 r
= log_unit_error_errno(u
, errno
, "Failed to link user keyring into session keyring: %m");
2762 /* Restore uid/gid back */
2763 if (uid_is_valid(uid
) && uid
!= saved_uid
) {
2764 if (setreuid(saved_uid
, -1) < 0) {
2765 r
= log_unit_error_errno(u
, errno
, "Failed to change UID back for user keyring: %m");
2770 if (gid_is_valid(gid
) && gid
!= saved_gid
) {
2771 if (setregid(saved_gid
, -1) < 0)
2772 return log_unit_error_errno(u
, errno
, "Failed to change GID back for user keyring: %m");
2775 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
2776 if (!sd_id128_is_null(u
->invocation_id
)) {
2779 key
= add_key("user", "invocation_id", &u
->invocation_id
, sizeof(u
->invocation_id
), KEY_SPEC_SESSION_KEYRING
);
2781 log_unit_debug_errno(u
, errno
, "Failed to add invocation ID to keyring, ignoring: %m");
2783 if (keyctl(KEYCTL_SETPERM
, key
,
2784 KEY_POS_VIEW
|KEY_POS_READ
|KEY_POS_SEARCH
|
2785 KEY_USR_VIEW
|KEY_USR_READ
|KEY_USR_SEARCH
, 0, 0) < 0)
2786 r
= log_unit_error_errno(u
, errno
, "Failed to restrict invocation ID permission: %m");
2791 /* Revert back uid & gid for the the last time, and exit */
2792 /* no extra logging, as only the first already reported error matters */
2793 if (getuid() != saved_uid
)
2794 (void) setreuid(saved_uid
, -1);
2796 if (getgid() != saved_gid
)
2797 (void) setregid(saved_gid
, -1);
2802 static void append_socket_pair(int *array
, size_t *n
, const int pair
[static 2]) {
2808 array
[(*n
)++] = pair
[0];
2810 array
[(*n
)++] = pair
[1];
2813 static int close_remaining_fds(
2814 const ExecParameters
*params
,
2815 const ExecRuntime
*runtime
,
2816 const DynamicCreds
*dcreds
,
2820 const int *fds
, size_t n_fds
) {
2822 size_t n_dont_close
= 0;
2823 int dont_close
[n_fds
+ 12];
2827 if (params
->stdin_fd
>= 0)
2828 dont_close
[n_dont_close
++] = params
->stdin_fd
;
2829 if (params
->stdout_fd
>= 0)
2830 dont_close
[n_dont_close
++] = params
->stdout_fd
;
2831 if (params
->stderr_fd
>= 0)
2832 dont_close
[n_dont_close
++] = params
->stderr_fd
;
2835 dont_close
[n_dont_close
++] = socket_fd
;
2837 dont_close
[n_dont_close
++] = exec_fd
;
2839 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
2840 n_dont_close
+= n_fds
;
2844 append_socket_pair(dont_close
, &n_dont_close
, runtime
->netns_storage_socket
);
2848 append_socket_pair(dont_close
, &n_dont_close
, dcreds
->user
->storage_socket
);
2850 append_socket_pair(dont_close
, &n_dont_close
, dcreds
->group
->storage_socket
);
2853 if (user_lookup_fd
>= 0)
2854 dont_close
[n_dont_close
++] = user_lookup_fd
;
2856 return close_all_fds(dont_close
, n_dont_close
);
2859 static int send_user_lookup(
2867 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2868 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2871 if (user_lookup_fd
< 0)
2874 if (!uid_is_valid(uid
) && !gid_is_valid(gid
))
2877 if (writev(user_lookup_fd
,
2879 IOVEC_INIT(&uid
, sizeof(uid
)),
2880 IOVEC_INIT(&gid
, sizeof(gid
)),
2881 IOVEC_INIT_STRING(unit
->id
) }, 3) < 0)
2887 static int acquire_home(const ExecContext
*c
, uid_t uid
, const char** home
, char **buf
) {
2894 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2899 if (!c
->working_directory_home
)
2902 r
= get_home_dir(buf
);
2910 static int compile_suggested_paths(const ExecContext
*c
, const ExecParameters
*p
, char ***ret
) {
2911 _cleanup_strv_free_
char ** list
= NULL
;
2912 ExecDirectoryType t
;
2919 assert(c
->dynamic_user
);
2921 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2922 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2925 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
2928 if (t
== EXEC_DIRECTORY_CONFIGURATION
)
2934 STRV_FOREACH(i
, c
->directories
[t
].paths
) {
2937 if (exec_directory_is_private(c
, t
))
2938 e
= path_join(p
->prefix
[t
], "private", *i
);
2940 e
= path_join(p
->prefix
[t
], *i
);
2944 r
= strv_consume(&list
, e
);
2950 *ret
= TAKE_PTR(list
);
2955 static char *exec_command_line(char **argv
);
2957 static int exec_parameters_get_cgroup_path(const ExecParameters
*params
, char **ret
) {
2958 bool using_subcgroup
;
2964 if (!params
->cgroup_path
)
2967 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
2968 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
2969 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
2970 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
2971 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
2972 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
2973 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
2974 * flag, which is only passed for the former statements, not for the latter. */
2976 using_subcgroup
= FLAGS_SET(params
->flags
, EXEC_CONTROL_CGROUP
|EXEC_CGROUP_DELEGATE
|EXEC_IS_CONTROL
);
2977 if (using_subcgroup
)
2978 p
= path_join(params
->cgroup_path
, ".control");
2980 p
= strdup(params
->cgroup_path
);
2985 return using_subcgroup
;
2988 static int exec_child(
2990 const ExecCommand
*command
,
2991 const ExecContext
*context
,
2992 const ExecParameters
*params
,
2993 ExecRuntime
*runtime
,
2994 DynamicCreds
*dcreds
,
2996 const int named_iofds
[static 3],
2998 size_t n_socket_fds
,
2999 size_t n_storage_fds
,
3004 _cleanup_strv_free_
char **our_env
= NULL
, **pass_env
= NULL
, **accum_env
= NULL
, **replaced_argv
= NULL
;
3005 int *fds_with_exec_fd
, n_fds_with_exec_fd
, r
, ngids
= 0, exec_fd
= -1;
3006 _cleanup_free_ gid_t
*supplementary_gids
= NULL
;
3007 const char *username
= NULL
, *groupname
= NULL
;
3008 _cleanup_free_
char *home_buffer
= NULL
;
3009 const char *home
= NULL
, *shell
= NULL
;
3010 char **final_argv
= NULL
;
3011 dev_t journal_stream_dev
= 0;
3012 ino_t journal_stream_ino
= 0;
3013 bool userns_set_up
= false;
3014 bool needs_sandboxing
, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
3015 needs_setuid
, /* Do we need to do the actual setresuid()/setresgid() calls? */
3016 needs_mount_namespace
, /* Do we need to set up a mount namespace for this kernel? */
3017 needs_ambient_hack
; /* Do we need to apply the ambient capabilities hack? */
3019 _cleanup_free_
char *mac_selinux_context_net
= NULL
;
3020 bool use_selinux
= false;
3023 bool use_smack
= false;
3026 bool use_apparmor
= false;
3028 uid_t saved_uid
= getuid();
3029 gid_t saved_gid
= getgid();
3030 uid_t uid
= UID_INVALID
;
3031 gid_t gid
= GID_INVALID
;
3033 ExecDirectoryType dt
;
3035 _cleanup_free_ gid_t
*gids_after_pam
= NULL
;
3036 int ngids_after_pam
= 0;
3042 assert(exit_status
);
3044 rename_process_from_path(command
->path
);
3046 /* We reset exactly these signals, since they are the
3047 * only ones we set to SIG_IGN in the main daemon. All
3048 * others we leave untouched because we set them to
3049 * SIG_DFL or a valid handler initially, both of which
3050 * will be demoted to SIG_DFL. */
3051 (void) default_signals(SIGNALS_CRASH_HANDLER
,
3052 SIGNALS_IGNORE
, -1);
3054 if (context
->ignore_sigpipe
)
3055 (void) ignore_signals(SIGPIPE
, -1);
3057 r
= reset_signal_mask();
3059 *exit_status
= EXIT_SIGNAL_MASK
;
3060 return log_unit_error_errno(unit
, r
, "Failed to set process signal mask: %m");
3063 if (params
->idle_pipe
)
3064 do_idle_pipe_dance(params
->idle_pipe
);
3066 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
3067 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
3068 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
3069 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
3072 log_set_open_when_needed(true);
3074 /* In case anything used libc syslog(), close this here, too */
3077 n_fds
= n_socket_fds
+ n_storage_fds
;
3078 r
= close_remaining_fds(params
, runtime
, dcreds
, user_lookup_fd
, socket_fd
, params
->exec_fd
, fds
, n_fds
);
3080 *exit_status
= EXIT_FDS
;
3081 return log_unit_error_errno(unit
, r
, "Failed to close unwanted file descriptors: %m");
3084 if (!context
->same_pgrp
)
3086 *exit_status
= EXIT_SETSID
;
3087 return log_unit_error_errno(unit
, errno
, "Failed to create new process session: %m");
3090 exec_context_tty_reset(context
, params
);
3092 if (unit_shall_confirm_spawn(unit
)) {
3093 const char *vc
= params
->confirm_spawn
;
3094 _cleanup_free_
char *cmdline
= NULL
;
3096 cmdline
= exec_command_line(command
->argv
);
3098 *exit_status
= EXIT_MEMORY
;
3102 r
= ask_for_confirmation(vc
, unit
, cmdline
);
3103 if (r
!= CONFIRM_EXECUTE
) {
3104 if (r
== CONFIRM_PRETEND_SUCCESS
) {
3105 *exit_status
= EXIT_SUCCESS
;
3108 *exit_status
= EXIT_CONFIRM
;
3109 log_unit_error(unit
, "Execution cancelled by the user");
3114 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
3115 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
3116 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
3117 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
3118 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
3119 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit
->id
, true) != 0 ||
3120 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit
->manager
) ? "system" : "user", true) != 0) {
3121 *exit_status
= EXIT_MEMORY
;
3122 return log_unit_error_errno(unit
, errno
, "Failed to update environment: %m");
3125 if (context
->dynamic_user
&& dcreds
) {
3126 _cleanup_strv_free_
char **suggested_paths
= NULL
;
3128 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
3129 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
3130 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
3131 *exit_status
= EXIT_USER
;
3132 return log_unit_error_errno(unit
, errno
, "Failed to update environment: %m");
3135 r
= compile_suggested_paths(context
, params
, &suggested_paths
);
3137 *exit_status
= EXIT_MEMORY
;
3141 r
= dynamic_creds_realize(dcreds
, suggested_paths
, &uid
, &gid
);
3143 *exit_status
= EXIT_USER
;
3145 log_unit_error(unit
, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3148 return log_unit_error_errno(unit
, r
, "Failed to update dynamic user credentials: %m");
3151 if (!uid_is_valid(uid
)) {
3152 *exit_status
= EXIT_USER
;
3153 log_unit_error(unit
, "UID validation failed for \""UID_FMT
"\"", uid
);
3157 if (!gid_is_valid(gid
)) {
3158 *exit_status
= EXIT_USER
;
3159 log_unit_error(unit
, "GID validation failed for \""GID_FMT
"\"", gid
);
3164 username
= dcreds
->user
->name
;
3167 r
= get_fixed_user(context
, &username
, &uid
, &gid
, &home
, &shell
);
3169 *exit_status
= EXIT_USER
;
3170 return log_unit_error_errno(unit
, r
, "Failed to determine user credentials: %m");
3173 r
= get_fixed_group(context
, &groupname
, &gid
);
3175 *exit_status
= EXIT_GROUP
;
3176 return log_unit_error_errno(unit
, r
, "Failed to determine group credentials: %m");
3180 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3181 r
= get_supplementary_groups(context
, username
, groupname
, gid
,
3182 &supplementary_gids
, &ngids
);
3184 *exit_status
= EXIT_GROUP
;
3185 return log_unit_error_errno(unit
, r
, "Failed to determine supplementary groups: %m");
3188 r
= send_user_lookup(unit
, user_lookup_fd
, uid
, gid
);
3190 *exit_status
= EXIT_USER
;
3191 return log_unit_error_errno(unit
, r
, "Failed to send user credentials to PID1: %m");
3194 user_lookup_fd
= safe_close(user_lookup_fd
);
3196 r
= acquire_home(context
, uid
, &home
, &home_buffer
);
3198 *exit_status
= EXIT_CHDIR
;
3199 return log_unit_error_errno(unit
, r
, "Failed to determine $HOME for user: %m");
3202 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3203 * must sure to drop O_NONBLOCK */
3205 (void) fd_nonblock(socket_fd
, false);
3207 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3208 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3209 if (params
->cgroup_path
) {
3210 _cleanup_free_
char *p
= NULL
;
3212 r
= exec_parameters_get_cgroup_path(params
, &p
);
3214 *exit_status
= EXIT_CGROUP
;
3215 return log_unit_error_errno(unit
, r
, "Failed to acquire cgroup path: %m");
3218 r
= cg_attach_everywhere(params
->cgroup_supported
, p
, 0, NULL
, NULL
);
3220 *exit_status
= EXIT_CGROUP
;
3221 return log_unit_error_errno(unit
, r
, "Failed to attach to cgroup %s: %m", p
);
3225 if (context
->network_namespace_path
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
3226 r
= open_netns_path(runtime
->netns_storage_socket
, context
->network_namespace_path
);
3228 *exit_status
= EXIT_NETWORK
;
3229 return log_unit_error_errno(unit
, r
, "Failed to open network namespace path %s: %m", context
->network_namespace_path
);
3233 r
= setup_input(context
, params
, socket_fd
, named_iofds
);
3235 *exit_status
= EXIT_STDIN
;
3236 return log_unit_error_errno(unit
, r
, "Failed to set up standard input: %m");
3239 r
= setup_output(unit
, context
, params
, STDOUT_FILENO
, socket_fd
, named_iofds
, basename(command
->path
), uid
, gid
, &journal_stream_dev
, &journal_stream_ino
);
3241 *exit_status
= EXIT_STDOUT
;
3242 return log_unit_error_errno(unit
, r
, "Failed to set up standard output: %m");
3245 r
= setup_output(unit
, context
, params
, STDERR_FILENO
, socket_fd
, named_iofds
, basename(command
->path
), uid
, gid
, &journal_stream_dev
, &journal_stream_ino
);
3247 *exit_status
= EXIT_STDERR
;
3248 return log_unit_error_errno(unit
, r
, "Failed to set up standard error output: %m");
3251 if (context
->oom_score_adjust_set
) {
3252 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3253 * prohibit write access to this file, and we shouldn't trip up over that. */
3254 r
= set_oom_score_adjust(context
->oom_score_adjust
);
3255 if (IN_SET(r
, -EPERM
, -EACCES
))
3256 log_unit_debug_errno(unit
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
3258 *exit_status
= EXIT_OOM_ADJUST
;
3259 return log_unit_error_errno(unit
, r
, "Failed to adjust OOM setting: %m");
3263 if (context
->nice_set
) {
3264 r
= setpriority_closest(context
->nice
);
3266 return log_unit_error_errno(unit
, r
, "Failed to set up process scheduling priority (nice level): %m");
3269 if (context
->cpu_sched_set
) {
3270 struct sched_param param
= {
3271 .sched_priority
= context
->cpu_sched_priority
,
3274 r
= sched_setscheduler(0,
3275 context
->cpu_sched_policy
|
3276 (context
->cpu_sched_reset_on_fork
?
3277 SCHED_RESET_ON_FORK
: 0),
3280 *exit_status
= EXIT_SETSCHEDULER
;
3281 return log_unit_error_errno(unit
, errno
, "Failed to set up CPU scheduling: %m");
3285 if (context
->cpu_set
.set
)
3286 if (sched_setaffinity(0, context
->cpu_set
.allocated
, context
->cpu_set
.set
) < 0) {
3287 *exit_status
= EXIT_CPUAFFINITY
;
3288 return log_unit_error_errno(unit
, errno
, "Failed to set up CPU affinity: %m");
3291 if (mpol_is_valid(numa_policy_get_type(&context
->numa_policy
))) {
3292 r
= apply_numa_policy(&context
->numa_policy
);
3293 if (r
== -EOPNOTSUPP
)
3294 log_unit_debug_errno(unit
, r
, "NUMA support not available, ignoring.");
3296 *exit_status
= EXIT_NUMA_POLICY
;
3297 return log_unit_error_errno(unit
, r
, "Failed to set NUMA memory policy: %m");
3301 if (context
->ioprio_set
)
3302 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
3303 *exit_status
= EXIT_IOPRIO
;
3304 return log_unit_error_errno(unit
, errno
, "Failed to set up IO scheduling priority: %m");
3307 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
3308 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
3309 *exit_status
= EXIT_TIMERSLACK
;
3310 return log_unit_error_errno(unit
, errno
, "Failed to set up timer slack: %m");
3313 if (context
->personality
!= PERSONALITY_INVALID
) {
3314 r
= safe_personality(context
->personality
);
3316 *exit_status
= EXIT_PERSONALITY
;
3317 return log_unit_error_errno(unit
, r
, "Failed to set up execution domain (personality): %m");
3321 if (context
->utmp_id
)
3322 utmp_put_init_process(context
->utmp_id
, getpid_cached(), getsid(0),
3324 context
->utmp_mode
== EXEC_UTMP_INIT
? INIT_PROCESS
:
3325 context
->utmp_mode
== EXEC_UTMP_LOGIN
? LOGIN_PROCESS
:
3329 if (uid_is_valid(uid
)) {
3330 r
= chown_terminal(STDIN_FILENO
, uid
);
3332 *exit_status
= EXIT_STDIN
;
3333 return log_unit_error_errno(unit
, r
, "Failed to change ownership of terminal: %m");
3337 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
3338 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
3339 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
3340 * touch a single hierarchy too. */
3341 if (params
->cgroup_path
&& context
->user
&& (params
->flags
& EXEC_CGROUP_DELEGATE
)) {
3342 r
= cg_set_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, uid
, gid
);
3344 *exit_status
= EXIT_CGROUP
;
3345 return log_unit_error_errno(unit
, r
, "Failed to adjust control group access: %m");
3349 for (dt
= 0; dt
< _EXEC_DIRECTORY_TYPE_MAX
; dt
++) {
3350 r
= setup_exec_directory(context
, params
, uid
, gid
, dt
, exit_status
);
3352 return log_unit_error_errno(unit
, r
, "Failed to set up special execution directory in %s: %m", params
->prefix
[dt
]);
3355 r
= build_environment(
3367 *exit_status
= EXIT_MEMORY
;
3371 r
= build_pass_environment(context
, &pass_env
);
3373 *exit_status
= EXIT_MEMORY
;
3377 accum_env
= strv_env_merge(5,
3378 params
->environment
,
3381 context
->environment
,
3385 *exit_status
= EXIT_MEMORY
;
3388 accum_env
= strv_env_clean(accum_env
);
3390 (void) umask(context
->umask
);
3392 r
= setup_keyring(unit
, context
, params
, uid
, gid
);
3394 *exit_status
= EXIT_KEYRING
;
3395 return log_unit_error_errno(unit
, r
, "Failed to set up kernel keyring: %m");
3398 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
3399 needs_sandboxing
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& EXEC_COMMAND_FULLY_PRIVILEGED
);
3401 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3402 needs_ambient_hack
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && (command
->flags
& EXEC_COMMAND_AMBIENT_MAGIC
) && !ambient_capabilities_supported();
3404 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3405 if (needs_ambient_hack
)
3406 needs_setuid
= false;
3408 needs_setuid
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& (EXEC_COMMAND_FULLY_PRIVILEGED
|EXEC_COMMAND_NO_SETUID
));
3410 if (needs_sandboxing
) {
3411 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3412 * present. The actual MAC context application will happen later, as late as possible, to avoid
3413 * impacting our own code paths. */
3416 use_selinux
= mac_selinux_use();
3419 use_smack
= mac_smack_use();
3422 use_apparmor
= mac_apparmor_use();
3426 if (needs_sandboxing
) {
3429 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3430 * is set here. (See below.) */
3432 r
= setrlimit_closest_all((const struct rlimit
* const *) context
->rlimit
, &which_failed
);
3434 *exit_status
= EXIT_LIMITS
;
3435 return log_unit_error_errno(unit
, r
, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed
));
3441 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3442 * wins here. (See above.) */
3444 if (context
->pam_name
&& username
) {
3445 r
= setup_pam(context
->pam_name
, username
, uid
, gid
, context
->tty_path
, &accum_env
, fds
, n_fds
);
3447 *exit_status
= EXIT_PAM
;
3448 return log_unit_error_errno(unit
, r
, "Failed to set up PAM session: %m");
3451 ngids_after_pam
= getgroups_alloc(&gids_after_pam
);
3452 if (ngids_after_pam
< 0) {
3453 *exit_status
= EXIT_MEMORY
;
3454 return log_unit_error_errno(unit
, ngids_after_pam
, "Failed to obtain groups after setting up PAM: %m");
3459 if (needs_sandboxing
) {
3461 if (use_selinux
&& params
->selinux_context_net
&& socket_fd
>= 0) {
3462 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
3464 *exit_status
= EXIT_SELINUX_CONTEXT
;
3465 return log_unit_error_errno(unit
, r
, "Failed to determine SELinux context: %m");
3470 /* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
3471 * Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
3472 * set up the all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
3473 if (context
->private_users
&& !have_effective_cap(CAP_SYS_ADMIN
)) {
3474 userns_set_up
= true;
3475 r
= setup_private_users(saved_uid
, saved_gid
, uid
, gid
);
3477 *exit_status
= EXIT_USER
;
3478 return log_unit_error_errno(unit
, r
, "Failed to set up user namespacing for unprivileged user: %m");
3483 if ((context
->private_network
|| context
->network_namespace_path
) && runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
3485 if (ns_type_supported(NAMESPACE_NET
)) {
3486 r
= setup_netns(runtime
->netns_storage_socket
);
3488 *exit_status
= EXIT_NETWORK
;
3489 return log_unit_error_errno(unit
, r
, "Failed to set up network namespacing: %m");
3491 } else if (context
->network_namespace_path
) {
3492 *exit_status
= EXIT_NETWORK
;
3493 return log_unit_error_errno(unit
, SYNTHETIC_ERRNO(EOPNOTSUPP
), "NetworkNamespacePath= is not supported, refusing.");
3495 log_unit_warning(unit
, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
3498 needs_mount_namespace
= exec_needs_mount_namespace(context
, params
, runtime
);
3499 if (needs_mount_namespace
) {
3500 _cleanup_free_
char *error_path
= NULL
;
3502 r
= apply_mount_namespace(unit
, command
, context
, params
, runtime
, &error_path
);
3504 *exit_status
= EXIT_NAMESPACE
;
3505 return log_unit_error_errno(unit
, r
, "Failed to set up mount namespacing%s%s: %m",
3506 error_path
? ": " : "", strempty(error_path
));
3510 if (context
->protect_hostname
) {
3511 if (ns_type_supported(NAMESPACE_UTS
)) {
3512 if (unshare(CLONE_NEWUTS
) < 0) {
3513 if (!ERRNO_IS_NOT_SUPPORTED(errno
) && !ERRNO_IS_PRIVILEGE(errno
)) {
3514 *exit_status
= EXIT_NAMESPACE
;
3515 return log_unit_error_errno(unit
, errno
, "Failed to set up UTS namespacing: %m");
3518 log_unit_warning(unit
, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
3521 log_unit_warning(unit
, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
3523 r
= seccomp_protect_hostname();
3525 *exit_status
= EXIT_SECCOMP
;
3526 return log_unit_error_errno(unit
, r
, "Failed to apply hostname restrictions: %m");
3531 /* Drop groups as early as possible.
3532 * This needs to be done after PrivateDevices=y setup as device nodes should be owned by the host's root.
3533 * For non-root in a userns, devices will be owned by the user/group before the group change, and nobody. */
3535 _cleanup_free_ gid_t
*gids_to_enforce
= NULL
;
3536 int ngids_to_enforce
= 0;
3538 ngids_to_enforce
= merge_gid_lists(supplementary_gids
,
3543 if (ngids_to_enforce
< 0) {
3544 *exit_status
= EXIT_MEMORY
;
3545 return log_unit_error_errno(unit
,
3547 "Failed to merge group lists. Group membership might be incorrect: %m");
3550 r
= enforce_groups(gid
, gids_to_enforce
, ngids_to_enforce
);
3552 *exit_status
= EXIT_GROUP
;
3553 return log_unit_error_errno(unit
, r
, "Changing group credentials failed: %m");
3557 /* If the user namespace was not set up above, try to do it now.
3558 * It's preferred to set up the user namespace later (after all other namespaces) so as not to be
3559 * restricted by rules pertaining to combining user namspaces with other namespaces (e.g. in the
3560 * case of mount namespaces being less privileged when the mount point list is copied from a
3561 * different user namespace). */
3563 if (needs_sandboxing
&& context
->private_users
&& !userns_set_up
) {
3564 r
= setup_private_users(saved_uid
, saved_gid
, uid
, gid
);
3566 *exit_status
= EXIT_USER
;
3567 return log_unit_error_errno(unit
, r
, "Failed to set up user namespacing: %m");
3571 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3572 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3573 * however if we have it as we want to keep it open until the final execve(). */
3575 if (params
->exec_fd
>= 0) {
3576 exec_fd
= params
->exec_fd
;
3578 if (exec_fd
< 3 + (int) n_fds
) {
3581 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3582 * process we are about to execute. */
3584 moved_fd
= fcntl(exec_fd
, F_DUPFD_CLOEXEC
, 3 + (int) n_fds
);
3586 *exit_status
= EXIT_FDS
;
3587 return log_unit_error_errno(unit
, errno
, "Couldn't move exec fd up: %m");
3590 safe_close(exec_fd
);
3593 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3594 r
= fd_cloexec(exec_fd
, true);
3596 *exit_status
= EXIT_FDS
;
3597 return log_unit_error_errno(unit
, r
, "Failed to make exec fd FD_CLOEXEC: %m");
3601 fds_with_exec_fd
= newa(int, n_fds
+ 1);
3602 memcpy_safe(fds_with_exec_fd
, fds
, n_fds
* sizeof(int));
3603 fds_with_exec_fd
[n_fds
] = exec_fd
;
3604 n_fds_with_exec_fd
= n_fds
+ 1;
3606 fds_with_exec_fd
= fds
;
3607 n_fds_with_exec_fd
= n_fds
;
3610 r
= close_all_fds(fds_with_exec_fd
, n_fds_with_exec_fd
);
3612 r
= shift_fds(fds
, n_fds
);
3614 r
= flags_fds(fds
, n_socket_fds
, n_storage_fds
, context
->non_blocking
);
3616 *exit_status
= EXIT_FDS
;
3617 return log_unit_error_errno(unit
, r
, "Failed to adjust passed file descriptors: %m");
3620 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3621 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3622 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3625 secure_bits
= context
->secure_bits
;
3627 if (needs_sandboxing
) {
3630 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3631 * requested. (Note this is placed after the general resource limit initialization, see
3632 * above, in order to take precedence.) */
3633 if (context
->restrict_realtime
&& !context
->rlimit
[RLIMIT_RTPRIO
]) {
3634 if (setrlimit(RLIMIT_RTPRIO
, &RLIMIT_MAKE_CONST(0)) < 0) {
3635 *exit_status
= EXIT_LIMITS
;
3636 return log_unit_error_errno(unit
, errno
, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
3641 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3642 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3644 r
= setup_smack(context
, command
);
3646 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
3647 return log_unit_error_errno(unit
, r
, "Failed to set SMACK process label: %m");
3652 bset
= context
->capability_bounding_set
;
3653 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3654 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3655 * instead of us doing that */
3656 if (needs_ambient_hack
)
3657 bset
|= (UINT64_C(1) << CAP_SETPCAP
) |
3658 (UINT64_C(1) << CAP_SETUID
) |
3659 (UINT64_C(1) << CAP_SETGID
);
3661 if (!cap_test_all(bset
)) {
3662 r
= capability_bounding_set_drop(bset
, false);
3664 *exit_status
= EXIT_CAPABILITIES
;
3665 return log_unit_error_errno(unit
, r
, "Failed to drop capabilities: %m");
3669 /* This is done before enforce_user, but ambient set
3670 * does not survive over setresuid() if keep_caps is not set. */
3671 if (!needs_ambient_hack
) {
3672 r
= capability_ambient_set_apply(context
->capability_ambient_set
, true);
3674 *exit_status
= EXIT_CAPABILITIES
;
3675 return log_unit_error_errno(unit
, r
, "Failed to apply ambient capabilities (before UID change): %m");
3680 /* chroot to root directory first, before we lose the ability to chroot */
3681 r
= apply_root_directory(context
, params
, needs_mount_namespace
, exit_status
);
3683 return log_unit_error_errno(unit
, r
, "Chrooting to the requested root directory failed: %m");
3686 if (uid_is_valid(uid
)) {
3687 r
= enforce_user(context
, uid
);
3689 *exit_status
= EXIT_USER
;
3690 return log_unit_error_errno(unit
, r
, "Failed to change UID to " UID_FMT
": %m", uid
);
3693 if (!needs_ambient_hack
&&
3694 context
->capability_ambient_set
!= 0) {
3696 /* Fix the ambient capabilities after user change. */
3697 r
= capability_ambient_set_apply(context
->capability_ambient_set
, false);
3699 *exit_status
= EXIT_CAPABILITIES
;
3700 return log_unit_error_errno(unit
, r
, "Failed to apply ambient capabilities (after UID change): %m");
3703 /* If we were asked to change user and ambient capabilities
3704 * were requested, we had to add keep-caps to the securebits
3705 * so that we would maintain the inherited capability set
3706 * through the setresuid(). Make sure that the bit is added
3707 * also to the context secure_bits so that we don't try to
3708 * drop the bit away next. */
3710 secure_bits
|= 1<<SECURE_KEEP_CAPS
;
3715 /* Apply working directory here, because the working directory might be on NFS and only the user running
3716 * this service might have the correct privilege to change to the working directory */
3717 r
= apply_working_directory(context
, params
, home
, exit_status
);
3719 return log_unit_error_errno(unit
, r
, "Changing to the requested working directory failed: %m");
3721 if (needs_sandboxing
) {
3722 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3723 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3724 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3725 * are restricted. */
3729 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
3732 r
= setexeccon(exec_context
);
3734 *exit_status
= EXIT_SELINUX_CONTEXT
;
3735 return log_unit_error_errno(unit
, r
, "Failed to change SELinux context to %s: %m", exec_context
);
3742 if (use_apparmor
&& context
->apparmor_profile
) {
3743 r
= aa_change_onexec(context
->apparmor_profile
);
3744 if (r
< 0 && !context
->apparmor_profile_ignore
) {
3745 *exit_status
= EXIT_APPARMOR_PROFILE
;
3746 return log_unit_error_errno(unit
, errno
, "Failed to prepare AppArmor profile change to %s: %m", context
->apparmor_profile
);
3751 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3752 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
3753 if (prctl(PR_GET_SECUREBITS
) != secure_bits
)
3754 if (prctl(PR_SET_SECUREBITS
, secure_bits
) < 0) {
3755 *exit_status
= EXIT_SECUREBITS
;
3756 return log_unit_error_errno(unit
, errno
, "Failed to set process secure bits: %m");
3759 if (context_has_no_new_privileges(context
))
3760 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
3761 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
3762 return log_unit_error_errno(unit
, errno
, "Failed to disable new privileges: %m");
3766 r
= apply_address_families(unit
, context
);
3768 *exit_status
= EXIT_ADDRESS_FAMILIES
;
3769 return log_unit_error_errno(unit
, r
, "Failed to restrict address families: %m");
3772 r
= apply_memory_deny_write_execute(unit
, context
);
3774 *exit_status
= EXIT_SECCOMP
;
3775 return log_unit_error_errno(unit
, r
, "Failed to disable writing to executable memory: %m");
3778 r
= apply_restrict_realtime(unit
, context
);
3780 *exit_status
= EXIT_SECCOMP
;
3781 return log_unit_error_errno(unit
, r
, "Failed to apply realtime restrictions: %m");
3784 r
= apply_restrict_suid_sgid(unit
, context
);
3786 *exit_status
= EXIT_SECCOMP
;
3787 return log_unit_error_errno(unit
, r
, "Failed to apply SUID/SGID restrictions: %m");
3790 r
= apply_restrict_namespaces(unit
, context
);
3792 *exit_status
= EXIT_SECCOMP
;
3793 return log_unit_error_errno(unit
, r
, "Failed to apply namespace restrictions: %m");
3796 r
= apply_protect_sysctl(unit
, context
);
3798 *exit_status
= EXIT_SECCOMP
;
3799 return log_unit_error_errno(unit
, r
, "Failed to apply sysctl restrictions: %m");
3802 r
= apply_protect_kernel_modules(unit
, context
);
3804 *exit_status
= EXIT_SECCOMP
;
3805 return log_unit_error_errno(unit
, r
, "Failed to apply module loading restrictions: %m");
3808 r
= apply_protect_kernel_logs(unit
, context
);
3810 *exit_status
= EXIT_SECCOMP
;
3811 return log_unit_error_errno(unit
, r
, "Failed to apply kernel log restrictions: %m");
3814 r
= apply_protect_clock(unit
, context
);
3816 *exit_status
= EXIT_SECCOMP
;
3817 return log_unit_error_errno(unit
, r
, "Failed to apply clock restrictions: %m");
3820 r
= apply_private_devices(unit
, context
);
3822 *exit_status
= EXIT_SECCOMP
;
3823 return log_unit_error_errno(unit
, r
, "Failed to set up private devices: %m");
3826 r
= apply_syscall_archs(unit
, context
);
3828 *exit_status
= EXIT_SECCOMP
;
3829 return log_unit_error_errno(unit
, r
, "Failed to apply syscall architecture restrictions: %m");
3832 r
= apply_lock_personality(unit
, context
);
3834 *exit_status
= EXIT_SECCOMP
;
3835 return log_unit_error_errno(unit
, r
, "Failed to lock personalities: %m");
3838 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3839 * by the filter as little as possible. */
3840 r
= apply_syscall_filter(unit
, context
, needs_ambient_hack
);
3842 *exit_status
= EXIT_SECCOMP
;
3843 return log_unit_error_errno(unit
, r
, "Failed to apply system call filters: %m");
3848 if (!strv_isempty(context
->unset_environment
)) {
3851 ee
= strv_env_delete(accum_env
, 1, context
->unset_environment
);
3853 *exit_status
= EXIT_MEMORY
;
3857 strv_free_and_replace(accum_env
, ee
);
3860 if (!FLAGS_SET(command
->flags
, EXEC_COMMAND_NO_ENV_EXPAND
)) {
3861 replaced_argv
= replace_env_argv(command
->argv
, accum_env
);
3862 if (!replaced_argv
) {
3863 *exit_status
= EXIT_MEMORY
;
3866 final_argv
= replaced_argv
;
3868 final_argv
= command
->argv
;
3870 if (DEBUG_LOGGING
) {
3871 _cleanup_free_
char *line
;
3873 line
= exec_command_line(final_argv
);
3875 log_struct(LOG_DEBUG
,
3876 "EXECUTABLE=%s", command
->path
,
3877 LOG_UNIT_MESSAGE(unit
, "Executing: %s", line
),
3879 LOG_UNIT_INVOCATION_ID(unit
));
3885 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3886 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3888 if (write(exec_fd
, &hot
, sizeof(hot
)) < 0) {
3889 *exit_status
= EXIT_EXEC
;
3890 return log_unit_error_errno(unit
, errno
, "Failed to enable exec_fd: %m");
3894 execve(command
->path
, final_argv
, accum_env
);
3900 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3901 * that POLLHUP on it no longer means execve() succeeded. */
3903 if (write(exec_fd
, &hot
, sizeof(hot
)) < 0) {
3904 *exit_status
= EXIT_EXEC
;
3905 return log_unit_error_errno(unit
, errno
, "Failed to disable exec_fd: %m");
3909 if (r
== -ENOENT
&& (command
->flags
& EXEC_COMMAND_IGNORE_FAILURE
)) {
3910 log_struct_errno(LOG_INFO
, r
,
3911 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR
,
3913 LOG_UNIT_INVOCATION_ID(unit
),
3914 LOG_UNIT_MESSAGE(unit
, "Executable %s missing, skipping: %m",
3916 "EXECUTABLE=%s", command
->path
);
3920 *exit_status
= EXIT_EXEC
;
3921 return log_unit_error_errno(unit
, r
, "Failed to execute command: %m");
3924 static int exec_context_load_environment(const Unit
*unit
, const ExecContext
*c
, char ***l
);
3925 static int exec_context_named_iofds(const ExecContext
*c
, const ExecParameters
*p
, int named_iofds
[static 3]);
3927 int exec_spawn(Unit
*unit
,
3928 ExecCommand
*command
,
3929 const ExecContext
*context
,
3930 const ExecParameters
*params
,
3931 ExecRuntime
*runtime
,
3932 DynamicCreds
*dcreds
,
3935 int socket_fd
, r
, named_iofds
[3] = { -1, -1, -1 }, *fds
= NULL
;
3936 _cleanup_free_
char *subcgroup_path
= NULL
;
3937 _cleanup_strv_free_
char **files_env
= NULL
;
3938 size_t n_storage_fds
= 0, n_socket_fds
= 0;
3939 _cleanup_free_
char *line
= NULL
;
3947 assert(params
->fds
|| (params
->n_socket_fds
+ params
->n_storage_fds
<= 0));
3949 if (context
->std_input
== EXEC_INPUT_SOCKET
||
3950 context
->std_output
== EXEC_OUTPUT_SOCKET
||
3951 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
3953 if (params
->n_socket_fds
> 1) {
3954 log_unit_error(unit
, "Got more than one socket.");
3958 if (params
->n_socket_fds
== 0) {
3959 log_unit_error(unit
, "Got no socket.");
3963 socket_fd
= params
->fds
[0];
3967 n_socket_fds
= params
->n_socket_fds
;
3968 n_storage_fds
= params
->n_storage_fds
;
3971 r
= exec_context_named_iofds(context
, params
, named_iofds
);
3973 return log_unit_error_errno(unit
, r
, "Failed to load a named file descriptor: %m");
3975 r
= exec_context_load_environment(unit
, context
, &files_env
);
3977 return log_unit_error_errno(unit
, r
, "Failed to load environment files: %m");
3979 line
= exec_command_line(command
->argv
);
3983 log_struct(LOG_DEBUG
,
3984 LOG_UNIT_MESSAGE(unit
, "About to execute: %s", line
),
3985 "EXECUTABLE=%s", command
->path
,
3987 LOG_UNIT_INVOCATION_ID(unit
));
3989 if (params
->cgroup_path
) {
3990 r
= exec_parameters_get_cgroup_path(params
, &subcgroup_path
);
3992 return log_unit_error_errno(unit
, r
, "Failed to acquire subcgroup path: %m");
3993 if (r
> 0) { /* We are using a child cgroup */
3994 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, subcgroup_path
);
3996 return log_unit_error_errno(unit
, r
, "Failed to create control group '%s': %m", subcgroup_path
);
4002 return log_unit_error_errno(unit
, errno
, "Failed to fork: %m");
4005 int exit_status
= EXIT_SUCCESS
;
4007 r
= exec_child(unit
,
4019 unit
->manager
->user_lookup_fds
[1],
4023 const char *status
=
4024 exit_status_to_string(exit_status
,
4025 EXIT_STATUS_LIBC
| EXIT_STATUS_SYSTEMD
);
4027 log_struct_errno(LOG_ERR
, r
,
4028 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR
,
4030 LOG_UNIT_INVOCATION_ID(unit
),
4031 LOG_UNIT_MESSAGE(unit
, "Failed at step %s spawning %s: %m",
4032 status
, command
->path
),
4033 "EXECUTABLE=%s", command
->path
);
4039 log_unit_debug(unit
, "Forked %s as "PID_FMT
, command
->path
, pid
);
4041 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
4042 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
4043 * process will be killed too). */
4045 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER
, subcgroup_path
, pid
);
4047 exec_status_start(&command
->exec_status
, pid
);
4053 void exec_context_init(ExecContext
*c
) {
4054 ExecDirectoryType i
;
4059 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
4060 c
->cpu_sched_policy
= SCHED_OTHER
;
4061 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
4062 c
->syslog_level_prefix
= true;
4063 c
->ignore_sigpipe
= true;
4064 c
->timer_slack_nsec
= NSEC_INFINITY
;
4065 c
->personality
= PERSONALITY_INVALID
;
4066 for (i
= 0; i
< _EXEC_DIRECTORY_TYPE_MAX
; i
++)
4067 c
->directories
[i
].mode
= 0755;
4068 c
->timeout_clean_usec
= USEC_INFINITY
;
4069 c
->capability_bounding_set
= CAP_ALL
;
4070 assert_cc(NAMESPACE_FLAGS_INITIAL
!= NAMESPACE_FLAGS_ALL
);
4071 c
->restrict_namespaces
= NAMESPACE_FLAGS_INITIAL
;
4072 c
->log_level_max
= -1;
4073 numa_policy_reset(&c
->numa_policy
);
4076 void exec_context_done(ExecContext
*c
) {
4077 ExecDirectoryType i
;
4082 c
->environment
= strv_free(c
->environment
);
4083 c
->environment_files
= strv_free(c
->environment_files
);
4084 c
->pass_environment
= strv_free(c
->pass_environment
);
4085 c
->unset_environment
= strv_free(c
->unset_environment
);
4087 rlimit_free_all(c
->rlimit
);
4089 for (l
= 0; l
< 3; l
++) {
4090 c
->stdio_fdname
[l
] = mfree(c
->stdio_fdname
[l
]);
4091 c
->stdio_file
[l
] = mfree(c
->stdio_file
[l
]);
4094 c
->working_directory
= mfree(c
->working_directory
);
4095 c
->root_directory
= mfree(c
->root_directory
);
4096 c
->root_image
= mfree(c
->root_image
);
4097 c
->tty_path
= mfree(c
->tty_path
);
4098 c
->syslog_identifier
= mfree(c
->syslog_identifier
);
4099 c
->user
= mfree(c
->user
);
4100 c
->group
= mfree(c
->group
);
4102 c
->supplementary_groups
= strv_free(c
->supplementary_groups
);
4104 c
->pam_name
= mfree(c
->pam_name
);
4106 c
->read_only_paths
= strv_free(c
->read_only_paths
);
4107 c
->read_write_paths
= strv_free(c
->read_write_paths
);
4108 c
->inaccessible_paths
= strv_free(c
->inaccessible_paths
);
4110 bind_mount_free_many(c
->bind_mounts
, c
->n_bind_mounts
);
4111 c
->bind_mounts
= NULL
;
4112 c
->n_bind_mounts
= 0;
4113 temporary_filesystem_free_many(c
->temporary_filesystems
, c
->n_temporary_filesystems
);
4114 c
->temporary_filesystems
= NULL
;
4115 c
->n_temporary_filesystems
= 0;
4117 cpu_set_reset(&c
->cpu_set
);
4118 numa_policy_reset(&c
->numa_policy
);
4120 c
->utmp_id
= mfree(c
->utmp_id
);
4121 c
->selinux_context
= mfree(c
->selinux_context
);
4122 c
->apparmor_profile
= mfree(c
->apparmor_profile
);
4123 c
->smack_process_label
= mfree(c
->smack_process_label
);
4125 c
->syscall_filter
= hashmap_free(c
->syscall_filter
);
4126 c
->syscall_archs
= set_free(c
->syscall_archs
);
4127 c
->address_families
= set_free(c
->address_families
);
4129 for (i
= 0; i
< _EXEC_DIRECTORY_TYPE_MAX
; i
++)
4130 c
->directories
[i
].paths
= strv_free(c
->directories
[i
].paths
);
4132 c
->log_level_max
= -1;
4134 exec_context_free_log_extra_fields(c
);
4136 c
->log_ratelimit_interval_usec
= 0;
4137 c
->log_ratelimit_burst
= 0;
4139 c
->stdin_data
= mfree(c
->stdin_data
);
4140 c
->stdin_data_size
= 0;
4142 c
->network_namespace_path
= mfree(c
->network_namespace_path
);
4145 int exec_context_destroy_runtime_directory(const ExecContext
*c
, const char *runtime_prefix
) {
4150 if (!runtime_prefix
)
4153 STRV_FOREACH(i
, c
->directories
[EXEC_DIRECTORY_RUNTIME
].paths
) {
4154 _cleanup_free_
char *p
;
4156 if (exec_directory_is_private(c
, EXEC_DIRECTORY_RUNTIME
))
4157 p
= path_join(runtime_prefix
, "private", *i
);
4159 p
= path_join(runtime_prefix
, *i
);
4163 /* We execute this synchronously, since we need to be sure this is gone when we start the
4165 (void) rm_rf(p
, REMOVE_ROOT
);
4171 static void exec_command_done(ExecCommand
*c
) {
4174 c
->path
= mfree(c
->path
);
4175 c
->argv
= strv_free(c
->argv
);
4178 void exec_command_done_array(ExecCommand
*c
, size_t n
) {
4181 for (i
= 0; i
< n
; i
++)
4182 exec_command_done(c
+i
);
4185 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
4189 LIST_REMOVE(command
, c
, i
);
4190 exec_command_done(i
);
4197 void exec_command_free_array(ExecCommand
**c
, size_t n
) {
4200 for (i
= 0; i
< n
; i
++)
4201 c
[i
] = exec_command_free_list(c
[i
]);
4204 void exec_command_reset_status_array(ExecCommand
*c
, size_t n
) {
4207 for (i
= 0; i
< n
; i
++)
4208 exec_status_reset(&c
[i
].exec_status
);
4211 void exec_command_reset_status_list_array(ExecCommand
**c
, size_t n
) {
4214 for (i
= 0; i
< n
; i
++) {
4217 LIST_FOREACH(command
, z
, c
[i
])
4218 exec_status_reset(&z
->exec_status
);
4222 typedef struct InvalidEnvInfo
{
4227 static void invalid_env(const char *p
, void *userdata
) {
4228 InvalidEnvInfo
*info
= userdata
;
4230 log_unit_error(info
->unit
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
4233 const char* exec_context_fdname(const ExecContext
*c
, int fd_index
) {
4239 if (c
->std_input
!= EXEC_INPUT_NAMED_FD
)
4242 return c
->stdio_fdname
[STDIN_FILENO
] ?: "stdin";
4245 if (c
->std_output
!= EXEC_OUTPUT_NAMED_FD
)
4248 return c
->stdio_fdname
[STDOUT_FILENO
] ?: "stdout";
4251 if (c
->std_error
!= EXEC_OUTPUT_NAMED_FD
)
4254 return c
->stdio_fdname
[STDERR_FILENO
] ?: "stderr";
4261 static int exec_context_named_iofds(
4262 const ExecContext
*c
,
4263 const ExecParameters
*p
,
4264 int named_iofds
[static 3]) {
4267 const char* stdio_fdname
[3];
4272 assert(named_iofds
);
4274 targets
= (c
->std_input
== EXEC_INPUT_NAMED_FD
) +
4275 (c
->std_output
== EXEC_OUTPUT_NAMED_FD
) +
4276 (c
->std_error
== EXEC_OUTPUT_NAMED_FD
);
4278 for (i
= 0; i
< 3; i
++)
4279 stdio_fdname
[i
] = exec_context_fdname(c
, i
);
4281 n_fds
= p
->n_storage_fds
+ p
->n_socket_fds
;
4283 for (i
= 0; i
< n_fds
&& targets
> 0; i
++)
4284 if (named_iofds
[STDIN_FILENO
] < 0 &&
4285 c
->std_input
== EXEC_INPUT_NAMED_FD
&&
4286 stdio_fdname
[STDIN_FILENO
] &&
4287 streq(p
->fd_names
[i
], stdio_fdname
[STDIN_FILENO
])) {
4289 named_iofds
[STDIN_FILENO
] = p
->fds
[i
];
4292 } else if (named_iofds
[STDOUT_FILENO
] < 0 &&
4293 c
->std_output
== EXEC_OUTPUT_NAMED_FD
&&
4294 stdio_fdname
[STDOUT_FILENO
] &&
4295 streq(p
->fd_names
[i
], stdio_fdname
[STDOUT_FILENO
])) {
4297 named_iofds
[STDOUT_FILENO
] = p
->fds
[i
];
4300 } else if (named_iofds
[STDERR_FILENO
] < 0 &&
4301 c
->std_error
== EXEC_OUTPUT_NAMED_FD
&&
4302 stdio_fdname
[STDERR_FILENO
] &&
4303 streq(p
->fd_names
[i
], stdio_fdname
[STDERR_FILENO
])) {
4305 named_iofds
[STDERR_FILENO
] = p
->fds
[i
];
4309 return targets
== 0 ? 0 : -ENOENT
;
4312 static int exec_context_load_environment(const Unit
*unit
, const ExecContext
*c
, char ***l
) {
4313 char **i
, **r
= NULL
;
4318 STRV_FOREACH(i
, c
->environment_files
) {
4322 bool ignore
= false;
4324 _cleanup_globfree_ glob_t pglob
= {};
4333 if (!path_is_absolute(fn
)) {
4341 /* Filename supports globbing, take all matching files */
4342 k
= safe_glob(fn
, 0, &pglob
);
4351 /* When we don't match anything, -ENOENT should be returned */
4352 assert(pglob
.gl_pathc
> 0);
4354 for (n
= 0; n
< pglob
.gl_pathc
; n
++) {
4355 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], &p
);
4363 /* Log invalid environment variables with filename */
4365 InvalidEnvInfo info
= {
4367 .path
= pglob
.gl_pathv
[n
]
4370 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
4378 m
= strv_env_merge(2, r
, p
);
4394 static bool tty_may_match_dev_console(const char *tty
) {
4395 _cleanup_free_
char *resolved
= NULL
;
4400 tty
= skip_dev_prefix(tty
);
4402 /* trivial identity? */
4403 if (streq(tty
, "console"))
4406 if (resolve_dev_console(&resolved
) < 0)
4407 return true; /* if we could not resolve, assume it may */
4409 /* "tty0" means the active VC, so it may be the same sometimes */
4410 return path_equal(resolved
, tty
) || (streq(resolved
, "tty0") && tty_is_vc(tty
));
4413 static bool exec_context_may_touch_tty(const ExecContext
*ec
) {
4416 return ec
->tty_reset
||
4418 ec
->tty_vt_disallocate
||
4419 is_terminal_input(ec
->std_input
) ||
4420 is_terminal_output(ec
->std_output
) ||
4421 is_terminal_output(ec
->std_error
);
4424 bool exec_context_may_touch_console(const ExecContext
*ec
) {
4426 return exec_context_may_touch_tty(ec
) &&
4427 tty_may_match_dev_console(exec_context_tty_path(ec
));
4430 static void strv_fprintf(FILE *f
, char **l
) {
4436 fprintf(f
, " %s", *g
);
4439 void exec_context_dump(const ExecContext
*c
, FILE* f
, const char *prefix
) {
4440 char **e
, **d
, buf_clean
[FORMAT_TIMESPAN_MAX
];
4441 ExecDirectoryType dt
;
4448 prefix
= strempty(prefix
);
4452 "%sWorkingDirectory: %s\n"
4453 "%sRootDirectory: %s\n"
4454 "%sNonBlocking: %s\n"
4455 "%sPrivateTmp: %s\n"
4456 "%sPrivateDevices: %s\n"
4457 "%sProtectKernelTunables: %s\n"
4458 "%sProtectKernelModules: %s\n"
4459 "%sProtectKernelLogs: %s\n"
4460 "%sProtectClock: %s\n"
4461 "%sProtectControlGroups: %s\n"
4462 "%sPrivateNetwork: %s\n"
4463 "%sPrivateUsers: %s\n"
4464 "%sProtectHome: %s\n"
4465 "%sProtectSystem: %s\n"
4466 "%sMountAPIVFS: %s\n"
4467 "%sIgnoreSIGPIPE: %s\n"
4468 "%sMemoryDenyWriteExecute: %s\n"
4469 "%sRestrictRealtime: %s\n"
4470 "%sRestrictSUIDSGID: %s\n"
4471 "%sKeyringMode: %s\n"
4472 "%sProtectHostname: %s\n",
4474 prefix
, c
->working_directory
? c
->working_directory
: "/",
4475 prefix
, c
->root_directory
? c
->root_directory
: "/",
4476 prefix
, yes_no(c
->non_blocking
),
4477 prefix
, yes_no(c
->private_tmp
),
4478 prefix
, yes_no(c
->private_devices
),
4479 prefix
, yes_no(c
->protect_kernel_tunables
),
4480 prefix
, yes_no(c
->protect_kernel_modules
),
4481 prefix
, yes_no(c
->protect_kernel_logs
),
4482 prefix
, yes_no(c
->protect_clock
),
4483 prefix
, yes_no(c
->protect_control_groups
),
4484 prefix
, yes_no(c
->private_network
),
4485 prefix
, yes_no(c
->private_users
),
4486 prefix
, protect_home_to_string(c
->protect_home
),
4487 prefix
, protect_system_to_string(c
->protect_system
),
4488 prefix
, yes_no(c
->mount_apivfs
),
4489 prefix
, yes_no(c
->ignore_sigpipe
),
4490 prefix
, yes_no(c
->memory_deny_write_execute
),
4491 prefix
, yes_no(c
->restrict_realtime
),
4492 prefix
, yes_no(c
->restrict_suid_sgid
),
4493 prefix
, exec_keyring_mode_to_string(c
->keyring_mode
),
4494 prefix
, yes_no(c
->protect_hostname
));
4497 fprintf(f
, "%sRootImage: %s\n", prefix
, c
->root_image
);
4499 STRV_FOREACH(e
, c
->environment
)
4500 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
4502 STRV_FOREACH(e
, c
->environment_files
)
4503 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
4505 STRV_FOREACH(e
, c
->pass_environment
)
4506 fprintf(f
, "%sPassEnvironment: %s\n", prefix
, *e
);
4508 STRV_FOREACH(e
, c
->unset_environment
)
4509 fprintf(f
, "%sUnsetEnvironment: %s\n", prefix
, *e
);
4511 fprintf(f
, "%sRuntimeDirectoryPreserve: %s\n", prefix
, exec_preserve_mode_to_string(c
->runtime_directory_preserve_mode
));
4513 for (dt
= 0; dt
< _EXEC_DIRECTORY_TYPE_MAX
; dt
++) {
4514 fprintf(f
, "%s%sMode: %04o\n", prefix
, exec_directory_type_to_string(dt
), c
->directories
[dt
].mode
);
4516 STRV_FOREACH(d
, c
->directories
[dt
].paths
)
4517 fprintf(f
, "%s%s: %s\n", prefix
, exec_directory_type_to_string(dt
), *d
);
4521 "%sTimeoutCleanSec: %s\n",
4522 prefix
, format_timespan(buf_clean
, sizeof(buf_clean
), c
->timeout_clean_usec
, USEC_PER_SEC
));
4529 if (c
->oom_score_adjust_set
)
4531 "%sOOMScoreAdjust: %i\n",
4532 prefix
, c
->oom_score_adjust
);
4534 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
4536 fprintf(f
, "%sLimit%s: " RLIM_FMT
"\n",
4537 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
4538 fprintf(f
, "%sLimit%sSoft: " RLIM_FMT
"\n",
4539 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_cur
);
4542 if (c
->ioprio_set
) {
4543 _cleanup_free_
char *class_str
= NULL
;
4545 r
= ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
4547 fprintf(f
, "%sIOSchedulingClass: %s\n", prefix
, class_str
);
4549 fprintf(f
, "%sIOPriority: %lu\n", prefix
, IOPRIO_PRIO_DATA(c
->ioprio
));
4552 if (c
->cpu_sched_set
) {
4553 _cleanup_free_
char *policy_str
= NULL
;
4555 r
= sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
4557 fprintf(f
, "%sCPUSchedulingPolicy: %s\n", prefix
, policy_str
);
4560 "%sCPUSchedulingPriority: %i\n"
4561 "%sCPUSchedulingResetOnFork: %s\n",
4562 prefix
, c
->cpu_sched_priority
,
4563 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
4566 if (c
->cpu_set
.set
) {
4567 _cleanup_free_
char *affinity
= NULL
;
4569 affinity
= cpu_set_to_range_string(&c
->cpu_set
);
4570 fprintf(f
, "%sCPUAffinity: %s\n", prefix
, affinity
);
4573 if (mpol_is_valid(numa_policy_get_type(&c
->numa_policy
))) {
4574 _cleanup_free_
char *nodes
= NULL
;
4576 nodes
= cpu_set_to_range_string(&c
->numa_policy
.nodes
);
4577 fprintf(f
, "%sNUMAPolicy: %s\n", prefix
, mpol_to_string(numa_policy_get_type(&c
->numa_policy
)));
4578 fprintf(f
, "%sNUMAMask: %s\n", prefix
, strnull(nodes
));
4581 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
4582 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
4585 "%sStandardInput: %s\n"
4586 "%sStandardOutput: %s\n"
4587 "%sStandardError: %s\n",
4588 prefix
, exec_input_to_string(c
->std_input
),
4589 prefix
, exec_output_to_string(c
->std_output
),
4590 prefix
, exec_output_to_string(c
->std_error
));
4592 if (c
->std_input
== EXEC_INPUT_NAMED_FD
)
4593 fprintf(f
, "%sStandardInputFileDescriptorName: %s\n", prefix
, c
->stdio_fdname
[STDIN_FILENO
]);
4594 if (c
->std_output
== EXEC_OUTPUT_NAMED_FD
)
4595 fprintf(f
, "%sStandardOutputFileDescriptorName: %s\n", prefix
, c
->stdio_fdname
[STDOUT_FILENO
]);
4596 if (c
->std_error
== EXEC_OUTPUT_NAMED_FD
)
4597 fprintf(f
, "%sStandardErrorFileDescriptorName: %s\n", prefix
, c
->stdio_fdname
[STDERR_FILENO
]);
4599 if (c
->std_input
== EXEC_INPUT_FILE
)
4600 fprintf(f
, "%sStandardInputFile: %s\n", prefix
, c
->stdio_file
[STDIN_FILENO
]);
4601 if (c
->std_output
== EXEC_OUTPUT_FILE
)
4602 fprintf(f
, "%sStandardOutputFile: %s\n", prefix
, c
->stdio_file
[STDOUT_FILENO
]);
4603 if (c
->std_output
== EXEC_OUTPUT_FILE_APPEND
)
4604 fprintf(f
, "%sStandardOutputFileToAppend: %s\n", prefix
, c
->stdio_file
[STDOUT_FILENO
]);
4605 if (c
->std_error
== EXEC_OUTPUT_FILE
)
4606 fprintf(f
, "%sStandardErrorFile: %s\n", prefix
, c
->stdio_file
[STDERR_FILENO
]);
4607 if (c
->std_error
== EXEC_OUTPUT_FILE_APPEND
)
4608 fprintf(f
, "%sStandardErrorFileToAppend: %s\n", prefix
, c
->stdio_file
[STDERR_FILENO
]);
4614 "%sTTYVHangup: %s\n"
4615 "%sTTYVTDisallocate: %s\n",
4616 prefix
, c
->tty_path
,
4617 prefix
, yes_no(c
->tty_reset
),
4618 prefix
, yes_no(c
->tty_vhangup
),
4619 prefix
, yes_no(c
->tty_vt_disallocate
));
4621 if (IN_SET(c
->std_output
,
4624 EXEC_OUTPUT_JOURNAL
,
4625 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
4626 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
4627 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) ||
4628 IN_SET(c
->std_error
,
4631 EXEC_OUTPUT_JOURNAL
,
4632 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
4633 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
4634 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
)) {
4636 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
4638 r
= log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
4640 fprintf(f
, "%sSyslogFacility: %s\n", prefix
, fac_str
);
4642 r
= log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
4644 fprintf(f
, "%sSyslogLevel: %s\n", prefix
, lvl_str
);
4647 if (c
->log_level_max
>= 0) {
4648 _cleanup_free_
char *t
= NULL
;
4650 (void) log_level_to_string_alloc(c
->log_level_max
, &t
);
4652 fprintf(f
, "%sLogLevelMax: %s\n", prefix
, strna(t
));
4655 if (c
->log_ratelimit_interval_usec
> 0) {
4656 char buf_timespan
[FORMAT_TIMESPAN_MAX
];
4659 "%sLogRateLimitIntervalSec: %s\n",
4660 prefix
, format_timespan(buf_timespan
, sizeof(buf_timespan
), c
->log_ratelimit_interval_usec
, USEC_PER_SEC
));
4663 if (c
->log_ratelimit_burst
> 0)
4664 fprintf(f
, "%sLogRateLimitBurst: %u\n", prefix
, c
->log_ratelimit_burst
);
4666 if (c
->n_log_extra_fields
> 0) {
4669 for (j
= 0; j
< c
->n_log_extra_fields
; j
++) {
4670 fprintf(f
, "%sLogExtraFields: ", prefix
);
4671 fwrite(c
->log_extra_fields
[j
].iov_base
,
4672 1, c
->log_extra_fields
[j
].iov_len
,
4678 if (c
->secure_bits
) {
4679 _cleanup_free_
char *str
= NULL
;
4681 r
= secure_bits_to_string_alloc(c
->secure_bits
, &str
);
4683 fprintf(f
, "%sSecure Bits: %s\n", prefix
, str
);
4686 if (c
->capability_bounding_set
!= CAP_ALL
) {
4687 _cleanup_free_
char *str
= NULL
;
4689 r
= capability_set_to_string_alloc(c
->capability_bounding_set
, &str
);
4691 fprintf(f
, "%sCapabilityBoundingSet: %s\n", prefix
, str
);
4694 if (c
->capability_ambient_set
!= 0) {
4695 _cleanup_free_
char *str
= NULL
;
4697 r
= capability_set_to_string_alloc(c
->capability_ambient_set
, &str
);
4699 fprintf(f
, "%sAmbientCapabilities: %s\n", prefix
, str
);
4703 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
4705 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
4707 fprintf(f
, "%sDynamicUser: %s\n", prefix
, yes_no(c
->dynamic_user
));
4709 if (!strv_isempty(c
->supplementary_groups
)) {
4710 fprintf(f
, "%sSupplementaryGroups:", prefix
);
4711 strv_fprintf(f
, c
->supplementary_groups
);
4716 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
4718 if (!strv_isempty(c
->read_write_paths
)) {
4719 fprintf(f
, "%sReadWritePaths:", prefix
);
4720 strv_fprintf(f
, c
->read_write_paths
);
4724 if (!strv_isempty(c
->read_only_paths
)) {
4725 fprintf(f
, "%sReadOnlyPaths:", prefix
);
4726 strv_fprintf(f
, c
->read_only_paths
);
4730 if (!strv_isempty(c
->inaccessible_paths
)) {
4731 fprintf(f
, "%sInaccessiblePaths:", prefix
);
4732 strv_fprintf(f
, c
->inaccessible_paths
);
4736 if (c
->n_bind_mounts
> 0)
4737 for (i
= 0; i
< c
->n_bind_mounts
; i
++)
4738 fprintf(f
, "%s%s: %s%s:%s:%s\n", prefix
,
4739 c
->bind_mounts
[i
].read_only
? "BindReadOnlyPaths" : "BindPaths",
4740 c
->bind_mounts
[i
].ignore_enoent
? "-": "",
4741 c
->bind_mounts
[i
].source
,
4742 c
->bind_mounts
[i
].destination
,
4743 c
->bind_mounts
[i
].recursive
? "rbind" : "norbind");
4745 if (c
->n_temporary_filesystems
> 0)
4746 for (i
= 0; i
< c
->n_temporary_filesystems
; i
++) {
4747 TemporaryFileSystem
*t
= c
->temporary_filesystems
+ i
;
4749 fprintf(f
, "%sTemporaryFileSystem: %s%s%s\n", prefix
,
4751 isempty(t
->options
) ? "" : ":",
4752 strempty(t
->options
));
4757 "%sUtmpIdentifier: %s\n",
4758 prefix
, c
->utmp_id
);
4760 if (c
->selinux_context
)
4762 "%sSELinuxContext: %s%s\n",
4763 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
4765 if (c
->apparmor_profile
)
4767 "%sAppArmorProfile: %s%s\n",
4768 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
4770 if (c
->smack_process_label
)
4772 "%sSmackProcessLabel: %s%s\n",
4773 prefix
, c
->smack_process_label_ignore
? "-" : "", c
->smack_process_label
);
4775 if (c
->personality
!= PERSONALITY_INVALID
)
4777 "%sPersonality: %s\n",
4778 prefix
, strna(personality_to_string(c
->personality
)));
4781 "%sLockPersonality: %s\n",
4782 prefix
, yes_no(c
->lock_personality
));
4784 if (c
->syscall_filter
) {
4792 "%sSystemCallFilter: ",
4795 if (!c
->syscall_whitelist
)
4799 HASHMAP_FOREACH_KEY(val
, id
, c
->syscall_filter
, j
) {
4800 _cleanup_free_
char *name
= NULL
;
4801 const char *errno_name
= NULL
;
4802 int num
= PTR_TO_INT(val
);
4809 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
4810 fputs(strna(name
), f
);
4813 errno_name
= errno_to_name(num
);
4815 fprintf(f
, ":%s", errno_name
);
4817 fprintf(f
, ":%d", num
);
4825 if (c
->syscall_archs
) {
4832 "%sSystemCallArchitectures:",
4836 SET_FOREACH(id
, c
->syscall_archs
, j
)
4837 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
4842 if (exec_context_restrict_namespaces_set(c
)) {
4843 _cleanup_free_
char *s
= NULL
;
4845 r
= namespace_flags_to_string(c
->restrict_namespaces
, &s
);
4847 fprintf(f
, "%sRestrictNamespaces: %s\n",
4851 if (c
->network_namespace_path
)
4853 "%sNetworkNamespacePath: %s\n",
4854 prefix
, c
->network_namespace_path
);
4856 if (c
->syscall_errno
> 0) {
4857 const char *errno_name
;
4859 fprintf(f
, "%sSystemCallErrorNumber: ", prefix
);
4861 errno_name
= errno_to_name(c
->syscall_errno
);
4863 fprintf(f
, "%s\n", errno_name
);
4865 fprintf(f
, "%d\n", c
->syscall_errno
);
4869 bool exec_context_maintains_privileges(const ExecContext
*c
) {
4872 /* Returns true if the process forked off would run under
4873 * an unchanged UID or as root. */
4878 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
4884 int exec_context_get_effective_ioprio(const ExecContext
*c
) {
4892 p
= ioprio_get(IOPRIO_WHO_PROCESS
, 0);
4894 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 4);
4899 void exec_context_free_log_extra_fields(ExecContext
*c
) {
4904 for (l
= 0; l
< c
->n_log_extra_fields
; l
++)
4905 free(c
->log_extra_fields
[l
].iov_base
);
4906 c
->log_extra_fields
= mfree(c
->log_extra_fields
);
4907 c
->n_log_extra_fields
= 0;
4910 void exec_context_revert_tty(ExecContext
*c
) {
4915 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
4916 exec_context_tty_reset(c
, NULL
);
4918 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
4919 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
4920 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
4922 if (exec_context_may_touch_tty(c
)) {
4925 path
= exec_context_tty_path(c
);
4927 r
= chmod_and_chown(path
, TTY_MODE
, 0, TTY_GID
);
4928 if (r
< 0 && r
!= -ENOENT
)
4929 log_warning_errno(r
, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path
);
4934 int exec_context_get_clean_directories(
4940 _cleanup_strv_free_
char **l
= NULL
;
4941 ExecDirectoryType t
;
4948 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
4951 if (!FLAGS_SET(mask
, 1U << t
))
4957 STRV_FOREACH(i
, c
->directories
[t
].paths
) {
4960 j
= path_join(prefix
[t
], *i
);
4964 r
= strv_consume(&l
, j
);
4968 /* Also remove private directories unconditionally. */
4969 if (t
!= EXEC_DIRECTORY_CONFIGURATION
) {
4970 j
= path_join(prefix
[t
], "private", *i
);
4974 r
= strv_consume(&l
, j
);
4985 int exec_context_get_clean_mask(ExecContext
*c
, ExecCleanMask
*ret
) {
4986 ExecCleanMask mask
= 0;
4991 for (ExecDirectoryType t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++)
4992 if (!strv_isempty(c
->directories
[t
].paths
))
4999 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
5006 dual_timestamp_get(&s
->start_timestamp
);
5009 void exec_status_exit(ExecStatus
*s
, const ExecContext
*context
, pid_t pid
, int code
, int status
) {
5012 if (s
->pid
!= pid
) {
5018 dual_timestamp_get(&s
->exit_timestamp
);
5023 if (context
&& context
->utmp_id
)
5024 (void) utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
5027 void exec_status_reset(ExecStatus
*s
) {
5030 *s
= (ExecStatus
) {};
5033 void exec_status_dump(const ExecStatus
*s
, FILE *f
, const char *prefix
) {
5034 char buf
[FORMAT_TIMESTAMP_MAX
];
5042 prefix
= strempty(prefix
);
5045 "%sPID: "PID_FMT
"\n",
5048 if (dual_timestamp_is_set(&s
->start_timestamp
))
5050 "%sStart Timestamp: %s\n",
5051 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
5053 if (dual_timestamp_is_set(&s
->exit_timestamp
))
5055 "%sExit Timestamp: %s\n"
5057 "%sExit Status: %i\n",
5058 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
5059 prefix
, sigchld_code_to_string(s
->code
),
5063 static char *exec_command_line(char **argv
) {
5071 STRV_FOREACH(a
, argv
)
5079 STRV_FOREACH(a
, argv
) {
5086 if (strpbrk(*a
, WHITESPACE
)) {
5097 /* FIXME: this doesn't really handle arguments that have
5098 * spaces and ticks in them */
5103 static void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
5104 _cleanup_free_
char *cmd
= NULL
;
5105 const char *prefix2
;
5110 prefix
= strempty(prefix
);
5111 prefix2
= strjoina(prefix
, "\t");
5113 cmd
= exec_command_line(c
->argv
);
5115 "%sCommand Line: %s\n",
5116 prefix
, cmd
? cmd
: strerror_safe(ENOMEM
));
5118 exec_status_dump(&c
->exec_status
, f
, prefix2
);
5121 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
5124 prefix
= strempty(prefix
);
5126 LIST_FOREACH(command
, c
, c
)
5127 exec_command_dump(c
, f
, prefix
);
5130 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
5137 /* It's kind of important, that we keep the order here */
5138 LIST_FIND_TAIL(command
, *l
, end
);
5139 LIST_INSERT_AFTER(command
, *l
, end
, e
);
5144 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
5152 l
= strv_new_ap(path
, ap
);
5164 free_and_replace(c
->path
, p
);
5166 return strv_free_and_replace(c
->argv
, l
);
5169 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
5170 _cleanup_strv_free_
char **l
= NULL
;
5178 l
= strv_new_ap(path
, ap
);
5184 r
= strv_extend_strv(&c
->argv
, l
, false);
5191 static void *remove_tmpdir_thread(void *p
) {
5192 _cleanup_free_
char *path
= p
;
5194 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
5198 static ExecRuntime
* exec_runtime_free(ExecRuntime
*rt
, bool destroy
) {
5205 (void) hashmap_remove(rt
->manager
->exec_runtime_by_id
, rt
->id
);
5207 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
5208 if (destroy
&& rt
->tmp_dir
) {
5209 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
5211 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
5213 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
5220 if (destroy
&& rt
->var_tmp_dir
) {
5221 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
5223 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
5225 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
5226 free(rt
->var_tmp_dir
);
5229 rt
->var_tmp_dir
= NULL
;
5232 rt
->id
= mfree(rt
->id
);
5233 rt
->tmp_dir
= mfree(rt
->tmp_dir
);
5234 rt
->var_tmp_dir
= mfree(rt
->var_tmp_dir
);
5235 safe_close_pair(rt
->netns_storage_socket
);
5239 static void exec_runtime_freep(ExecRuntime
**rt
) {
5240 (void) exec_runtime_free(*rt
, false);
5243 static int exec_runtime_allocate(ExecRuntime
**ret
) {
5248 n
= new(ExecRuntime
, 1);
5252 *n
= (ExecRuntime
) {
5253 .netns_storage_socket
= { -1, -1 },
5260 static int exec_runtime_add(
5263 const char *tmp_dir
,
5264 const char *var_tmp_dir
,
5265 const int netns_storage_socket
[2],
5266 ExecRuntime
**ret
) {
5268 _cleanup_(exec_runtime_freep
) ExecRuntime
*rt
= NULL
;
5274 r
= hashmap_ensure_allocated(&m
->exec_runtime_by_id
, &string_hash_ops
);
5278 r
= exec_runtime_allocate(&rt
);
5282 rt
->id
= strdup(id
);
5287 rt
->tmp_dir
= strdup(tmp_dir
);
5291 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
5292 assert(var_tmp_dir
);
5293 rt
->var_tmp_dir
= strdup(var_tmp_dir
);
5294 if (!rt
->var_tmp_dir
)
5298 if (netns_storage_socket
) {
5299 rt
->netns_storage_socket
[0] = netns_storage_socket
[0];
5300 rt
->netns_storage_socket
[1] = netns_storage_socket
[1];
5303 r
= hashmap_put(m
->exec_runtime_by_id
, rt
->id
, rt
);
5312 /* do not remove created ExecRuntime object when the operation succeeds. */
5317 static int exec_runtime_make(Manager
*m
, const ExecContext
*c
, const char *id
, ExecRuntime
**ret
) {
5318 _cleanup_free_
char *tmp_dir
= NULL
, *var_tmp_dir
= NULL
;
5319 _cleanup_close_pair_
int netns_storage_socket
[2] = { -1, -1 };
5326 /* It is not necessary to create ExecRuntime object. */
5327 if (!c
->private_network
&& !c
->private_tmp
&& !c
->network_namespace_path
)
5330 if (c
->private_tmp
) {
5331 r
= setup_tmp_dirs(id
, &tmp_dir
, &var_tmp_dir
);
5336 if (c
->private_network
|| c
->network_namespace_path
) {
5337 if (socketpair(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0, netns_storage_socket
) < 0)
5341 r
= exec_runtime_add(m
, id
, tmp_dir
, var_tmp_dir
, netns_storage_socket
, ret
);
5346 netns_storage_socket
[0] = netns_storage_socket
[1] = -1;
5350 int exec_runtime_acquire(Manager
*m
, const ExecContext
*c
, const char *id
, bool create
, ExecRuntime
**ret
) {
5358 rt
= hashmap_get(m
->exec_runtime_by_id
, id
);
5360 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5366 /* If not found, then create a new object. */
5367 r
= exec_runtime_make(m
, c
, id
, &rt
);
5369 /* When r == 0, it is not necessary to create ExecRuntime object. */
5373 /* increment reference counter. */
5379 ExecRuntime
*exec_runtime_unref(ExecRuntime
*rt
, bool destroy
) {
5383 assert(rt
->n_ref
> 0);
5389 return exec_runtime_free(rt
, destroy
);
5392 int exec_runtime_serialize(const Manager
*m
, FILE *f
, FDSet
*fds
) {
5400 HASHMAP_FOREACH(rt
, m
->exec_runtime_by_id
, i
) {
5401 fprintf(f
, "exec-runtime=%s", rt
->id
);
5404 fprintf(f
, " tmp-dir=%s", rt
->tmp_dir
);
5406 if (rt
->var_tmp_dir
)
5407 fprintf(f
, " var-tmp-dir=%s", rt
->var_tmp_dir
);
5409 if (rt
->netns_storage_socket
[0] >= 0) {
5412 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
5416 fprintf(f
, " netns-socket-0=%i", copy
);
5419 if (rt
->netns_storage_socket
[1] >= 0) {
5422 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
5426 fprintf(f
, " netns-socket-1=%i", copy
);
5435 int exec_runtime_deserialize_compat(Unit
*u
, const char *key
, const char *value
, FDSet
*fds
) {
5436 _cleanup_(exec_runtime_freep
) ExecRuntime
*rt_create
= NULL
;
5440 /* This is for the migration from old (v237 or earlier) deserialization text.
5441 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5442 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5443 * so or not from the serialized text, then we always creates a new object owned by this. */
5449 /* Manager manages ExecRuntime objects by the unit id.
5450 * So, we omit the serialized text when the unit does not have id (yet?)... */
5451 if (isempty(u
->id
)) {
5452 log_unit_debug(u
, "Invocation ID not found. Dropping runtime parameter.");
5456 r
= hashmap_ensure_allocated(&u
->manager
->exec_runtime_by_id
, &string_hash_ops
);
5458 log_unit_debug_errno(u
, r
, "Failed to allocate storage for runtime parameter: %m");
5462 rt
= hashmap_get(u
->manager
->exec_runtime_by_id
, u
->id
);
5464 r
= exec_runtime_allocate(&rt_create
);
5468 rt_create
->id
= strdup(u
->id
);
5475 if (streq(key
, "tmp-dir")) {
5478 copy
= strdup(value
);
5482 free_and_replace(rt
->tmp_dir
, copy
);
5484 } else if (streq(key
, "var-tmp-dir")) {
5487 copy
= strdup(value
);
5491 free_and_replace(rt
->var_tmp_dir
, copy
);
5493 } else if (streq(key
, "netns-socket-0")) {
5496 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
)) {
5497 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
5501 safe_close(rt
->netns_storage_socket
[0]);
5502 rt
->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
5504 } else if (streq(key
, "netns-socket-1")) {
5507 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
)) {
5508 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
5512 safe_close(rt
->netns_storage_socket
[1]);
5513 rt
->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
5517 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5519 r
= hashmap_put(u
->manager
->exec_runtime_by_id
, rt_create
->id
, rt_create
);
5521 log_unit_debug_errno(u
, r
, "Failed to put runtime parameter to manager's storage: %m");
5525 rt_create
->manager
= u
->manager
;
5534 void exec_runtime_deserialize_one(Manager
*m
, const char *value
, FDSet
*fds
) {
5535 char *id
= NULL
, *tmp_dir
= NULL
, *var_tmp_dir
= NULL
;
5536 int r
, fd0
= -1, fd1
= -1;
5537 const char *p
, *v
= value
;
5544 n
= strcspn(v
, " ");
5545 id
= strndupa(v
, n
);
5550 v
= startswith(p
, "tmp-dir=");
5552 n
= strcspn(v
, " ");
5553 tmp_dir
= strndupa(v
, n
);
5559 v
= startswith(p
, "var-tmp-dir=");
5561 n
= strcspn(v
, " ");
5562 var_tmp_dir
= strndupa(v
, n
);
5568 v
= startswith(p
, "netns-socket-0=");
5572 n
= strcspn(v
, " ");
5573 buf
= strndupa(v
, n
);
5574 if (safe_atoi(buf
, &fd0
) < 0 || !fdset_contains(fds
, fd0
)) {
5575 log_debug("Unable to process exec-runtime netns fd specification.");
5578 fd0
= fdset_remove(fds
, fd0
);
5584 v
= startswith(p
, "netns-socket-1=");
5588 n
= strcspn(v
, " ");
5589 buf
= strndupa(v
, n
);
5590 if (safe_atoi(buf
, &fd1
) < 0 || !fdset_contains(fds
, fd1
)) {
5591 log_debug("Unable to process exec-runtime netns fd specification.");
5594 fd1
= fdset_remove(fds
, fd1
);
5599 r
= exec_runtime_add(m
, id
, tmp_dir
, var_tmp_dir
, (int[]) { fd0
, fd1
}, NULL
);
5601 log_debug_errno(r
, "Failed to add exec-runtime: %m");
5604 void exec_runtime_vacuum(Manager
*m
) {
5610 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5612 HASHMAP_FOREACH(rt
, m
->exec_runtime_by_id
, i
) {
5616 (void) exec_runtime_free(rt
, false);
5620 void exec_params_clear(ExecParameters
*p
) {
5624 strv_free(p
->environment
);
5627 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
5628 [EXEC_INPUT_NULL
] = "null",
5629 [EXEC_INPUT_TTY
] = "tty",
5630 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
5631 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
5632 [EXEC_INPUT_SOCKET
] = "socket",
5633 [EXEC_INPUT_NAMED_FD
] = "fd",
5634 [EXEC_INPUT_DATA
] = "data",
5635 [EXEC_INPUT_FILE
] = "file",
5638 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
5640 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
5641 [EXEC_OUTPUT_INHERIT
] = "inherit",
5642 [EXEC_OUTPUT_NULL
] = "null",
5643 [EXEC_OUTPUT_TTY
] = "tty",
5644 [EXEC_OUTPUT_SYSLOG
] = "syslog",
5645 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
5646 [EXEC_OUTPUT_KMSG
] = "kmsg",
5647 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
5648 [EXEC_OUTPUT_JOURNAL
] = "journal",
5649 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
5650 [EXEC_OUTPUT_SOCKET
] = "socket",
5651 [EXEC_OUTPUT_NAMED_FD
] = "fd",
5652 [EXEC_OUTPUT_FILE
] = "file",
5653 [EXEC_OUTPUT_FILE_APPEND
] = "append",
5656 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);
5658 static const char* const exec_utmp_mode_table
[_EXEC_UTMP_MODE_MAX
] = {
5659 [EXEC_UTMP_INIT
] = "init",
5660 [EXEC_UTMP_LOGIN
] = "login",
5661 [EXEC_UTMP_USER
] = "user",
5664 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode
, ExecUtmpMode
);
5666 static const char* const exec_preserve_mode_table
[_EXEC_PRESERVE_MODE_MAX
] = {
5667 [EXEC_PRESERVE_NO
] = "no",
5668 [EXEC_PRESERVE_YES
] = "yes",
5669 [EXEC_PRESERVE_RESTART
] = "restart",
5672 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode
, ExecPreserveMode
, EXEC_PRESERVE_YES
);
5674 /* This table maps ExecDirectoryType to the setting it is configured with in the unit */
5675 static const char* const exec_directory_type_table
[_EXEC_DIRECTORY_TYPE_MAX
] = {
5676 [EXEC_DIRECTORY_RUNTIME
] = "RuntimeDirectory",
5677 [EXEC_DIRECTORY_STATE
] = "StateDirectory",
5678 [EXEC_DIRECTORY_CACHE
] = "CacheDirectory",
5679 [EXEC_DIRECTORY_LOGS
] = "LogsDirectory",
5680 [EXEC_DIRECTORY_CONFIGURATION
] = "ConfigurationDirectory",
5683 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type
, ExecDirectoryType
);
5685 /* And this table maps ExecDirectoryType too, but to a generic term identifying the type of resource. This
5686 * one is supposed to be generic enough to be used for unit types that don't use ExecContext and per-unit
5687 * directories, specifically .timer units with their timestamp touch file. */
5688 static const char* const exec_resource_type_table
[_EXEC_DIRECTORY_TYPE_MAX
] = {
5689 [EXEC_DIRECTORY_RUNTIME
] = "runtime",
5690 [EXEC_DIRECTORY_STATE
] = "state",
5691 [EXEC_DIRECTORY_CACHE
] = "cache",
5692 [EXEC_DIRECTORY_LOGS
] = "logs",
5693 [EXEC_DIRECTORY_CONFIGURATION
] = "configuration",
5696 DEFINE_STRING_TABLE_LOOKUP(exec_resource_type
, ExecDirectoryType
);
5698 /* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
5699 * the service payload in. */
5700 static const char* const exec_directory_env_name_table
[_EXEC_DIRECTORY_TYPE_MAX
] = {
5701 [EXEC_DIRECTORY_RUNTIME
] = "RUNTIME_DIRECTORY",
5702 [EXEC_DIRECTORY_STATE
] = "STATE_DIRECTORY",
5703 [EXEC_DIRECTORY_CACHE
] = "CACHE_DIRECTORY",
5704 [EXEC_DIRECTORY_LOGS
] = "LOGS_DIRECTORY",
5705 [EXEC_DIRECTORY_CONFIGURATION
] = "CONFIGURATION_DIRECTORY",
5708 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name
, ExecDirectoryType
);
5710 static const char* const exec_keyring_mode_table
[_EXEC_KEYRING_MODE_MAX
] = {
5711 [EXEC_KEYRING_INHERIT
] = "inherit",
5712 [EXEC_KEYRING_PRIVATE
] = "private",
5713 [EXEC_KEYRING_SHARED
] = "shared",
5716 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode
, ExecKeyringMode
);