1 /* SPDX-License-Identifier: LGPL-2.1+ */
10 #include <sys/capability.h>
11 #include <sys/eventfd.h>
13 #include <sys/personality.h>
14 #include <sys/prctl.h>
16 #include <sys/socket.h>
18 #include <sys/types.h>
24 #include <security/pam_appl.h>
28 #include <selinux/selinux.h>
36 #include <sys/apparmor.h>
39 #include "sd-messages.h"
42 #include "alloc-util.h"
44 #include "apparmor-util.h"
49 #include "capability-util.h"
50 #include "chown-recursive.h"
51 #include "cpu-set-util.h"
54 #include "errno-list.h"
56 #include "exit-status.h"
59 #include "format-util.h"
61 #include "glob-util.h"
70 #include "namespace.h"
71 #include "parse-util.h"
72 #include "path-util.h"
73 #include "process-util.h"
74 #include "rlimit-util.h"
77 #include "seccomp-util.h"
79 #include "securebits.h"
80 #include "securebits-util.h"
81 #include "selinux-util.h"
82 #include "signal-util.h"
83 #include "smack-util.h"
84 #include "socket-util.h"
86 #include "stat-util.h"
87 #include "string-table.h"
88 #include "string-util.h"
90 #include "syslog-util.h"
91 #include "terminal-util.h"
92 #include "umask-util.h"
94 #include "user-util.h"
96 #include "utmp-wtmp.h"
98 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
99 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
101 /* This assumes there is a 'tty' group */
102 #define TTY_MODE 0620
104 #define SNDBUF_SIZE (8*1024*1024)
106 static int shift_fds(int fds
[], size_t n_fds
) {
107 int start
, restart_from
;
112 /* Modifies the fds array! (sorts it) */
122 for (i
= start
; i
< (int) n_fds
; i
++) {
125 /* Already at right index? */
129 nfd
= fcntl(fds
[i
], F_DUPFD
, i
+ 3);
136 /* Hmm, the fd we wanted isn't free? Then
137 * let's remember that and try again from here */
138 if (nfd
!= i
+3 && restart_from
< 0)
142 if (restart_from
< 0)
145 start
= restart_from
;
151 static int flags_fds(const int fds
[], size_t n_socket_fds
, size_t n_storage_fds
, bool nonblock
) {
155 n_fds
= n_socket_fds
+ n_storage_fds
;
161 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
162 * O_NONBLOCK only applies to socket activation though. */
164 for (i
= 0; i
< n_fds
; i
++) {
166 if (i
< n_socket_fds
) {
167 r
= fd_nonblock(fds
[i
], nonblock
);
172 /* We unconditionally drop FD_CLOEXEC from the fds,
173 * since after all we want to pass these fds to our
176 r
= fd_cloexec(fds
[i
], false);
184 static const char *exec_context_tty_path(const ExecContext
*context
) {
187 if (context
->stdio_as_fds
)
190 if (context
->tty_path
)
191 return context
->tty_path
;
193 return "/dev/console";
196 static void exec_context_tty_reset(const ExecContext
*context
, const ExecParameters
*p
) {
201 path
= exec_context_tty_path(context
);
203 if (context
->tty_vhangup
) {
204 if (p
&& p
->stdin_fd
>= 0)
205 (void) terminal_vhangup_fd(p
->stdin_fd
);
207 (void) terminal_vhangup(path
);
210 if (context
->tty_reset
) {
211 if (p
&& p
->stdin_fd
>= 0)
212 (void) reset_terminal_fd(p
->stdin_fd
, true);
214 (void) reset_terminal(path
);
217 if (context
->tty_vt_disallocate
&& path
)
218 (void) vt_disallocate(path
);
221 static bool is_terminal_input(ExecInput i
) {
224 EXEC_INPUT_TTY_FORCE
,
225 EXEC_INPUT_TTY_FAIL
);
228 static bool is_terminal_output(ExecOutput o
) {
231 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
232 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
233 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
);
236 static bool is_syslog_output(ExecOutput o
) {
239 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
);
242 static bool is_kmsg_output(ExecOutput o
) {
245 EXEC_OUTPUT_KMSG_AND_CONSOLE
);
248 static bool exec_context_needs_term(const ExecContext
*c
) {
251 /* Return true if the execution context suggests we should set $TERM to something useful. */
253 if (is_terminal_input(c
->std_input
))
256 if (is_terminal_output(c
->std_output
))
259 if (is_terminal_output(c
->std_error
))
262 return !!c
->tty_path
;
265 static int open_null_as(int flags
, int nfd
) {
270 fd
= open("/dev/null", flags
|O_NOCTTY
);
274 return move_fd(fd
, nfd
, false);
277 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
278 static const union sockaddr_union sa
= {
279 .un
.sun_family
= AF_UNIX
,
280 .un
.sun_path
= "/run/systemd/journal/stdout",
282 uid_t olduid
= UID_INVALID
;
283 gid_t oldgid
= GID_INVALID
;
286 if (gid_is_valid(gid
)) {
289 if (setegid(gid
) < 0)
293 if (uid_is_valid(uid
)) {
296 if (seteuid(uid
) < 0) {
302 r
= connect(fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
)) < 0 ? -errno
: 0;
304 /* If we fail to restore the uid or gid, things will likely
305 fail later on. This should only happen if an LSM interferes. */
307 if (uid_is_valid(uid
))
308 (void) seteuid(olduid
);
311 if (gid_is_valid(gid
))
312 (void) setegid(oldgid
);
317 static int connect_logger_as(
319 const ExecContext
*context
,
320 const ExecParameters
*params
,
331 assert(output
< _EXEC_OUTPUT_MAX
);
335 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
339 r
= connect_journal_socket(fd
, uid
, gid
);
343 if (shutdown(fd
, SHUT_RD
) < 0) {
348 (void) fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
358 context
->syslog_identifier
?: ident
,
359 params
->flags
& EXEC_PASS_LOG_UNIT
? unit
->id
: "",
360 context
->syslog_priority
,
361 !!context
->syslog_level_prefix
,
362 is_syslog_output(output
),
363 is_kmsg_output(output
),
364 is_terminal_output(output
));
366 return move_fd(fd
, nfd
, false);
368 static int open_terminal_as(const char *path
, int flags
, int nfd
) {
374 fd
= open_terminal(path
, flags
| O_NOCTTY
);
378 return move_fd(fd
, nfd
, false);
381 static int acquire_path(const char *path
, int flags
, mode_t mode
) {
382 union sockaddr_union sa
= {};
383 _cleanup_close_
int fd
= -1;
388 if (IN_SET(flags
& O_ACCMODE
, O_WRONLY
, O_RDWR
))
391 fd
= open(path
, flags
|O_NOCTTY
, mode
);
395 if (errno
!= ENXIO
) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
397 if (strlen(path
) >= sizeof(sa
.un
.sun_path
)) /* Too long, can't be a UNIX socket */
400 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
402 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
406 salen
= sockaddr_un_set_path(&sa
.un
, path
);
410 if (connect(fd
, &sa
.sa
, salen
) < 0)
411 return errno
== EINVAL
? -ENXIO
: -errno
; /* Propagate initial error if we get EINVAL, i.e. we have
412 * indication that his wasn't an AF_UNIX socket after all */
414 if ((flags
& O_ACCMODE
) == O_RDONLY
)
415 r
= shutdown(fd
, SHUT_WR
);
416 else if ((flags
& O_ACCMODE
) == O_WRONLY
)
417 r
= shutdown(fd
, SHUT_RD
);
426 static int fixup_input(
427 const ExecContext
*context
,
429 bool apply_tty_stdin
) {
435 std_input
= context
->std_input
;
437 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
438 return EXEC_INPUT_NULL
;
440 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
441 return EXEC_INPUT_NULL
;
443 if (std_input
== EXEC_INPUT_DATA
&& context
->stdin_data_size
== 0)
444 return EXEC_INPUT_NULL
;
449 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
451 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
452 return EXEC_OUTPUT_INHERIT
;
457 static int setup_input(
458 const ExecContext
*context
,
459 const ExecParameters
*params
,
461 int named_iofds
[3]) {
468 if (params
->stdin_fd
>= 0) {
469 if (dup2(params
->stdin_fd
, STDIN_FILENO
) < 0)
472 /* Try to make this the controlling tty, if it is a tty, and reset it */
473 if (isatty(STDIN_FILENO
)) {
474 (void) ioctl(STDIN_FILENO
, TIOCSCTTY
, context
->std_input
== EXEC_INPUT_TTY_FORCE
);
475 (void) reset_terminal_fd(STDIN_FILENO
, true);
481 i
= fixup_input(context
, socket_fd
, params
->flags
& EXEC_APPLY_TTY_STDIN
);
485 case EXEC_INPUT_NULL
:
486 return open_null_as(O_RDONLY
, STDIN_FILENO
);
489 case EXEC_INPUT_TTY_FORCE
:
490 case EXEC_INPUT_TTY_FAIL
: {
493 fd
= acquire_terminal(exec_context_tty_path(context
),
494 i
== EXEC_INPUT_TTY_FAIL
? ACQUIRE_TERMINAL_TRY
:
495 i
== EXEC_INPUT_TTY_FORCE
? ACQUIRE_TERMINAL_FORCE
:
496 ACQUIRE_TERMINAL_WAIT
,
501 return move_fd(fd
, STDIN_FILENO
, false);
504 case EXEC_INPUT_SOCKET
:
505 assert(socket_fd
>= 0);
507 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
509 case EXEC_INPUT_NAMED_FD
:
510 assert(named_iofds
[STDIN_FILENO
] >= 0);
512 (void) fd_nonblock(named_iofds
[STDIN_FILENO
], false);
513 return dup2(named_iofds
[STDIN_FILENO
], STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
515 case EXEC_INPUT_DATA
: {
518 fd
= acquire_data_fd(context
->stdin_data
, context
->stdin_data_size
, 0);
522 return move_fd(fd
, STDIN_FILENO
, false);
525 case EXEC_INPUT_FILE
: {
529 assert(context
->stdio_file
[STDIN_FILENO
]);
531 rw
= (context
->std_output
== EXEC_OUTPUT_FILE
&& streq_ptr(context
->stdio_file
[STDIN_FILENO
], context
->stdio_file
[STDOUT_FILENO
])) ||
532 (context
->std_error
== EXEC_OUTPUT_FILE
&& streq_ptr(context
->stdio_file
[STDIN_FILENO
], context
->stdio_file
[STDERR_FILENO
]));
534 fd
= acquire_path(context
->stdio_file
[STDIN_FILENO
], rw
? O_RDWR
: O_RDONLY
, 0666 & ~context
->umask
);
538 return move_fd(fd
, STDIN_FILENO
, false);
542 assert_not_reached("Unknown input type");
546 static int setup_output(
548 const ExecContext
*context
,
549 const ExecParameters
*params
,
556 dev_t
*journal_stream_dev
,
557 ino_t
*journal_stream_ino
) {
567 assert(journal_stream_dev
);
568 assert(journal_stream_ino
);
570 if (fileno
== STDOUT_FILENO
&& params
->stdout_fd
>= 0) {
572 if (dup2(params
->stdout_fd
, STDOUT_FILENO
) < 0)
575 return STDOUT_FILENO
;
578 if (fileno
== STDERR_FILENO
&& params
->stderr_fd
>= 0) {
579 if (dup2(params
->stderr_fd
, STDERR_FILENO
) < 0)
582 return STDERR_FILENO
;
585 i
= fixup_input(context
, socket_fd
, params
->flags
& EXEC_APPLY_TTY_STDIN
);
586 o
= fixup_output(context
->std_output
, socket_fd
);
588 if (fileno
== STDERR_FILENO
) {
590 e
= fixup_output(context
->std_error
, socket_fd
);
592 /* This expects the input and output are already set up */
594 /* Don't change the stderr file descriptor if we inherit all
595 * the way and are not on a tty */
596 if (e
== EXEC_OUTPUT_INHERIT
&&
597 o
== EXEC_OUTPUT_INHERIT
&&
598 i
== EXEC_INPUT_NULL
&&
599 !is_terminal_input(context
->std_input
) &&
603 /* Duplicate from stdout if possible */
604 if ((e
== o
&& e
!= EXEC_OUTPUT_NAMED_FD
) || e
== EXEC_OUTPUT_INHERIT
)
605 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
609 } else if (o
== EXEC_OUTPUT_INHERIT
) {
610 /* If input got downgraded, inherit the original value */
611 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
612 return open_terminal_as(exec_context_tty_path(context
), O_WRONLY
, fileno
);
614 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
615 if (!IN_SET(i
, EXEC_INPUT_NULL
, EXEC_INPUT_DATA
))
616 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
618 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
622 /* We need to open /dev/null here anew, to get the right access mode. */
623 return open_null_as(O_WRONLY
, fileno
);
628 case EXEC_OUTPUT_NULL
:
629 return open_null_as(O_WRONLY
, fileno
);
631 case EXEC_OUTPUT_TTY
:
632 if (is_terminal_input(i
))
633 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
635 /* We don't reset the terminal if this is just about output */
636 return open_terminal_as(exec_context_tty_path(context
), O_WRONLY
, fileno
);
638 case EXEC_OUTPUT_SYSLOG
:
639 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
640 case EXEC_OUTPUT_KMSG
:
641 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
642 case EXEC_OUTPUT_JOURNAL
:
643 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
644 r
= connect_logger_as(unit
, context
, params
, o
, ident
, fileno
, uid
, gid
);
646 log_unit_warning_errno(unit
, r
, "Failed to connect %s to the journal socket, ignoring: %m", fileno
== STDOUT_FILENO
? "stdout" : "stderr");
647 r
= open_null_as(O_WRONLY
, fileno
);
651 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
652 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
653 * services to detect whether they are connected to the journal or not.
655 * If both stdout and stderr are connected to a stream then let's make sure to store the data
656 * about STDERR as that's usually the best way to do logging. */
658 if (fstat(fileno
, &st
) >= 0 &&
659 (*journal_stream_ino
== 0 || fileno
== STDERR_FILENO
)) {
660 *journal_stream_dev
= st
.st_dev
;
661 *journal_stream_ino
= st
.st_ino
;
666 case EXEC_OUTPUT_SOCKET
:
667 assert(socket_fd
>= 0);
669 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
671 case EXEC_OUTPUT_NAMED_FD
:
672 assert(named_iofds
[fileno
] >= 0);
674 (void) fd_nonblock(named_iofds
[fileno
], false);
675 return dup2(named_iofds
[fileno
], fileno
) < 0 ? -errno
: fileno
;
677 case EXEC_OUTPUT_FILE
:
678 case EXEC_OUTPUT_FILE_APPEND
: {
682 assert(context
->stdio_file
[fileno
]);
684 rw
= context
->std_input
== EXEC_INPUT_FILE
&&
685 streq_ptr(context
->stdio_file
[fileno
], context
->stdio_file
[STDIN_FILENO
]);
688 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
691 if (o
== EXEC_OUTPUT_FILE_APPEND
)
694 fd
= acquire_path(context
->stdio_file
[fileno
], flags
, 0666 & ~context
->umask
);
699 return move_fd(fd
, fileno
, 0);
703 assert_not_reached("Unknown error type");
707 static int chown_terminal(int fd
, uid_t uid
) {
712 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
716 /* This might fail. What matters are the results. */
717 (void) fchown(fd
, uid
, -1);
718 (void) fchmod(fd
, TTY_MODE
);
720 if (fstat(fd
, &st
) < 0)
723 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
729 static int setup_confirm_stdio(const char *vc
, int *_saved_stdin
, int *_saved_stdout
) {
730 _cleanup_close_
int fd
= -1, saved_stdin
= -1, saved_stdout
= -1;
733 assert(_saved_stdin
);
734 assert(_saved_stdout
);
736 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
740 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
741 if (saved_stdout
< 0)
744 fd
= acquire_terminal(vc
, ACQUIRE_TERMINAL_WAIT
, DEFAULT_CONFIRM_USEC
);
748 r
= chown_terminal(fd
, getuid());
752 r
= reset_terminal_fd(fd
, true);
756 r
= rearrange_stdio(fd
, fd
, STDERR_FILENO
);
761 *_saved_stdin
= saved_stdin
;
762 *_saved_stdout
= saved_stdout
;
764 saved_stdin
= saved_stdout
= -1;
769 static void write_confirm_error_fd(int err
, int fd
, const Unit
*u
) {
772 if (err
== -ETIMEDOUT
)
773 dprintf(fd
, "Confirmation question timed out for %s, assuming positive response.\n", u
->id
);
776 dprintf(fd
, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u
->id
);
780 static void write_confirm_error(int err
, const char *vc
, const Unit
*u
) {
781 _cleanup_close_
int fd
= -1;
785 fd
= open_terminal(vc
, O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
789 write_confirm_error_fd(err
, fd
, u
);
792 static int restore_confirm_stdio(int *saved_stdin
, int *saved_stdout
) {
796 assert(saved_stdout
);
800 if (*saved_stdin
>= 0)
801 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
804 if (*saved_stdout
>= 0)
805 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
808 *saved_stdin
= safe_close(*saved_stdin
);
809 *saved_stdout
= safe_close(*saved_stdout
);
815 CONFIRM_PRETEND_FAILURE
= -1,
816 CONFIRM_PRETEND_SUCCESS
= 0,
820 static int ask_for_confirmation(const char *vc
, Unit
*u
, const char *cmdline
) {
821 int saved_stdout
= -1, saved_stdin
= -1, r
;
822 _cleanup_free_
char *e
= NULL
;
825 /* For any internal errors, assume a positive response. */
826 r
= setup_confirm_stdio(vc
, &saved_stdin
, &saved_stdout
);
828 write_confirm_error(r
, vc
, u
);
829 return CONFIRM_EXECUTE
;
832 /* confirm_spawn might have been disabled while we were sleeping. */
833 if (manager_is_confirm_spawn_disabled(u
->manager
)) {
838 e
= ellipsize(cmdline
, 60, 100);
846 r
= ask_char(&c
, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e
);
848 write_confirm_error_fd(r
, STDOUT_FILENO
, u
);
855 printf("Resuming normal execution.\n");
856 manager_disable_confirm_spawn();
860 unit_dump(u
, stdout
, " ");
861 continue; /* ask again */
863 printf("Failing execution.\n");
864 r
= CONFIRM_PRETEND_FAILURE
;
867 printf(" c - continue, proceed without asking anymore\n"
868 " D - dump, show the state of the unit\n"
869 " f - fail, don't execute the command and pretend it failed\n"
871 " i - info, show a short summary of the unit\n"
872 " j - jobs, show jobs that are in progress\n"
873 " s - skip, don't execute the command and pretend it succeeded\n"
874 " y - yes, execute the command\n");
875 continue; /* ask again */
877 printf(" Description: %s\n"
880 u
->id
, u
->description
, cmdline
);
881 continue; /* ask again */
883 manager_dump_jobs(u
->manager
, stdout
, " ");
884 continue; /* ask again */
886 /* 'n' was removed in favor of 'f'. */
887 printf("Didn't understand 'n', did you mean 'f'?\n");
888 continue; /* ask again */
890 printf("Skipping execution.\n");
891 r
= CONFIRM_PRETEND_SUCCESS
;
897 assert_not_reached("Unhandled choice");
903 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
907 static int get_fixed_user(const ExecContext
*c
, const char **user
,
908 uid_t
*uid
, gid_t
*gid
,
909 const char **home
, const char **shell
) {
918 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
919 * (i.e. are "/" or "/bin/nologin"). */
922 r
= get_user_creds(&name
, uid
, gid
, home
, shell
, USER_CREDS_CLEAN
);
930 static int get_fixed_group(const ExecContext
*c
, const char **group
, gid_t
*gid
) {
940 r
= get_group_creds(&name
, gid
, 0);
948 static int get_supplementary_groups(const ExecContext
*c
, const char *user
,
949 const char *group
, gid_t gid
,
950 gid_t
**supplementary_gids
, int *ngids
) {
954 bool keep_groups
= false;
955 gid_t
*groups
= NULL
;
956 _cleanup_free_ gid_t
*l_gids
= NULL
;
961 * If user is given, then lookup GID and supplementary groups list.
962 * We avoid NSS lookups for gid=0. Also we have to initialize groups
963 * here and as early as possible so we keep the list of supplementary
964 * groups of the caller.
966 if (user
&& gid_is_valid(gid
) && gid
!= 0) {
967 /* First step, initialize groups from /etc/groups */
968 if (initgroups(user
, gid
) < 0)
974 if (strv_isempty(c
->supplementary_groups
))
978 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
979 * be positive, otherwise fail.
982 ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
);
983 if (ngroups_max
<= 0) {
987 return -EOPNOTSUPP
; /* For all other values */
990 l_gids
= new(gid_t
, ngroups_max
);
996 * Lookup the list of groups that the user belongs to, we
997 * avoid NSS lookups here too for gid=0.
1000 if (getgrouplist(user
, gid
, l_gids
, &k
) < 0)
1005 STRV_FOREACH(i
, c
->supplementary_groups
) {
1008 if (k
>= ngroups_max
)
1012 r
= get_group_creds(&g
, l_gids
+k
, 0);
1020 * Sets ngids to zero to drop all supplementary groups, happens
1021 * when we are under root and SupplementaryGroups= is empty.
1028 /* Otherwise get the final list of supplementary groups */
1029 groups
= memdup(l_gids
, sizeof(gid_t
) * k
);
1033 *supplementary_gids
= groups
;
1041 static int enforce_groups(gid_t gid
, const gid_t
*supplementary_gids
, int ngids
) {
1044 /* Handle SupplementaryGroups= if it is not empty */
1046 r
= maybe_setgroups(ngids
, supplementary_gids
);
1051 if (gid_is_valid(gid
)) {
1052 /* Then set our gids */
1053 if (setresgid(gid
, gid
, gid
) < 0)
1060 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
1063 if (!uid_is_valid(uid
))
1066 /* Sets (but doesn't look up) the uid and make sure we keep the
1067 * capabilities while doing so. */
1069 if (context
->capability_ambient_set
!= 0) {
1071 /* First step: If we need to keep capabilities but
1072 * drop privileges we need to make sure we keep our
1073 * caps, while we drop privileges. */
1075 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
1077 if (prctl(PR_GET_SECUREBITS
) != sb
)
1078 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
1083 /* Second step: actually set the uids */
1084 if (setresuid(uid
, uid
, uid
) < 0)
1087 /* At this point we should have all necessary capabilities but
1088 are otherwise a normal user. However, the caps might got
1089 corrupted due to the setresuid() so we need clean them up
1090 later. This is done outside of this call. */
1097 static int null_conv(
1099 const struct pam_message
**msg
,
1100 struct pam_response
**resp
,
1101 void *appdata_ptr
) {
1103 /* We don't support conversations */
1105 return PAM_CONV_ERR
;
1110 static int setup_pam(
1117 int fds
[], size_t n_fds
) {
1121 static const struct pam_conv conv
= {
1126 _cleanup_(barrier_destroy
) Barrier barrier
= BARRIER_NULL
;
1127 pam_handle_t
*handle
= NULL
;
1129 int pam_code
= PAM_SUCCESS
, r
;
1130 char **nv
, **e
= NULL
;
1131 bool close_session
= false;
1132 pid_t pam_pid
= 0, parent_pid
;
1139 /* We set up PAM in the parent process, then fork. The child
1140 * will then stay around until killed via PR_GET_PDEATHSIG or
1141 * systemd via the cgroup logic. It will then remove the PAM
1142 * session again. The parent process will exec() the actual
1143 * daemon. We do things this way to ensure that the main PID
1144 * of the daemon is the one we initially fork()ed. */
1146 r
= barrier_create(&barrier
);
1150 if (log_get_max_level() < LOG_DEBUG
)
1151 flags
|= PAM_SILENT
;
1153 pam_code
= pam_start(name
, user
, &conv
, &handle
);
1154 if (pam_code
!= PAM_SUCCESS
) {
1160 _cleanup_free_
char *q
= NULL
;
1162 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1163 * out if that's the case, and read the TTY off it. */
1165 if (getttyname_malloc(STDIN_FILENO
, &q
) >= 0)
1166 tty
= strjoina("/dev/", q
);
1170 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
1171 if (pam_code
!= PAM_SUCCESS
)
1175 STRV_FOREACH(nv
, *env
) {
1176 pam_code
= pam_putenv(handle
, *nv
);
1177 if (pam_code
!= PAM_SUCCESS
)
1181 pam_code
= pam_acct_mgmt(handle
, flags
);
1182 if (pam_code
!= PAM_SUCCESS
)
1185 pam_code
= pam_open_session(handle
, flags
);
1186 if (pam_code
!= PAM_SUCCESS
)
1189 close_session
= true;
1191 e
= pam_getenvlist(handle
);
1193 pam_code
= PAM_BUF_ERR
;
1197 /* Block SIGTERM, so that we know that it won't get lost in
1200 assert_se(sigprocmask_many(SIG_BLOCK
, &old_ss
, SIGTERM
, -1) >= 0);
1202 parent_pid
= getpid_cached();
1204 r
= safe_fork("(sd-pam)", 0, &pam_pid
);
1208 int sig
, ret
= EXIT_PAM
;
1210 /* The child's job is to reset the PAM session on
1212 barrier_set_role(&barrier
, BARRIER_CHILD
);
1214 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1215 * are open here that have been opened by PAM. */
1216 (void) close_many(fds
, n_fds
);
1218 /* Drop privileges - we don't need any to pam_close_session
1219 * and this will make PR_SET_PDEATHSIG work in most cases.
1220 * If this fails, ignore the error - but expect sd-pam threads
1221 * to fail to exit normally */
1223 r
= maybe_setgroups(0, NULL
);
1225 log_warning_errno(r
, "Failed to setgroups() in sd-pam: %m");
1226 if (setresgid(gid
, gid
, gid
) < 0)
1227 log_warning_errno(errno
, "Failed to setresgid() in sd-pam: %m");
1228 if (setresuid(uid
, uid
, uid
) < 0)
1229 log_warning_errno(errno
, "Failed to setresuid() in sd-pam: %m");
1231 (void) ignore_signals(SIGPIPE
, -1);
1233 /* Wait until our parent died. This will only work if
1234 * the above setresuid() succeeds, otherwise the kernel
1235 * will not allow unprivileged parents kill their privileged
1236 * children this way. We rely on the control groups kill logic
1237 * to do the rest for us. */
1238 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
1241 /* Tell the parent that our setup is done. This is especially
1242 * important regarding dropping privileges. Otherwise, unit
1243 * setup might race against our setresuid(2) call.
1245 * If the parent aborted, we'll detect this below, hence ignore
1246 * return failure here. */
1247 (void) barrier_place(&barrier
);
1249 /* Check if our parent process might already have died? */
1250 if (getppid() == parent_pid
) {
1253 assert_se(sigemptyset(&ss
) >= 0);
1254 assert_se(sigaddset(&ss
, SIGTERM
) >= 0);
1257 if (sigwait(&ss
, &sig
) < 0) {
1264 assert(sig
== SIGTERM
);
1269 /* If our parent died we'll end the session */
1270 if (getppid() != parent_pid
) {
1271 pam_code
= pam_close_session(handle
, flags
);
1272 if (pam_code
!= PAM_SUCCESS
)
1279 pam_end(handle
, pam_code
| flags
);
1283 barrier_set_role(&barrier
, BARRIER_PARENT
);
1285 /* If the child was forked off successfully it will do all the
1286 * cleanups, so forget about the handle here. */
1289 /* Unblock SIGTERM again in the parent */
1290 assert_se(sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) >= 0);
1292 /* We close the log explicitly here, since the PAM modules
1293 * might have opened it, but we don't want this fd around. */
1296 /* Synchronously wait for the child to initialize. We don't care for
1297 * errors as we cannot recover. However, warn loudly if it happens. */
1298 if (!barrier_place_and_sync(&barrier
))
1299 log_error("PAM initialization failed");
1301 return strv_free_and_replace(*env
, e
);
1304 if (pam_code
!= PAM_SUCCESS
) {
1305 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
1306 r
= -EPERM
; /* PAM errors do not map to errno */
1308 log_error_errno(r
, "PAM failed: %m");
1312 pam_code
= pam_close_session(handle
, flags
);
1314 pam_end(handle
, pam_code
| flags
);
1326 static void rename_process_from_path(const char *path
) {
1327 char process_name
[11];
1331 /* This resulting string must fit in 10 chars (i.e. the length
1332 * of "/sbin/init") to look pretty in /bin/ps */
1336 rename_process("(...)");
1342 /* The end of the process name is usually more
1343 * interesting, since the first bit might just be
1349 process_name
[0] = '(';
1350 memcpy(process_name
+1, p
, l
);
1351 process_name
[1+l
] = ')';
1352 process_name
[1+l
+1] = 0;
1354 rename_process(process_name
);
1357 static bool context_has_address_families(const ExecContext
*c
) {
1360 return c
->address_families_whitelist
||
1361 !set_isempty(c
->address_families
);
1364 static bool context_has_syscall_filters(const ExecContext
*c
) {
1367 return c
->syscall_whitelist
||
1368 !hashmap_isempty(c
->syscall_filter
);
1371 static bool context_has_no_new_privileges(const ExecContext
*c
) {
1374 if (c
->no_new_privileges
)
1377 if (have_effective_cap(CAP_SYS_ADMIN
)) /* if we are privileged, we don't need NNP */
1380 /* We need NNP if we have any form of seccomp and are unprivileged */
1381 return context_has_address_families(c
) ||
1382 c
->memory_deny_write_execute
||
1383 c
->restrict_realtime
||
1384 exec_context_restrict_namespaces_set(c
) ||
1385 c
->protect_kernel_tunables
||
1386 c
->protect_kernel_modules
||
1387 c
->private_devices
||
1388 context_has_syscall_filters(c
) ||
1389 !set_isempty(c
->syscall_archs
) ||
1390 c
->lock_personality
;
1395 static bool skip_seccomp_unavailable(const Unit
* u
, const char* msg
) {
1397 if (is_seccomp_available())
1400 log_unit_debug(u
, "SECCOMP features not detected in the kernel, skipping %s", msg
);
1404 static int apply_syscall_filter(const Unit
* u
, const ExecContext
*c
, bool needs_ambient_hack
) {
1405 uint32_t negative_action
, default_action
, action
;
1411 if (!context_has_syscall_filters(c
))
1414 if (skip_seccomp_unavailable(u
, "SystemCallFilter="))
1417 negative_action
= c
->syscall_errno
== 0 ? SCMP_ACT_KILL
: SCMP_ACT_ERRNO(c
->syscall_errno
);
1419 if (c
->syscall_whitelist
) {
1420 default_action
= negative_action
;
1421 action
= SCMP_ACT_ALLOW
;
1423 default_action
= SCMP_ACT_ALLOW
;
1424 action
= negative_action
;
1427 if (needs_ambient_hack
) {
1428 r
= seccomp_filter_set_add(c
->syscall_filter
, c
->syscall_whitelist
, syscall_filter_sets
+ SYSCALL_FILTER_SET_SETUID
);
1433 return seccomp_load_syscall_filter_set_raw(default_action
, c
->syscall_filter
, action
, false);
1436 static int apply_syscall_archs(const Unit
*u
, const ExecContext
*c
) {
1440 if (set_isempty(c
->syscall_archs
))
1443 if (skip_seccomp_unavailable(u
, "SystemCallArchitectures="))
1446 return seccomp_restrict_archs(c
->syscall_archs
);
1449 static int apply_address_families(const Unit
* u
, const ExecContext
*c
) {
1453 if (!context_has_address_families(c
))
1456 if (skip_seccomp_unavailable(u
, "RestrictAddressFamilies="))
1459 return seccomp_restrict_address_families(c
->address_families
, c
->address_families_whitelist
);
1462 static int apply_memory_deny_write_execute(const Unit
* u
, const ExecContext
*c
) {
1466 if (!c
->memory_deny_write_execute
)
1469 if (skip_seccomp_unavailable(u
, "MemoryDenyWriteExecute="))
1472 return seccomp_memory_deny_write_execute();
1475 static int apply_restrict_realtime(const Unit
* u
, const ExecContext
*c
) {
1479 if (!c
->restrict_realtime
)
1482 if (skip_seccomp_unavailable(u
, "RestrictRealtime="))
1485 return seccomp_restrict_realtime();
1488 static int apply_protect_sysctl(const Unit
*u
, const ExecContext
*c
) {
1492 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1493 * let's protect even those systems where this is left on in the kernel. */
1495 if (!c
->protect_kernel_tunables
)
1498 if (skip_seccomp_unavailable(u
, "ProtectKernelTunables="))
1501 return seccomp_protect_sysctl();
1504 static int apply_protect_kernel_modules(const Unit
*u
, const ExecContext
*c
) {
1508 /* Turn off module syscalls on ProtectKernelModules=yes */
1510 if (!c
->protect_kernel_modules
)
1513 if (skip_seccomp_unavailable(u
, "ProtectKernelModules="))
1516 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW
, syscall_filter_sets
+ SYSCALL_FILTER_SET_MODULE
, SCMP_ACT_ERRNO(EPERM
), false);
1519 static int apply_private_devices(const Unit
*u
, const ExecContext
*c
) {
1523 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1525 if (!c
->private_devices
)
1528 if (skip_seccomp_unavailable(u
, "PrivateDevices="))
1531 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW
, syscall_filter_sets
+ SYSCALL_FILTER_SET_RAW_IO
, SCMP_ACT_ERRNO(EPERM
), false);
1534 static int apply_restrict_namespaces(const Unit
*u
, const ExecContext
*c
) {
1538 if (!exec_context_restrict_namespaces_set(c
))
1541 if (skip_seccomp_unavailable(u
, "RestrictNamespaces="))
1544 return seccomp_restrict_namespaces(c
->restrict_namespaces
);
1547 static int apply_lock_personality(const Unit
* u
, const ExecContext
*c
) {
1548 unsigned long personality
;
1554 if (!c
->lock_personality
)
1557 if (skip_seccomp_unavailable(u
, "LockPersonality="))
1560 personality
= c
->personality
;
1562 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1563 if (personality
== PERSONALITY_INVALID
) {
1565 r
= opinionated_personality(&personality
);
1570 return seccomp_lock_personality(personality
);
1575 static void do_idle_pipe_dance(int idle_pipe
[4]) {
1578 idle_pipe
[1] = safe_close(idle_pipe
[1]);
1579 idle_pipe
[2] = safe_close(idle_pipe
[2]);
1581 if (idle_pipe
[0] >= 0) {
1584 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1586 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1589 /* Signal systemd that we are bored and want to continue. */
1590 n
= write(idle_pipe
[3], "x", 1);
1592 /* Wait for systemd to react to the signal above. */
1593 fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1596 idle_pipe
[0] = safe_close(idle_pipe
[0]);
1600 idle_pipe
[3] = safe_close(idle_pipe
[3]);
1603 static const char *exec_directory_env_name_to_string(ExecDirectoryType t
);
1605 static int build_environment(
1607 const ExecContext
*c
,
1608 const ExecParameters
*p
,
1611 const char *username
,
1613 dev_t journal_stream_dev
,
1614 ino_t journal_stream_ino
,
1617 _cleanup_strv_free_
char **our_env
= NULL
;
1618 ExecDirectoryType t
;
1627 our_env
= new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX
);
1632 _cleanup_free_
char *joined
= NULL
;
1634 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid_cached()) < 0)
1636 our_env
[n_env
++] = x
;
1638 if (asprintf(&x
, "LISTEN_FDS=%zu", n_fds
) < 0)
1640 our_env
[n_env
++] = x
;
1642 joined
= strv_join(p
->fd_names
, ":");
1646 x
= strjoin("LISTEN_FDNAMES=", joined
);
1649 our_env
[n_env
++] = x
;
1652 if ((p
->flags
& EXEC_SET_WATCHDOG
) && p
->watchdog_usec
> 0) {
1653 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid_cached()) < 0)
1655 our_env
[n_env
++] = x
;
1657 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, p
->watchdog_usec
) < 0)
1659 our_env
[n_env
++] = x
;
1662 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1663 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1664 * check the database directly. */
1665 if (p
->flags
& EXEC_NSS_BYPASS_BUS
) {
1666 x
= strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1669 our_env
[n_env
++] = x
;
1673 x
= strappend("HOME=", home
);
1676 our_env
[n_env
++] = x
;
1680 x
= strappend("LOGNAME=", username
);
1683 our_env
[n_env
++] = x
;
1685 x
= strappend("USER=", username
);
1688 our_env
[n_env
++] = x
;
1692 x
= strappend("SHELL=", shell
);
1695 our_env
[n_env
++] = x
;
1698 if (!sd_id128_is_null(u
->invocation_id
)) {
1699 if (asprintf(&x
, "INVOCATION_ID=" SD_ID128_FORMAT_STR
, SD_ID128_FORMAT_VAL(u
->invocation_id
)) < 0)
1702 our_env
[n_env
++] = x
;
1705 if (exec_context_needs_term(c
)) {
1706 const char *tty_path
, *term
= NULL
;
1708 tty_path
= exec_context_tty_path(c
);
1710 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1711 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1712 * passes to PID 1 ends up all the way in the console login shown. */
1714 if (path_equal(tty_path
, "/dev/console") && getppid() == 1)
1715 term
= getenv("TERM");
1717 term
= default_term_for_tty(tty_path
);
1719 x
= strappend("TERM=", term
);
1722 our_env
[n_env
++] = x
;
1725 if (journal_stream_dev
!= 0 && journal_stream_ino
!= 0) {
1726 if (asprintf(&x
, "JOURNAL_STREAM=" DEV_FMT
":" INO_FMT
, journal_stream_dev
, journal_stream_ino
) < 0)
1729 our_env
[n_env
++] = x
;
1732 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
1733 _cleanup_free_
char *pre
= NULL
, *joined
= NULL
;
1739 if (strv_isempty(c
->directories
[t
].paths
))
1742 n
= exec_directory_env_name_to_string(t
);
1746 pre
= strjoin(p
->prefix
[t
], "/");
1750 joined
= strv_join_prefix(c
->directories
[t
].paths
, ":", pre
);
1754 x
= strjoin(n
, "=", joined
);
1758 our_env
[n_env
++] = x
;
1761 our_env
[n_env
++] = NULL
;
1762 assert(n_env
<= 14 + _EXEC_DIRECTORY_TYPE_MAX
);
1764 *ret
= TAKE_PTR(our_env
);
1769 static int build_pass_environment(const ExecContext
*c
, char ***ret
) {
1770 _cleanup_strv_free_
char **pass_env
= NULL
;
1771 size_t n_env
= 0, n_bufsize
= 0;
1774 STRV_FOREACH(i
, c
->pass_environment
) {
1775 _cleanup_free_
char *x
= NULL
;
1781 x
= strjoin(*i
, "=", v
);
1785 if (!GREEDY_REALLOC(pass_env
, n_bufsize
, n_env
+ 2))
1788 pass_env
[n_env
++] = TAKE_PTR(x
);
1789 pass_env
[n_env
] = NULL
;
1792 *ret
= TAKE_PTR(pass_env
);
1797 static bool exec_needs_mount_namespace(
1798 const ExecContext
*context
,
1799 const ExecParameters
*params
,
1800 const ExecRuntime
*runtime
) {
1805 if (context
->root_image
)
1808 if (!strv_isempty(context
->read_write_paths
) ||
1809 !strv_isempty(context
->read_only_paths
) ||
1810 !strv_isempty(context
->inaccessible_paths
))
1813 if (context
->n_bind_mounts
> 0)
1816 if (context
->n_temporary_filesystems
> 0)
1819 if (context
->mount_flags
!= 0)
1822 if (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
))
1825 if (context
->private_devices
||
1826 context
->private_mounts
||
1827 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1828 context
->protect_home
!= PROTECT_HOME_NO
||
1829 context
->protect_kernel_tunables
||
1830 context
->protect_kernel_modules
||
1831 context
->protect_control_groups
)
1834 if (context
->root_directory
) {
1835 ExecDirectoryType t
;
1837 if (context
->mount_apivfs
)
1840 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
1841 if (!params
->prefix
[t
])
1844 if (!strv_isempty(context
->directories
[t
].paths
))
1849 if (context
->dynamic_user
&&
1850 (!strv_isempty(context
->directories
[EXEC_DIRECTORY_STATE
].paths
) ||
1851 !strv_isempty(context
->directories
[EXEC_DIRECTORY_CACHE
].paths
) ||
1852 !strv_isempty(context
->directories
[EXEC_DIRECTORY_LOGS
].paths
)))
1858 static int setup_private_users(uid_t uid
, gid_t gid
) {
1859 _cleanup_free_
char *uid_map
= NULL
, *gid_map
= NULL
;
1860 _cleanup_close_pair_
int errno_pipe
[2] = { -1, -1 };
1861 _cleanup_close_
int unshare_ready_fd
= -1;
1862 _cleanup_(sigkill_waitp
) pid_t pid
= 0;
1867 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1868 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1869 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1870 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1871 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1872 * continues execution normally. */
1874 if (uid
!= 0 && uid_is_valid(uid
)) {
1875 r
= asprintf(&uid_map
,
1876 "0 0 1\n" /* Map root → root */
1877 UID_FMT
" " UID_FMT
" 1\n", /* Map $UID → $UID */
1882 uid_map
= strdup("0 0 1\n"); /* The case where the above is the same */
1887 if (gid
!= 0 && gid_is_valid(gid
)) {
1888 r
= asprintf(&gid_map
,
1889 "0 0 1\n" /* Map root → root */
1890 GID_FMT
" " GID_FMT
" 1\n", /* Map $GID → $GID */
1895 gid_map
= strdup("0 0 1\n"); /* The case where the above is the same */
1900 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1902 unshare_ready_fd
= eventfd(0, EFD_CLOEXEC
);
1903 if (unshare_ready_fd
< 0)
1906 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1908 if (pipe2(errno_pipe
, O_CLOEXEC
) < 0)
1911 r
= safe_fork("(sd-userns)", FORK_RESET_SIGNALS
|FORK_DEATHSIG
, &pid
);
1915 _cleanup_close_
int fd
= -1;
1919 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1920 * here, after the parent opened its own user namespace. */
1923 errno_pipe
[0] = safe_close(errno_pipe
[0]);
1925 /* Wait until the parent unshared the user namespace */
1926 if (read(unshare_ready_fd
, &c
, sizeof(c
)) < 0) {
1931 /* Disable the setgroups() system call in the child user namespace, for good. */
1932 a
= procfs_file_alloca(ppid
, "setgroups");
1933 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
1935 if (errno
!= ENOENT
) {
1940 /* If the file is missing the kernel is too old, let's continue anyway. */
1942 if (write(fd
, "deny\n", 5) < 0) {
1947 fd
= safe_close(fd
);
1950 /* First write the GID map */
1951 a
= procfs_file_alloca(ppid
, "gid_map");
1952 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
1957 if (write(fd
, gid_map
, strlen(gid_map
)) < 0) {
1961 fd
= safe_close(fd
);
1963 /* The write the UID map */
1964 a
= procfs_file_alloca(ppid
, "uid_map");
1965 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
1970 if (write(fd
, uid_map
, strlen(uid_map
)) < 0) {
1975 _exit(EXIT_SUCCESS
);
1978 (void) write(errno_pipe
[1], &r
, sizeof(r
));
1979 _exit(EXIT_FAILURE
);
1982 errno_pipe
[1] = safe_close(errno_pipe
[1]);
1984 if (unshare(CLONE_NEWUSER
) < 0)
1987 /* Let the child know that the namespace is ready now */
1988 if (write(unshare_ready_fd
, &c
, sizeof(c
)) < 0)
1991 /* Try to read an error code from the child */
1992 n
= read(errno_pipe
[0], &r
, sizeof(r
));
1995 if (n
== sizeof(r
)) { /* an error code was sent to us */
2000 if (n
!= 0) /* on success we should have read 0 bytes */
2003 r
= wait_for_terminate_and_check("(sd-userns)", pid
, 0);
2007 if (r
!= EXIT_SUCCESS
) /* If something strange happened with the child, let's consider this fatal, too */
2013 static int setup_exec_directory(
2014 const ExecContext
*context
,
2015 const ExecParameters
*params
,
2018 ExecDirectoryType type
,
2021 static const int exit_status_table
[_EXEC_DIRECTORY_TYPE_MAX
] = {
2022 [EXEC_DIRECTORY_RUNTIME
] = EXIT_RUNTIME_DIRECTORY
,
2023 [EXEC_DIRECTORY_STATE
] = EXIT_STATE_DIRECTORY
,
2024 [EXEC_DIRECTORY_CACHE
] = EXIT_CACHE_DIRECTORY
,
2025 [EXEC_DIRECTORY_LOGS
] = EXIT_LOGS_DIRECTORY
,
2026 [EXEC_DIRECTORY_CONFIGURATION
] = EXIT_CONFIGURATION_DIRECTORY
,
2033 assert(type
>= 0 && type
< _EXEC_DIRECTORY_TYPE_MAX
);
2034 assert(exit_status
);
2036 if (!params
->prefix
[type
])
2039 if (params
->flags
& EXEC_CHOWN_DIRECTORIES
) {
2040 if (!uid_is_valid(uid
))
2042 if (!gid_is_valid(gid
))
2046 STRV_FOREACH(rt
, context
->directories
[type
].paths
) {
2047 _cleanup_free_
char *p
= NULL
, *pp
= NULL
;
2049 p
= strjoin(params
->prefix
[type
], "/", *rt
);
2055 r
= mkdir_parents_label(p
, 0755);
2059 if (context
->dynamic_user
&&
2060 !IN_SET(type
, EXEC_DIRECTORY_RUNTIME
, EXEC_DIRECTORY_CONFIGURATION
)) {
2061 _cleanup_free_
char *private_root
= NULL
;
2063 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2064 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2065 * whose UID is later on reused. To lock this down we use the same trick used by container
2066 * managers to prohibit host users to get access to files of the same UID in containers: we
2067 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2068 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2069 * to make this directory permeable for the service itself.
2071 * Specifically: for a service which wants a special directory "foo/" we first create a
2072 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2073 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2074 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2075 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2076 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2077 * disabling the access boundary for the service and making sure it only gets access to the
2078 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2080 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
2081 * owned by the service itself.
2082 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2083 * files or sockets with other services. */
2085 private_root
= strjoin(params
->prefix
[type
], "/private");
2086 if (!private_root
) {
2091 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
2092 r
= mkdir_safe_label(private_root
, 0700, 0, 0, MKDIR_WARN_MODE
);
2096 pp
= strjoin(private_root
, "/", *rt
);
2102 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2103 r
= mkdir_parents_label(pp
, 0755);
2107 if (is_dir(p
, false) > 0 &&
2108 (laccess(pp
, F_OK
) < 0 && errno
== ENOENT
)) {
2110 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2111 * it over. Most likely the service has been upgraded from one that didn't use
2112 * DynamicUser=1, to one that does. */
2114 if (rename(p
, pp
) < 0) {
2119 /* Otherwise, create the actual directory for the service */
2121 r
= mkdir_label(pp
, context
->directories
[type
].mode
);
2122 if (r
< 0 && r
!= -EEXIST
)
2126 /* And link it up from the original place */
2127 r
= symlink_idempotent(pp
, p
, true);
2131 /* Lock down the access mode */
2132 if (chmod(pp
, context
->directories
[type
].mode
) < 0) {
2137 r
= mkdir_label(p
, context
->directories
[type
].mode
);
2138 if (r
< 0 && r
!= -EEXIST
)
2140 if (r
== -EEXIST
&& !context
->dynamic_user
)
2144 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2145 * a service, and shall not be writable. */
2146 if (type
== EXEC_DIRECTORY_CONFIGURATION
)
2149 /* Then, change the ownership of the whole tree, if necessary */
2150 r
= path_chown_recursive(pp
?: p
, uid
, gid
);
2158 *exit_status
= exit_status_table
[type
];
2163 static int setup_smack(
2164 const ExecContext
*context
,
2165 const ExecCommand
*command
) {
2172 if (context
->smack_process_label
) {
2173 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
2177 #ifdef SMACK_DEFAULT_PROCESS_LABEL
2179 _cleanup_free_
char *exec_label
= NULL
;
2181 r
= mac_smack_read(command
->path
, SMACK_ATTR_EXEC
, &exec_label
);
2182 if (r
< 0 && !IN_SET(r
, -ENODATA
, -EOPNOTSUPP
))
2185 r
= mac_smack_apply_pid(0, exec_label
? : SMACK_DEFAULT_PROCESS_LABEL
);
2195 static int compile_bind_mounts(
2196 const ExecContext
*context
,
2197 const ExecParameters
*params
,
2198 BindMount
**ret_bind_mounts
,
2199 size_t *ret_n_bind_mounts
,
2200 char ***ret_empty_directories
) {
2202 _cleanup_strv_free_
char **empty_directories
= NULL
;
2203 BindMount
*bind_mounts
;
2205 ExecDirectoryType t
;
2210 assert(ret_bind_mounts
);
2211 assert(ret_n_bind_mounts
);
2212 assert(ret_empty_directories
);
2214 n
= context
->n_bind_mounts
;
2215 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
2216 if (!params
->prefix
[t
])
2219 n
+= strv_length(context
->directories
[t
].paths
);
2223 *ret_bind_mounts
= NULL
;
2224 *ret_n_bind_mounts
= 0;
2225 *ret_empty_directories
= NULL
;
2229 bind_mounts
= new(BindMount
, n
);
2233 for (i
= 0; i
< context
->n_bind_mounts
; i
++) {
2234 BindMount
*item
= context
->bind_mounts
+ i
;
2237 s
= strdup(item
->source
);
2243 d
= strdup(item
->destination
);
2250 bind_mounts
[h
++] = (BindMount
) {
2253 .read_only
= item
->read_only
,
2254 .recursive
= item
->recursive
,
2255 .ignore_enoent
= item
->ignore_enoent
,
2259 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
2262 if (!params
->prefix
[t
])
2265 if (strv_isempty(context
->directories
[t
].paths
))
2268 if (context
->dynamic_user
&&
2269 !IN_SET(t
, EXEC_DIRECTORY_RUNTIME
, EXEC_DIRECTORY_CONFIGURATION
) &&
2270 !(context
->root_directory
|| context
->root_image
)) {
2273 /* So this is for a dynamic user, and we need to make sure the process can access its own
2274 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2275 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2277 private_root
= strjoin(params
->prefix
[t
], "/private");
2278 if (!private_root
) {
2283 r
= strv_consume(&empty_directories
, private_root
);
2288 STRV_FOREACH(suffix
, context
->directories
[t
].paths
) {
2291 if (context
->dynamic_user
&&
2292 !IN_SET(t
, EXEC_DIRECTORY_RUNTIME
, EXEC_DIRECTORY_CONFIGURATION
))
2293 s
= strjoin(params
->prefix
[t
], "/private/", *suffix
);
2295 s
= strjoin(params
->prefix
[t
], "/", *suffix
);
2301 if (context
->dynamic_user
&&
2302 !IN_SET(t
, EXEC_DIRECTORY_RUNTIME
, EXEC_DIRECTORY_CONFIGURATION
) &&
2303 (context
->root_directory
|| context
->root_image
))
2304 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2305 * directory is not created on the root directory. So, let's bind-mount the directory
2306 * on the 'non-private' place. */
2307 d
= strjoin(params
->prefix
[t
], "/", *suffix
);
2316 bind_mounts
[h
++] = (BindMount
) {
2321 .ignore_enoent
= false,
2328 *ret_bind_mounts
= bind_mounts
;
2329 *ret_n_bind_mounts
= n
;
2330 *ret_empty_directories
= TAKE_PTR(empty_directories
);
2335 bind_mount_free_many(bind_mounts
, h
);
2339 static int apply_mount_namespace(
2341 const ExecCommand
*command
,
2342 const ExecContext
*context
,
2343 const ExecParameters
*params
,
2344 const ExecRuntime
*runtime
) {
2346 _cleanup_strv_free_
char **empty_directories
= NULL
;
2347 char *tmp
= NULL
, *var
= NULL
;
2348 const char *root_dir
= NULL
, *root_image
= NULL
;
2349 NamespaceInfo ns_info
;
2350 bool needs_sandboxing
;
2351 BindMount
*bind_mounts
= NULL
;
2352 size_t n_bind_mounts
= 0;
2357 /* The runtime struct only contains the parent of the private /tmp,
2358 * which is non-accessible to world users. Inside of it there's a /tmp
2359 * that is sticky, and that's the one we want to use here. */
2361 if (context
->private_tmp
&& runtime
) {
2362 if (runtime
->tmp_dir
)
2363 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
2364 if (runtime
->var_tmp_dir
)
2365 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
2368 if (params
->flags
& EXEC_APPLY_CHROOT
) {
2369 root_image
= context
->root_image
;
2372 root_dir
= context
->root_directory
;
2375 r
= compile_bind_mounts(context
, params
, &bind_mounts
, &n_bind_mounts
, &empty_directories
);
2379 needs_sandboxing
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& EXEC_COMMAND_FULLY_PRIVILEGED
);
2380 if (needs_sandboxing
)
2381 ns_info
= (NamespaceInfo
) {
2382 .ignore_protect_paths
= false,
2383 .private_dev
= context
->private_devices
,
2384 .protect_control_groups
= context
->protect_control_groups
,
2385 .protect_kernel_tunables
= context
->protect_kernel_tunables
,
2386 .protect_kernel_modules
= context
->protect_kernel_modules
,
2387 .mount_apivfs
= context
->mount_apivfs
,
2388 .private_mounts
= context
->private_mounts
,
2390 else if (!context
->dynamic_user
&& root_dir
)
2392 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2393 * sandbox info, otherwise enforce it, don't ignore protected paths and
2394 * fail if we are enable to apply the sandbox inside the mount namespace.
2396 ns_info
= (NamespaceInfo
) {
2397 .ignore_protect_paths
= true,
2400 ns_info
= (NamespaceInfo
) {};
2402 r
= setup_namespace(root_dir
, root_image
,
2403 &ns_info
, context
->read_write_paths
,
2404 needs_sandboxing
? context
->read_only_paths
: NULL
,
2405 needs_sandboxing
? context
->inaccessible_paths
: NULL
,
2409 context
->temporary_filesystems
,
2410 context
->n_temporary_filesystems
,
2413 needs_sandboxing
? context
->protect_home
: PROTECT_HOME_NO
,
2414 needs_sandboxing
? context
->protect_system
: PROTECT_SYSTEM_NO
,
2415 context
->mount_flags
,
2416 DISSECT_IMAGE_DISCARD_ON_LOOP
);
2418 bind_mount_free_many(bind_mounts
, n_bind_mounts
);
2420 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
2421 * that with a special, recognizable error ENOANO. In this case, silently proceeed, but only if exclusively
2422 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2423 * completely different execution environment. */
2425 if (n_bind_mounts
== 0 &&
2426 context
->n_temporary_filesystems
== 0 &&
2427 !root_dir
&& !root_image
&&
2428 !context
->dynamic_user
) {
2429 log_unit_debug(u
, "Failed to set up namespace, assuming containerized execution and ignoring.");
2439 static int apply_working_directory(
2440 const ExecContext
*context
,
2441 const ExecParameters
*params
,
2443 const bool needs_mount_ns
,
2449 assert(exit_status
);
2451 if (context
->working_directory_home
) {
2454 *exit_status
= EXIT_CHDIR
;
2460 } else if (context
->working_directory
)
2461 wd
= context
->working_directory
;
2465 if (params
->flags
& EXEC_APPLY_CHROOT
) {
2466 if (!needs_mount_ns
&& context
->root_directory
)
2467 if (chroot(context
->root_directory
) < 0) {
2468 *exit_status
= EXIT_CHROOT
;
2474 d
= prefix_roota(context
->root_directory
, wd
);
2476 if (chdir(d
) < 0 && !context
->working_directory_missing_ok
) {
2477 *exit_status
= EXIT_CHDIR
;
2484 static int setup_keyring(
2486 const ExecContext
*context
,
2487 const ExecParameters
*p
,
2488 uid_t uid
, gid_t gid
) {
2490 key_serial_t keyring
;
2499 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2500 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2501 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2502 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2503 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2504 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2506 if (!(p
->flags
& EXEC_NEW_KEYRING
))
2509 if (context
->keyring_mode
== EXEC_KEYRING_INHERIT
)
2512 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2513 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2514 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2515 * & group is just as nasty as acquiring a reference to the user keyring. */
2517 saved_uid
= getuid();
2518 saved_gid
= getgid();
2520 if (gid_is_valid(gid
) && gid
!= saved_gid
) {
2521 if (setregid(gid
, -1) < 0)
2522 return log_unit_error_errno(u
, errno
, "Failed to change GID for user keyring: %m");
2525 if (uid_is_valid(uid
) && uid
!= saved_uid
) {
2526 if (setreuid(uid
, -1) < 0) {
2527 r
= log_unit_error_errno(u
, errno
, "Failed to change UID for user keyring: %m");
2532 keyring
= keyctl(KEYCTL_JOIN_SESSION_KEYRING
, 0, 0, 0, 0);
2533 if (keyring
== -1) {
2534 if (errno
== ENOSYS
)
2535 log_unit_debug_errno(u
, errno
, "Kernel keyring not supported, ignoring.");
2536 else if (IN_SET(errno
, EACCES
, EPERM
))
2537 log_unit_debug_errno(u
, errno
, "Kernel keyring access prohibited, ignoring.");
2538 else if (errno
== EDQUOT
)
2539 log_unit_debug_errno(u
, errno
, "Out of kernel keyrings to allocate, ignoring.");
2541 r
= log_unit_error_errno(u
, errno
, "Setting up kernel keyring failed: %m");
2546 /* When requested link the user keyring into the session keyring. */
2547 if (context
->keyring_mode
== EXEC_KEYRING_SHARED
) {
2549 if (keyctl(KEYCTL_LINK
,
2550 KEY_SPEC_USER_KEYRING
,
2551 KEY_SPEC_SESSION_KEYRING
, 0, 0) < 0) {
2552 r
= log_unit_error_errno(u
, errno
, "Failed to link user keyring into session keyring: %m");
2557 /* Restore uid/gid back */
2558 if (uid_is_valid(uid
) && uid
!= saved_uid
) {
2559 if (setreuid(saved_uid
, -1) < 0) {
2560 r
= log_unit_error_errno(u
, errno
, "Failed to change UID back for user keyring: %m");
2565 if (gid_is_valid(gid
) && gid
!= saved_gid
) {
2566 if (setregid(saved_gid
, -1) < 0)
2567 return log_unit_error_errno(u
, errno
, "Failed to change GID back for user keyring: %m");
2570 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
2571 if (!sd_id128_is_null(u
->invocation_id
)) {
2574 key
= add_key("user", "invocation_id", &u
->invocation_id
, sizeof(u
->invocation_id
), KEY_SPEC_SESSION_KEYRING
);
2576 log_unit_debug_errno(u
, errno
, "Failed to add invocation ID to keyring, ignoring: %m");
2578 if (keyctl(KEYCTL_SETPERM
, key
,
2579 KEY_POS_VIEW
|KEY_POS_READ
|KEY_POS_SEARCH
|
2580 KEY_USR_VIEW
|KEY_USR_READ
|KEY_USR_SEARCH
, 0, 0) < 0)
2581 r
= log_unit_error_errno(u
, errno
, "Failed to restrict invocation ID permission: %m");
2586 /* Revert back uid & gid for the the last time, and exit */
2587 /* no extra logging, as only the first already reported error matters */
2588 if (getuid() != saved_uid
)
2589 (void) setreuid(saved_uid
, -1);
2591 if (getgid() != saved_gid
)
2592 (void) setregid(saved_gid
, -1);
2597 static void append_socket_pair(int *array
, size_t *n
, const int pair
[2]) {
2605 array
[(*n
)++] = pair
[0];
2607 array
[(*n
)++] = pair
[1];
2610 static int close_remaining_fds(
2611 const ExecParameters
*params
,
2612 const ExecRuntime
*runtime
,
2613 const DynamicCreds
*dcreds
,
2617 int *fds
, size_t n_fds
) {
2619 size_t n_dont_close
= 0;
2620 int dont_close
[n_fds
+ 12];
2624 if (params
->stdin_fd
>= 0)
2625 dont_close
[n_dont_close
++] = params
->stdin_fd
;
2626 if (params
->stdout_fd
>= 0)
2627 dont_close
[n_dont_close
++] = params
->stdout_fd
;
2628 if (params
->stderr_fd
>= 0)
2629 dont_close
[n_dont_close
++] = params
->stderr_fd
;
2632 dont_close
[n_dont_close
++] = socket_fd
;
2634 dont_close
[n_dont_close
++] = exec_fd
;
2636 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
2637 n_dont_close
+= n_fds
;
2641 append_socket_pair(dont_close
, &n_dont_close
, runtime
->netns_storage_socket
);
2645 append_socket_pair(dont_close
, &n_dont_close
, dcreds
->user
->storage_socket
);
2647 append_socket_pair(dont_close
, &n_dont_close
, dcreds
->group
->storage_socket
);
2650 if (user_lookup_fd
>= 0)
2651 dont_close
[n_dont_close
++] = user_lookup_fd
;
2653 return close_all_fds(dont_close
, n_dont_close
);
2656 static int send_user_lookup(
2664 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2665 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2668 if (user_lookup_fd
< 0)
2671 if (!uid_is_valid(uid
) && !gid_is_valid(gid
))
2674 if (writev(user_lookup_fd
,
2676 IOVEC_INIT(&uid
, sizeof(uid
)),
2677 IOVEC_INIT(&gid
, sizeof(gid
)),
2678 IOVEC_INIT_STRING(unit
->id
) }, 3) < 0)
2684 static int acquire_home(const ExecContext
*c
, uid_t uid
, const char** home
, char **buf
) {
2691 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2696 if (!c
->working_directory_home
)
2700 /* Hardcode /root as home directory for UID 0 */
2705 r
= get_home_dir(buf
);
2713 static int compile_suggested_paths(const ExecContext
*c
, const ExecParameters
*p
, char ***ret
) {
2714 _cleanup_strv_free_
char ** list
= NULL
;
2715 ExecDirectoryType t
;
2722 assert(c
->dynamic_user
);
2724 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2725 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2728 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
2731 if (t
== EXEC_DIRECTORY_CONFIGURATION
)
2737 STRV_FOREACH(i
, c
->directories
[t
].paths
) {
2740 if (t
== EXEC_DIRECTORY_RUNTIME
)
2741 e
= strjoin(p
->prefix
[t
], "/", *i
);
2743 e
= strjoin(p
->prefix
[t
], "/private/", *i
);
2747 r
= strv_consume(&list
, e
);
2753 *ret
= TAKE_PTR(list
);
2758 static char *exec_command_line(char **argv
);
2760 static int exec_child(
2762 const ExecCommand
*command
,
2763 const ExecContext
*context
,
2764 const ExecParameters
*params
,
2765 ExecRuntime
*runtime
,
2766 DynamicCreds
*dcreds
,
2770 size_t n_socket_fds
,
2771 size_t n_storage_fds
,
2776 _cleanup_strv_free_
char **our_env
= NULL
, **pass_env
= NULL
, **accum_env
= NULL
, **final_argv
= NULL
;
2777 int *fds_with_exec_fd
, n_fds_with_exec_fd
, r
, ngids
= 0, exec_fd
= -1;
2778 _cleanup_free_ gid_t
*supplementary_gids
= NULL
;
2779 const char *username
= NULL
, *groupname
= NULL
;
2780 _cleanup_free_
char *home_buffer
= NULL
;
2781 const char *home
= NULL
, *shell
= NULL
;
2782 dev_t journal_stream_dev
= 0;
2783 ino_t journal_stream_ino
= 0;
2784 bool needs_sandboxing
, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2785 needs_setuid
, /* Do we need to do the actual setresuid()/setresgid() calls? */
2786 needs_mount_namespace
, /* Do we need to set up a mount namespace for this kernel? */
2787 needs_ambient_hack
; /* Do we need to apply the ambient capabilities hack? */
2789 _cleanup_free_
char *mac_selinux_context_net
= NULL
;
2790 bool use_selinux
= false;
2793 bool use_smack
= false;
2796 bool use_apparmor
= false;
2798 uid_t uid
= UID_INVALID
;
2799 gid_t gid
= GID_INVALID
;
2801 ExecDirectoryType dt
;
2808 assert(exit_status
);
2810 rename_process_from_path(command
->path
);
2812 /* We reset exactly these signals, since they are the
2813 * only ones we set to SIG_IGN in the main daemon. All
2814 * others we leave untouched because we set them to
2815 * SIG_DFL or a valid handler initially, both of which
2816 * will be demoted to SIG_DFL. */
2817 (void) default_signals(SIGNALS_CRASH_HANDLER
,
2818 SIGNALS_IGNORE
, -1);
2820 if (context
->ignore_sigpipe
)
2821 (void) ignore_signals(SIGPIPE
, -1);
2823 r
= reset_signal_mask();
2825 *exit_status
= EXIT_SIGNAL_MASK
;
2826 return log_unit_error_errno(unit
, r
, "Failed to set process signal mask: %m");
2829 if (params
->idle_pipe
)
2830 do_idle_pipe_dance(params
->idle_pipe
);
2832 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2833 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2834 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2835 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
2838 log_set_open_when_needed(true);
2840 /* In case anything used libc syslog(), close this here, too */
2843 n_fds
= n_socket_fds
+ n_storage_fds
;
2844 r
= close_remaining_fds(params
, runtime
, dcreds
, user_lookup_fd
, socket_fd
, params
->exec_fd
, fds
, n_fds
);
2846 *exit_status
= EXIT_FDS
;
2847 return log_unit_error_errno(unit
, r
, "Failed to close unwanted file descriptors: %m");
2850 if (!context
->same_pgrp
)
2852 *exit_status
= EXIT_SETSID
;
2853 return log_unit_error_errno(unit
, errno
, "Failed to create new process session: %m");
2856 exec_context_tty_reset(context
, params
);
2858 if (unit_shall_confirm_spawn(unit
)) {
2859 const char *vc
= params
->confirm_spawn
;
2860 _cleanup_free_
char *cmdline
= NULL
;
2862 cmdline
= exec_command_line(command
->argv
);
2864 *exit_status
= EXIT_MEMORY
;
2868 r
= ask_for_confirmation(vc
, unit
, cmdline
);
2869 if (r
!= CONFIRM_EXECUTE
) {
2870 if (r
== CONFIRM_PRETEND_SUCCESS
) {
2871 *exit_status
= EXIT_SUCCESS
;
2874 *exit_status
= EXIT_CONFIRM
;
2875 log_unit_error(unit
, "Execution cancelled by the user");
2880 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
2881 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
2882 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
2883 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
2884 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
2885 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit
->id
, true) != 0 ||
2886 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit
->manager
) ? "system" : "user", true) != 0) {
2887 *exit_status
= EXIT_MEMORY
;
2888 return log_unit_error_errno(unit
, errno
, "Failed to update environment: %m");
2891 if (context
->dynamic_user
&& dcreds
) {
2892 _cleanup_strv_free_
char **suggested_paths
= NULL
;
2894 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
2895 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
2896 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2897 *exit_status
= EXIT_USER
;
2898 return log_unit_error_errno(unit
, errno
, "Failed to update environment: %m");
2901 r
= compile_suggested_paths(context
, params
, &suggested_paths
);
2903 *exit_status
= EXIT_MEMORY
;
2907 r
= dynamic_creds_realize(dcreds
, suggested_paths
, &uid
, &gid
);
2909 *exit_status
= EXIT_USER
;
2911 log_unit_error(unit
, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2914 return log_unit_error_errno(unit
, r
, "Failed to update dynamic user credentials: %m");
2917 if (!uid_is_valid(uid
)) {
2918 *exit_status
= EXIT_USER
;
2919 log_unit_error(unit
, "UID validation failed for \""UID_FMT
"\"", uid
);
2923 if (!gid_is_valid(gid
)) {
2924 *exit_status
= EXIT_USER
;
2925 log_unit_error(unit
, "GID validation failed for \""GID_FMT
"\"", gid
);
2930 username
= dcreds
->user
->name
;
2933 r
= get_fixed_user(context
, &username
, &uid
, &gid
, &home
, &shell
);
2935 *exit_status
= EXIT_USER
;
2936 return log_unit_error_errno(unit
, r
, "Failed to determine user credentials: %m");
2939 r
= get_fixed_group(context
, &groupname
, &gid
);
2941 *exit_status
= EXIT_GROUP
;
2942 return log_unit_error_errno(unit
, r
, "Failed to determine group credentials: %m");
2946 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2947 r
= get_supplementary_groups(context
, username
, groupname
, gid
,
2948 &supplementary_gids
, &ngids
);
2950 *exit_status
= EXIT_GROUP
;
2951 return log_unit_error_errno(unit
, r
, "Failed to determine supplementary groups: %m");
2954 r
= send_user_lookup(unit
, user_lookup_fd
, uid
, gid
);
2956 *exit_status
= EXIT_USER
;
2957 return log_unit_error_errno(unit
, r
, "Failed to send user credentials to PID1: %m");
2960 user_lookup_fd
= safe_close(user_lookup_fd
);
2962 r
= acquire_home(context
, uid
, &home
, &home_buffer
);
2964 *exit_status
= EXIT_CHDIR
;
2965 return log_unit_error_errno(unit
, r
, "Failed to determine $HOME for user: %m");
2968 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2969 * must sure to drop O_NONBLOCK */
2971 (void) fd_nonblock(socket_fd
, false);
2973 r
= setup_input(context
, params
, socket_fd
, named_iofds
);
2975 *exit_status
= EXIT_STDIN
;
2976 return log_unit_error_errno(unit
, r
, "Failed to set up standard input: %m");
2979 r
= setup_output(unit
, context
, params
, STDOUT_FILENO
, socket_fd
, named_iofds
, basename(command
->path
), uid
, gid
, &journal_stream_dev
, &journal_stream_ino
);
2981 *exit_status
= EXIT_STDOUT
;
2982 return log_unit_error_errno(unit
, r
, "Failed to set up standard output: %m");
2985 r
= setup_output(unit
, context
, params
, STDERR_FILENO
, socket_fd
, named_iofds
, basename(command
->path
), uid
, gid
, &journal_stream_dev
, &journal_stream_ino
);
2987 *exit_status
= EXIT_STDERR
;
2988 return log_unit_error_errno(unit
, r
, "Failed to set up standard error output: %m");
2991 if (params
->cgroup_path
) {
2992 r
= cg_attach_everywhere(params
->cgroup_supported
, params
->cgroup_path
, 0, NULL
, NULL
);
2994 *exit_status
= EXIT_CGROUP
;
2995 return log_unit_error_errno(unit
, r
, "Failed to attach to cgroup %s: %m", params
->cgroup_path
);
2999 if (context
->oom_score_adjust_set
) {
3000 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3001 * prohibit write access to this file, and we shouldn't trip up over that. */
3002 r
= set_oom_score_adjust(context
->oom_score_adjust
);
3003 if (IN_SET(r
, -EPERM
, -EACCES
))
3004 log_unit_debug_errno(unit
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
3006 *exit_status
= EXIT_OOM_ADJUST
;
3007 return log_unit_error_errno(unit
, r
, "Failed to adjust OOM setting: %m");
3011 if (context
->nice_set
)
3012 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
3013 *exit_status
= EXIT_NICE
;
3014 return log_unit_error_errno(unit
, errno
, "Failed to set up process scheduling priority (nice level): %m");
3017 if (context
->cpu_sched_set
) {
3018 struct sched_param param
= {
3019 .sched_priority
= context
->cpu_sched_priority
,
3022 r
= sched_setscheduler(0,
3023 context
->cpu_sched_policy
|
3024 (context
->cpu_sched_reset_on_fork
?
3025 SCHED_RESET_ON_FORK
: 0),
3028 *exit_status
= EXIT_SETSCHEDULER
;
3029 return log_unit_error_errno(unit
, errno
, "Failed to set up CPU scheduling: %m");
3033 if (context
->cpuset
)
3034 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
3035 *exit_status
= EXIT_CPUAFFINITY
;
3036 return log_unit_error_errno(unit
, errno
, "Failed to set up CPU affinity: %m");
3039 if (context
->ioprio_set
)
3040 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
3041 *exit_status
= EXIT_IOPRIO
;
3042 return log_unit_error_errno(unit
, errno
, "Failed to set up IO scheduling priority: %m");
3045 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
3046 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
3047 *exit_status
= EXIT_TIMERSLACK
;
3048 return log_unit_error_errno(unit
, errno
, "Failed to set up timer slack: %m");
3051 if (context
->personality
!= PERSONALITY_INVALID
) {
3052 r
= safe_personality(context
->personality
);
3054 *exit_status
= EXIT_PERSONALITY
;
3055 return log_unit_error_errno(unit
, r
, "Failed to set up execution domain (personality): %m");
3059 if (context
->utmp_id
)
3060 utmp_put_init_process(context
->utmp_id
, getpid_cached(), getsid(0),
3062 context
->utmp_mode
== EXEC_UTMP_INIT
? INIT_PROCESS
:
3063 context
->utmp_mode
== EXEC_UTMP_LOGIN
? LOGIN_PROCESS
:
3067 if (context
->user
) {
3068 r
= chown_terminal(STDIN_FILENO
, uid
);
3070 *exit_status
= EXIT_STDIN
;
3071 return log_unit_error_errno(unit
, r
, "Failed to change ownership of terminal: %m");
3075 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroupsv1
3076 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
3077 * safe. On cgroupsv2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
3078 * touch a single hierarchy too. */
3079 if (params
->cgroup_path
&& context
->user
&& (params
->flags
& EXEC_CGROUP_DELEGATE
)) {
3080 r
= cg_set_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, uid
, gid
);
3082 *exit_status
= EXIT_CGROUP
;
3083 return log_unit_error_errno(unit
, r
, "Failed to adjust control group access: %m");
3087 for (dt
= 0; dt
< _EXEC_DIRECTORY_TYPE_MAX
; dt
++) {
3088 r
= setup_exec_directory(context
, params
, uid
, gid
, dt
, exit_status
);
3090 return log_unit_error_errno(unit
, r
, "Failed to set up special execution directory in %s: %m", params
->prefix
[dt
]);
3093 r
= build_environment(
3105 *exit_status
= EXIT_MEMORY
;
3109 r
= build_pass_environment(context
, &pass_env
);
3111 *exit_status
= EXIT_MEMORY
;
3115 accum_env
= strv_env_merge(5,
3116 params
->environment
,
3119 context
->environment
,
3123 *exit_status
= EXIT_MEMORY
;
3126 accum_env
= strv_env_clean(accum_env
);
3128 (void) umask(context
->umask
);
3130 r
= setup_keyring(unit
, context
, params
, uid
, gid
);
3132 *exit_status
= EXIT_KEYRING
;
3133 return log_unit_error_errno(unit
, r
, "Failed to set up kernel keyring: %m");
3136 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
3137 needs_sandboxing
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& EXEC_COMMAND_FULLY_PRIVILEGED
);
3139 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3140 needs_ambient_hack
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && (command
->flags
& EXEC_COMMAND_AMBIENT_MAGIC
) && !ambient_capabilities_supported();
3142 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3143 if (needs_ambient_hack
)
3144 needs_setuid
= false;
3146 needs_setuid
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& (EXEC_COMMAND_FULLY_PRIVILEGED
|EXEC_COMMAND_NO_SETUID
));
3148 if (needs_sandboxing
) {
3149 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3150 * present. The actual MAC context application will happen later, as late as possible, to avoid
3151 * impacting our own code paths. */
3154 use_selinux
= mac_selinux_use();
3157 use_smack
= mac_smack_use();
3160 use_apparmor
= mac_apparmor_use();
3165 if (context
->pam_name
&& username
) {
3166 r
= setup_pam(context
->pam_name
, username
, uid
, gid
, context
->tty_path
, &accum_env
, fds
, n_fds
);
3168 *exit_status
= EXIT_PAM
;
3169 return log_unit_error_errno(unit
, r
, "Failed to set up PAM session: %m");
3174 if (context
->private_network
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
3175 if (ns_type_supported(NAMESPACE_NET
)) {
3176 r
= setup_netns(runtime
->netns_storage_socket
);
3178 *exit_status
= EXIT_NETWORK
;
3179 return log_unit_error_errno(unit
, r
, "Failed to set up network namespacing: %m");
3182 log_unit_warning(unit
, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
3185 needs_mount_namespace
= exec_needs_mount_namespace(context
, params
, runtime
);
3186 if (needs_mount_namespace
) {
3187 r
= apply_mount_namespace(unit
, command
, context
, params
, runtime
);
3189 *exit_status
= EXIT_NAMESPACE
;
3190 return log_unit_error_errno(unit
, r
, "Failed to set up mount namespacing: %m");
3194 /* Apply just after mount namespace setup */
3195 r
= apply_working_directory(context
, params
, home
, needs_mount_namespace
, exit_status
);
3197 return log_unit_error_errno(unit
, r
, "Changing to the requested working directory failed: %m");
3199 /* Drop groups as early as possbile */
3201 r
= enforce_groups(gid
, supplementary_gids
, ngids
);
3203 *exit_status
= EXIT_GROUP
;
3204 return log_unit_error_errno(unit
, r
, "Changing group credentials failed: %m");
3208 if (needs_sandboxing
) {
3210 if (use_selinux
&& params
->selinux_context_net
&& socket_fd
>= 0) {
3211 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
3213 *exit_status
= EXIT_SELINUX_CONTEXT
;
3214 return log_unit_error_errno(unit
, r
, "Failed to determine SELinux context: %m");
3219 if (context
->private_users
) {
3220 r
= setup_private_users(uid
, gid
);
3222 *exit_status
= EXIT_USER
;
3223 return log_unit_error_errno(unit
, r
, "Failed to set up user namespacing: %m");
3228 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3229 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3230 * however if we have it as we want to keep it open until the final execve(). */
3232 if (params
->exec_fd
>= 0) {
3233 exec_fd
= params
->exec_fd
;
3235 if (exec_fd
< 3 + (int) n_fds
) {
3238 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3239 * process we are about to execute. */
3241 moved_fd
= fcntl(exec_fd
, F_DUPFD_CLOEXEC
, 3 + (int) n_fds
);
3243 *exit_status
= EXIT_FDS
;
3244 return log_unit_error_errno(unit
, errno
, "Couldn't move exec fd up: %m");
3247 safe_close(exec_fd
);
3250 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3251 r
= fd_cloexec(exec_fd
, true);
3253 *exit_status
= EXIT_FDS
;
3254 return log_unit_error_errno(unit
, r
, "Failed to make exec fd FD_CLOEXEC: %m");
3258 fds_with_exec_fd
= newa(int, n_fds
+ 1);
3259 memcpy_safe(fds_with_exec_fd
, fds
, n_fds
* sizeof(int));
3260 fds_with_exec_fd
[n_fds
] = exec_fd
;
3261 n_fds_with_exec_fd
= n_fds
+ 1;
3263 fds_with_exec_fd
= fds
;
3264 n_fds_with_exec_fd
= n_fds
;
3267 r
= close_all_fds(fds_with_exec_fd
, n_fds_with_exec_fd
);
3269 r
= shift_fds(fds
, n_fds
);
3271 r
= flags_fds(fds
, n_socket_fds
, n_storage_fds
, context
->non_blocking
);
3273 *exit_status
= EXIT_FDS
;
3274 return log_unit_error_errno(unit
, r
, "Failed to adjust passed file descriptors: %m");
3277 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3278 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3279 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3282 secure_bits
= context
->secure_bits
;
3284 if (needs_sandboxing
) {
3288 r
= setrlimit_closest_all((const struct rlimit
* const *) context
->rlimit
, &which_failed
);
3290 *exit_status
= EXIT_LIMITS
;
3291 return log_unit_error_errno(unit
, r
, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed
));
3294 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3295 if (context
->restrict_realtime
&& !context
->rlimit
[RLIMIT_RTPRIO
]) {
3296 if (setrlimit(RLIMIT_RTPRIO
, &RLIMIT_MAKE_CONST(0)) < 0) {
3297 *exit_status
= EXIT_LIMITS
;
3298 return log_unit_error_errno(unit
, errno
, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
3303 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3304 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3306 r
= setup_smack(context
, command
);
3308 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
3309 return log_unit_error_errno(unit
, r
, "Failed to set SMACK process label: %m");
3314 bset
= context
->capability_bounding_set
;
3315 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3316 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3317 * instead of us doing that */
3318 if (needs_ambient_hack
)
3319 bset
|= (UINT64_C(1) << CAP_SETPCAP
) |
3320 (UINT64_C(1) << CAP_SETUID
) |
3321 (UINT64_C(1) << CAP_SETGID
);
3323 if (!cap_test_all(bset
)) {
3324 r
= capability_bounding_set_drop(bset
, false);
3326 *exit_status
= EXIT_CAPABILITIES
;
3327 return log_unit_error_errno(unit
, r
, "Failed to drop capabilities: %m");
3331 /* This is done before enforce_user, but ambient set
3332 * does not survive over setresuid() if keep_caps is not set. */
3333 if (!needs_ambient_hack
&&
3334 context
->capability_ambient_set
!= 0) {
3335 r
= capability_ambient_set_apply(context
->capability_ambient_set
, true);
3337 *exit_status
= EXIT_CAPABILITIES
;
3338 return log_unit_error_errno(unit
, r
, "Failed to apply ambient capabilities (before UID change): %m");
3344 if (context
->user
) {
3345 r
= enforce_user(context
, uid
);
3347 *exit_status
= EXIT_USER
;
3348 return log_unit_error_errno(unit
, r
, "Failed to change UID to " UID_FMT
": %m", uid
);
3351 if (!needs_ambient_hack
&&
3352 context
->capability_ambient_set
!= 0) {
3354 /* Fix the ambient capabilities after user change. */
3355 r
= capability_ambient_set_apply(context
->capability_ambient_set
, false);
3357 *exit_status
= EXIT_CAPABILITIES
;
3358 return log_unit_error_errno(unit
, r
, "Failed to apply ambient capabilities (after UID change): %m");
3361 /* If we were asked to change user and ambient capabilities
3362 * were requested, we had to add keep-caps to the securebits
3363 * so that we would maintain the inherited capability set
3364 * through the setresuid(). Make sure that the bit is added
3365 * also to the context secure_bits so that we don't try to
3366 * drop the bit away next. */
3368 secure_bits
|= 1<<SECURE_KEEP_CAPS
;
3373 if (needs_sandboxing
) {
3374 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3375 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3376 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3377 * are restricted. */
3381 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
3384 r
= setexeccon(exec_context
);
3386 *exit_status
= EXIT_SELINUX_CONTEXT
;
3387 return log_unit_error_errno(unit
, r
, "Failed to change SELinux context to %s: %m", exec_context
);
3394 if (use_apparmor
&& context
->apparmor_profile
) {
3395 r
= aa_change_onexec(context
->apparmor_profile
);
3396 if (r
< 0 && !context
->apparmor_profile_ignore
) {
3397 *exit_status
= EXIT_APPARMOR_PROFILE
;
3398 return log_unit_error_errno(unit
, errno
, "Failed to prepare AppArmor profile change to %s: %m", context
->apparmor_profile
);
3403 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3404 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
3405 if (prctl(PR_GET_SECUREBITS
) != secure_bits
)
3406 if (prctl(PR_SET_SECUREBITS
, secure_bits
) < 0) {
3407 *exit_status
= EXIT_SECUREBITS
;
3408 return log_unit_error_errno(unit
, errno
, "Failed to set process secure bits: %m");
3411 if (context_has_no_new_privileges(context
))
3412 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
3413 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
3414 return log_unit_error_errno(unit
, errno
, "Failed to disable new privileges: %m");
3418 r
= apply_address_families(unit
, context
);
3420 *exit_status
= EXIT_ADDRESS_FAMILIES
;
3421 return log_unit_error_errno(unit
, r
, "Failed to restrict address families: %m");
3424 r
= apply_memory_deny_write_execute(unit
, context
);
3426 *exit_status
= EXIT_SECCOMP
;
3427 return log_unit_error_errno(unit
, r
, "Failed to disable writing to executable memory: %m");
3430 r
= apply_restrict_realtime(unit
, context
);
3432 *exit_status
= EXIT_SECCOMP
;
3433 return log_unit_error_errno(unit
, r
, "Failed to apply realtime restrictions: %m");
3436 r
= apply_restrict_namespaces(unit
, context
);
3438 *exit_status
= EXIT_SECCOMP
;
3439 return log_unit_error_errno(unit
, r
, "Failed to apply namespace restrictions: %m");
3442 r
= apply_protect_sysctl(unit
, context
);
3444 *exit_status
= EXIT_SECCOMP
;
3445 return log_unit_error_errno(unit
, r
, "Failed to apply sysctl restrictions: %m");
3448 r
= apply_protect_kernel_modules(unit
, context
);
3450 *exit_status
= EXIT_SECCOMP
;
3451 return log_unit_error_errno(unit
, r
, "Failed to apply module loading restrictions: %m");
3454 r
= apply_private_devices(unit
, context
);
3456 *exit_status
= EXIT_SECCOMP
;
3457 return log_unit_error_errno(unit
, r
, "Failed to set up private devices: %m");
3460 r
= apply_syscall_archs(unit
, context
);
3462 *exit_status
= EXIT_SECCOMP
;
3463 return log_unit_error_errno(unit
, r
, "Failed to apply syscall architecture restrictions: %m");
3466 r
= apply_lock_personality(unit
, context
);
3468 *exit_status
= EXIT_SECCOMP
;
3469 return log_unit_error_errno(unit
, r
, "Failed to lock personalities: %m");
3472 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3473 * by the filter as little as possible. */
3474 r
= apply_syscall_filter(unit
, context
, needs_ambient_hack
);
3476 *exit_status
= EXIT_SECCOMP
;
3477 return log_unit_error_errno(unit
, r
, "Failed to apply system call filters: %m");
3482 if (!strv_isempty(context
->unset_environment
)) {
3485 ee
= strv_env_delete(accum_env
, 1, context
->unset_environment
);
3487 *exit_status
= EXIT_MEMORY
;
3491 strv_free_and_replace(accum_env
, ee
);
3494 final_argv
= replace_env_argv(command
->argv
, accum_env
);
3496 *exit_status
= EXIT_MEMORY
;
3500 if (DEBUG_LOGGING
) {
3501 _cleanup_free_
char *line
;
3503 line
= exec_command_line(final_argv
);
3505 log_struct(LOG_DEBUG
,
3506 "EXECUTABLE=%s", command
->path
,
3507 LOG_UNIT_MESSAGE(unit
, "Executing: %s", line
),
3509 LOG_UNIT_INVOCATION_ID(unit
));
3515 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3516 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3518 if (write(exec_fd
, &hot
, sizeof(hot
)) < 0) {
3519 *exit_status
= EXIT_EXEC
;
3520 return log_unit_error_errno(unit
, errno
, "Failed to enable exec_fd: %m");
3524 execve(command
->path
, final_argv
, accum_env
);
3530 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3531 * that POLLHUP on it no longer means execve() succeeded. */
3533 if (write(exec_fd
, &hot
, sizeof(hot
)) < 0) {
3534 *exit_status
= EXIT_EXEC
;
3535 return log_unit_error_errno(unit
, errno
, "Failed to disable exec_fd: %m");
3539 if (r
== -ENOENT
&& (command
->flags
& EXEC_COMMAND_IGNORE_FAILURE
)) {
3540 log_struct_errno(LOG_INFO
, r
,
3541 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR
,
3543 LOG_UNIT_INVOCATION_ID(unit
),
3544 LOG_UNIT_MESSAGE(unit
, "Executable %s missing, skipping: %m",
3546 "EXECUTABLE=%s", command
->path
);
3550 *exit_status
= EXIT_EXEC
;
3551 return log_unit_error_errno(unit
, r
, "Failed to execute command: %m");
3554 static int exec_context_load_environment(const Unit
*unit
, const ExecContext
*c
, char ***l
);
3555 static int exec_context_named_iofds(const ExecContext
*c
, const ExecParameters
*p
, int named_iofds
[3]);
3557 int exec_spawn(Unit
*unit
,
3558 ExecCommand
*command
,
3559 const ExecContext
*context
,
3560 const ExecParameters
*params
,
3561 ExecRuntime
*runtime
,
3562 DynamicCreds
*dcreds
,
3565 int socket_fd
, r
, named_iofds
[3] = { -1, -1, -1 }, *fds
= NULL
;
3566 _cleanup_strv_free_
char **files_env
= NULL
;
3567 size_t n_storage_fds
= 0, n_socket_fds
= 0;
3568 _cleanup_free_
char *line
= NULL
;
3576 assert(params
->fds
|| (params
->n_socket_fds
+ params
->n_storage_fds
<= 0));
3578 if (context
->std_input
== EXEC_INPUT_SOCKET
||
3579 context
->std_output
== EXEC_OUTPUT_SOCKET
||
3580 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
3582 if (params
->n_socket_fds
> 1) {
3583 log_unit_error(unit
, "Got more than one socket.");
3587 if (params
->n_socket_fds
== 0) {
3588 log_unit_error(unit
, "Got no socket.");
3592 socket_fd
= params
->fds
[0];
3596 n_socket_fds
= params
->n_socket_fds
;
3597 n_storage_fds
= params
->n_storage_fds
;
3600 r
= exec_context_named_iofds(context
, params
, named_iofds
);
3602 return log_unit_error_errno(unit
, r
, "Failed to load a named file descriptor: %m");
3604 r
= exec_context_load_environment(unit
, context
, &files_env
);
3606 return log_unit_error_errno(unit
, r
, "Failed to load environment files: %m");
3608 line
= exec_command_line(command
->argv
);
3612 log_struct(LOG_DEBUG
,
3613 LOG_UNIT_MESSAGE(unit
, "About to execute: %s", line
),
3614 "EXECUTABLE=%s", command
->path
,
3616 LOG_UNIT_INVOCATION_ID(unit
));
3620 return log_unit_error_errno(unit
, errno
, "Failed to fork: %m");
3623 int exit_status
= EXIT_SUCCESS
;
3625 r
= exec_child(unit
,
3637 unit
->manager
->user_lookup_fds
[1],
3641 log_struct_errno(LOG_ERR
, r
,
3642 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR
,
3644 LOG_UNIT_INVOCATION_ID(unit
),
3645 LOG_UNIT_MESSAGE(unit
, "Failed at step %s spawning %s: %m",
3646 exit_status_to_string(exit_status
, EXIT_STATUS_SYSTEMD
),
3648 "EXECUTABLE=%s", command
->path
);
3653 log_unit_debug(unit
, "Forked %s as "PID_FMT
, command
->path
, pid
);
3655 /* We add the new process to the cgroup both in the child (so
3656 * that we can be sure that no user code is ever executed
3657 * outside of the cgroup) and in the parent (so that we can be
3658 * sure that when we kill the cgroup the process will be
3660 if (params
->cgroup_path
)
3661 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, pid
);
3663 exec_status_start(&command
->exec_status
, pid
);
3669 void exec_context_init(ExecContext
*c
) {
3670 ExecDirectoryType i
;
3675 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
3676 c
->cpu_sched_policy
= SCHED_OTHER
;
3677 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
3678 c
->syslog_level_prefix
= true;
3679 c
->ignore_sigpipe
= true;
3680 c
->timer_slack_nsec
= NSEC_INFINITY
;
3681 c
->personality
= PERSONALITY_INVALID
;
3682 for (i
= 0; i
< _EXEC_DIRECTORY_TYPE_MAX
; i
++)
3683 c
->directories
[i
].mode
= 0755;
3684 c
->capability_bounding_set
= CAP_ALL
;
3685 assert_cc(NAMESPACE_FLAGS_INITIAL
!= NAMESPACE_FLAGS_ALL
);
3686 c
->restrict_namespaces
= NAMESPACE_FLAGS_INITIAL
;
3687 c
->log_level_max
= -1;
3690 void exec_context_done(ExecContext
*c
) {
3691 ExecDirectoryType i
;
3696 c
->environment
= strv_free(c
->environment
);
3697 c
->environment_files
= strv_free(c
->environment_files
);
3698 c
->pass_environment
= strv_free(c
->pass_environment
);
3699 c
->unset_environment
= strv_free(c
->unset_environment
);
3701 rlimit_free_all(c
->rlimit
);
3703 for (l
= 0; l
< 3; l
++) {
3704 c
->stdio_fdname
[l
] = mfree(c
->stdio_fdname
[l
]);
3705 c
->stdio_file
[l
] = mfree(c
->stdio_file
[l
]);
3708 c
->working_directory
= mfree(c
->working_directory
);
3709 c
->root_directory
= mfree(c
->root_directory
);
3710 c
->root_image
= mfree(c
->root_image
);
3711 c
->tty_path
= mfree(c
->tty_path
);
3712 c
->syslog_identifier
= mfree(c
->syslog_identifier
);
3713 c
->user
= mfree(c
->user
);
3714 c
->group
= mfree(c
->group
);
3716 c
->supplementary_groups
= strv_free(c
->supplementary_groups
);
3718 c
->pam_name
= mfree(c
->pam_name
);
3720 c
->read_only_paths
= strv_free(c
->read_only_paths
);
3721 c
->read_write_paths
= strv_free(c
->read_write_paths
);
3722 c
->inaccessible_paths
= strv_free(c
->inaccessible_paths
);
3724 bind_mount_free_many(c
->bind_mounts
, c
->n_bind_mounts
);
3725 c
->bind_mounts
= NULL
;
3726 c
->n_bind_mounts
= 0;
3727 temporary_filesystem_free_many(c
->temporary_filesystems
, c
->n_temporary_filesystems
);
3728 c
->temporary_filesystems
= NULL
;
3729 c
->n_temporary_filesystems
= 0;
3731 c
->cpuset
= cpu_set_mfree(c
->cpuset
);
3733 c
->utmp_id
= mfree(c
->utmp_id
);
3734 c
->selinux_context
= mfree(c
->selinux_context
);
3735 c
->apparmor_profile
= mfree(c
->apparmor_profile
);
3736 c
->smack_process_label
= mfree(c
->smack_process_label
);
3738 c
->syscall_filter
= hashmap_free(c
->syscall_filter
);
3739 c
->syscall_archs
= set_free(c
->syscall_archs
);
3740 c
->address_families
= set_free(c
->address_families
);
3742 for (i
= 0; i
< _EXEC_DIRECTORY_TYPE_MAX
; i
++)
3743 c
->directories
[i
].paths
= strv_free(c
->directories
[i
].paths
);
3745 c
->log_level_max
= -1;
3747 exec_context_free_log_extra_fields(c
);
3749 c
->stdin_data
= mfree(c
->stdin_data
);
3750 c
->stdin_data_size
= 0;
3753 int exec_context_destroy_runtime_directory(const ExecContext
*c
, const char *runtime_prefix
) {
3758 if (!runtime_prefix
)
3761 STRV_FOREACH(i
, c
->directories
[EXEC_DIRECTORY_RUNTIME
].paths
) {
3762 _cleanup_free_
char *p
;
3764 p
= strjoin(runtime_prefix
, "/", *i
);
3768 /* We execute this synchronously, since we need to be sure this is gone when we start the service
3770 (void) rm_rf(p
, REMOVE_ROOT
);
3776 static void exec_command_done(ExecCommand
*c
) {
3779 c
->path
= mfree(c
->path
);
3780 c
->argv
= strv_free(c
->argv
);
3783 void exec_command_done_array(ExecCommand
*c
, size_t n
) {
3786 for (i
= 0; i
< n
; i
++)
3787 exec_command_done(c
+i
);
3790 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
3794 LIST_REMOVE(command
, c
, i
);
3795 exec_command_done(i
);
3802 void exec_command_free_array(ExecCommand
**c
, size_t n
) {
3805 for (i
= 0; i
< n
; i
++)
3806 c
[i
] = exec_command_free_list(c
[i
]);
3809 void exec_command_reset_status_array(ExecCommand
*c
, size_t n
) {
3812 for (i
= 0; i
< n
; i
++)
3813 exec_status_reset(&c
[i
].exec_status
);
3816 void exec_command_reset_status_list_array(ExecCommand
**c
, size_t n
) {
3819 for (i
= 0; i
< n
; i
++) {
3822 LIST_FOREACH(command
, z
, c
[i
])
3823 exec_status_reset(&z
->exec_status
);
3827 typedef struct InvalidEnvInfo
{
3832 static void invalid_env(const char *p
, void *userdata
) {
3833 InvalidEnvInfo
*info
= userdata
;
3835 log_unit_error(info
->unit
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
3838 const char* exec_context_fdname(const ExecContext
*c
, int fd_index
) {
3844 if (c
->std_input
!= EXEC_INPUT_NAMED_FD
)
3847 return c
->stdio_fdname
[STDIN_FILENO
] ?: "stdin";
3850 if (c
->std_output
!= EXEC_OUTPUT_NAMED_FD
)
3853 return c
->stdio_fdname
[STDOUT_FILENO
] ?: "stdout";
3856 if (c
->std_error
!= EXEC_OUTPUT_NAMED_FD
)
3859 return c
->stdio_fdname
[STDERR_FILENO
] ?: "stderr";
3866 static int exec_context_named_iofds(const ExecContext
*c
, const ExecParameters
*p
, int named_iofds
[3]) {
3868 const char* stdio_fdname
[3];
3874 targets
= (c
->std_input
== EXEC_INPUT_NAMED_FD
) +
3875 (c
->std_output
== EXEC_OUTPUT_NAMED_FD
) +
3876 (c
->std_error
== EXEC_OUTPUT_NAMED_FD
);
3878 for (i
= 0; i
< 3; i
++)
3879 stdio_fdname
[i
] = exec_context_fdname(c
, i
);
3881 n_fds
= p
->n_storage_fds
+ p
->n_socket_fds
;
3883 for (i
= 0; i
< n_fds
&& targets
> 0; i
++)
3884 if (named_iofds
[STDIN_FILENO
] < 0 &&
3885 c
->std_input
== EXEC_INPUT_NAMED_FD
&&
3886 stdio_fdname
[STDIN_FILENO
] &&
3887 streq(p
->fd_names
[i
], stdio_fdname
[STDIN_FILENO
])) {
3889 named_iofds
[STDIN_FILENO
] = p
->fds
[i
];
3892 } else if (named_iofds
[STDOUT_FILENO
] < 0 &&
3893 c
->std_output
== EXEC_OUTPUT_NAMED_FD
&&
3894 stdio_fdname
[STDOUT_FILENO
] &&
3895 streq(p
->fd_names
[i
], stdio_fdname
[STDOUT_FILENO
])) {
3897 named_iofds
[STDOUT_FILENO
] = p
->fds
[i
];
3900 } else if (named_iofds
[STDERR_FILENO
] < 0 &&
3901 c
->std_error
== EXEC_OUTPUT_NAMED_FD
&&
3902 stdio_fdname
[STDERR_FILENO
] &&
3903 streq(p
->fd_names
[i
], stdio_fdname
[STDERR_FILENO
])) {
3905 named_iofds
[STDERR_FILENO
] = p
->fds
[i
];
3909 return targets
== 0 ? 0 : -ENOENT
;
3912 static int exec_context_load_environment(const Unit
*unit
, const ExecContext
*c
, char ***l
) {
3913 char **i
, **r
= NULL
;
3918 STRV_FOREACH(i
, c
->environment_files
) {
3922 bool ignore
= false;
3924 _cleanup_globfree_ glob_t pglob
= {};
3933 if (!path_is_absolute(fn
)) {
3941 /* Filename supports globbing, take all matching files */
3942 k
= safe_glob(fn
, 0, &pglob
);
3951 /* When we don't match anything, -ENOENT should be returned */
3952 assert(pglob
.gl_pathc
> 0);
3954 for (n
= 0; n
< pglob
.gl_pathc
; n
++) {
3955 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], NULL
, &p
);
3963 /* Log invalid environment variables with filename */
3965 InvalidEnvInfo info
= {
3967 .path
= pglob
.gl_pathv
[n
]
3970 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
3978 m
= strv_env_merge(2, r
, p
);
3994 static bool tty_may_match_dev_console(const char *tty
) {
3995 _cleanup_free_
char *resolved
= NULL
;
4000 tty
= skip_dev_prefix(tty
);
4002 /* trivial identity? */
4003 if (streq(tty
, "console"))
4006 if (resolve_dev_console(&resolved
) < 0)
4007 return true; /* if we could not resolve, assume it may */
4009 /* "tty0" means the active VC, so it may be the same sometimes */
4010 return streq(resolved
, tty
) || (streq(resolved
, "tty0") && tty_is_vc(tty
));
4013 bool exec_context_may_touch_console(const ExecContext
*ec
) {
4015 return (ec
->tty_reset
||
4017 ec
->tty_vt_disallocate
||
4018 is_terminal_input(ec
->std_input
) ||
4019 is_terminal_output(ec
->std_output
) ||
4020 is_terminal_output(ec
->std_error
)) &&
4021 tty_may_match_dev_console(exec_context_tty_path(ec
));
4024 static void strv_fprintf(FILE *f
, char **l
) {
4030 fprintf(f
, " %s", *g
);
4033 void exec_context_dump(const ExecContext
*c
, FILE* f
, const char *prefix
) {
4034 ExecDirectoryType dt
;
4042 prefix
= strempty(prefix
);
4046 "%sWorkingDirectory: %s\n"
4047 "%sRootDirectory: %s\n"
4048 "%sNonBlocking: %s\n"
4049 "%sPrivateTmp: %s\n"
4050 "%sPrivateDevices: %s\n"
4051 "%sProtectKernelTunables: %s\n"
4052 "%sProtectKernelModules: %s\n"
4053 "%sProtectControlGroups: %s\n"
4054 "%sPrivateNetwork: %s\n"
4055 "%sPrivateUsers: %s\n"
4056 "%sProtectHome: %s\n"
4057 "%sProtectSystem: %s\n"
4058 "%sMountAPIVFS: %s\n"
4059 "%sIgnoreSIGPIPE: %s\n"
4060 "%sMemoryDenyWriteExecute: %s\n"
4061 "%sRestrictRealtime: %s\n"
4062 "%sKeyringMode: %s\n",
4064 prefix
, c
->working_directory
? c
->working_directory
: "/",
4065 prefix
, c
->root_directory
? c
->root_directory
: "/",
4066 prefix
, yes_no(c
->non_blocking
),
4067 prefix
, yes_no(c
->private_tmp
),
4068 prefix
, yes_no(c
->private_devices
),
4069 prefix
, yes_no(c
->protect_kernel_tunables
),
4070 prefix
, yes_no(c
->protect_kernel_modules
),
4071 prefix
, yes_no(c
->protect_control_groups
),
4072 prefix
, yes_no(c
->private_network
),
4073 prefix
, yes_no(c
->private_users
),
4074 prefix
, protect_home_to_string(c
->protect_home
),
4075 prefix
, protect_system_to_string(c
->protect_system
),
4076 prefix
, yes_no(c
->mount_apivfs
),
4077 prefix
, yes_no(c
->ignore_sigpipe
),
4078 prefix
, yes_no(c
->memory_deny_write_execute
),
4079 prefix
, yes_no(c
->restrict_realtime
),
4080 prefix
, exec_keyring_mode_to_string(c
->keyring_mode
));
4083 fprintf(f
, "%sRootImage: %s\n", prefix
, c
->root_image
);
4085 STRV_FOREACH(e
, c
->environment
)
4086 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
4088 STRV_FOREACH(e
, c
->environment_files
)
4089 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
4091 STRV_FOREACH(e
, c
->pass_environment
)
4092 fprintf(f
, "%sPassEnvironment: %s\n", prefix
, *e
);
4094 STRV_FOREACH(e
, c
->unset_environment
)
4095 fprintf(f
, "%sUnsetEnvironment: %s\n", prefix
, *e
);
4097 fprintf(f
, "%sRuntimeDirectoryPreserve: %s\n", prefix
, exec_preserve_mode_to_string(c
->runtime_directory_preserve_mode
));
4099 for (dt
= 0; dt
< _EXEC_DIRECTORY_TYPE_MAX
; dt
++) {
4100 fprintf(f
, "%s%sMode: %04o\n", prefix
, exec_directory_type_to_string(dt
), c
->directories
[dt
].mode
);
4102 STRV_FOREACH(d
, c
->directories
[dt
].paths
)
4103 fprintf(f
, "%s%s: %s\n", prefix
, exec_directory_type_to_string(dt
), *d
);
4111 if (c
->oom_score_adjust_set
)
4113 "%sOOMScoreAdjust: %i\n",
4114 prefix
, c
->oom_score_adjust
);
4116 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
4118 fprintf(f
, "%sLimit%s: " RLIM_FMT
"\n",
4119 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
4120 fprintf(f
, "%sLimit%sSoft: " RLIM_FMT
"\n",
4121 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_cur
);
4124 if (c
->ioprio_set
) {
4125 _cleanup_free_
char *class_str
= NULL
;
4127 r
= ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
4129 fprintf(f
, "%sIOSchedulingClass: %s\n", prefix
, class_str
);
4131 fprintf(f
, "%sIOPriority: %lu\n", prefix
, IOPRIO_PRIO_DATA(c
->ioprio
));
4134 if (c
->cpu_sched_set
) {
4135 _cleanup_free_
char *policy_str
= NULL
;
4137 r
= sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
4139 fprintf(f
, "%sCPUSchedulingPolicy: %s\n", prefix
, policy_str
);
4142 "%sCPUSchedulingPriority: %i\n"
4143 "%sCPUSchedulingResetOnFork: %s\n",
4144 prefix
, c
->cpu_sched_priority
,
4145 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
4149 fprintf(f
, "%sCPUAffinity:", prefix
);
4150 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
4151 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
4152 fprintf(f
, " %u", i
);
4156 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
4157 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
4160 "%sStandardInput: %s\n"
4161 "%sStandardOutput: %s\n"
4162 "%sStandardError: %s\n",
4163 prefix
, exec_input_to_string(c
->std_input
),
4164 prefix
, exec_output_to_string(c
->std_output
),
4165 prefix
, exec_output_to_string(c
->std_error
));
4167 if (c
->std_input
== EXEC_INPUT_NAMED_FD
)
4168 fprintf(f
, "%sStandardInputFileDescriptorName: %s\n", prefix
, c
->stdio_fdname
[STDIN_FILENO
]);
4169 if (c
->std_output
== EXEC_OUTPUT_NAMED_FD
)
4170 fprintf(f
, "%sStandardOutputFileDescriptorName: %s\n", prefix
, c
->stdio_fdname
[STDOUT_FILENO
]);
4171 if (c
->std_error
== EXEC_OUTPUT_NAMED_FD
)
4172 fprintf(f
, "%sStandardErrorFileDescriptorName: %s\n", prefix
, c
->stdio_fdname
[STDERR_FILENO
]);
4174 if (c
->std_input
== EXEC_INPUT_FILE
)
4175 fprintf(f
, "%sStandardInputFile: %s\n", prefix
, c
->stdio_file
[STDIN_FILENO
]);
4176 if (c
->std_output
== EXEC_OUTPUT_FILE
)
4177 fprintf(f
, "%sStandardOutputFile: %s\n", prefix
, c
->stdio_file
[STDOUT_FILENO
]);
4178 if (c
->std_output
== EXEC_OUTPUT_FILE_APPEND
)
4179 fprintf(f
, "%sStandardOutputFileToAppend: %s\n", prefix
, c
->stdio_file
[STDOUT_FILENO
]);
4180 if (c
->std_error
== EXEC_OUTPUT_FILE
)
4181 fprintf(f
, "%sStandardErrorFile: %s\n", prefix
, c
->stdio_file
[STDERR_FILENO
]);
4182 if (c
->std_error
== EXEC_OUTPUT_FILE_APPEND
)
4183 fprintf(f
, "%sStandardErrorFileToAppend: %s\n", prefix
, c
->stdio_file
[STDERR_FILENO
]);
4189 "%sTTYVHangup: %s\n"
4190 "%sTTYVTDisallocate: %s\n",
4191 prefix
, c
->tty_path
,
4192 prefix
, yes_no(c
->tty_reset
),
4193 prefix
, yes_no(c
->tty_vhangup
),
4194 prefix
, yes_no(c
->tty_vt_disallocate
));
4196 if (IN_SET(c
->std_output
,
4199 EXEC_OUTPUT_JOURNAL
,
4200 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
4201 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
4202 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) ||
4203 IN_SET(c
->std_error
,
4206 EXEC_OUTPUT_JOURNAL
,
4207 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
4208 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
4209 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
)) {
4211 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
4213 r
= log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
4215 fprintf(f
, "%sSyslogFacility: %s\n", prefix
, fac_str
);
4217 r
= log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
4219 fprintf(f
, "%sSyslogLevel: %s\n", prefix
, lvl_str
);
4222 if (c
->log_level_max
>= 0) {
4223 _cleanup_free_
char *t
= NULL
;
4225 (void) log_level_to_string_alloc(c
->log_level_max
, &t
);
4227 fprintf(f
, "%sLogLevelMax: %s\n", prefix
, strna(t
));
4230 if (c
->n_log_extra_fields
> 0) {
4233 for (j
= 0; j
< c
->n_log_extra_fields
; j
++) {
4234 fprintf(f
, "%sLogExtraFields: ", prefix
);
4235 fwrite(c
->log_extra_fields
[j
].iov_base
,
4236 1, c
->log_extra_fields
[j
].iov_len
,
4242 if (c
->secure_bits
) {
4243 _cleanup_free_
char *str
= NULL
;
4245 r
= secure_bits_to_string_alloc(c
->secure_bits
, &str
);
4247 fprintf(f
, "%sSecure Bits: %s\n", prefix
, str
);
4250 if (c
->capability_bounding_set
!= CAP_ALL
) {
4251 _cleanup_free_
char *str
= NULL
;
4253 r
= capability_set_to_string_alloc(c
->capability_bounding_set
, &str
);
4255 fprintf(f
, "%sCapabilityBoundingSet: %s\n", prefix
, str
);
4258 if (c
->capability_ambient_set
!= 0) {
4259 _cleanup_free_
char *str
= NULL
;
4261 r
= capability_set_to_string_alloc(c
->capability_ambient_set
, &str
);
4263 fprintf(f
, "%sAmbientCapabilities: %s\n", prefix
, str
);
4267 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
4269 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
4271 fprintf(f
, "%sDynamicUser: %s\n", prefix
, yes_no(c
->dynamic_user
));
4273 if (!strv_isempty(c
->supplementary_groups
)) {
4274 fprintf(f
, "%sSupplementaryGroups:", prefix
);
4275 strv_fprintf(f
, c
->supplementary_groups
);
4280 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
4282 if (!strv_isempty(c
->read_write_paths
)) {
4283 fprintf(f
, "%sReadWritePaths:", prefix
);
4284 strv_fprintf(f
, c
->read_write_paths
);
4288 if (!strv_isempty(c
->read_only_paths
)) {
4289 fprintf(f
, "%sReadOnlyPaths:", prefix
);
4290 strv_fprintf(f
, c
->read_only_paths
);
4294 if (!strv_isempty(c
->inaccessible_paths
)) {
4295 fprintf(f
, "%sInaccessiblePaths:", prefix
);
4296 strv_fprintf(f
, c
->inaccessible_paths
);
4300 if (c
->n_bind_mounts
> 0)
4301 for (i
= 0; i
< c
->n_bind_mounts
; i
++)
4302 fprintf(f
, "%s%s: %s%s:%s:%s\n", prefix
,
4303 c
->bind_mounts
[i
].read_only
? "BindReadOnlyPaths" : "BindPaths",
4304 c
->bind_mounts
[i
].ignore_enoent
? "-": "",
4305 c
->bind_mounts
[i
].source
,
4306 c
->bind_mounts
[i
].destination
,
4307 c
->bind_mounts
[i
].recursive
? "rbind" : "norbind");
4309 if (c
->n_temporary_filesystems
> 0)
4310 for (i
= 0; i
< c
->n_temporary_filesystems
; i
++) {
4311 TemporaryFileSystem
*t
= c
->temporary_filesystems
+ i
;
4313 fprintf(f
, "%sTemporaryFileSystem: %s%s%s\n", prefix
,
4315 isempty(t
->options
) ? "" : ":",
4316 strempty(t
->options
));
4321 "%sUtmpIdentifier: %s\n",
4322 prefix
, c
->utmp_id
);
4324 if (c
->selinux_context
)
4326 "%sSELinuxContext: %s%s\n",
4327 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
4329 if (c
->apparmor_profile
)
4331 "%sAppArmorProfile: %s%s\n",
4332 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
4334 if (c
->smack_process_label
)
4336 "%sSmackProcessLabel: %s%s\n",
4337 prefix
, c
->smack_process_label_ignore
? "-" : "", c
->smack_process_label
);
4339 if (c
->personality
!= PERSONALITY_INVALID
)
4341 "%sPersonality: %s\n",
4342 prefix
, strna(personality_to_string(c
->personality
)));
4345 "%sLockPersonality: %s\n",
4346 prefix
, yes_no(c
->lock_personality
));
4348 if (c
->syscall_filter
) {
4356 "%sSystemCallFilter: ",
4359 if (!c
->syscall_whitelist
)
4363 HASHMAP_FOREACH_KEY(val
, id
, c
->syscall_filter
, j
) {
4364 _cleanup_free_
char *name
= NULL
;
4365 const char *errno_name
= NULL
;
4366 int num
= PTR_TO_INT(val
);
4373 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
4374 fputs(strna(name
), f
);
4377 errno_name
= errno_to_name(num
);
4379 fprintf(f
, ":%s", errno_name
);
4381 fprintf(f
, ":%d", num
);
4389 if (c
->syscall_archs
) {
4396 "%sSystemCallArchitectures:",
4400 SET_FOREACH(id
, c
->syscall_archs
, j
)
4401 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
4406 if (exec_context_restrict_namespaces_set(c
)) {
4407 _cleanup_free_
char *s
= NULL
;
4409 r
= namespace_flags_to_string(c
->restrict_namespaces
, &s
);
4411 fprintf(f
, "%sRestrictNamespaces: %s\n",
4415 if (c
->syscall_errno
> 0) {
4416 const char *errno_name
;
4418 fprintf(f
, "%sSystemCallErrorNumber: ", prefix
);
4420 errno_name
= errno_to_name(c
->syscall_errno
);
4422 fprintf(f
, "%s\n", errno_name
);
4424 fprintf(f
, "%d\n", c
->syscall_errno
);
4427 if (c
->apparmor_profile
)
4429 "%sAppArmorProfile: %s%s\n",
4430 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
4433 bool exec_context_maintains_privileges(const ExecContext
*c
) {
4436 /* Returns true if the process forked off would run under
4437 * an unchanged UID or as root. */
4442 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
4448 int exec_context_get_effective_ioprio(const ExecContext
*c
) {
4456 p
= ioprio_get(IOPRIO_WHO_PROCESS
, 0);
4458 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 4);
4463 void exec_context_free_log_extra_fields(ExecContext
*c
) {
4468 for (l
= 0; l
< c
->n_log_extra_fields
; l
++)
4469 free(c
->log_extra_fields
[l
].iov_base
);
4470 c
->log_extra_fields
= mfree(c
->log_extra_fields
);
4471 c
->n_log_extra_fields
= 0;
4474 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
4481 dual_timestamp_get(&s
->start_timestamp
);
4484 void exec_status_exit(ExecStatus
*s
, const ExecContext
*context
, pid_t pid
, int code
, int status
) {
4487 if (s
->pid
!= pid
) {
4493 dual_timestamp_get(&s
->exit_timestamp
);
4499 if (context
->utmp_id
)
4500 (void) utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
4502 exec_context_tty_reset(context
, NULL
);
4506 void exec_status_reset(ExecStatus
*s
) {
4509 *s
= (ExecStatus
) {};
4512 void exec_status_dump(const ExecStatus
*s
, FILE *f
, const char *prefix
) {
4513 char buf
[FORMAT_TIMESTAMP_MAX
];
4521 prefix
= strempty(prefix
);
4524 "%sPID: "PID_FMT
"\n",
4527 if (dual_timestamp_is_set(&s
->start_timestamp
))
4529 "%sStart Timestamp: %s\n",
4530 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
4532 if (dual_timestamp_is_set(&s
->exit_timestamp
))
4534 "%sExit Timestamp: %s\n"
4536 "%sExit Status: %i\n",
4537 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
4538 prefix
, sigchld_code_to_string(s
->code
),
4542 static char *exec_command_line(char **argv
) {
4550 STRV_FOREACH(a
, argv
)
4558 STRV_FOREACH(a
, argv
) {
4565 if (strpbrk(*a
, WHITESPACE
)) {
4576 /* FIXME: this doesn't really handle arguments that have
4577 * spaces and ticks in them */
4582 static void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
4583 _cleanup_free_
char *cmd
= NULL
;
4584 const char *prefix2
;
4589 prefix
= strempty(prefix
);
4590 prefix2
= strjoina(prefix
, "\t");
4592 cmd
= exec_command_line(c
->argv
);
4594 "%sCommand Line: %s\n",
4595 prefix
, cmd
? cmd
: strerror(ENOMEM
));
4597 exec_status_dump(&c
->exec_status
, f
, prefix2
);
4600 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
4603 prefix
= strempty(prefix
);
4605 LIST_FOREACH(command
, c
, c
)
4606 exec_command_dump(c
, f
, prefix
);
4609 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
4616 /* It's kind of important, that we keep the order here */
4617 LIST_FIND_TAIL(command
, *l
, end
);
4618 LIST_INSERT_AFTER(command
, *l
, end
, e
);
4623 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
4631 l
= strv_new_ap(path
, ap
);
4646 return strv_free_and_replace(c
->argv
, l
);
4649 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
4650 _cleanup_strv_free_
char **l
= NULL
;
4658 l
= strv_new_ap(path
, ap
);
4664 r
= strv_extend_strv(&c
->argv
, l
, false);
4671 static void *remove_tmpdir_thread(void *p
) {
4672 _cleanup_free_
char *path
= p
;
4674 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
4678 static ExecRuntime
* exec_runtime_free(ExecRuntime
*rt
, bool destroy
) {
4685 (void) hashmap_remove(rt
->manager
->exec_runtime_by_id
, rt
->id
);
4687 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
4688 if (destroy
&& rt
->tmp_dir
) {
4689 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
4691 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
4693 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
4700 if (destroy
&& rt
->var_tmp_dir
) {
4701 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
4703 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
4705 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
4706 free(rt
->var_tmp_dir
);
4709 rt
->var_tmp_dir
= NULL
;
4712 rt
->id
= mfree(rt
->id
);
4713 rt
->tmp_dir
= mfree(rt
->tmp_dir
);
4714 rt
->var_tmp_dir
= mfree(rt
->var_tmp_dir
);
4715 safe_close_pair(rt
->netns_storage_socket
);
4719 static void exec_runtime_freep(ExecRuntime
**rt
) {
4721 (void) exec_runtime_free(*rt
, false);
4724 static int exec_runtime_allocate(ExecRuntime
**rt
) {
4727 *rt
= new0(ExecRuntime
, 1);
4731 (*rt
)->netns_storage_socket
[0] = (*rt
)->netns_storage_socket
[1] = -1;
4735 static int exec_runtime_add(
4738 const char *tmp_dir
,
4739 const char *var_tmp_dir
,
4740 const int netns_storage_socket
[2],
4741 ExecRuntime
**ret
) {
4743 _cleanup_(exec_runtime_freep
) ExecRuntime
*rt
= NULL
;
4749 r
= hashmap_ensure_allocated(&m
->exec_runtime_by_id
, &string_hash_ops
);
4753 r
= exec_runtime_allocate(&rt
);
4757 rt
->id
= strdup(id
);
4762 rt
->tmp_dir
= strdup(tmp_dir
);
4766 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
4767 assert(var_tmp_dir
);
4768 rt
->var_tmp_dir
= strdup(var_tmp_dir
);
4769 if (!rt
->var_tmp_dir
)
4773 if (netns_storage_socket
) {
4774 rt
->netns_storage_socket
[0] = netns_storage_socket
[0];
4775 rt
->netns_storage_socket
[1] = netns_storage_socket
[1];
4778 r
= hashmap_put(m
->exec_runtime_by_id
, rt
->id
, rt
);
4787 /* do not remove created ExecRuntime object when the operation succeeds. */
4792 static int exec_runtime_make(Manager
*m
, const ExecContext
*c
, const char *id
, ExecRuntime
**ret
) {
4793 _cleanup_free_
char *tmp_dir
= NULL
, *var_tmp_dir
= NULL
;
4794 _cleanup_close_pair_
int netns_storage_socket
[2] = {-1, -1};
4801 /* It is not necessary to create ExecRuntime object. */
4802 if (!c
->private_network
&& !c
->private_tmp
)
4805 if (c
->private_tmp
) {
4806 r
= setup_tmp_dirs(id
, &tmp_dir
, &var_tmp_dir
);
4811 if (c
->private_network
) {
4812 if (socketpair(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0, netns_storage_socket
) < 0)
4816 r
= exec_runtime_add(m
, id
, tmp_dir
, var_tmp_dir
, netns_storage_socket
, ret
);
4821 netns_storage_socket
[0] = -1;
4822 netns_storage_socket
[1] = -1;
4826 int exec_runtime_acquire(Manager
*m
, const ExecContext
*c
, const char *id
, bool create
, ExecRuntime
**ret
) {
4834 rt
= hashmap_get(m
->exec_runtime_by_id
, id
);
4836 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
4842 /* If not found, then create a new object. */
4843 r
= exec_runtime_make(m
, c
, id
, &rt
);
4845 /* When r == 0, it is not necessary to create ExecRuntime object. */
4849 /* increment reference counter. */
4855 ExecRuntime
*exec_runtime_unref(ExecRuntime
*rt
, bool destroy
) {
4859 assert(rt
->n_ref
> 0);
4865 return exec_runtime_free(rt
, destroy
);
4868 int exec_runtime_serialize(const Manager
*m
, FILE *f
, FDSet
*fds
) {
4876 HASHMAP_FOREACH(rt
, m
->exec_runtime_by_id
, i
) {
4877 fprintf(f
, "exec-runtime=%s", rt
->id
);
4880 fprintf(f
, " tmp-dir=%s", rt
->tmp_dir
);
4882 if (rt
->var_tmp_dir
)
4883 fprintf(f
, " var-tmp-dir=%s", rt
->var_tmp_dir
);
4885 if (rt
->netns_storage_socket
[0] >= 0) {
4888 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
4892 fprintf(f
, " netns-socket-0=%i", copy
);
4895 if (rt
->netns_storage_socket
[1] >= 0) {
4898 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
4902 fprintf(f
, " netns-socket-1=%i", copy
);
4911 int exec_runtime_deserialize_compat(Unit
*u
, const char *key
, const char *value
, FDSet
*fds
) {
4912 _cleanup_(exec_runtime_freep
) ExecRuntime
*rt_create
= NULL
;
4916 /* This is for the migration from old (v237 or earlier) deserialization text.
4917 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
4918 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
4919 * so or not from the serialized text, then we always creates a new object owned by this. */
4925 /* Manager manages ExecRuntime objects by the unit id.
4926 * So, we omit the serialized text when the unit does not have id (yet?)... */
4927 if (isempty(u
->id
)) {
4928 log_unit_debug(u
, "Invocation ID not found. Dropping runtime parameter.");
4932 r
= hashmap_ensure_allocated(&u
->manager
->exec_runtime_by_id
, &string_hash_ops
);
4934 log_unit_debug_errno(u
, r
, "Failed to allocate storage for runtime parameter: %m");
4938 rt
= hashmap_get(u
->manager
->exec_runtime_by_id
, u
->id
);
4940 r
= exec_runtime_allocate(&rt_create
);
4944 rt_create
->id
= strdup(u
->id
);
4951 if (streq(key
, "tmp-dir")) {
4954 copy
= strdup(value
);
4958 free_and_replace(rt
->tmp_dir
, copy
);
4960 } else if (streq(key
, "var-tmp-dir")) {
4963 copy
= strdup(value
);
4967 free_and_replace(rt
->var_tmp_dir
, copy
);
4969 } else if (streq(key
, "netns-socket-0")) {
4972 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
)) {
4973 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
4977 safe_close(rt
->netns_storage_socket
[0]);
4978 rt
->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
4980 } else if (streq(key
, "netns-socket-1")) {
4983 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
)) {
4984 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
4988 safe_close(rt
->netns_storage_socket
[1]);
4989 rt
->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
4993 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
4995 r
= hashmap_put(u
->manager
->exec_runtime_by_id
, rt_create
->id
, rt_create
);
4997 log_unit_debug_errno(u
, r
, "Failed to put runtime parameter to manager's storage: %m");
5001 rt_create
->manager
= u
->manager
;
5010 void exec_runtime_deserialize_one(Manager
*m
, const char *value
, FDSet
*fds
) {
5011 char *id
= NULL
, *tmp_dir
= NULL
, *var_tmp_dir
= NULL
;
5012 int r
, fd0
= -1, fd1
= -1;
5013 const char *p
, *v
= value
;
5020 n
= strcspn(v
, " ");
5021 id
= strndupa(v
, n
);
5026 v
= startswith(p
, "tmp-dir=");
5028 n
= strcspn(v
, " ");
5029 tmp_dir
= strndupa(v
, n
);
5035 v
= startswith(p
, "var-tmp-dir=");
5037 n
= strcspn(v
, " ");
5038 var_tmp_dir
= strndupa(v
, n
);
5044 v
= startswith(p
, "netns-socket-0=");
5048 n
= strcspn(v
, " ");
5049 buf
= strndupa(v
, n
);
5050 if (safe_atoi(buf
, &fd0
) < 0 || !fdset_contains(fds
, fd0
)) {
5051 log_debug("Unable to process exec-runtime netns fd specification.");
5054 fd0
= fdset_remove(fds
, fd0
);
5060 v
= startswith(p
, "netns-socket-1=");
5064 n
= strcspn(v
, " ");
5065 buf
= strndupa(v
, n
);
5066 if (safe_atoi(buf
, &fd1
) < 0 || !fdset_contains(fds
, fd1
)) {
5067 log_debug("Unable to process exec-runtime netns fd specification.");
5070 fd1
= fdset_remove(fds
, fd1
);
5075 r
= exec_runtime_add(m
, id
, tmp_dir
, var_tmp_dir
, (int[]) { fd0
, fd1
}, NULL
);
5077 log_debug_errno(r
, "Failed to add exec-runtime: %m");
5080 void exec_runtime_vacuum(Manager
*m
) {
5086 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5088 HASHMAP_FOREACH(rt
, m
->exec_runtime_by_id
, i
) {
5092 (void) exec_runtime_free(rt
, false);
5096 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
5097 [EXEC_INPUT_NULL
] = "null",
5098 [EXEC_INPUT_TTY
] = "tty",
5099 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
5100 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
5101 [EXEC_INPUT_SOCKET
] = "socket",
5102 [EXEC_INPUT_NAMED_FD
] = "fd",
5103 [EXEC_INPUT_DATA
] = "data",
5104 [EXEC_INPUT_FILE
] = "file",
5107 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
5109 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
5110 [EXEC_OUTPUT_INHERIT
] = "inherit",
5111 [EXEC_OUTPUT_NULL
] = "null",
5112 [EXEC_OUTPUT_TTY
] = "tty",
5113 [EXEC_OUTPUT_SYSLOG
] = "syslog",
5114 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
5115 [EXEC_OUTPUT_KMSG
] = "kmsg",
5116 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
5117 [EXEC_OUTPUT_JOURNAL
] = "journal",
5118 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
5119 [EXEC_OUTPUT_SOCKET
] = "socket",
5120 [EXEC_OUTPUT_NAMED_FD
] = "fd",
5121 [EXEC_OUTPUT_FILE
] = "file",
5122 [EXEC_OUTPUT_FILE_APPEND
] = "append",
5125 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);
5127 static const char* const exec_utmp_mode_table
[_EXEC_UTMP_MODE_MAX
] = {
5128 [EXEC_UTMP_INIT
] = "init",
5129 [EXEC_UTMP_LOGIN
] = "login",
5130 [EXEC_UTMP_USER
] = "user",
5133 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode
, ExecUtmpMode
);
5135 static const char* const exec_preserve_mode_table
[_EXEC_PRESERVE_MODE_MAX
] = {
5136 [EXEC_PRESERVE_NO
] = "no",
5137 [EXEC_PRESERVE_YES
] = "yes",
5138 [EXEC_PRESERVE_RESTART
] = "restart",
5141 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode
, ExecPreserveMode
, EXEC_PRESERVE_YES
);
5143 static const char* const exec_directory_type_table
[_EXEC_DIRECTORY_TYPE_MAX
] = {
5144 [EXEC_DIRECTORY_RUNTIME
] = "RuntimeDirectory",
5145 [EXEC_DIRECTORY_STATE
] = "StateDirectory",
5146 [EXEC_DIRECTORY_CACHE
] = "CacheDirectory",
5147 [EXEC_DIRECTORY_LOGS
] = "LogsDirectory",
5148 [EXEC_DIRECTORY_CONFIGURATION
] = "ConfigurationDirectory",
5151 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type
, ExecDirectoryType
);
5153 static const char* const exec_directory_env_name_table
[_EXEC_DIRECTORY_TYPE_MAX
] = {
5154 [EXEC_DIRECTORY_RUNTIME
] = "RUNTIME_DIRECTORY",
5155 [EXEC_DIRECTORY_STATE
] = "STATE_DIRECTORY",
5156 [EXEC_DIRECTORY_CACHE
] = "CACHE_DIRECTORY",
5157 [EXEC_DIRECTORY_LOGS
] = "LOGS_DIRECTORY",
5158 [EXEC_DIRECTORY_CONFIGURATION
] = "CONFIGURATION_DIRECTORY",
5161 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name
, ExecDirectoryType
);
5163 static const char* const exec_keyring_mode_table
[_EXEC_KEYRING_MODE_MAX
] = {
5164 [EXEC_KEYRING_INHERIT
] = "inherit",
5165 [EXEC_KEYRING_PRIVATE
] = "private",
5166 [EXEC_KEYRING_SHARED
] = "shared",
5169 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode
, ExecKeyringMode
);