1 /* SPDX-License-Identifier: LGPL-2.1+ */
10 #include <sys/capability.h>
11 #include <sys/eventfd.h>
13 #include <sys/personality.h>
14 #include <sys/prctl.h>
16 #include <sys/socket.h>
18 #include <sys/types.h>
24 #include <security/pam_appl.h>
28 #include <selinux/selinux.h>
36 #include <sys/apparmor.h>
39 #include "sd-messages.h"
42 #include "alloc-util.h"
44 #include "apparmor-util.h"
49 #include "capability-util.h"
50 #include "chown-recursive.h"
51 #include "cpu-set-util.h"
54 #include "errno-list.h"
56 #include "exit-status.h"
59 #include "format-util.h"
61 #include "glob-util.h"
70 #include "namespace.h"
71 #include "parse-util.h"
72 #include "path-util.h"
73 #include "process-util.h"
74 #include "rlimit-util.h"
77 #include "seccomp-util.h"
79 #include "securebits.h"
80 #include "securebits-util.h"
81 #include "selinux-util.h"
82 #include "signal-util.h"
83 #include "smack-util.h"
84 #include "socket-util.h"
86 #include "stat-util.h"
87 #include "string-table.h"
88 #include "string-util.h"
90 #include "syslog-util.h"
91 #include "terminal-util.h"
92 #include "umask-util.h"
94 #include "user-util.h"
96 #include "utmp-wtmp.h"
98 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
99 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
101 /* This assumes there is a 'tty' group */
102 #define TTY_MODE 0620
104 #define SNDBUF_SIZE (8*1024*1024)
106 static int shift_fds(int fds
[], size_t n_fds
) {
107 int start
, restart_from
;
112 /* Modifies the fds array! (sorts it) */
122 for (i
= start
; i
< (int) n_fds
; i
++) {
125 /* Already at right index? */
129 nfd
= fcntl(fds
[i
], F_DUPFD
, i
+ 3);
136 /* Hmm, the fd we wanted isn't free? Then
137 * let's remember that and try again from here */
138 if (nfd
!= i
+3 && restart_from
< 0)
142 if (restart_from
< 0)
145 start
= restart_from
;
151 static int flags_fds(const int fds
[], size_t n_socket_fds
, size_t n_storage_fds
, bool nonblock
) {
155 n_fds
= n_socket_fds
+ n_storage_fds
;
161 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
162 * O_NONBLOCK only applies to socket activation though. */
164 for (i
= 0; i
< n_fds
; i
++) {
166 if (i
< n_socket_fds
) {
167 r
= fd_nonblock(fds
[i
], nonblock
);
172 /* We unconditionally drop FD_CLOEXEC from the fds,
173 * since after all we want to pass these fds to our
176 r
= fd_cloexec(fds
[i
], false);
184 static const char *exec_context_tty_path(const ExecContext
*context
) {
187 if (context
->stdio_as_fds
)
190 if (context
->tty_path
)
191 return context
->tty_path
;
193 return "/dev/console";
196 static void exec_context_tty_reset(const ExecContext
*context
, const ExecParameters
*p
) {
201 path
= exec_context_tty_path(context
);
203 if (context
->tty_vhangup
) {
204 if (p
&& p
->stdin_fd
>= 0)
205 (void) terminal_vhangup_fd(p
->stdin_fd
);
207 (void) terminal_vhangup(path
);
210 if (context
->tty_reset
) {
211 if (p
&& p
->stdin_fd
>= 0)
212 (void) reset_terminal_fd(p
->stdin_fd
, true);
214 (void) reset_terminal(path
);
217 if (context
->tty_vt_disallocate
&& path
)
218 (void) vt_disallocate(path
);
221 static bool is_terminal_input(ExecInput i
) {
224 EXEC_INPUT_TTY_FORCE
,
225 EXEC_INPUT_TTY_FAIL
);
228 static bool is_terminal_output(ExecOutput o
) {
231 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
232 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
233 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
);
236 static bool is_syslog_output(ExecOutput o
) {
239 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
);
242 static bool is_kmsg_output(ExecOutput o
) {
245 EXEC_OUTPUT_KMSG_AND_CONSOLE
);
248 static bool exec_context_needs_term(const ExecContext
*c
) {
251 /* Return true if the execution context suggests we should set $TERM to something useful. */
253 if (is_terminal_input(c
->std_input
))
256 if (is_terminal_output(c
->std_output
))
259 if (is_terminal_output(c
->std_error
))
262 return !!c
->tty_path
;
265 static int open_null_as(int flags
, int nfd
) {
270 fd
= open("/dev/null", flags
|O_NOCTTY
);
274 return move_fd(fd
, nfd
, false);
277 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
278 static const union sockaddr_union sa
= {
279 .un
.sun_family
= AF_UNIX
,
280 .un
.sun_path
= "/run/systemd/journal/stdout",
282 uid_t olduid
= UID_INVALID
;
283 gid_t oldgid
= GID_INVALID
;
286 if (gid_is_valid(gid
)) {
289 if (setegid(gid
) < 0)
293 if (uid_is_valid(uid
)) {
296 if (seteuid(uid
) < 0) {
302 r
= connect(fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
)) < 0 ? -errno
: 0;
304 /* If we fail to restore the uid or gid, things will likely
305 fail later on. This should only happen if an LSM interferes. */
307 if (uid_is_valid(uid
))
308 (void) seteuid(olduid
);
311 if (gid_is_valid(gid
))
312 (void) setegid(oldgid
);
317 static int connect_logger_as(
319 const ExecContext
*context
,
320 const ExecParameters
*params
,
331 assert(output
< _EXEC_OUTPUT_MAX
);
335 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
339 r
= connect_journal_socket(fd
, uid
, gid
);
343 if (shutdown(fd
, SHUT_RD
) < 0) {
348 (void) fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
358 context
->syslog_identifier
?: ident
,
359 params
->flags
& EXEC_PASS_LOG_UNIT
? unit
->id
: "",
360 context
->syslog_priority
,
361 !!context
->syslog_level_prefix
,
362 is_syslog_output(output
),
363 is_kmsg_output(output
),
364 is_terminal_output(output
));
366 return move_fd(fd
, nfd
, false);
368 static int open_terminal_as(const char *path
, int flags
, int nfd
) {
374 fd
= open_terminal(path
, flags
| O_NOCTTY
);
378 return move_fd(fd
, nfd
, false);
381 static int acquire_path(const char *path
, int flags
, mode_t mode
) {
382 union sockaddr_union sa
= {
383 .sa
.sa_family
= AF_UNIX
,
389 if (IN_SET(flags
& O_ACCMODE
, O_WRONLY
, O_RDWR
))
392 fd
= open(path
, flags
|O_NOCTTY
, mode
);
396 if (errno
!= ENXIO
) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
398 if (strlen(path
) > sizeof(sa
.un
.sun_path
)) /* Too long, can't be a UNIX socket */
401 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
403 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
407 strncpy(sa
.un
.sun_path
, path
, sizeof(sa
.un
.sun_path
));
408 if (connect(fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
)) < 0) {
410 return errno
== EINVAL
? -ENXIO
: -errno
; /* Propagate initial error if we get EINVAL, i.e. we have
411 * indication that his wasn't an AF_UNIX socket after all */
414 if ((flags
& O_ACCMODE
) == O_RDONLY
)
415 r
= shutdown(fd
, SHUT_WR
);
416 else if ((flags
& O_ACCMODE
) == O_WRONLY
)
417 r
= shutdown(fd
, SHUT_RD
);
428 static int fixup_input(
429 const ExecContext
*context
,
431 bool apply_tty_stdin
) {
437 std_input
= context
->std_input
;
439 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
440 return EXEC_INPUT_NULL
;
442 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
443 return EXEC_INPUT_NULL
;
445 if (std_input
== EXEC_INPUT_DATA
&& context
->stdin_data_size
== 0)
446 return EXEC_INPUT_NULL
;
451 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
453 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
454 return EXEC_OUTPUT_INHERIT
;
459 static int setup_input(
460 const ExecContext
*context
,
461 const ExecParameters
*params
,
463 int named_iofds
[3]) {
470 if (params
->stdin_fd
>= 0) {
471 if (dup2(params
->stdin_fd
, STDIN_FILENO
) < 0)
474 /* Try to make this the controlling tty, if it is a tty, and reset it */
475 if (isatty(STDIN_FILENO
)) {
476 (void) ioctl(STDIN_FILENO
, TIOCSCTTY
, context
->std_input
== EXEC_INPUT_TTY_FORCE
);
477 (void) reset_terminal_fd(STDIN_FILENO
, true);
483 i
= fixup_input(context
, socket_fd
, params
->flags
& EXEC_APPLY_TTY_STDIN
);
487 case EXEC_INPUT_NULL
:
488 return open_null_as(O_RDONLY
, STDIN_FILENO
);
491 case EXEC_INPUT_TTY_FORCE
:
492 case EXEC_INPUT_TTY_FAIL
: {
495 fd
= acquire_terminal(exec_context_tty_path(context
),
496 i
== EXEC_INPUT_TTY_FAIL
? ACQUIRE_TERMINAL_TRY
:
497 i
== EXEC_INPUT_TTY_FORCE
? ACQUIRE_TERMINAL_FORCE
:
498 ACQUIRE_TERMINAL_WAIT
,
503 return move_fd(fd
, STDIN_FILENO
, false);
506 case EXEC_INPUT_SOCKET
:
507 assert(socket_fd
>= 0);
509 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
511 case EXEC_INPUT_NAMED_FD
:
512 assert(named_iofds
[STDIN_FILENO
] >= 0);
514 (void) fd_nonblock(named_iofds
[STDIN_FILENO
], false);
515 return dup2(named_iofds
[STDIN_FILENO
], STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
517 case EXEC_INPUT_DATA
: {
520 fd
= acquire_data_fd(context
->stdin_data
, context
->stdin_data_size
, 0);
524 return move_fd(fd
, STDIN_FILENO
, false);
527 case EXEC_INPUT_FILE
: {
531 assert(context
->stdio_file
[STDIN_FILENO
]);
533 rw
= (context
->std_output
== EXEC_OUTPUT_FILE
&& streq_ptr(context
->stdio_file
[STDIN_FILENO
], context
->stdio_file
[STDOUT_FILENO
])) ||
534 (context
->std_error
== EXEC_OUTPUT_FILE
&& streq_ptr(context
->stdio_file
[STDIN_FILENO
], context
->stdio_file
[STDERR_FILENO
]));
536 fd
= acquire_path(context
->stdio_file
[STDIN_FILENO
], rw
? O_RDWR
: O_RDONLY
, 0666 & ~context
->umask
);
540 return move_fd(fd
, STDIN_FILENO
, false);
544 assert_not_reached("Unknown input type");
548 static int setup_output(
550 const ExecContext
*context
,
551 const ExecParameters
*params
,
558 dev_t
*journal_stream_dev
,
559 ino_t
*journal_stream_ino
) {
569 assert(journal_stream_dev
);
570 assert(journal_stream_ino
);
572 if (fileno
== STDOUT_FILENO
&& params
->stdout_fd
>= 0) {
574 if (dup2(params
->stdout_fd
, STDOUT_FILENO
) < 0)
577 return STDOUT_FILENO
;
580 if (fileno
== STDERR_FILENO
&& params
->stderr_fd
>= 0) {
581 if (dup2(params
->stderr_fd
, STDERR_FILENO
) < 0)
584 return STDERR_FILENO
;
587 i
= fixup_input(context
, socket_fd
, params
->flags
& EXEC_APPLY_TTY_STDIN
);
588 o
= fixup_output(context
->std_output
, socket_fd
);
590 if (fileno
== STDERR_FILENO
) {
592 e
= fixup_output(context
->std_error
, socket_fd
);
594 /* This expects the input and output are already set up */
596 /* Don't change the stderr file descriptor if we inherit all
597 * the way and are not on a tty */
598 if (e
== EXEC_OUTPUT_INHERIT
&&
599 o
== EXEC_OUTPUT_INHERIT
&&
600 i
== EXEC_INPUT_NULL
&&
601 !is_terminal_input(context
->std_input
) &&
605 /* Duplicate from stdout if possible */
606 if ((e
== o
&& e
!= EXEC_OUTPUT_NAMED_FD
) || e
== EXEC_OUTPUT_INHERIT
)
607 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
611 } else if (o
== EXEC_OUTPUT_INHERIT
) {
612 /* If input got downgraded, inherit the original value */
613 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
614 return open_terminal_as(exec_context_tty_path(context
), O_WRONLY
, fileno
);
616 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
617 if (!IN_SET(i
, EXEC_INPUT_NULL
, EXEC_INPUT_DATA
))
618 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
620 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
624 /* We need to open /dev/null here anew, to get the right access mode. */
625 return open_null_as(O_WRONLY
, fileno
);
630 case EXEC_OUTPUT_NULL
:
631 return open_null_as(O_WRONLY
, fileno
);
633 case EXEC_OUTPUT_TTY
:
634 if (is_terminal_input(i
))
635 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
637 /* We don't reset the terminal if this is just about output */
638 return open_terminal_as(exec_context_tty_path(context
), O_WRONLY
, fileno
);
640 case EXEC_OUTPUT_SYSLOG
:
641 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
642 case EXEC_OUTPUT_KMSG
:
643 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
644 case EXEC_OUTPUT_JOURNAL
:
645 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
646 r
= connect_logger_as(unit
, context
, params
, o
, ident
, fileno
, uid
, gid
);
648 log_unit_warning_errno(unit
, r
, "Failed to connect %s to the journal socket, ignoring: %m", fileno
== STDOUT_FILENO
? "stdout" : "stderr");
649 r
= open_null_as(O_WRONLY
, fileno
);
653 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
654 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
655 * services to detect whether they are connected to the journal or not.
657 * If both stdout and stderr are connected to a stream then let's make sure to store the data
658 * about STDERR as that's usually the best way to do logging. */
660 if (fstat(fileno
, &st
) >= 0 &&
661 (*journal_stream_ino
== 0 || fileno
== STDERR_FILENO
)) {
662 *journal_stream_dev
= st
.st_dev
;
663 *journal_stream_ino
= st
.st_ino
;
668 case EXEC_OUTPUT_SOCKET
:
669 assert(socket_fd
>= 0);
671 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
673 case EXEC_OUTPUT_NAMED_FD
:
674 assert(named_iofds
[fileno
] >= 0);
676 (void) fd_nonblock(named_iofds
[fileno
], false);
677 return dup2(named_iofds
[fileno
], fileno
) < 0 ? -errno
: fileno
;
679 case EXEC_OUTPUT_FILE
:
680 case EXEC_OUTPUT_FILE_APPEND
: {
684 assert(context
->stdio_file
[fileno
]);
686 rw
= context
->std_input
== EXEC_INPUT_FILE
&&
687 streq_ptr(context
->stdio_file
[fileno
], context
->stdio_file
[STDIN_FILENO
]);
690 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
693 if (o
== EXEC_OUTPUT_FILE_APPEND
)
696 fd
= acquire_path(context
->stdio_file
[fileno
], flags
, 0666 & ~context
->umask
);
701 return move_fd(fd
, fileno
, 0);
705 assert_not_reached("Unknown error type");
709 static int chown_terminal(int fd
, uid_t uid
) {
714 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
718 /* This might fail. What matters are the results. */
719 (void) fchown(fd
, uid
, -1);
720 (void) fchmod(fd
, TTY_MODE
);
722 if (fstat(fd
, &st
) < 0)
725 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
731 static int setup_confirm_stdio(const char *vc
, int *_saved_stdin
, int *_saved_stdout
) {
732 _cleanup_close_
int fd
= -1, saved_stdin
= -1, saved_stdout
= -1;
735 assert(_saved_stdin
);
736 assert(_saved_stdout
);
738 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
742 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
743 if (saved_stdout
< 0)
746 fd
= acquire_terminal(vc
, ACQUIRE_TERMINAL_WAIT
, DEFAULT_CONFIRM_USEC
);
750 r
= chown_terminal(fd
, getuid());
754 r
= reset_terminal_fd(fd
, true);
758 r
= rearrange_stdio(fd
, fd
, STDERR_FILENO
);
763 *_saved_stdin
= saved_stdin
;
764 *_saved_stdout
= saved_stdout
;
766 saved_stdin
= saved_stdout
= -1;
771 static void write_confirm_error_fd(int err
, int fd
, const Unit
*u
) {
774 if (err
== -ETIMEDOUT
)
775 dprintf(fd
, "Confirmation question timed out for %s, assuming positive response.\n", u
->id
);
778 dprintf(fd
, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u
->id
);
782 static void write_confirm_error(int err
, const char *vc
, const Unit
*u
) {
783 _cleanup_close_
int fd
= -1;
787 fd
= open_terminal(vc
, O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
791 write_confirm_error_fd(err
, fd
, u
);
794 static int restore_confirm_stdio(int *saved_stdin
, int *saved_stdout
) {
798 assert(saved_stdout
);
802 if (*saved_stdin
>= 0)
803 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
806 if (*saved_stdout
>= 0)
807 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
810 *saved_stdin
= safe_close(*saved_stdin
);
811 *saved_stdout
= safe_close(*saved_stdout
);
817 CONFIRM_PRETEND_FAILURE
= -1,
818 CONFIRM_PRETEND_SUCCESS
= 0,
822 static int ask_for_confirmation(const char *vc
, Unit
*u
, const char *cmdline
) {
823 int saved_stdout
= -1, saved_stdin
= -1, r
;
824 _cleanup_free_
char *e
= NULL
;
827 /* For any internal errors, assume a positive response. */
828 r
= setup_confirm_stdio(vc
, &saved_stdin
, &saved_stdout
);
830 write_confirm_error(r
, vc
, u
);
831 return CONFIRM_EXECUTE
;
834 /* confirm_spawn might have been disabled while we were sleeping. */
835 if (manager_is_confirm_spawn_disabled(u
->manager
)) {
840 e
= ellipsize(cmdline
, 60, 100);
848 r
= ask_char(&c
, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e
);
850 write_confirm_error_fd(r
, STDOUT_FILENO
, u
);
857 printf("Resuming normal execution.\n");
858 manager_disable_confirm_spawn();
862 unit_dump(u
, stdout
, " ");
863 continue; /* ask again */
865 printf("Failing execution.\n");
866 r
= CONFIRM_PRETEND_FAILURE
;
869 printf(" c - continue, proceed without asking anymore\n"
870 " D - dump, show the state of the unit\n"
871 " f - fail, don't execute the command and pretend it failed\n"
873 " i - info, show a short summary of the unit\n"
874 " j - jobs, show jobs that are in progress\n"
875 " s - skip, don't execute the command and pretend it succeeded\n"
876 " y - yes, execute the command\n");
877 continue; /* ask again */
879 printf(" Description: %s\n"
882 u
->id
, u
->description
, cmdline
);
883 continue; /* ask again */
885 manager_dump_jobs(u
->manager
, stdout
, " ");
886 continue; /* ask again */
888 /* 'n' was removed in favor of 'f'. */
889 printf("Didn't understand 'n', did you mean 'f'?\n");
890 continue; /* ask again */
892 printf("Skipping execution.\n");
893 r
= CONFIRM_PRETEND_SUCCESS
;
899 assert_not_reached("Unhandled choice");
905 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
909 static int get_fixed_user(const ExecContext
*c
, const char **user
,
910 uid_t
*uid
, gid_t
*gid
,
911 const char **home
, const char **shell
) {
920 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
921 * (i.e. are "/" or "/bin/nologin"). */
924 r
= get_user_creds_clean(&name
, uid
, gid
, home
, shell
);
932 static int get_fixed_group(const ExecContext
*c
, const char **group
, gid_t
*gid
) {
942 r
= get_group_creds(&name
, gid
);
950 static int get_supplementary_groups(const ExecContext
*c
, const char *user
,
951 const char *group
, gid_t gid
,
952 gid_t
**supplementary_gids
, int *ngids
) {
956 bool keep_groups
= false;
957 gid_t
*groups
= NULL
;
958 _cleanup_free_ gid_t
*l_gids
= NULL
;
963 * If user is given, then lookup GID and supplementary groups list.
964 * We avoid NSS lookups for gid=0. Also we have to initialize groups
965 * here and as early as possible so we keep the list of supplementary
966 * groups of the caller.
968 if (user
&& gid_is_valid(gid
) && gid
!= 0) {
969 /* First step, initialize groups from /etc/groups */
970 if (initgroups(user
, gid
) < 0)
976 if (strv_isempty(c
->supplementary_groups
))
980 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
981 * be positive, otherwise fail.
984 ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
);
985 if (ngroups_max
<= 0) {
989 return -EOPNOTSUPP
; /* For all other values */
992 l_gids
= new(gid_t
, ngroups_max
);
998 * Lookup the list of groups that the user belongs to, we
999 * avoid NSS lookups here too for gid=0.
1002 if (getgrouplist(user
, gid
, l_gids
, &k
) < 0)
1007 STRV_FOREACH(i
, c
->supplementary_groups
) {
1010 if (k
>= ngroups_max
)
1014 r
= get_group_creds(&g
, l_gids
+k
);
1022 * Sets ngids to zero to drop all supplementary groups, happens
1023 * when we are under root and SupplementaryGroups= is empty.
1030 /* Otherwise get the final list of supplementary groups */
1031 groups
= memdup(l_gids
, sizeof(gid_t
) * k
);
1035 *supplementary_gids
= groups
;
1043 static int enforce_groups(gid_t gid
, const gid_t
*supplementary_gids
, int ngids
) {
1046 /* Handle SupplementaryGroups= if it is not empty */
1048 r
= maybe_setgroups(ngids
, supplementary_gids
);
1053 if (gid_is_valid(gid
)) {
1054 /* Then set our gids */
1055 if (setresgid(gid
, gid
, gid
) < 0)
1062 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
1065 if (!uid_is_valid(uid
))
1068 /* Sets (but doesn't look up) the uid and make sure we keep the
1069 * capabilities while doing so. */
1071 if (context
->capability_ambient_set
!= 0) {
1073 /* First step: If we need to keep capabilities but
1074 * drop privileges we need to make sure we keep our
1075 * caps, while we drop privileges. */
1077 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
1079 if (prctl(PR_GET_SECUREBITS
) != sb
)
1080 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
1085 /* Second step: actually set the uids */
1086 if (setresuid(uid
, uid
, uid
) < 0)
1089 /* At this point we should have all necessary capabilities but
1090 are otherwise a normal user. However, the caps might got
1091 corrupted due to the setresuid() so we need clean them up
1092 later. This is done outside of this call. */
1099 static int null_conv(
1101 const struct pam_message
**msg
,
1102 struct pam_response
**resp
,
1103 void *appdata_ptr
) {
1105 /* We don't support conversations */
1107 return PAM_CONV_ERR
;
1112 static int setup_pam(
1119 int fds
[], size_t n_fds
) {
1123 static const struct pam_conv conv
= {
1128 _cleanup_(barrier_destroy
) Barrier barrier
= BARRIER_NULL
;
1129 pam_handle_t
*handle
= NULL
;
1131 int pam_code
= PAM_SUCCESS
, r
;
1132 char **nv
, **e
= NULL
;
1133 bool close_session
= false;
1134 pid_t pam_pid
= 0, parent_pid
;
1141 /* We set up PAM in the parent process, then fork. The child
1142 * will then stay around until killed via PR_GET_PDEATHSIG or
1143 * systemd via the cgroup logic. It will then remove the PAM
1144 * session again. The parent process will exec() the actual
1145 * daemon. We do things this way to ensure that the main PID
1146 * of the daemon is the one we initially fork()ed. */
1148 r
= barrier_create(&barrier
);
1152 if (log_get_max_level() < LOG_DEBUG
)
1153 flags
|= PAM_SILENT
;
1155 pam_code
= pam_start(name
, user
, &conv
, &handle
);
1156 if (pam_code
!= PAM_SUCCESS
) {
1162 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
1163 if (pam_code
!= PAM_SUCCESS
)
1167 STRV_FOREACH(nv
, *env
) {
1168 pam_code
= pam_putenv(handle
, *nv
);
1169 if (pam_code
!= PAM_SUCCESS
)
1173 pam_code
= pam_acct_mgmt(handle
, flags
);
1174 if (pam_code
!= PAM_SUCCESS
)
1177 pam_code
= pam_open_session(handle
, flags
);
1178 if (pam_code
!= PAM_SUCCESS
)
1181 close_session
= true;
1183 e
= pam_getenvlist(handle
);
1185 pam_code
= PAM_BUF_ERR
;
1189 /* Block SIGTERM, so that we know that it won't get lost in
1192 assert_se(sigprocmask_many(SIG_BLOCK
, &old_ss
, SIGTERM
, -1) >= 0);
1194 parent_pid
= getpid_cached();
1196 r
= safe_fork("(sd-pam)", 0, &pam_pid
);
1200 int sig
, ret
= EXIT_PAM
;
1202 /* The child's job is to reset the PAM session on
1204 barrier_set_role(&barrier
, BARRIER_CHILD
);
1206 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1207 * are open here that have been opened by PAM. */
1208 (void) close_many(fds
, n_fds
);
1210 /* Drop privileges - we don't need any to pam_close_session
1211 * and this will make PR_SET_PDEATHSIG work in most cases.
1212 * If this fails, ignore the error - but expect sd-pam threads
1213 * to fail to exit normally */
1215 r
= maybe_setgroups(0, NULL
);
1217 log_warning_errno(r
, "Failed to setgroups() in sd-pam: %m");
1218 if (setresgid(gid
, gid
, gid
) < 0)
1219 log_warning_errno(errno
, "Failed to setresgid() in sd-pam: %m");
1220 if (setresuid(uid
, uid
, uid
) < 0)
1221 log_warning_errno(errno
, "Failed to setresuid() in sd-pam: %m");
1223 (void) ignore_signals(SIGPIPE
, -1);
1225 /* Wait until our parent died. This will only work if
1226 * the above setresuid() succeeds, otherwise the kernel
1227 * will not allow unprivileged parents kill their privileged
1228 * children this way. We rely on the control groups kill logic
1229 * to do the rest for us. */
1230 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
1233 /* Tell the parent that our setup is done. This is especially
1234 * important regarding dropping privileges. Otherwise, unit
1235 * setup might race against our setresuid(2) call.
1237 * If the parent aborted, we'll detect this below, hence ignore
1238 * return failure here. */
1239 (void) barrier_place(&barrier
);
1241 /* Check if our parent process might already have died? */
1242 if (getppid() == parent_pid
) {
1245 assert_se(sigemptyset(&ss
) >= 0);
1246 assert_se(sigaddset(&ss
, SIGTERM
) >= 0);
1249 if (sigwait(&ss
, &sig
) < 0) {
1256 assert(sig
== SIGTERM
);
1261 /* If our parent died we'll end the session */
1262 if (getppid() != parent_pid
) {
1263 pam_code
= pam_close_session(handle
, flags
);
1264 if (pam_code
!= PAM_SUCCESS
)
1271 pam_end(handle
, pam_code
| flags
);
1275 barrier_set_role(&barrier
, BARRIER_PARENT
);
1277 /* If the child was forked off successfully it will do all the
1278 * cleanups, so forget about the handle here. */
1281 /* Unblock SIGTERM again in the parent */
1282 assert_se(sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) >= 0);
1284 /* We close the log explicitly here, since the PAM modules
1285 * might have opened it, but we don't want this fd around. */
1288 /* Synchronously wait for the child to initialize. We don't care for
1289 * errors as we cannot recover. However, warn loudly if it happens. */
1290 if (!barrier_place_and_sync(&barrier
))
1291 log_error("PAM initialization failed");
1293 return strv_free_and_replace(*env
, e
);
1296 if (pam_code
!= PAM_SUCCESS
) {
1297 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
1298 r
= -EPERM
; /* PAM errors do not map to errno */
1300 log_error_errno(r
, "PAM failed: %m");
1304 pam_code
= pam_close_session(handle
, flags
);
1306 pam_end(handle
, pam_code
| flags
);
1318 static void rename_process_from_path(const char *path
) {
1319 char process_name
[11];
1323 /* This resulting string must fit in 10 chars (i.e. the length
1324 * of "/sbin/init") to look pretty in /bin/ps */
1328 rename_process("(...)");
1334 /* The end of the process name is usually more
1335 * interesting, since the first bit might just be
1341 process_name
[0] = '(';
1342 memcpy(process_name
+1, p
, l
);
1343 process_name
[1+l
] = ')';
1344 process_name
[1+l
+1] = 0;
1346 rename_process(process_name
);
1349 static bool context_has_address_families(const ExecContext
*c
) {
1352 return c
->address_families_whitelist
||
1353 !set_isempty(c
->address_families
);
1356 static bool context_has_syscall_filters(const ExecContext
*c
) {
1359 return c
->syscall_whitelist
||
1360 !hashmap_isempty(c
->syscall_filter
);
1363 static bool context_has_no_new_privileges(const ExecContext
*c
) {
1366 if (c
->no_new_privileges
)
1369 if (have_effective_cap(CAP_SYS_ADMIN
)) /* if we are privileged, we don't need NNP */
1372 /* We need NNP if we have any form of seccomp and are unprivileged */
1373 return context_has_address_families(c
) ||
1374 c
->memory_deny_write_execute
||
1375 c
->restrict_realtime
||
1376 exec_context_restrict_namespaces_set(c
) ||
1377 c
->protect_kernel_tunables
||
1378 c
->protect_kernel_modules
||
1379 c
->private_devices
||
1380 context_has_syscall_filters(c
) ||
1381 !set_isempty(c
->syscall_archs
) ||
1382 c
->lock_personality
;
1387 static bool skip_seccomp_unavailable(const Unit
* u
, const char* msg
) {
1389 if (is_seccomp_available())
1392 log_unit_debug(u
, "SECCOMP features not detected in the kernel, skipping %s", msg
);
1396 static int apply_syscall_filter(const Unit
* u
, const ExecContext
*c
, bool needs_ambient_hack
) {
1397 uint32_t negative_action
, default_action
, action
;
1403 if (!context_has_syscall_filters(c
))
1406 if (skip_seccomp_unavailable(u
, "SystemCallFilter="))
1409 negative_action
= c
->syscall_errno
== 0 ? SCMP_ACT_KILL
: SCMP_ACT_ERRNO(c
->syscall_errno
);
1411 if (c
->syscall_whitelist
) {
1412 default_action
= negative_action
;
1413 action
= SCMP_ACT_ALLOW
;
1415 default_action
= SCMP_ACT_ALLOW
;
1416 action
= negative_action
;
1419 if (needs_ambient_hack
) {
1420 r
= seccomp_filter_set_add(c
->syscall_filter
, c
->syscall_whitelist
, syscall_filter_sets
+ SYSCALL_FILTER_SET_SETUID
);
1425 return seccomp_load_syscall_filter_set_raw(default_action
, c
->syscall_filter
, action
);
1428 static int apply_syscall_archs(const Unit
*u
, const ExecContext
*c
) {
1432 if (set_isempty(c
->syscall_archs
))
1435 if (skip_seccomp_unavailable(u
, "SystemCallArchitectures="))
1438 return seccomp_restrict_archs(c
->syscall_archs
);
1441 static int apply_address_families(const Unit
* u
, const ExecContext
*c
) {
1445 if (!context_has_address_families(c
))
1448 if (skip_seccomp_unavailable(u
, "RestrictAddressFamilies="))
1451 return seccomp_restrict_address_families(c
->address_families
, c
->address_families_whitelist
);
1454 static int apply_memory_deny_write_execute(const Unit
* u
, const ExecContext
*c
) {
1458 if (!c
->memory_deny_write_execute
)
1461 if (skip_seccomp_unavailable(u
, "MemoryDenyWriteExecute="))
1464 return seccomp_memory_deny_write_execute();
1467 static int apply_restrict_realtime(const Unit
* u
, const ExecContext
*c
) {
1471 if (!c
->restrict_realtime
)
1474 if (skip_seccomp_unavailable(u
, "RestrictRealtime="))
1477 return seccomp_restrict_realtime();
1480 static int apply_protect_sysctl(const Unit
*u
, const ExecContext
*c
) {
1484 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1485 * let's protect even those systems where this is left on in the kernel. */
1487 if (!c
->protect_kernel_tunables
)
1490 if (skip_seccomp_unavailable(u
, "ProtectKernelTunables="))
1493 return seccomp_protect_sysctl();
1496 static int apply_protect_kernel_modules(const Unit
*u
, const ExecContext
*c
) {
1500 /* Turn off module syscalls on ProtectKernelModules=yes */
1502 if (!c
->protect_kernel_modules
)
1505 if (skip_seccomp_unavailable(u
, "ProtectKernelModules="))
1508 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW
, syscall_filter_sets
+ SYSCALL_FILTER_SET_MODULE
, SCMP_ACT_ERRNO(EPERM
));
1511 static int apply_private_devices(const Unit
*u
, const ExecContext
*c
) {
1515 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1517 if (!c
->private_devices
)
1520 if (skip_seccomp_unavailable(u
, "PrivateDevices="))
1523 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW
, syscall_filter_sets
+ SYSCALL_FILTER_SET_RAW_IO
, SCMP_ACT_ERRNO(EPERM
));
1526 static int apply_restrict_namespaces(const Unit
*u
, const ExecContext
*c
) {
1530 if (!exec_context_restrict_namespaces_set(c
))
1533 if (skip_seccomp_unavailable(u
, "RestrictNamespaces="))
1536 return seccomp_restrict_namespaces(c
->restrict_namespaces
);
1539 static int apply_lock_personality(const Unit
* u
, const ExecContext
*c
) {
1540 unsigned long personality
;
1546 if (!c
->lock_personality
)
1549 if (skip_seccomp_unavailable(u
, "LockPersonality="))
1552 personality
= c
->personality
;
1554 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1555 if (personality
== PERSONALITY_INVALID
) {
1557 r
= opinionated_personality(&personality
);
1562 return seccomp_lock_personality(personality
);
1567 static void do_idle_pipe_dance(int idle_pipe
[4]) {
1570 idle_pipe
[1] = safe_close(idle_pipe
[1]);
1571 idle_pipe
[2] = safe_close(idle_pipe
[2]);
1573 if (idle_pipe
[0] >= 0) {
1576 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1578 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1581 /* Signal systemd that we are bored and want to continue. */
1582 n
= write(idle_pipe
[3], "x", 1);
1584 /* Wait for systemd to react to the signal above. */
1585 fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1588 idle_pipe
[0] = safe_close(idle_pipe
[0]);
1592 idle_pipe
[3] = safe_close(idle_pipe
[3]);
1595 static int build_environment(
1597 const ExecContext
*c
,
1598 const ExecParameters
*p
,
1601 const char *username
,
1603 dev_t journal_stream_dev
,
1604 ino_t journal_stream_ino
,
1607 _cleanup_strv_free_
char **our_env
= NULL
;
1615 our_env
= new0(char*, 14);
1620 _cleanup_free_
char *joined
= NULL
;
1622 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid_cached()) < 0)
1624 our_env
[n_env
++] = x
;
1626 if (asprintf(&x
, "LISTEN_FDS=%zu", n_fds
) < 0)
1628 our_env
[n_env
++] = x
;
1630 joined
= strv_join(p
->fd_names
, ":");
1634 x
= strjoin("LISTEN_FDNAMES=", joined
);
1637 our_env
[n_env
++] = x
;
1640 if ((p
->flags
& EXEC_SET_WATCHDOG
) && p
->watchdog_usec
> 0) {
1641 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid_cached()) < 0)
1643 our_env
[n_env
++] = x
;
1645 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, p
->watchdog_usec
) < 0)
1647 our_env
[n_env
++] = x
;
1650 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1651 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1652 * check the database directly. */
1653 if (p
->flags
& EXEC_NSS_BYPASS_BUS
) {
1654 x
= strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1657 our_env
[n_env
++] = x
;
1661 x
= strappend("HOME=", home
);
1664 our_env
[n_env
++] = x
;
1668 x
= strappend("LOGNAME=", username
);
1671 our_env
[n_env
++] = x
;
1673 x
= strappend("USER=", username
);
1676 our_env
[n_env
++] = x
;
1680 x
= strappend("SHELL=", shell
);
1683 our_env
[n_env
++] = x
;
1686 if (!sd_id128_is_null(u
->invocation_id
)) {
1687 if (asprintf(&x
, "INVOCATION_ID=" SD_ID128_FORMAT_STR
, SD_ID128_FORMAT_VAL(u
->invocation_id
)) < 0)
1690 our_env
[n_env
++] = x
;
1693 if (exec_context_needs_term(c
)) {
1694 const char *tty_path
, *term
= NULL
;
1696 tty_path
= exec_context_tty_path(c
);
1698 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1699 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1700 * passes to PID 1 ends up all the way in the console login shown. */
1702 if (path_equal(tty_path
, "/dev/console") && getppid() == 1)
1703 term
= getenv("TERM");
1705 term
= default_term_for_tty(tty_path
);
1707 x
= strappend("TERM=", term
);
1710 our_env
[n_env
++] = x
;
1713 if (journal_stream_dev
!= 0 && journal_stream_ino
!= 0) {
1714 if (asprintf(&x
, "JOURNAL_STREAM=" DEV_FMT
":" INO_FMT
, journal_stream_dev
, journal_stream_ino
) < 0)
1717 our_env
[n_env
++] = x
;
1720 our_env
[n_env
++] = NULL
;
1721 assert(n_env
<= 12);
1723 *ret
= TAKE_PTR(our_env
);
1728 static int build_pass_environment(const ExecContext
*c
, char ***ret
) {
1729 _cleanup_strv_free_
char **pass_env
= NULL
;
1730 size_t n_env
= 0, n_bufsize
= 0;
1733 STRV_FOREACH(i
, c
->pass_environment
) {
1734 _cleanup_free_
char *x
= NULL
;
1740 x
= strjoin(*i
, "=", v
);
1744 if (!GREEDY_REALLOC(pass_env
, n_bufsize
, n_env
+ 2))
1747 pass_env
[n_env
++] = TAKE_PTR(x
);
1748 pass_env
[n_env
] = NULL
;
1751 *ret
= TAKE_PTR(pass_env
);
1756 static bool exec_needs_mount_namespace(
1757 const ExecContext
*context
,
1758 const ExecParameters
*params
,
1759 const ExecRuntime
*runtime
) {
1764 if (context
->root_image
)
1767 if (!strv_isempty(context
->read_write_paths
) ||
1768 !strv_isempty(context
->read_only_paths
) ||
1769 !strv_isempty(context
->inaccessible_paths
))
1772 if (context
->n_bind_mounts
> 0)
1775 if (context
->n_temporary_filesystems
> 0)
1778 if (context
->mount_flags
!= 0)
1781 if (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
))
1784 if (context
->private_devices
||
1785 context
->private_mounts
||
1786 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1787 context
->protect_home
!= PROTECT_HOME_NO
||
1788 context
->protect_kernel_tunables
||
1789 context
->protect_kernel_modules
||
1790 context
->protect_control_groups
)
1793 if (context
->root_directory
) {
1794 ExecDirectoryType t
;
1796 if (context
->mount_apivfs
)
1799 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
1800 if (!params
->prefix
[t
])
1803 if (!strv_isempty(context
->directories
[t
].paths
))
1808 if (context
->dynamic_user
&&
1809 (!strv_isempty(context
->directories
[EXEC_DIRECTORY_STATE
].paths
) ||
1810 !strv_isempty(context
->directories
[EXEC_DIRECTORY_CACHE
].paths
) ||
1811 !strv_isempty(context
->directories
[EXEC_DIRECTORY_LOGS
].paths
)))
1817 static int setup_private_users(uid_t uid
, gid_t gid
) {
1818 _cleanup_free_
char *uid_map
= NULL
, *gid_map
= NULL
;
1819 _cleanup_close_pair_
int errno_pipe
[2] = { -1, -1 };
1820 _cleanup_close_
int unshare_ready_fd
= -1;
1821 _cleanup_(sigkill_waitp
) pid_t pid
= 0;
1826 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1827 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1828 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1829 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1830 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1831 * continues execution normally. */
1833 if (uid
!= 0 && uid_is_valid(uid
)) {
1834 r
= asprintf(&uid_map
,
1835 "0 0 1\n" /* Map root → root */
1836 UID_FMT
" " UID_FMT
" 1\n", /* Map $UID → $UID */
1841 uid_map
= strdup("0 0 1\n"); /* The case where the above is the same */
1846 if (gid
!= 0 && gid_is_valid(gid
)) {
1847 r
= asprintf(&gid_map
,
1848 "0 0 1\n" /* Map root → root */
1849 GID_FMT
" " GID_FMT
" 1\n", /* Map $GID → $GID */
1854 gid_map
= strdup("0 0 1\n"); /* The case where the above is the same */
1859 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1861 unshare_ready_fd
= eventfd(0, EFD_CLOEXEC
);
1862 if (unshare_ready_fd
< 0)
1865 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1867 if (pipe2(errno_pipe
, O_CLOEXEC
) < 0)
1870 r
= safe_fork("(sd-userns)", FORK_RESET_SIGNALS
|FORK_DEATHSIG
, &pid
);
1874 _cleanup_close_
int fd
= -1;
1878 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1879 * here, after the parent opened its own user namespace. */
1882 errno_pipe
[0] = safe_close(errno_pipe
[0]);
1884 /* Wait until the parent unshared the user namespace */
1885 if (read(unshare_ready_fd
, &c
, sizeof(c
)) < 0) {
1890 /* Disable the setgroups() system call in the child user namespace, for good. */
1891 a
= procfs_file_alloca(ppid
, "setgroups");
1892 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
1894 if (errno
!= ENOENT
) {
1899 /* If the file is missing the kernel is too old, let's continue anyway. */
1901 if (write(fd
, "deny\n", 5) < 0) {
1906 fd
= safe_close(fd
);
1909 /* First write the GID map */
1910 a
= procfs_file_alloca(ppid
, "gid_map");
1911 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
1916 if (write(fd
, gid_map
, strlen(gid_map
)) < 0) {
1920 fd
= safe_close(fd
);
1922 /* The write the UID map */
1923 a
= procfs_file_alloca(ppid
, "uid_map");
1924 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
1929 if (write(fd
, uid_map
, strlen(uid_map
)) < 0) {
1934 _exit(EXIT_SUCCESS
);
1937 (void) write(errno_pipe
[1], &r
, sizeof(r
));
1938 _exit(EXIT_FAILURE
);
1941 errno_pipe
[1] = safe_close(errno_pipe
[1]);
1943 if (unshare(CLONE_NEWUSER
) < 0)
1946 /* Let the child know that the namespace is ready now */
1947 if (write(unshare_ready_fd
, &c
, sizeof(c
)) < 0)
1950 /* Try to read an error code from the child */
1951 n
= read(errno_pipe
[0], &r
, sizeof(r
));
1954 if (n
== sizeof(r
)) { /* an error code was sent to us */
1959 if (n
!= 0) /* on success we should have read 0 bytes */
1962 r
= wait_for_terminate_and_check("(sd-userns)", pid
, 0);
1966 if (r
!= EXIT_SUCCESS
) /* If something strange happened with the child, let's consider this fatal, too */
1972 static int setup_exec_directory(
1973 const ExecContext
*context
,
1974 const ExecParameters
*params
,
1977 ExecDirectoryType type
,
1980 static const int exit_status_table
[_EXEC_DIRECTORY_TYPE_MAX
] = {
1981 [EXEC_DIRECTORY_RUNTIME
] = EXIT_RUNTIME_DIRECTORY
,
1982 [EXEC_DIRECTORY_STATE
] = EXIT_STATE_DIRECTORY
,
1983 [EXEC_DIRECTORY_CACHE
] = EXIT_CACHE_DIRECTORY
,
1984 [EXEC_DIRECTORY_LOGS
] = EXIT_LOGS_DIRECTORY
,
1985 [EXEC_DIRECTORY_CONFIGURATION
] = EXIT_CONFIGURATION_DIRECTORY
,
1992 assert(type
>= 0 && type
< _EXEC_DIRECTORY_TYPE_MAX
);
1993 assert(exit_status
);
1995 if (!params
->prefix
[type
])
1998 if (params
->flags
& EXEC_CHOWN_DIRECTORIES
) {
1999 if (!uid_is_valid(uid
))
2001 if (!gid_is_valid(gid
))
2005 STRV_FOREACH(rt
, context
->directories
[type
].paths
) {
2006 _cleanup_free_
char *p
= NULL
, *pp
= NULL
;
2008 p
= strjoin(params
->prefix
[type
], "/", *rt
);
2014 r
= mkdir_parents_label(p
, 0755);
2018 if (context
->dynamic_user
&&
2019 !IN_SET(type
, EXEC_DIRECTORY_RUNTIME
, EXEC_DIRECTORY_CONFIGURATION
)) {
2020 _cleanup_free_
char *private_root
= NULL
, *relative
= NULL
, *parent
= NULL
;
2022 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
2023 * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
2024 * whose UID is later on reused. To lock this down we use the same trick used by container
2025 * managers to prohibit host users to get access to files of the same UID in containers: we
2026 * place everything inside a directory that has an access mode of 0700 and is owned root:root,
2027 * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
2028 * to make this directory permeable for the service itself.
2030 * Specifically: for a service which wants a special directory "foo/" we first create a
2031 * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
2032 * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
2033 * privileged host users can access "foo/" as usual, but unprivileged host users can't look
2034 * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
2035 * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
2036 * disabling the access boundary for the service and making sure it only gets access to the
2037 * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
2039 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
2040 * owned by the service itself.
2041 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
2042 * files or sockets with other services. */
2044 private_root
= strjoin(params
->prefix
[type
], "/private");
2045 if (!private_root
) {
2050 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
2051 r
= mkdir_safe_label(private_root
, 0700, 0, 0, MKDIR_WARN_MODE
);
2055 pp
= strjoin(private_root
, "/", *rt
);
2061 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2062 r
= mkdir_parents_label(pp
, 0755);
2066 if (is_dir(p
, false) > 0 &&
2067 (laccess(pp
, F_OK
) < 0 && errno
== ENOENT
)) {
2069 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2070 * it over. Most likely the service has been upgraded from one that didn't use
2071 * DynamicUser=1, to one that does. */
2073 if (rename(p
, pp
) < 0) {
2078 /* Otherwise, create the actual directory for the service */
2080 r
= mkdir_label(pp
, context
->directories
[type
].mode
);
2081 if (r
< 0 && r
!= -EEXIST
)
2085 parent
= dirname_malloc(p
);
2091 r
= path_make_relative(parent
, pp
, &relative
);
2095 /* And link it up from the original place */
2096 r
= symlink_idempotent(relative
, p
);
2100 /* Lock down the access mode */
2101 if (chmod(pp
, context
->directories
[type
].mode
) < 0) {
2106 r
= mkdir_label(p
, context
->directories
[type
].mode
);
2107 if (r
< 0 && r
!= -EEXIST
)
2109 if (r
== -EEXIST
&& !context
->dynamic_user
)
2113 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
2114 * a service, and shall not be writable. */
2115 if (type
== EXEC_DIRECTORY_CONFIGURATION
)
2118 /* Then, change the ownership of the whole tree, if necessary */
2119 r
= path_chown_recursive(pp
?: p
, uid
, gid
);
2127 *exit_status
= exit_status_table
[type
];
2132 static int setup_smack(
2133 const ExecContext
*context
,
2134 const ExecCommand
*command
) {
2141 if (context
->smack_process_label
) {
2142 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
2146 #ifdef SMACK_DEFAULT_PROCESS_LABEL
2148 _cleanup_free_
char *exec_label
= NULL
;
2150 r
= mac_smack_read(command
->path
, SMACK_ATTR_EXEC
, &exec_label
);
2151 if (r
< 0 && !IN_SET(r
, -ENODATA
, -EOPNOTSUPP
))
2154 r
= mac_smack_apply_pid(0, exec_label
? : SMACK_DEFAULT_PROCESS_LABEL
);
2164 static int compile_bind_mounts(
2165 const ExecContext
*context
,
2166 const ExecParameters
*params
,
2167 BindMount
**ret_bind_mounts
,
2168 size_t *ret_n_bind_mounts
,
2169 char ***ret_empty_directories
) {
2171 _cleanup_strv_free_
char **empty_directories
= NULL
;
2172 BindMount
*bind_mounts
;
2174 ExecDirectoryType t
;
2179 assert(ret_bind_mounts
);
2180 assert(ret_n_bind_mounts
);
2181 assert(ret_empty_directories
);
2183 n
= context
->n_bind_mounts
;
2184 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
2185 if (!params
->prefix
[t
])
2188 n
+= strv_length(context
->directories
[t
].paths
);
2192 *ret_bind_mounts
= NULL
;
2193 *ret_n_bind_mounts
= 0;
2194 *ret_empty_directories
= NULL
;
2198 bind_mounts
= new(BindMount
, n
);
2202 for (i
= 0; i
< context
->n_bind_mounts
; i
++) {
2203 BindMount
*item
= context
->bind_mounts
+ i
;
2206 s
= strdup(item
->source
);
2212 d
= strdup(item
->destination
);
2219 bind_mounts
[h
++] = (BindMount
) {
2222 .read_only
= item
->read_only
,
2223 .recursive
= item
->recursive
,
2224 .ignore_enoent
= item
->ignore_enoent
,
2228 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
2231 if (!params
->prefix
[t
])
2234 if (strv_isempty(context
->directories
[t
].paths
))
2237 if (context
->dynamic_user
&&
2238 !IN_SET(t
, EXEC_DIRECTORY_RUNTIME
, EXEC_DIRECTORY_CONFIGURATION
) &&
2239 !(context
->root_directory
|| context
->root_image
)) {
2242 /* So this is for a dynamic user, and we need to make sure the process can access its own
2243 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2244 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2246 private_root
= strjoin(params
->prefix
[t
], "/private");
2247 if (!private_root
) {
2252 r
= strv_consume(&empty_directories
, private_root
);
2257 STRV_FOREACH(suffix
, context
->directories
[t
].paths
) {
2260 if (context
->dynamic_user
&&
2261 !IN_SET(t
, EXEC_DIRECTORY_RUNTIME
, EXEC_DIRECTORY_CONFIGURATION
))
2262 s
= strjoin(params
->prefix
[t
], "/private/", *suffix
);
2264 s
= strjoin(params
->prefix
[t
], "/", *suffix
);
2270 if (context
->dynamic_user
&&
2271 !IN_SET(t
, EXEC_DIRECTORY_RUNTIME
, EXEC_DIRECTORY_CONFIGURATION
) &&
2272 (context
->root_directory
|| context
->root_image
))
2273 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2274 * directory is not created on the root directory. So, let's bind-mount the directory
2275 * on the 'non-private' place. */
2276 d
= strjoin(params
->prefix
[t
], "/", *suffix
);
2285 bind_mounts
[h
++] = (BindMount
) {
2290 .ignore_enoent
= false,
2297 *ret_bind_mounts
= bind_mounts
;
2298 *ret_n_bind_mounts
= n
;
2299 *ret_empty_directories
= TAKE_PTR(empty_directories
);
2304 bind_mount_free_many(bind_mounts
, h
);
2308 static int apply_mount_namespace(
2310 const ExecCommand
*command
,
2311 const ExecContext
*context
,
2312 const ExecParameters
*params
,
2313 const ExecRuntime
*runtime
) {
2315 _cleanup_strv_free_
char **empty_directories
= NULL
;
2316 char *tmp
= NULL
, *var
= NULL
;
2317 const char *root_dir
= NULL
, *root_image
= NULL
;
2318 NamespaceInfo ns_info
;
2319 bool needs_sandboxing
;
2320 BindMount
*bind_mounts
= NULL
;
2321 size_t n_bind_mounts
= 0;
2326 /* The runtime struct only contains the parent of the private /tmp,
2327 * which is non-accessible to world users. Inside of it there's a /tmp
2328 * that is sticky, and that's the one we want to use here. */
2330 if (context
->private_tmp
&& runtime
) {
2331 if (runtime
->tmp_dir
)
2332 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
2333 if (runtime
->var_tmp_dir
)
2334 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
2337 if (params
->flags
& EXEC_APPLY_CHROOT
) {
2338 root_image
= context
->root_image
;
2341 root_dir
= context
->root_directory
;
2344 r
= compile_bind_mounts(context
, params
, &bind_mounts
, &n_bind_mounts
, &empty_directories
);
2348 needs_sandboxing
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& EXEC_COMMAND_FULLY_PRIVILEGED
);
2349 if (needs_sandboxing
)
2350 ns_info
= (NamespaceInfo
) {
2351 .ignore_protect_paths
= false,
2352 .private_dev
= context
->private_devices
,
2353 .protect_control_groups
= context
->protect_control_groups
,
2354 .protect_kernel_tunables
= context
->protect_kernel_tunables
,
2355 .protect_kernel_modules
= context
->protect_kernel_modules
,
2356 .mount_apivfs
= context
->mount_apivfs
,
2357 .private_mounts
= context
->private_mounts
,
2359 else if (!context
->dynamic_user
&& root_dir
)
2361 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2362 * sandbox info, otherwise enforce it, don't ignore protected paths and
2363 * fail if we are enable to apply the sandbox inside the mount namespace.
2365 ns_info
= (NamespaceInfo
) {
2366 .ignore_protect_paths
= true,
2369 ns_info
= (NamespaceInfo
) {};
2371 r
= setup_namespace(root_dir
, root_image
,
2372 &ns_info
, context
->read_write_paths
,
2373 needs_sandboxing
? context
->read_only_paths
: NULL
,
2374 needs_sandboxing
? context
->inaccessible_paths
: NULL
,
2378 context
->temporary_filesystems
,
2379 context
->n_temporary_filesystems
,
2382 needs_sandboxing
? context
->protect_home
: PROTECT_HOME_NO
,
2383 needs_sandboxing
? context
->protect_system
: PROTECT_SYSTEM_NO
,
2384 context
->mount_flags
,
2385 DISSECT_IMAGE_DISCARD_ON_LOOP
);
2387 bind_mount_free_many(bind_mounts
, n_bind_mounts
);
2389 /* If we couldn't set up the namespace this is probably due to a
2390 * missing capability. In this case, silently proceeed. */
2391 if (IN_SET(r
, -EPERM
, -EACCES
)) {
2392 log_unit_debug_errno(u
, r
, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2399 static int apply_working_directory(
2400 const ExecContext
*context
,
2401 const ExecParameters
*params
,
2403 const bool needs_mount_ns
,
2409 assert(exit_status
);
2411 if (context
->working_directory_home
) {
2414 *exit_status
= EXIT_CHDIR
;
2420 } else if (context
->working_directory
)
2421 wd
= context
->working_directory
;
2425 if (params
->flags
& EXEC_APPLY_CHROOT
) {
2426 if (!needs_mount_ns
&& context
->root_directory
)
2427 if (chroot(context
->root_directory
) < 0) {
2428 *exit_status
= EXIT_CHROOT
;
2434 d
= prefix_roota(context
->root_directory
, wd
);
2436 if (chdir(d
) < 0 && !context
->working_directory_missing_ok
) {
2437 *exit_status
= EXIT_CHDIR
;
2444 static int setup_keyring(
2446 const ExecContext
*context
,
2447 const ExecParameters
*p
,
2448 uid_t uid
, gid_t gid
) {
2450 key_serial_t keyring
;
2459 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2460 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2461 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2462 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2463 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2464 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2466 if (!(p
->flags
& EXEC_NEW_KEYRING
))
2469 if (context
->keyring_mode
== EXEC_KEYRING_INHERIT
)
2472 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2473 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2474 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2475 * & group is just as nasty as acquiring a reference to the user keyring. */
2477 saved_uid
= getuid();
2478 saved_gid
= getgid();
2480 if (gid_is_valid(gid
) && gid
!= saved_gid
) {
2481 if (setregid(gid
, -1) < 0)
2482 return log_unit_error_errno(u
, errno
, "Failed to change GID for user keyring: %m");
2485 if (uid_is_valid(uid
) && uid
!= saved_uid
) {
2486 if (setreuid(uid
, -1) < 0) {
2487 r
= log_unit_error_errno(u
, errno
, "Failed to change UID for user keyring: %m");
2492 keyring
= keyctl(KEYCTL_JOIN_SESSION_KEYRING
, 0, 0, 0, 0);
2493 if (keyring
== -1) {
2494 if (errno
== ENOSYS
)
2495 log_unit_debug_errno(u
, errno
, "Kernel keyring not supported, ignoring.");
2496 else if (IN_SET(errno
, EACCES
, EPERM
))
2497 log_unit_debug_errno(u
, errno
, "Kernel keyring access prohibited, ignoring.");
2498 else if (errno
== EDQUOT
)
2499 log_unit_debug_errno(u
, errno
, "Out of kernel keyrings to allocate, ignoring.");
2501 r
= log_unit_error_errno(u
, errno
, "Setting up kernel keyring failed: %m");
2506 /* When requested link the user keyring into the session keyring. */
2507 if (context
->keyring_mode
== EXEC_KEYRING_SHARED
) {
2509 if (keyctl(KEYCTL_LINK
,
2510 KEY_SPEC_USER_KEYRING
,
2511 KEY_SPEC_SESSION_KEYRING
, 0, 0) < 0) {
2512 r
= log_unit_error_errno(u
, errno
, "Failed to link user keyring into session keyring: %m");
2517 /* Restore uid/gid back */
2518 if (uid_is_valid(uid
) && uid
!= saved_uid
) {
2519 if (setreuid(saved_uid
, -1) < 0) {
2520 r
= log_unit_error_errno(u
, errno
, "Failed to change UID back for user keyring: %m");
2525 if (gid_is_valid(gid
) && gid
!= saved_gid
) {
2526 if (setregid(saved_gid
, -1) < 0)
2527 return log_unit_error_errno(u
, errno
, "Failed to change GID back for user keyring: %m");
2530 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
2531 if (!sd_id128_is_null(u
->invocation_id
)) {
2534 key
= add_key("user", "invocation_id", &u
->invocation_id
, sizeof(u
->invocation_id
), KEY_SPEC_SESSION_KEYRING
);
2536 log_unit_debug_errno(u
, errno
, "Failed to add invocation ID to keyring, ignoring: %m");
2538 if (keyctl(KEYCTL_SETPERM
, key
,
2539 KEY_POS_VIEW
|KEY_POS_READ
|KEY_POS_SEARCH
|
2540 KEY_USR_VIEW
|KEY_USR_READ
|KEY_USR_SEARCH
, 0, 0) < 0)
2541 r
= log_unit_error_errno(u
, errno
, "Failed to restrict invocation ID permission: %m");
2546 /* Revert back uid & gid for the the last time, and exit */
2547 /* no extra logging, as only the first already reported error matters */
2548 if (getuid() != saved_uid
)
2549 (void) setreuid(saved_uid
, -1);
2551 if (getgid() != saved_gid
)
2552 (void) setregid(saved_gid
, -1);
2557 static void append_socket_pair(int *array
, size_t *n
, const int pair
[2]) {
2565 array
[(*n
)++] = pair
[0];
2567 array
[(*n
)++] = pair
[1];
2570 static int close_remaining_fds(
2571 const ExecParameters
*params
,
2572 const ExecRuntime
*runtime
,
2573 const DynamicCreds
*dcreds
,
2577 int *fds
, size_t n_fds
) {
2579 size_t n_dont_close
= 0;
2580 int dont_close
[n_fds
+ 12];
2584 if (params
->stdin_fd
>= 0)
2585 dont_close
[n_dont_close
++] = params
->stdin_fd
;
2586 if (params
->stdout_fd
>= 0)
2587 dont_close
[n_dont_close
++] = params
->stdout_fd
;
2588 if (params
->stderr_fd
>= 0)
2589 dont_close
[n_dont_close
++] = params
->stderr_fd
;
2592 dont_close
[n_dont_close
++] = socket_fd
;
2594 dont_close
[n_dont_close
++] = exec_fd
;
2596 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
2597 n_dont_close
+= n_fds
;
2601 append_socket_pair(dont_close
, &n_dont_close
, runtime
->netns_storage_socket
);
2605 append_socket_pair(dont_close
, &n_dont_close
, dcreds
->user
->storage_socket
);
2607 append_socket_pair(dont_close
, &n_dont_close
, dcreds
->group
->storage_socket
);
2610 if (user_lookup_fd
>= 0)
2611 dont_close
[n_dont_close
++] = user_lookup_fd
;
2613 return close_all_fds(dont_close
, n_dont_close
);
2616 static int send_user_lookup(
2624 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2625 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2628 if (user_lookup_fd
< 0)
2631 if (!uid_is_valid(uid
) && !gid_is_valid(gid
))
2634 if (writev(user_lookup_fd
,
2636 IOVEC_INIT(&uid
, sizeof(uid
)),
2637 IOVEC_INIT(&gid
, sizeof(gid
)),
2638 IOVEC_INIT_STRING(unit
->id
) }, 3) < 0)
2644 static int acquire_home(const ExecContext
*c
, uid_t uid
, const char** home
, char **buf
) {
2651 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2656 if (!c
->working_directory_home
)
2660 /* Hardcode /root as home directory for UID 0 */
2665 r
= get_home_dir(buf
);
2673 static int compile_suggested_paths(const ExecContext
*c
, const ExecParameters
*p
, char ***ret
) {
2674 _cleanup_strv_free_
char ** list
= NULL
;
2675 ExecDirectoryType t
;
2682 assert(c
->dynamic_user
);
2684 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2685 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2688 for (t
= 0; t
< _EXEC_DIRECTORY_TYPE_MAX
; t
++) {
2691 if (t
== EXEC_DIRECTORY_CONFIGURATION
)
2697 STRV_FOREACH(i
, c
->directories
[t
].paths
) {
2700 if (t
== EXEC_DIRECTORY_RUNTIME
)
2701 e
= strjoin(p
->prefix
[t
], "/", *i
);
2703 e
= strjoin(p
->prefix
[t
], "/private/", *i
);
2707 r
= strv_consume(&list
, e
);
2713 *ret
= TAKE_PTR(list
);
2718 static char *exec_command_line(char **argv
);
2720 static int exec_child(
2722 const ExecCommand
*command
,
2723 const ExecContext
*context
,
2724 const ExecParameters
*params
,
2725 ExecRuntime
*runtime
,
2726 DynamicCreds
*dcreds
,
2730 size_t n_socket_fds
,
2731 size_t n_storage_fds
,
2736 _cleanup_strv_free_
char **our_env
= NULL
, **pass_env
= NULL
, **accum_env
= NULL
, **final_argv
= NULL
;
2737 int *fds_with_exec_fd
, n_fds_with_exec_fd
, r
, ngids
= 0, exec_fd
= -1;
2738 _cleanup_free_ gid_t
*supplementary_gids
= NULL
;
2739 const char *username
= NULL
, *groupname
= NULL
;
2740 _cleanup_free_
char *home_buffer
= NULL
;
2741 const char *home
= NULL
, *shell
= NULL
;
2742 dev_t journal_stream_dev
= 0;
2743 ino_t journal_stream_ino
= 0;
2744 bool needs_sandboxing
, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2745 needs_setuid
, /* Do we need to do the actual setresuid()/setresgid() calls? */
2746 needs_mount_namespace
, /* Do we need to set up a mount namespace for this kernel? */
2747 needs_ambient_hack
; /* Do we need to apply the ambient capabilities hack? */
2749 _cleanup_free_
char *mac_selinux_context_net
= NULL
;
2750 bool use_selinux
= false;
2753 bool use_smack
= false;
2756 bool use_apparmor
= false;
2758 uid_t uid
= UID_INVALID
;
2759 gid_t gid
= GID_INVALID
;
2761 ExecDirectoryType dt
;
2768 assert(exit_status
);
2770 rename_process_from_path(command
->path
);
2772 /* We reset exactly these signals, since they are the
2773 * only ones we set to SIG_IGN in the main daemon. All
2774 * others we leave untouched because we set them to
2775 * SIG_DFL or a valid handler initially, both of which
2776 * will be demoted to SIG_DFL. */
2777 (void) default_signals(SIGNALS_CRASH_HANDLER
,
2778 SIGNALS_IGNORE
, -1);
2780 if (context
->ignore_sigpipe
)
2781 (void) ignore_signals(SIGPIPE
, -1);
2783 r
= reset_signal_mask();
2785 *exit_status
= EXIT_SIGNAL_MASK
;
2786 return log_unit_error_errno(unit
, r
, "Failed to set process signal mask: %m");
2789 if (params
->idle_pipe
)
2790 do_idle_pipe_dance(params
->idle_pipe
);
2792 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2793 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2794 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2795 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
2798 log_set_open_when_needed(true);
2800 /* In case anything used libc syslog(), close this here, too */
2803 n_fds
= n_socket_fds
+ n_storage_fds
;
2804 r
= close_remaining_fds(params
, runtime
, dcreds
, user_lookup_fd
, socket_fd
, params
->exec_fd
, fds
, n_fds
);
2806 *exit_status
= EXIT_FDS
;
2807 return log_unit_error_errno(unit
, r
, "Failed to close unwanted file descriptors: %m");
2810 if (!context
->same_pgrp
)
2812 *exit_status
= EXIT_SETSID
;
2813 return log_unit_error_errno(unit
, errno
, "Failed to create new process session: %m");
2816 exec_context_tty_reset(context
, params
);
2818 if (unit_shall_confirm_spawn(unit
)) {
2819 const char *vc
= params
->confirm_spawn
;
2820 _cleanup_free_
char *cmdline
= NULL
;
2822 cmdline
= exec_command_line(command
->argv
);
2824 *exit_status
= EXIT_MEMORY
;
2828 r
= ask_for_confirmation(vc
, unit
, cmdline
);
2829 if (r
!= CONFIRM_EXECUTE
) {
2830 if (r
== CONFIRM_PRETEND_SUCCESS
) {
2831 *exit_status
= EXIT_SUCCESS
;
2834 *exit_status
= EXIT_CONFIRM
;
2835 log_unit_error(unit
, "Execution cancelled by the user");
2840 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
2841 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
2842 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
2843 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
2844 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
2845 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit
->id
, true) != 0 ||
2846 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit
->manager
) ? "system" : "user", true) != 0) {
2847 *exit_status
= EXIT_MEMORY
;
2848 return log_unit_error_errno(unit
, errno
, "Failed to update environment: %m");
2851 if (context
->dynamic_user
&& dcreds
) {
2852 _cleanup_strv_free_
char **suggested_paths
= NULL
;
2854 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
2855 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
2856 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2857 *exit_status
= EXIT_USER
;
2858 return log_unit_error_errno(unit
, errno
, "Failed to update environment: %m");
2861 r
= compile_suggested_paths(context
, params
, &suggested_paths
);
2863 *exit_status
= EXIT_MEMORY
;
2867 r
= dynamic_creds_realize(dcreds
, suggested_paths
, &uid
, &gid
);
2869 *exit_status
= EXIT_USER
;
2871 log_unit_error(unit
, "Failed to update dynamic user credentials: User or group with specified name already exists.");
2874 return log_unit_error_errno(unit
, r
, "Failed to update dynamic user credentials: %m");
2877 if (!uid_is_valid(uid
)) {
2878 *exit_status
= EXIT_USER
;
2879 log_unit_error(unit
, "UID validation failed for \""UID_FMT
"\"", uid
);
2883 if (!gid_is_valid(gid
)) {
2884 *exit_status
= EXIT_USER
;
2885 log_unit_error(unit
, "GID validation failed for \""GID_FMT
"\"", gid
);
2890 username
= dcreds
->user
->name
;
2893 r
= get_fixed_user(context
, &username
, &uid
, &gid
, &home
, &shell
);
2895 *exit_status
= EXIT_USER
;
2896 return log_unit_error_errno(unit
, r
, "Failed to determine user credentials: %m");
2899 r
= get_fixed_group(context
, &groupname
, &gid
);
2901 *exit_status
= EXIT_GROUP
;
2902 return log_unit_error_errno(unit
, r
, "Failed to determine group credentials: %m");
2906 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2907 r
= get_supplementary_groups(context
, username
, groupname
, gid
,
2908 &supplementary_gids
, &ngids
);
2910 *exit_status
= EXIT_GROUP
;
2911 return log_unit_error_errno(unit
, r
, "Failed to determine supplementary groups: %m");
2914 r
= send_user_lookup(unit
, user_lookup_fd
, uid
, gid
);
2916 *exit_status
= EXIT_USER
;
2917 return log_unit_error_errno(unit
, r
, "Failed to send user credentials to PID1: %m");
2920 user_lookup_fd
= safe_close(user_lookup_fd
);
2922 r
= acquire_home(context
, uid
, &home
, &home_buffer
);
2924 *exit_status
= EXIT_CHDIR
;
2925 return log_unit_error_errno(unit
, r
, "Failed to determine $HOME for user: %m");
2928 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2929 * must sure to drop O_NONBLOCK */
2931 (void) fd_nonblock(socket_fd
, false);
2933 r
= setup_input(context
, params
, socket_fd
, named_iofds
);
2935 *exit_status
= EXIT_STDIN
;
2936 return log_unit_error_errno(unit
, r
, "Failed to set up standard input: %m");
2939 r
= setup_output(unit
, context
, params
, STDOUT_FILENO
, socket_fd
, named_iofds
, basename(command
->path
), uid
, gid
, &journal_stream_dev
, &journal_stream_ino
);
2941 *exit_status
= EXIT_STDOUT
;
2942 return log_unit_error_errno(unit
, r
, "Failed to set up standard output: %m");
2945 r
= setup_output(unit
, context
, params
, STDERR_FILENO
, socket_fd
, named_iofds
, basename(command
->path
), uid
, gid
, &journal_stream_dev
, &journal_stream_ino
);
2947 *exit_status
= EXIT_STDERR
;
2948 return log_unit_error_errno(unit
, r
, "Failed to set up standard error output: %m");
2951 if (params
->cgroup_path
) {
2952 r
= cg_attach_everywhere(params
->cgroup_supported
, params
->cgroup_path
, 0, NULL
, NULL
);
2954 *exit_status
= EXIT_CGROUP
;
2955 return log_unit_error_errno(unit
, r
, "Failed to attach to cgroup %s: %m", params
->cgroup_path
);
2959 if (context
->oom_score_adjust_set
) {
2960 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
2961 * prohibit write access to this file, and we shouldn't trip up over that. */
2962 r
= set_oom_score_adjust(context
->oom_score_adjust
);
2963 if (IN_SET(r
, -EPERM
, -EACCES
))
2964 log_unit_debug_errno(unit
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
2966 *exit_status
= EXIT_OOM_ADJUST
;
2967 return log_unit_error_errno(unit
, r
, "Failed to adjust OOM setting: %m");
2971 if (context
->nice_set
)
2972 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
2973 *exit_status
= EXIT_NICE
;
2974 return log_unit_error_errno(unit
, errno
, "Failed to set up process scheduling priority (nice level): %m");
2977 if (context
->cpu_sched_set
) {
2978 struct sched_param param
= {
2979 .sched_priority
= context
->cpu_sched_priority
,
2982 r
= sched_setscheduler(0,
2983 context
->cpu_sched_policy
|
2984 (context
->cpu_sched_reset_on_fork
?
2985 SCHED_RESET_ON_FORK
: 0),
2988 *exit_status
= EXIT_SETSCHEDULER
;
2989 return log_unit_error_errno(unit
, errno
, "Failed to set up CPU scheduling: %m");
2993 if (context
->cpuset
)
2994 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
2995 *exit_status
= EXIT_CPUAFFINITY
;
2996 return log_unit_error_errno(unit
, errno
, "Failed to set up CPU affinity: %m");
2999 if (context
->ioprio_set
)
3000 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
3001 *exit_status
= EXIT_IOPRIO
;
3002 return log_unit_error_errno(unit
, errno
, "Failed to set up IO scheduling priority: %m");
3005 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
3006 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
3007 *exit_status
= EXIT_TIMERSLACK
;
3008 return log_unit_error_errno(unit
, errno
, "Failed to set up timer slack: %m");
3011 if (context
->personality
!= PERSONALITY_INVALID
) {
3012 r
= safe_personality(context
->personality
);
3014 *exit_status
= EXIT_PERSONALITY
;
3015 return log_unit_error_errno(unit
, r
, "Failed to set up execution domain (personality): %m");
3019 if (context
->utmp_id
)
3020 utmp_put_init_process(context
->utmp_id
, getpid_cached(), getsid(0),
3022 context
->utmp_mode
== EXEC_UTMP_INIT
? INIT_PROCESS
:
3023 context
->utmp_mode
== EXEC_UTMP_LOGIN
? LOGIN_PROCESS
:
3027 if (context
->user
) {
3028 r
= chown_terminal(STDIN_FILENO
, uid
);
3030 *exit_status
= EXIT_STDIN
;
3031 return log_unit_error_errno(unit
, r
, "Failed to change ownership of terminal: %m");
3035 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroupsv1
3036 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
3037 * safe. On cgroupsv2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
3038 * touch a single hierarchy too. */
3039 if (params
->cgroup_path
&& context
->user
&& (params
->flags
& EXEC_CGROUP_DELEGATE
)) {
3040 r
= cg_set_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, uid
, gid
);
3042 *exit_status
= EXIT_CGROUP
;
3043 return log_unit_error_errno(unit
, r
, "Failed to adjust control group access: %m");
3047 for (dt
= 0; dt
< _EXEC_DIRECTORY_TYPE_MAX
; dt
++) {
3048 r
= setup_exec_directory(context
, params
, uid
, gid
, dt
, exit_status
);
3050 return log_unit_error_errno(unit
, r
, "Failed to set up special execution directory in %s: %m", params
->prefix
[dt
]);
3053 r
= build_environment(
3065 *exit_status
= EXIT_MEMORY
;
3069 r
= build_pass_environment(context
, &pass_env
);
3071 *exit_status
= EXIT_MEMORY
;
3075 accum_env
= strv_env_merge(5,
3076 params
->environment
,
3079 context
->environment
,
3083 *exit_status
= EXIT_MEMORY
;
3086 accum_env
= strv_env_clean(accum_env
);
3088 (void) umask(context
->umask
);
3090 r
= setup_keyring(unit
, context
, params
, uid
, gid
);
3092 *exit_status
= EXIT_KEYRING
;
3093 return log_unit_error_errno(unit
, r
, "Failed to set up kernel keyring: %m");
3096 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
3097 needs_sandboxing
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& EXEC_COMMAND_FULLY_PRIVILEGED
);
3099 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3100 needs_ambient_hack
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && (command
->flags
& EXEC_COMMAND_AMBIENT_MAGIC
) && !ambient_capabilities_supported();
3102 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3103 if (needs_ambient_hack
)
3104 needs_setuid
= false;
3106 needs_setuid
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& (EXEC_COMMAND_FULLY_PRIVILEGED
|EXEC_COMMAND_NO_SETUID
));
3108 if (needs_sandboxing
) {
3109 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3110 * present. The actual MAC context application will happen later, as late as possible, to avoid
3111 * impacting our own code paths. */
3114 use_selinux
= mac_selinux_use();
3117 use_smack
= mac_smack_use();
3120 use_apparmor
= mac_apparmor_use();
3125 if (context
->pam_name
&& username
) {
3126 r
= setup_pam(context
->pam_name
, username
, uid
, gid
, context
->tty_path
, &accum_env
, fds
, n_fds
);
3128 *exit_status
= EXIT_PAM
;
3129 return log_unit_error_errno(unit
, r
, "Failed to set up PAM session: %m");
3134 if (context
->private_network
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
3135 if (ns_type_supported(NAMESPACE_NET
)) {
3136 r
= setup_netns(runtime
->netns_storage_socket
);
3138 *exit_status
= EXIT_NETWORK
;
3139 return log_unit_error_errno(unit
, r
, "Failed to set up network namespacing: %m");
3142 log_unit_warning(unit
, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
3145 needs_mount_namespace
= exec_needs_mount_namespace(context
, params
, runtime
);
3146 if (needs_mount_namespace
) {
3147 r
= apply_mount_namespace(unit
, command
, context
, params
, runtime
);
3149 *exit_status
= EXIT_NAMESPACE
;
3150 return log_unit_error_errno(unit
, r
, "Failed to set up mount namespacing: %m");
3154 /* Apply just after mount namespace setup */
3155 r
= apply_working_directory(context
, params
, home
, needs_mount_namespace
, exit_status
);
3157 return log_unit_error_errno(unit
, r
, "Changing to the requested working directory failed: %m");
3159 /* Drop groups as early as possbile */
3161 r
= enforce_groups(gid
, supplementary_gids
, ngids
);
3163 *exit_status
= EXIT_GROUP
;
3164 return log_unit_error_errno(unit
, r
, "Changing group credentials failed: %m");
3168 if (needs_sandboxing
) {
3170 if (use_selinux
&& params
->selinux_context_net
&& socket_fd
>= 0) {
3171 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
3173 *exit_status
= EXIT_SELINUX_CONTEXT
;
3174 return log_unit_error_errno(unit
, r
, "Failed to determine SELinux context: %m");
3179 if (context
->private_users
) {
3180 r
= setup_private_users(uid
, gid
);
3182 *exit_status
= EXIT_USER
;
3183 return log_unit_error_errno(unit
, r
, "Failed to set up user namespacing: %m");
3188 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
3189 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3190 * however if we have it as we want to keep it open until the final execve(). */
3192 if (params
->exec_fd
>= 0) {
3193 exec_fd
= params
->exec_fd
;
3195 if (exec_fd
< 3 + (int) n_fds
) {
3198 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3199 * process we are about to execute. */
3201 moved_fd
= fcntl(exec_fd
, F_DUPFD_CLOEXEC
, 3 + (int) n_fds
);
3203 *exit_status
= EXIT_FDS
;
3204 return log_unit_error_errno(unit
, errno
, "Couldn't move exec fd up: %m");
3207 safe_close(exec_fd
);
3210 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3211 r
= fd_cloexec(exec_fd
, true);
3213 *exit_status
= EXIT_FDS
;
3214 return log_unit_error_errno(unit
, r
, "Failed to make exec fd FD_CLOEXEC: %m");
3218 fds_with_exec_fd
= newa(int, n_fds
+ 1);
3219 memcpy(fds_with_exec_fd
, fds
, n_fds
* sizeof(int));
3220 fds_with_exec_fd
[n_fds
] = exec_fd
;
3221 n_fds_with_exec_fd
= n_fds
+ 1;
3223 fds_with_exec_fd
= fds
;
3224 n_fds_with_exec_fd
= n_fds
;
3227 r
= close_all_fds(fds_with_exec_fd
, n_fds_with_exec_fd
);
3229 r
= shift_fds(fds
, n_fds
);
3231 r
= flags_fds(fds
, n_socket_fds
, n_storage_fds
, context
->non_blocking
);
3233 *exit_status
= EXIT_FDS
;
3234 return log_unit_error_errno(unit
, r
, "Failed to adjust passed file descriptors: %m");
3237 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3238 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3239 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3242 secure_bits
= context
->secure_bits
;
3244 if (needs_sandboxing
) {
3248 r
= setrlimit_closest_all((const struct rlimit
* const *) context
->rlimit
, &which_failed
);
3250 *exit_status
= EXIT_LIMITS
;
3251 return log_unit_error_errno(unit
, r
, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed
));
3254 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
3255 if (context
->restrict_realtime
&& !context
->rlimit
[RLIMIT_RTPRIO
]) {
3256 if (setrlimit(RLIMIT_RTPRIO
, &RLIMIT_MAKE_CONST(0)) < 0) {
3257 *exit_status
= EXIT_LIMITS
;
3258 return log_unit_error_errno(unit
, errno
, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
3263 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3264 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3266 r
= setup_smack(context
, command
);
3268 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
3269 return log_unit_error_errno(unit
, r
, "Failed to set SMACK process label: %m");
3274 bset
= context
->capability_bounding_set
;
3275 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3276 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3277 * instead of us doing that */
3278 if (needs_ambient_hack
)
3279 bset
|= (UINT64_C(1) << CAP_SETPCAP
) |
3280 (UINT64_C(1) << CAP_SETUID
) |
3281 (UINT64_C(1) << CAP_SETGID
);
3283 if (!cap_test_all(bset
)) {
3284 r
= capability_bounding_set_drop(bset
, false);
3286 *exit_status
= EXIT_CAPABILITIES
;
3287 return log_unit_error_errno(unit
, r
, "Failed to drop capabilities: %m");
3291 /* This is done before enforce_user, but ambient set
3292 * does not survive over setresuid() if keep_caps is not set. */
3293 if (!needs_ambient_hack
&&
3294 context
->capability_ambient_set
!= 0) {
3295 r
= capability_ambient_set_apply(context
->capability_ambient_set
, true);
3297 *exit_status
= EXIT_CAPABILITIES
;
3298 return log_unit_error_errno(unit
, r
, "Failed to apply ambient capabilities (before UID change): %m");
3304 if (context
->user
) {
3305 r
= enforce_user(context
, uid
);
3307 *exit_status
= EXIT_USER
;
3308 return log_unit_error_errno(unit
, r
, "Failed to change UID to " UID_FMT
": %m", uid
);
3311 if (!needs_ambient_hack
&&
3312 context
->capability_ambient_set
!= 0) {
3314 /* Fix the ambient capabilities after user change. */
3315 r
= capability_ambient_set_apply(context
->capability_ambient_set
, false);
3317 *exit_status
= EXIT_CAPABILITIES
;
3318 return log_unit_error_errno(unit
, r
, "Failed to apply ambient capabilities (after UID change): %m");
3321 /* If we were asked to change user and ambient capabilities
3322 * were requested, we had to add keep-caps to the securebits
3323 * so that we would maintain the inherited capability set
3324 * through the setresuid(). Make sure that the bit is added
3325 * also to the context secure_bits so that we don't try to
3326 * drop the bit away next. */
3328 secure_bits
|= 1<<SECURE_KEEP_CAPS
;
3333 if (needs_sandboxing
) {
3334 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
3335 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3336 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3337 * are restricted. */
3341 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
3344 r
= setexeccon(exec_context
);
3346 *exit_status
= EXIT_SELINUX_CONTEXT
;
3347 return log_unit_error_errno(unit
, r
, "Failed to change SELinux context to %s: %m", exec_context
);
3354 if (use_apparmor
&& context
->apparmor_profile
) {
3355 r
= aa_change_onexec(context
->apparmor_profile
);
3356 if (r
< 0 && !context
->apparmor_profile_ignore
) {
3357 *exit_status
= EXIT_APPARMOR_PROFILE
;
3358 return log_unit_error_errno(unit
, errno
, "Failed to prepare AppArmor profile change to %s: %m", context
->apparmor_profile
);
3363 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3364 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
3365 if (prctl(PR_GET_SECUREBITS
) != secure_bits
)
3366 if (prctl(PR_SET_SECUREBITS
, secure_bits
) < 0) {
3367 *exit_status
= EXIT_SECUREBITS
;
3368 return log_unit_error_errno(unit
, errno
, "Failed to set process secure bits: %m");
3371 if (context_has_no_new_privileges(context
))
3372 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
3373 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
3374 return log_unit_error_errno(unit
, errno
, "Failed to disable new privileges: %m");
3378 r
= apply_address_families(unit
, context
);
3380 *exit_status
= EXIT_ADDRESS_FAMILIES
;
3381 return log_unit_error_errno(unit
, r
, "Failed to restrict address families: %m");
3384 r
= apply_memory_deny_write_execute(unit
, context
);
3386 *exit_status
= EXIT_SECCOMP
;
3387 return log_unit_error_errno(unit
, r
, "Failed to disable writing to executable memory: %m");
3390 r
= apply_restrict_realtime(unit
, context
);
3392 *exit_status
= EXIT_SECCOMP
;
3393 return log_unit_error_errno(unit
, r
, "Failed to apply realtime restrictions: %m");
3396 r
= apply_restrict_namespaces(unit
, context
);
3398 *exit_status
= EXIT_SECCOMP
;
3399 return log_unit_error_errno(unit
, r
, "Failed to apply namespace restrictions: %m");
3402 r
= apply_protect_sysctl(unit
, context
);
3404 *exit_status
= EXIT_SECCOMP
;
3405 return log_unit_error_errno(unit
, r
, "Failed to apply sysctl restrictions: %m");
3408 r
= apply_protect_kernel_modules(unit
, context
);
3410 *exit_status
= EXIT_SECCOMP
;
3411 return log_unit_error_errno(unit
, r
, "Failed to apply module loading restrictions: %m");
3414 r
= apply_private_devices(unit
, context
);
3416 *exit_status
= EXIT_SECCOMP
;
3417 return log_unit_error_errno(unit
, r
, "Failed to set up private devices: %m");
3420 r
= apply_syscall_archs(unit
, context
);
3422 *exit_status
= EXIT_SECCOMP
;
3423 return log_unit_error_errno(unit
, r
, "Failed to apply syscall architecture restrictions: %m");
3426 r
= apply_lock_personality(unit
, context
);
3428 *exit_status
= EXIT_SECCOMP
;
3429 return log_unit_error_errno(unit
, r
, "Failed to lock personalities: %m");
3432 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3433 * by the filter as little as possible. */
3434 r
= apply_syscall_filter(unit
, context
, needs_ambient_hack
);
3436 *exit_status
= EXIT_SECCOMP
;
3437 return log_unit_error_errno(unit
, r
, "Failed to apply system call filters: %m");
3442 if (!strv_isempty(context
->unset_environment
)) {
3445 ee
= strv_env_delete(accum_env
, 1, context
->unset_environment
);
3447 *exit_status
= EXIT_MEMORY
;
3451 strv_free_and_replace(accum_env
, ee
);
3454 final_argv
= replace_env_argv(command
->argv
, accum_env
);
3456 *exit_status
= EXIT_MEMORY
;
3460 if (DEBUG_LOGGING
) {
3461 _cleanup_free_
char *line
;
3463 line
= exec_command_line(final_argv
);
3465 log_struct(LOG_DEBUG
,
3466 "EXECUTABLE=%s", command
->path
,
3467 LOG_UNIT_MESSAGE(unit
, "Executing: %s", line
),
3469 LOG_UNIT_INVOCATION_ID(unit
));
3475 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3476 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3478 if (write(exec_fd
, &hot
, sizeof(hot
)) < 0) {
3479 *exit_status
= EXIT_EXEC
;
3480 return log_unit_error_errno(unit
, errno
, "Failed to enable exec_fd: %m");
3484 execve(command
->path
, final_argv
, accum_env
);
3490 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
3491 * that POLLHUP on it no longer means execve() succeeded. */
3493 if (write(exec_fd
, &hot
, sizeof(hot
)) < 0) {
3494 *exit_status
= EXIT_EXEC
;
3495 return log_unit_error_errno(unit
, errno
, "Failed to disable exec_fd: %m");
3499 if (r
== -ENOENT
&& (command
->flags
& EXEC_COMMAND_IGNORE_FAILURE
)) {
3500 log_struct_errno(LOG_INFO
, r
,
3501 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR
,
3503 LOG_UNIT_INVOCATION_ID(unit
),
3504 LOG_UNIT_MESSAGE(unit
, "Executable %s missing, skipping: %m",
3506 "EXECUTABLE=%s", command
->path
);
3510 *exit_status
= EXIT_EXEC
;
3511 return log_unit_error_errno(unit
, r
, "Failed to execute command: %m");
3514 static int exec_context_load_environment(const Unit
*unit
, const ExecContext
*c
, char ***l
);
3515 static int exec_context_named_iofds(const ExecContext
*c
, const ExecParameters
*p
, int named_iofds
[3]);
3517 int exec_spawn(Unit
*unit
,
3518 ExecCommand
*command
,
3519 const ExecContext
*context
,
3520 const ExecParameters
*params
,
3521 ExecRuntime
*runtime
,
3522 DynamicCreds
*dcreds
,
3525 int socket_fd
, r
, named_iofds
[3] = { -1, -1, -1 }, *fds
= NULL
;
3526 _cleanup_strv_free_
char **files_env
= NULL
;
3527 size_t n_storage_fds
= 0, n_socket_fds
= 0;
3528 _cleanup_free_
char *line
= NULL
;
3536 assert(params
->fds
|| (params
->n_socket_fds
+ params
->n_storage_fds
<= 0));
3538 if (context
->std_input
== EXEC_INPUT_SOCKET
||
3539 context
->std_output
== EXEC_OUTPUT_SOCKET
||
3540 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
3542 if (params
->n_socket_fds
> 1) {
3543 log_unit_error(unit
, "Got more than one socket.");
3547 if (params
->n_socket_fds
== 0) {
3548 log_unit_error(unit
, "Got no socket.");
3552 socket_fd
= params
->fds
[0];
3556 n_socket_fds
= params
->n_socket_fds
;
3557 n_storage_fds
= params
->n_storage_fds
;
3560 r
= exec_context_named_iofds(context
, params
, named_iofds
);
3562 return log_unit_error_errno(unit
, r
, "Failed to load a named file descriptor: %m");
3564 r
= exec_context_load_environment(unit
, context
, &files_env
);
3566 return log_unit_error_errno(unit
, r
, "Failed to load environment files: %m");
3568 line
= exec_command_line(command
->argv
);
3572 log_struct(LOG_DEBUG
,
3573 LOG_UNIT_MESSAGE(unit
, "About to execute: %s", line
),
3574 "EXECUTABLE=%s", command
->path
,
3576 LOG_UNIT_INVOCATION_ID(unit
));
3580 return log_unit_error_errno(unit
, errno
, "Failed to fork: %m");
3583 int exit_status
= EXIT_SUCCESS
;
3585 r
= exec_child(unit
,
3597 unit
->manager
->user_lookup_fds
[1],
3601 log_struct_errno(LOG_ERR
, r
,
3602 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR
,
3604 LOG_UNIT_INVOCATION_ID(unit
),
3605 LOG_UNIT_MESSAGE(unit
, "Failed at step %s spawning %s: %m",
3606 exit_status_to_string(exit_status
, EXIT_STATUS_SYSTEMD
),
3608 "EXECUTABLE=%s", command
->path
);
3613 log_unit_debug(unit
, "Forked %s as "PID_FMT
, command
->path
, pid
);
3615 /* We add the new process to the cgroup both in the child (so
3616 * that we can be sure that no user code is ever executed
3617 * outside of the cgroup) and in the parent (so that we can be
3618 * sure that when we kill the cgroup the process will be
3620 if (params
->cgroup_path
)
3621 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, pid
);
3623 exec_status_start(&command
->exec_status
, pid
);
3629 void exec_context_init(ExecContext
*c
) {
3630 ExecDirectoryType i
;
3635 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
3636 c
->cpu_sched_policy
= SCHED_OTHER
;
3637 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
3638 c
->syslog_level_prefix
= true;
3639 c
->ignore_sigpipe
= true;
3640 c
->timer_slack_nsec
= NSEC_INFINITY
;
3641 c
->personality
= PERSONALITY_INVALID
;
3642 for (i
= 0; i
< _EXEC_DIRECTORY_TYPE_MAX
; i
++)
3643 c
->directories
[i
].mode
= 0755;
3644 c
->capability_bounding_set
= CAP_ALL
;
3645 assert_cc(NAMESPACE_FLAGS_INITIAL
!= NAMESPACE_FLAGS_ALL
);
3646 c
->restrict_namespaces
= NAMESPACE_FLAGS_INITIAL
;
3647 c
->log_level_max
= -1;
3650 void exec_context_done(ExecContext
*c
) {
3651 ExecDirectoryType i
;
3656 c
->environment
= strv_free(c
->environment
);
3657 c
->environment_files
= strv_free(c
->environment_files
);
3658 c
->pass_environment
= strv_free(c
->pass_environment
);
3659 c
->unset_environment
= strv_free(c
->unset_environment
);
3661 rlimit_free_all(c
->rlimit
);
3663 for (l
= 0; l
< 3; l
++) {
3664 c
->stdio_fdname
[l
] = mfree(c
->stdio_fdname
[l
]);
3665 c
->stdio_file
[l
] = mfree(c
->stdio_file
[l
]);
3668 c
->working_directory
= mfree(c
->working_directory
);
3669 c
->root_directory
= mfree(c
->root_directory
);
3670 c
->root_image
= mfree(c
->root_image
);
3671 c
->tty_path
= mfree(c
->tty_path
);
3672 c
->syslog_identifier
= mfree(c
->syslog_identifier
);
3673 c
->user
= mfree(c
->user
);
3674 c
->group
= mfree(c
->group
);
3676 c
->supplementary_groups
= strv_free(c
->supplementary_groups
);
3678 c
->pam_name
= mfree(c
->pam_name
);
3680 c
->read_only_paths
= strv_free(c
->read_only_paths
);
3681 c
->read_write_paths
= strv_free(c
->read_write_paths
);
3682 c
->inaccessible_paths
= strv_free(c
->inaccessible_paths
);
3684 bind_mount_free_many(c
->bind_mounts
, c
->n_bind_mounts
);
3685 c
->bind_mounts
= NULL
;
3686 c
->n_bind_mounts
= 0;
3687 temporary_filesystem_free_many(c
->temporary_filesystems
, c
->n_temporary_filesystems
);
3688 c
->temporary_filesystems
= NULL
;
3689 c
->n_temporary_filesystems
= 0;
3691 c
->cpuset
= cpu_set_mfree(c
->cpuset
);
3693 c
->utmp_id
= mfree(c
->utmp_id
);
3694 c
->selinux_context
= mfree(c
->selinux_context
);
3695 c
->apparmor_profile
= mfree(c
->apparmor_profile
);
3696 c
->smack_process_label
= mfree(c
->smack_process_label
);
3698 c
->syscall_filter
= hashmap_free(c
->syscall_filter
);
3699 c
->syscall_archs
= set_free(c
->syscall_archs
);
3700 c
->address_families
= set_free(c
->address_families
);
3702 for (i
= 0; i
< _EXEC_DIRECTORY_TYPE_MAX
; i
++)
3703 c
->directories
[i
].paths
= strv_free(c
->directories
[i
].paths
);
3705 c
->log_level_max
= -1;
3707 exec_context_free_log_extra_fields(c
);
3709 c
->stdin_data
= mfree(c
->stdin_data
);
3710 c
->stdin_data_size
= 0;
3713 int exec_context_destroy_runtime_directory(const ExecContext
*c
, const char *runtime_prefix
) {
3718 if (!runtime_prefix
)
3721 STRV_FOREACH(i
, c
->directories
[EXEC_DIRECTORY_RUNTIME
].paths
) {
3722 _cleanup_free_
char *p
;
3724 p
= strjoin(runtime_prefix
, "/", *i
);
3728 /* We execute this synchronously, since we need to be sure this is gone when we start the service
3730 (void) rm_rf(p
, REMOVE_ROOT
);
3736 static void exec_command_done(ExecCommand
*c
) {
3739 c
->path
= mfree(c
->path
);
3740 c
->argv
= strv_free(c
->argv
);
3743 void exec_command_done_array(ExecCommand
*c
, size_t n
) {
3746 for (i
= 0; i
< n
; i
++)
3747 exec_command_done(c
+i
);
3750 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
3754 LIST_REMOVE(command
, c
, i
);
3755 exec_command_done(i
);
3762 void exec_command_free_array(ExecCommand
**c
, size_t n
) {
3765 for (i
= 0; i
< n
; i
++)
3766 c
[i
] = exec_command_free_list(c
[i
]);
3769 void exec_command_reset_status_array(ExecCommand
*c
, size_t n
) {
3772 for (i
= 0; i
< n
; i
++)
3773 exec_status_reset(&c
[i
].exec_status
);
3776 void exec_command_reset_status_list_array(ExecCommand
**c
, size_t n
) {
3779 for (i
= 0; i
< n
; i
++) {
3782 LIST_FOREACH(command
, z
, c
[i
])
3783 exec_status_reset(&z
->exec_status
);
3787 typedef struct InvalidEnvInfo
{
3792 static void invalid_env(const char *p
, void *userdata
) {
3793 InvalidEnvInfo
*info
= userdata
;
3795 log_unit_error(info
->unit
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
3798 const char* exec_context_fdname(const ExecContext
*c
, int fd_index
) {
3804 if (c
->std_input
!= EXEC_INPUT_NAMED_FD
)
3807 return c
->stdio_fdname
[STDIN_FILENO
] ?: "stdin";
3810 if (c
->std_output
!= EXEC_OUTPUT_NAMED_FD
)
3813 return c
->stdio_fdname
[STDOUT_FILENO
] ?: "stdout";
3816 if (c
->std_error
!= EXEC_OUTPUT_NAMED_FD
)
3819 return c
->stdio_fdname
[STDERR_FILENO
] ?: "stderr";
3826 static int exec_context_named_iofds(const ExecContext
*c
, const ExecParameters
*p
, int named_iofds
[3]) {
3828 const char* stdio_fdname
[3];
3834 targets
= (c
->std_input
== EXEC_INPUT_NAMED_FD
) +
3835 (c
->std_output
== EXEC_OUTPUT_NAMED_FD
) +
3836 (c
->std_error
== EXEC_OUTPUT_NAMED_FD
);
3838 for (i
= 0; i
< 3; i
++)
3839 stdio_fdname
[i
] = exec_context_fdname(c
, i
);
3841 n_fds
= p
->n_storage_fds
+ p
->n_socket_fds
;
3843 for (i
= 0; i
< n_fds
&& targets
> 0; i
++)
3844 if (named_iofds
[STDIN_FILENO
] < 0 &&
3845 c
->std_input
== EXEC_INPUT_NAMED_FD
&&
3846 stdio_fdname
[STDIN_FILENO
] &&
3847 streq(p
->fd_names
[i
], stdio_fdname
[STDIN_FILENO
])) {
3849 named_iofds
[STDIN_FILENO
] = p
->fds
[i
];
3852 } else if (named_iofds
[STDOUT_FILENO
] < 0 &&
3853 c
->std_output
== EXEC_OUTPUT_NAMED_FD
&&
3854 stdio_fdname
[STDOUT_FILENO
] &&
3855 streq(p
->fd_names
[i
], stdio_fdname
[STDOUT_FILENO
])) {
3857 named_iofds
[STDOUT_FILENO
] = p
->fds
[i
];
3860 } else if (named_iofds
[STDERR_FILENO
] < 0 &&
3861 c
->std_error
== EXEC_OUTPUT_NAMED_FD
&&
3862 stdio_fdname
[STDERR_FILENO
] &&
3863 streq(p
->fd_names
[i
], stdio_fdname
[STDERR_FILENO
])) {
3865 named_iofds
[STDERR_FILENO
] = p
->fds
[i
];
3869 return targets
== 0 ? 0 : -ENOENT
;
3872 static int exec_context_load_environment(const Unit
*unit
, const ExecContext
*c
, char ***l
) {
3873 char **i
, **r
= NULL
;
3878 STRV_FOREACH(i
, c
->environment_files
) {
3882 bool ignore
= false;
3884 _cleanup_globfree_ glob_t pglob
= {};
3893 if (!path_is_absolute(fn
)) {
3901 /* Filename supports globbing, take all matching files */
3902 k
= safe_glob(fn
, 0, &pglob
);
3911 /* When we don't match anything, -ENOENT should be returned */
3912 assert(pglob
.gl_pathc
> 0);
3914 for (n
= 0; n
< pglob
.gl_pathc
; n
++) {
3915 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], NULL
, &p
);
3923 /* Log invalid environment variables with filename */
3925 InvalidEnvInfo info
= {
3927 .path
= pglob
.gl_pathv
[n
]
3930 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
3938 m
= strv_env_merge(2, r
, p
);
3954 static bool tty_may_match_dev_console(const char *tty
) {
3955 _cleanup_free_
char *resolved
= NULL
;
3960 tty
= skip_dev_prefix(tty
);
3962 /* trivial identity? */
3963 if (streq(tty
, "console"))
3966 if (resolve_dev_console(&resolved
) < 0)
3967 return true; /* if we could not resolve, assume it may */
3969 /* "tty0" means the active VC, so it may be the same sometimes */
3970 return streq(resolved
, tty
) || (streq(resolved
, "tty0") && tty_is_vc(tty
));
3973 bool exec_context_may_touch_console(const ExecContext
*ec
) {
3975 return (ec
->tty_reset
||
3977 ec
->tty_vt_disallocate
||
3978 is_terminal_input(ec
->std_input
) ||
3979 is_terminal_output(ec
->std_output
) ||
3980 is_terminal_output(ec
->std_error
)) &&
3981 tty_may_match_dev_console(exec_context_tty_path(ec
));
3984 static void strv_fprintf(FILE *f
, char **l
) {
3990 fprintf(f
, " %s", *g
);
3993 void exec_context_dump(const ExecContext
*c
, FILE* f
, const char *prefix
) {
3994 ExecDirectoryType dt
;
4002 prefix
= strempty(prefix
);
4006 "%sWorkingDirectory: %s\n"
4007 "%sRootDirectory: %s\n"
4008 "%sNonBlocking: %s\n"
4009 "%sPrivateTmp: %s\n"
4010 "%sPrivateDevices: %s\n"
4011 "%sProtectKernelTunables: %s\n"
4012 "%sProtectKernelModules: %s\n"
4013 "%sProtectControlGroups: %s\n"
4014 "%sPrivateNetwork: %s\n"
4015 "%sPrivateUsers: %s\n"
4016 "%sProtectHome: %s\n"
4017 "%sProtectSystem: %s\n"
4018 "%sMountAPIVFS: %s\n"
4019 "%sIgnoreSIGPIPE: %s\n"
4020 "%sMemoryDenyWriteExecute: %s\n"
4021 "%sRestrictRealtime: %s\n"
4022 "%sKeyringMode: %s\n",
4024 prefix
, c
->working_directory
? c
->working_directory
: "/",
4025 prefix
, c
->root_directory
? c
->root_directory
: "/",
4026 prefix
, yes_no(c
->non_blocking
),
4027 prefix
, yes_no(c
->private_tmp
),
4028 prefix
, yes_no(c
->private_devices
),
4029 prefix
, yes_no(c
->protect_kernel_tunables
),
4030 prefix
, yes_no(c
->protect_kernel_modules
),
4031 prefix
, yes_no(c
->protect_control_groups
),
4032 prefix
, yes_no(c
->private_network
),
4033 prefix
, yes_no(c
->private_users
),
4034 prefix
, protect_home_to_string(c
->protect_home
),
4035 prefix
, protect_system_to_string(c
->protect_system
),
4036 prefix
, yes_no(c
->mount_apivfs
),
4037 prefix
, yes_no(c
->ignore_sigpipe
),
4038 prefix
, yes_no(c
->memory_deny_write_execute
),
4039 prefix
, yes_no(c
->restrict_realtime
),
4040 prefix
, exec_keyring_mode_to_string(c
->keyring_mode
));
4043 fprintf(f
, "%sRootImage: %s\n", prefix
, c
->root_image
);
4045 STRV_FOREACH(e
, c
->environment
)
4046 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
4048 STRV_FOREACH(e
, c
->environment_files
)
4049 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
4051 STRV_FOREACH(e
, c
->pass_environment
)
4052 fprintf(f
, "%sPassEnvironment: %s\n", prefix
, *e
);
4054 STRV_FOREACH(e
, c
->unset_environment
)
4055 fprintf(f
, "%sUnsetEnvironment: %s\n", prefix
, *e
);
4057 fprintf(f
, "%sRuntimeDirectoryPreserve: %s\n", prefix
, exec_preserve_mode_to_string(c
->runtime_directory_preserve_mode
));
4059 for (dt
= 0; dt
< _EXEC_DIRECTORY_TYPE_MAX
; dt
++) {
4060 fprintf(f
, "%s%sMode: %04o\n", prefix
, exec_directory_type_to_string(dt
), c
->directories
[dt
].mode
);
4062 STRV_FOREACH(d
, c
->directories
[dt
].paths
)
4063 fprintf(f
, "%s%s: %s\n", prefix
, exec_directory_type_to_string(dt
), *d
);
4071 if (c
->oom_score_adjust_set
)
4073 "%sOOMScoreAdjust: %i\n",
4074 prefix
, c
->oom_score_adjust
);
4076 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
4078 fprintf(f
, "Limit%s%s: " RLIM_FMT
"\n",
4079 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
4080 fprintf(f
, "Limit%s%sSoft: " RLIM_FMT
"\n",
4081 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_cur
);
4084 if (c
->ioprio_set
) {
4085 _cleanup_free_
char *class_str
= NULL
;
4087 r
= ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
4089 fprintf(f
, "%sIOSchedulingClass: %s\n", prefix
, class_str
);
4091 fprintf(f
, "%sIOPriority: %lu\n", prefix
, IOPRIO_PRIO_DATA(c
->ioprio
));
4094 if (c
->cpu_sched_set
) {
4095 _cleanup_free_
char *policy_str
= NULL
;
4097 r
= sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
4099 fprintf(f
, "%sCPUSchedulingPolicy: %s\n", prefix
, policy_str
);
4102 "%sCPUSchedulingPriority: %i\n"
4103 "%sCPUSchedulingResetOnFork: %s\n",
4104 prefix
, c
->cpu_sched_priority
,
4105 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
4109 fprintf(f
, "%sCPUAffinity:", prefix
);
4110 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
4111 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
4112 fprintf(f
, " %u", i
);
4116 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
4117 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
4120 "%sStandardInput: %s\n"
4121 "%sStandardOutput: %s\n"
4122 "%sStandardError: %s\n",
4123 prefix
, exec_input_to_string(c
->std_input
),
4124 prefix
, exec_output_to_string(c
->std_output
),
4125 prefix
, exec_output_to_string(c
->std_error
));
4127 if (c
->std_input
== EXEC_INPUT_NAMED_FD
)
4128 fprintf(f
, "%sStandardInputFileDescriptorName: %s\n", prefix
, c
->stdio_fdname
[STDIN_FILENO
]);
4129 if (c
->std_output
== EXEC_OUTPUT_NAMED_FD
)
4130 fprintf(f
, "%sStandardOutputFileDescriptorName: %s\n", prefix
, c
->stdio_fdname
[STDOUT_FILENO
]);
4131 if (c
->std_error
== EXEC_OUTPUT_NAMED_FD
)
4132 fprintf(f
, "%sStandardErrorFileDescriptorName: %s\n", prefix
, c
->stdio_fdname
[STDERR_FILENO
]);
4134 if (c
->std_input
== EXEC_INPUT_FILE
)
4135 fprintf(f
, "%sStandardInputFile: %s\n", prefix
, c
->stdio_file
[STDIN_FILENO
]);
4136 if (c
->std_output
== EXEC_OUTPUT_FILE
)
4137 fprintf(f
, "%sStandardOutputFile: %s\n", prefix
, c
->stdio_file
[STDOUT_FILENO
]);
4138 if (c
->std_output
== EXEC_OUTPUT_FILE_APPEND
)
4139 fprintf(f
, "%sStandardOutputFileToAppend: %s\n", prefix
, c
->stdio_file
[STDOUT_FILENO
]);
4140 if (c
->std_error
== EXEC_OUTPUT_FILE
)
4141 fprintf(f
, "%sStandardErrorFile: %s\n", prefix
, c
->stdio_file
[STDERR_FILENO
]);
4142 if (c
->std_error
== EXEC_OUTPUT_FILE_APPEND
)
4143 fprintf(f
, "%sStandardErrorFileToAppend: %s\n", prefix
, c
->stdio_file
[STDERR_FILENO
]);
4149 "%sTTYVHangup: %s\n"
4150 "%sTTYVTDisallocate: %s\n",
4151 prefix
, c
->tty_path
,
4152 prefix
, yes_no(c
->tty_reset
),
4153 prefix
, yes_no(c
->tty_vhangup
),
4154 prefix
, yes_no(c
->tty_vt_disallocate
));
4156 if (IN_SET(c
->std_output
,
4159 EXEC_OUTPUT_JOURNAL
,
4160 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
4161 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
4162 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) ||
4163 IN_SET(c
->std_error
,
4166 EXEC_OUTPUT_JOURNAL
,
4167 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
4168 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
4169 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
)) {
4171 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
4173 r
= log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
4175 fprintf(f
, "%sSyslogFacility: %s\n", prefix
, fac_str
);
4177 r
= log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
4179 fprintf(f
, "%sSyslogLevel: %s\n", prefix
, lvl_str
);
4182 if (c
->log_level_max
>= 0) {
4183 _cleanup_free_
char *t
= NULL
;
4185 (void) log_level_to_string_alloc(c
->log_level_max
, &t
);
4187 fprintf(f
, "%sLogLevelMax: %s\n", prefix
, strna(t
));
4190 if (c
->n_log_extra_fields
> 0) {
4193 for (j
= 0; j
< c
->n_log_extra_fields
; j
++) {
4194 fprintf(f
, "%sLogExtraFields: ", prefix
);
4195 fwrite(c
->log_extra_fields
[j
].iov_base
,
4196 1, c
->log_extra_fields
[j
].iov_len
,
4202 if (c
->secure_bits
) {
4203 _cleanup_free_
char *str
= NULL
;
4205 r
= secure_bits_to_string_alloc(c
->secure_bits
, &str
);
4207 fprintf(f
, "%sSecure Bits: %s\n", prefix
, str
);
4210 if (c
->capability_bounding_set
!= CAP_ALL
) {
4211 _cleanup_free_
char *str
= NULL
;
4213 r
= capability_set_to_string_alloc(c
->capability_bounding_set
, &str
);
4215 fprintf(f
, "%sCapabilityBoundingSet: %s\n", prefix
, str
);
4218 if (c
->capability_ambient_set
!= 0) {
4219 _cleanup_free_
char *str
= NULL
;
4221 r
= capability_set_to_string_alloc(c
->capability_ambient_set
, &str
);
4223 fprintf(f
, "%sAmbientCapabilities: %s\n", prefix
, str
);
4227 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
4229 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
4231 fprintf(f
, "%sDynamicUser: %s\n", prefix
, yes_no(c
->dynamic_user
));
4233 if (!strv_isempty(c
->supplementary_groups
)) {
4234 fprintf(f
, "%sSupplementaryGroups:", prefix
);
4235 strv_fprintf(f
, c
->supplementary_groups
);
4240 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
4242 if (!strv_isempty(c
->read_write_paths
)) {
4243 fprintf(f
, "%sReadWritePaths:", prefix
);
4244 strv_fprintf(f
, c
->read_write_paths
);
4248 if (!strv_isempty(c
->read_only_paths
)) {
4249 fprintf(f
, "%sReadOnlyPaths:", prefix
);
4250 strv_fprintf(f
, c
->read_only_paths
);
4254 if (!strv_isempty(c
->inaccessible_paths
)) {
4255 fprintf(f
, "%sInaccessiblePaths:", prefix
);
4256 strv_fprintf(f
, c
->inaccessible_paths
);
4260 if (c
->n_bind_mounts
> 0)
4261 for (i
= 0; i
< c
->n_bind_mounts
; i
++)
4262 fprintf(f
, "%s%s: %s%s:%s:%s\n", prefix
,
4263 c
->bind_mounts
[i
].read_only
? "BindReadOnlyPaths" : "BindPaths",
4264 c
->bind_mounts
[i
].ignore_enoent
? "-": "",
4265 c
->bind_mounts
[i
].source
,
4266 c
->bind_mounts
[i
].destination
,
4267 c
->bind_mounts
[i
].recursive
? "rbind" : "norbind");
4269 if (c
->n_temporary_filesystems
> 0)
4270 for (i
= 0; i
< c
->n_temporary_filesystems
; i
++) {
4271 TemporaryFileSystem
*t
= c
->temporary_filesystems
+ i
;
4273 fprintf(f
, "%sTemporaryFileSystem: %s%s%s\n", prefix
,
4275 isempty(t
->options
) ? "" : ":",
4276 strempty(t
->options
));
4281 "%sUtmpIdentifier: %s\n",
4282 prefix
, c
->utmp_id
);
4284 if (c
->selinux_context
)
4286 "%sSELinuxContext: %s%s\n",
4287 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
4289 if (c
->apparmor_profile
)
4291 "%sAppArmorProfile: %s%s\n",
4292 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
4294 if (c
->smack_process_label
)
4296 "%sSmackProcessLabel: %s%s\n",
4297 prefix
, c
->smack_process_label_ignore
? "-" : "", c
->smack_process_label
);
4299 if (c
->personality
!= PERSONALITY_INVALID
)
4301 "%sPersonality: %s\n",
4302 prefix
, strna(personality_to_string(c
->personality
)));
4305 "%sLockPersonality: %s\n",
4306 prefix
, yes_no(c
->lock_personality
));
4308 if (c
->syscall_filter
) {
4316 "%sSystemCallFilter: ",
4319 if (!c
->syscall_whitelist
)
4323 HASHMAP_FOREACH_KEY(val
, id
, c
->syscall_filter
, j
) {
4324 _cleanup_free_
char *name
= NULL
;
4325 const char *errno_name
= NULL
;
4326 int num
= PTR_TO_INT(val
);
4333 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
4334 fputs(strna(name
), f
);
4337 errno_name
= errno_to_name(num
);
4339 fprintf(f
, ":%s", errno_name
);
4341 fprintf(f
, ":%d", num
);
4349 if (c
->syscall_archs
) {
4356 "%sSystemCallArchitectures:",
4360 SET_FOREACH(id
, c
->syscall_archs
, j
)
4361 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
4366 if (exec_context_restrict_namespaces_set(c
)) {
4367 _cleanup_free_
char *s
= NULL
;
4369 r
= namespace_flags_to_string(c
->restrict_namespaces
, &s
);
4371 fprintf(f
, "%sRestrictNamespaces: %s\n",
4375 if (c
->syscall_errno
> 0) {
4376 const char *errno_name
;
4378 fprintf(f
, "%sSystemCallErrorNumber: ", prefix
);
4380 errno_name
= errno_to_name(c
->syscall_errno
);
4382 fprintf(f
, "%s\n", errno_name
);
4384 fprintf(f
, "%d\n", c
->syscall_errno
);
4387 if (c
->apparmor_profile
)
4389 "%sAppArmorProfile: %s%s\n",
4390 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
4393 bool exec_context_maintains_privileges(const ExecContext
*c
) {
4396 /* Returns true if the process forked off would run under
4397 * an unchanged UID or as root. */
4402 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
4408 int exec_context_get_effective_ioprio(const ExecContext
*c
) {
4416 p
= ioprio_get(IOPRIO_WHO_PROCESS
, 0);
4418 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 4);
4423 void exec_context_free_log_extra_fields(ExecContext
*c
) {
4428 for (l
= 0; l
< c
->n_log_extra_fields
; l
++)
4429 free(c
->log_extra_fields
[l
].iov_base
);
4430 c
->log_extra_fields
= mfree(c
->log_extra_fields
);
4431 c
->n_log_extra_fields
= 0;
4434 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
4441 dual_timestamp_get(&s
->start_timestamp
);
4444 void exec_status_exit(ExecStatus
*s
, const ExecContext
*context
, pid_t pid
, int code
, int status
) {
4447 if (s
->pid
!= pid
) {
4453 dual_timestamp_get(&s
->exit_timestamp
);
4459 if (context
->utmp_id
)
4460 (void) utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
4462 exec_context_tty_reset(context
, NULL
);
4466 void exec_status_reset(ExecStatus
*s
) {
4469 *s
= (ExecStatus
) {};
4472 void exec_status_dump(const ExecStatus
*s
, FILE *f
, const char *prefix
) {
4473 char buf
[FORMAT_TIMESTAMP_MAX
];
4481 prefix
= strempty(prefix
);
4484 "%sPID: "PID_FMT
"\n",
4487 if (dual_timestamp_is_set(&s
->start_timestamp
))
4489 "%sStart Timestamp: %s\n",
4490 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
4492 if (dual_timestamp_is_set(&s
->exit_timestamp
))
4494 "%sExit Timestamp: %s\n"
4496 "%sExit Status: %i\n",
4497 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
4498 prefix
, sigchld_code_to_string(s
->code
),
4502 static char *exec_command_line(char **argv
) {
4510 STRV_FOREACH(a
, argv
)
4518 STRV_FOREACH(a
, argv
) {
4525 if (strpbrk(*a
, WHITESPACE
)) {
4536 /* FIXME: this doesn't really handle arguments that have
4537 * spaces and ticks in them */
4542 static void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
4543 _cleanup_free_
char *cmd
= NULL
;
4544 const char *prefix2
;
4549 prefix
= strempty(prefix
);
4550 prefix2
= strjoina(prefix
, "\t");
4552 cmd
= exec_command_line(c
->argv
);
4554 "%sCommand Line: %s\n",
4555 prefix
, cmd
? cmd
: strerror(ENOMEM
));
4557 exec_status_dump(&c
->exec_status
, f
, prefix2
);
4560 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
4563 prefix
= strempty(prefix
);
4565 LIST_FOREACH(command
, c
, c
)
4566 exec_command_dump(c
, f
, prefix
);
4569 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
4576 /* It's kind of important, that we keep the order here */
4577 LIST_FIND_TAIL(command
, *l
, end
);
4578 LIST_INSERT_AFTER(command
, *l
, end
, e
);
4583 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
4591 l
= strv_new_ap(path
, ap
);
4606 return strv_free_and_replace(c
->argv
, l
);
4609 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
4610 _cleanup_strv_free_
char **l
= NULL
;
4618 l
= strv_new_ap(path
, ap
);
4624 r
= strv_extend_strv(&c
->argv
, l
, false);
4631 static void *remove_tmpdir_thread(void *p
) {
4632 _cleanup_free_
char *path
= p
;
4634 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
4638 static ExecRuntime
* exec_runtime_free(ExecRuntime
*rt
, bool destroy
) {
4645 (void) hashmap_remove(rt
->manager
->exec_runtime_by_id
, rt
->id
);
4647 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
4648 if (destroy
&& rt
->tmp_dir
) {
4649 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
4651 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
4653 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
4660 if (destroy
&& rt
->var_tmp_dir
) {
4661 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
4663 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
4665 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
4666 free(rt
->var_tmp_dir
);
4669 rt
->var_tmp_dir
= NULL
;
4672 rt
->id
= mfree(rt
->id
);
4673 rt
->tmp_dir
= mfree(rt
->tmp_dir
);
4674 rt
->var_tmp_dir
= mfree(rt
->var_tmp_dir
);
4675 safe_close_pair(rt
->netns_storage_socket
);
4679 static void exec_runtime_freep(ExecRuntime
**rt
) {
4681 (void) exec_runtime_free(*rt
, false);
4684 static int exec_runtime_allocate(ExecRuntime
**rt
) {
4687 *rt
= new0(ExecRuntime
, 1);
4691 (*rt
)->netns_storage_socket
[0] = (*rt
)->netns_storage_socket
[1] = -1;
4695 static int exec_runtime_add(
4698 const char *tmp_dir
,
4699 const char *var_tmp_dir
,
4700 const int netns_storage_socket
[2],
4701 ExecRuntime
**ret
) {
4703 _cleanup_(exec_runtime_freep
) ExecRuntime
*rt
= NULL
;
4709 r
= hashmap_ensure_allocated(&m
->exec_runtime_by_id
, &string_hash_ops
);
4713 r
= exec_runtime_allocate(&rt
);
4717 rt
->id
= strdup(id
);
4722 rt
->tmp_dir
= strdup(tmp_dir
);
4726 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
4727 assert(var_tmp_dir
);
4728 rt
->var_tmp_dir
= strdup(var_tmp_dir
);
4729 if (!rt
->var_tmp_dir
)
4733 if (netns_storage_socket
) {
4734 rt
->netns_storage_socket
[0] = netns_storage_socket
[0];
4735 rt
->netns_storage_socket
[1] = netns_storage_socket
[1];
4738 r
= hashmap_put(m
->exec_runtime_by_id
, rt
->id
, rt
);
4747 /* do not remove created ExecRuntime object when the operation succeeds. */
4752 static int exec_runtime_make(Manager
*m
, const ExecContext
*c
, const char *id
, ExecRuntime
**ret
) {
4753 _cleanup_free_
char *tmp_dir
= NULL
, *var_tmp_dir
= NULL
;
4754 _cleanup_close_pair_
int netns_storage_socket
[2] = {-1, -1};
4761 /* It is not necessary to create ExecRuntime object. */
4762 if (!c
->private_network
&& !c
->private_tmp
)
4765 if (c
->private_tmp
) {
4766 r
= setup_tmp_dirs(id
, &tmp_dir
, &var_tmp_dir
);
4771 if (c
->private_network
) {
4772 if (socketpair(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0, netns_storage_socket
) < 0)
4776 r
= exec_runtime_add(m
, id
, tmp_dir
, var_tmp_dir
, netns_storage_socket
, ret
);
4781 netns_storage_socket
[0] = -1;
4782 netns_storage_socket
[1] = -1;
4786 int exec_runtime_acquire(Manager
*m
, const ExecContext
*c
, const char *id
, bool create
, ExecRuntime
**ret
) {
4794 rt
= hashmap_get(m
->exec_runtime_by_id
, id
);
4796 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
4802 /* If not found, then create a new object. */
4803 r
= exec_runtime_make(m
, c
, id
, &rt
);
4805 /* When r == 0, it is not necessary to create ExecRuntime object. */
4809 /* increment reference counter. */
4815 ExecRuntime
*exec_runtime_unref(ExecRuntime
*rt
, bool destroy
) {
4819 assert(rt
->n_ref
> 0);
4825 return exec_runtime_free(rt
, destroy
);
4828 int exec_runtime_serialize(const Manager
*m
, FILE *f
, FDSet
*fds
) {
4836 HASHMAP_FOREACH(rt
, m
->exec_runtime_by_id
, i
) {
4837 fprintf(f
, "exec-runtime=%s", rt
->id
);
4840 fprintf(f
, " tmp-dir=%s", rt
->tmp_dir
);
4842 if (rt
->var_tmp_dir
)
4843 fprintf(f
, " var-tmp-dir=%s", rt
->var_tmp_dir
);
4845 if (rt
->netns_storage_socket
[0] >= 0) {
4848 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
4852 fprintf(f
, " netns-socket-0=%i", copy
);
4855 if (rt
->netns_storage_socket
[1] >= 0) {
4858 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
4862 fprintf(f
, " netns-socket-1=%i", copy
);
4871 int exec_runtime_deserialize_compat(Unit
*u
, const char *key
, const char *value
, FDSet
*fds
) {
4872 _cleanup_(exec_runtime_freep
) ExecRuntime
*rt_create
= NULL
;
4876 /* This is for the migration from old (v237 or earlier) deserialization text.
4877 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
4878 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
4879 * so or not from the serialized text, then we always creates a new object owned by this. */
4885 /* Manager manages ExecRuntime objects by the unit id.
4886 * So, we omit the serialized text when the unit does not have id (yet?)... */
4887 if (isempty(u
->id
)) {
4888 log_unit_debug(u
, "Invocation ID not found. Dropping runtime parameter.");
4892 r
= hashmap_ensure_allocated(&u
->manager
->exec_runtime_by_id
, &string_hash_ops
);
4894 log_unit_debug_errno(u
, r
, "Failed to allocate storage for runtime parameter: %m");
4898 rt
= hashmap_get(u
->manager
->exec_runtime_by_id
, u
->id
);
4900 r
= exec_runtime_allocate(&rt_create
);
4904 rt_create
->id
= strdup(u
->id
);
4911 if (streq(key
, "tmp-dir")) {
4914 copy
= strdup(value
);
4918 free_and_replace(rt
->tmp_dir
, copy
);
4920 } else if (streq(key
, "var-tmp-dir")) {
4923 copy
= strdup(value
);
4927 free_and_replace(rt
->var_tmp_dir
, copy
);
4929 } else if (streq(key
, "netns-socket-0")) {
4932 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
)) {
4933 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
4937 safe_close(rt
->netns_storage_socket
[0]);
4938 rt
->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
4940 } else if (streq(key
, "netns-socket-1")) {
4943 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
)) {
4944 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
4948 safe_close(rt
->netns_storage_socket
[1]);
4949 rt
->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
4953 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
4955 r
= hashmap_put(u
->manager
->exec_runtime_by_id
, rt_create
->id
, rt_create
);
4957 log_unit_debug_errno(u
, r
, "Failed to put runtime parameter to manager's storage: %m");
4961 rt_create
->manager
= u
->manager
;
4970 void exec_runtime_deserialize_one(Manager
*m
, const char *value
, FDSet
*fds
) {
4971 char *id
= NULL
, *tmp_dir
= NULL
, *var_tmp_dir
= NULL
;
4972 int r
, fd0
= -1, fd1
= -1;
4973 const char *p
, *v
= value
;
4980 n
= strcspn(v
, " ");
4981 id
= strndupa(v
, n
);
4986 v
= startswith(p
, "tmp-dir=");
4988 n
= strcspn(v
, " ");
4989 tmp_dir
= strndupa(v
, n
);
4995 v
= startswith(p
, "var-tmp-dir=");
4997 n
= strcspn(v
, " ");
4998 var_tmp_dir
= strndupa(v
, n
);
5004 v
= startswith(p
, "netns-socket-0=");
5008 n
= strcspn(v
, " ");
5009 buf
= strndupa(v
, n
);
5010 if (safe_atoi(buf
, &fd0
) < 0 || !fdset_contains(fds
, fd0
)) {
5011 log_debug("Unable to process exec-runtime netns fd specification.");
5014 fd0
= fdset_remove(fds
, fd0
);
5020 v
= startswith(p
, "netns-socket-1=");
5024 n
= strcspn(v
, " ");
5025 buf
= strndupa(v
, n
);
5026 if (safe_atoi(buf
, &fd1
) < 0 || !fdset_contains(fds
, fd1
)) {
5027 log_debug("Unable to process exec-runtime netns fd specification.");
5030 fd1
= fdset_remove(fds
, fd1
);
5035 r
= exec_runtime_add(m
, id
, tmp_dir
, var_tmp_dir
, (int[]) { fd0
, fd1
}, NULL
);
5037 log_debug_errno(r
, "Failed to add exec-runtime: %m");
5042 void exec_runtime_vacuum(Manager
*m
) {
5048 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5050 HASHMAP_FOREACH(rt
, m
->exec_runtime_by_id
, i
) {
5054 (void) exec_runtime_free(rt
, false);
5058 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
5059 [EXEC_INPUT_NULL
] = "null",
5060 [EXEC_INPUT_TTY
] = "tty",
5061 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
5062 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
5063 [EXEC_INPUT_SOCKET
] = "socket",
5064 [EXEC_INPUT_NAMED_FD
] = "fd",
5065 [EXEC_INPUT_DATA
] = "data",
5066 [EXEC_INPUT_FILE
] = "file",
5069 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
5071 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
5072 [EXEC_OUTPUT_INHERIT
] = "inherit",
5073 [EXEC_OUTPUT_NULL
] = "null",
5074 [EXEC_OUTPUT_TTY
] = "tty",
5075 [EXEC_OUTPUT_SYSLOG
] = "syslog",
5076 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
5077 [EXEC_OUTPUT_KMSG
] = "kmsg",
5078 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
5079 [EXEC_OUTPUT_JOURNAL
] = "journal",
5080 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
5081 [EXEC_OUTPUT_SOCKET
] = "socket",
5082 [EXEC_OUTPUT_NAMED_FD
] = "fd",
5083 [EXEC_OUTPUT_FILE
] = "file",
5084 [EXEC_OUTPUT_FILE_APPEND
] = "append",
5087 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);
5089 static const char* const exec_utmp_mode_table
[_EXEC_UTMP_MODE_MAX
] = {
5090 [EXEC_UTMP_INIT
] = "init",
5091 [EXEC_UTMP_LOGIN
] = "login",
5092 [EXEC_UTMP_USER
] = "user",
5095 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode
, ExecUtmpMode
);
5097 static const char* const exec_preserve_mode_table
[_EXEC_PRESERVE_MODE_MAX
] = {
5098 [EXEC_PRESERVE_NO
] = "no",
5099 [EXEC_PRESERVE_YES
] = "yes",
5100 [EXEC_PRESERVE_RESTART
] = "restart",
5103 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode
, ExecPreserveMode
, EXEC_PRESERVE_YES
);
5105 static const char* const exec_directory_type_table
[_EXEC_DIRECTORY_TYPE_MAX
] = {
5106 [EXEC_DIRECTORY_RUNTIME
] = "RuntimeDirectory",
5107 [EXEC_DIRECTORY_STATE
] = "StateDirectory",
5108 [EXEC_DIRECTORY_CACHE
] = "CacheDirectory",
5109 [EXEC_DIRECTORY_LOGS
] = "LogsDirectory",
5110 [EXEC_DIRECTORY_CONFIGURATION
] = "ConfigurationDirectory",
5113 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type
, ExecDirectoryType
);
5115 static const char* const exec_keyring_mode_table
[_EXEC_KEYRING_MODE_MAX
] = {
5116 [EXEC_KEYRING_INHERIT
] = "inherit",
5117 [EXEC_KEYRING_PRIVATE
] = "private",
5118 [EXEC_KEYRING_SHARED
] = "shared",
5121 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode
, ExecKeyringMode
);