2 This file is part of systemd.
4 Copyright 2010 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/capability.h>
28 #include <sys/eventfd.h>
30 #include <sys/personality.h>
31 #include <sys/prctl.h>
33 #include <sys/socket.h>
35 #include <sys/types.h>
41 #include <security/pam_appl.h>
45 #include <selinux/selinux.h>
53 #include <sys/apparmor.h>
56 #include "sd-messages.h"
59 #include "alloc-util.h"
61 #include "apparmor-util.h"
66 #include "capability-util.h"
67 #include "chown-recursive.h"
70 #include "errno-list.h"
72 #include "exit-status.h"
75 #include "format-util.h"
77 #include "glob-util.h"
85 #include "namespace.h"
86 #include "parse-util.h"
87 #include "path-util.h"
88 #include "process-util.h"
89 #include "rlimit-util.h"
92 #include "seccomp-util.h"
94 #include "securebits.h"
95 #include "securebits-util.h"
96 #include "selinux-util.h"
97 #include "signal-util.h"
98 #include "smack-util.h"
100 #include "string-table.h"
101 #include "string-util.h"
103 #include "syslog-util.h"
104 #include "terminal-util.h"
106 #include "user-util.h"
108 #include "utmp-wtmp.h"
110 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
111 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
113 /* This assumes there is a 'tty' group */
114 #define TTY_MODE 0620
116 #define SNDBUF_SIZE (8*1024*1024)
118 static int shift_fds(int fds
[], unsigned n_fds
) {
119 int start
, restart_from
;
124 /* Modifies the fds array! (sorts it) */
134 for (i
= start
; i
< (int) n_fds
; i
++) {
137 /* Already at right index? */
141 nfd
= fcntl(fds
[i
], F_DUPFD
, i
+ 3);
148 /* Hmm, the fd we wanted isn't free? Then
149 * let's remember that and try again from here */
150 if (nfd
!= i
+3 && restart_from
< 0)
154 if (restart_from
< 0)
157 start
= restart_from
;
163 static int flags_fds(const int fds
[], unsigned n_storage_fds
, unsigned n_socket_fds
, bool nonblock
) {
167 n_fds
= n_storage_fds
+ n_socket_fds
;
173 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
174 * O_NONBLOCK only applies to socket activation though. */
176 for (i
= 0; i
< n_fds
; i
++) {
178 if (i
< n_socket_fds
) {
179 r
= fd_nonblock(fds
[i
], nonblock
);
184 /* We unconditionally drop FD_CLOEXEC from the fds,
185 * since after all we want to pass these fds to our
188 r
= fd_cloexec(fds
[i
], false);
196 static const char *exec_context_tty_path(const ExecContext
*context
) {
199 if (context
->stdio_as_fds
)
202 if (context
->tty_path
)
203 return context
->tty_path
;
205 return "/dev/console";
208 static void exec_context_tty_reset(const ExecContext
*context
, const ExecParameters
*p
) {
213 path
= exec_context_tty_path(context
);
215 if (context
->tty_vhangup
) {
216 if (p
&& p
->stdin_fd
>= 0)
217 (void) terminal_vhangup_fd(p
->stdin_fd
);
219 (void) terminal_vhangup(path
);
222 if (context
->tty_reset
) {
223 if (p
&& p
->stdin_fd
>= 0)
224 (void) reset_terminal_fd(p
->stdin_fd
, true);
226 (void) reset_terminal(path
);
229 if (context
->tty_vt_disallocate
&& path
)
230 (void) vt_disallocate(path
);
233 static bool is_terminal_input(ExecInput i
) {
236 EXEC_INPUT_TTY_FORCE
,
237 EXEC_INPUT_TTY_FAIL
);
240 static bool is_terminal_output(ExecOutput o
) {
243 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
244 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
245 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
);
248 static bool is_syslog_output(ExecOutput o
) {
251 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
);
254 static bool is_kmsg_output(ExecOutput o
) {
257 EXEC_OUTPUT_KMSG_AND_CONSOLE
);
260 static bool exec_context_needs_term(const ExecContext
*c
) {
263 /* Return true if the execution context suggests we should set $TERM to something useful. */
265 if (is_terminal_input(c
->std_input
))
268 if (is_terminal_output(c
->std_output
))
271 if (is_terminal_output(c
->std_error
))
274 return !!c
->tty_path
;
277 static int open_null_as(int flags
, int nfd
) {
282 fd
= open("/dev/null", flags
|O_NOCTTY
);
287 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
295 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
296 static const union sockaddr_union sa
= {
297 .un
.sun_family
= AF_UNIX
,
298 .un
.sun_path
= "/run/systemd/journal/stdout",
300 uid_t olduid
= UID_INVALID
;
301 gid_t oldgid
= GID_INVALID
;
304 if (gid_is_valid(gid
)) {
307 if (setegid(gid
) < 0)
311 if (uid_is_valid(uid
)) {
314 if (seteuid(uid
) < 0) {
320 r
= connect(fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
)) < 0 ? -errno
: 0;
322 /* If we fail to restore the uid or gid, things will likely
323 fail later on. This should only happen if an LSM interferes. */
325 if (uid_is_valid(uid
))
326 (void) seteuid(olduid
);
329 if (gid_is_valid(gid
))
330 (void) setegid(oldgid
);
335 static int connect_logger_as(
337 const ExecContext
*context
,
338 const ExecParameters
*params
,
349 assert(output
< _EXEC_OUTPUT_MAX
);
353 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
357 r
= connect_journal_socket(fd
, uid
, gid
);
361 if (shutdown(fd
, SHUT_RD
) < 0) {
366 (void) fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
376 context
->syslog_identifier
?: ident
,
377 params
->flags
& EXEC_PASS_LOG_UNIT
? unit
->id
: "",
378 context
->syslog_priority
,
379 !!context
->syslog_level_prefix
,
380 is_syslog_output(output
),
381 is_kmsg_output(output
),
382 is_terminal_output(output
));
387 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
392 static int open_terminal_as(const char *path
, mode_t mode
, int nfd
) {
398 fd
= open_terminal(path
, mode
| O_NOCTTY
);
403 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
411 static int fixup_input(ExecInput std_input
, int socket_fd
, bool apply_tty_stdin
) {
413 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
414 return EXEC_INPUT_NULL
;
416 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
417 return EXEC_INPUT_NULL
;
422 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
424 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
425 return EXEC_OUTPUT_INHERIT
;
430 static int setup_input(
431 const ExecContext
*context
,
432 const ExecParameters
*params
,
434 int named_iofds
[3]) {
441 if (params
->stdin_fd
>= 0) {
442 if (dup2(params
->stdin_fd
, STDIN_FILENO
) < 0)
445 /* Try to make this the controlling tty, if it is a tty, and reset it */
446 (void) ioctl(STDIN_FILENO
, TIOCSCTTY
, context
->std_input
== EXEC_INPUT_TTY_FORCE
);
447 (void) reset_terminal_fd(STDIN_FILENO
, true);
452 i
= fixup_input(context
->std_input
, socket_fd
, params
->flags
& EXEC_APPLY_TTY_STDIN
);
456 case EXEC_INPUT_NULL
:
457 return open_null_as(O_RDONLY
, STDIN_FILENO
);
460 case EXEC_INPUT_TTY_FORCE
:
461 case EXEC_INPUT_TTY_FAIL
: {
464 fd
= acquire_terminal(exec_context_tty_path(context
),
465 i
== EXEC_INPUT_TTY_FAIL
,
466 i
== EXEC_INPUT_TTY_FORCE
,
472 if (fd
!= STDIN_FILENO
) {
473 r
= dup2(fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
481 case EXEC_INPUT_SOCKET
:
482 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
484 case EXEC_INPUT_NAMED_FD
:
485 (void) fd_nonblock(named_iofds
[STDIN_FILENO
], false);
486 return dup2(named_iofds
[STDIN_FILENO
], STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
489 assert_not_reached("Unknown input type");
493 static int setup_output(
495 const ExecContext
*context
,
496 const ExecParameters
*params
,
503 dev_t
*journal_stream_dev
,
504 ino_t
*journal_stream_ino
) {
514 assert(journal_stream_dev
);
515 assert(journal_stream_ino
);
517 if (fileno
== STDOUT_FILENO
&& params
->stdout_fd
>= 0) {
519 if (dup2(params
->stdout_fd
, STDOUT_FILENO
) < 0)
522 return STDOUT_FILENO
;
525 if (fileno
== STDERR_FILENO
&& params
->stderr_fd
>= 0) {
526 if (dup2(params
->stderr_fd
, STDERR_FILENO
) < 0)
529 return STDERR_FILENO
;
532 i
= fixup_input(context
->std_input
, socket_fd
, params
->flags
& EXEC_APPLY_TTY_STDIN
);
533 o
= fixup_output(context
->std_output
, socket_fd
);
535 if (fileno
== STDERR_FILENO
) {
537 e
= fixup_output(context
->std_error
, socket_fd
);
539 /* This expects the input and output are already set up */
541 /* Don't change the stderr file descriptor if we inherit all
542 * the way and are not on a tty */
543 if (e
== EXEC_OUTPUT_INHERIT
&&
544 o
== EXEC_OUTPUT_INHERIT
&&
545 i
== EXEC_INPUT_NULL
&&
546 !is_terminal_input(context
->std_input
) &&
550 /* Duplicate from stdout if possible */
551 if ((e
== o
&& e
!= EXEC_OUTPUT_NAMED_FD
) || e
== EXEC_OUTPUT_INHERIT
)
552 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
556 } else if (o
== EXEC_OUTPUT_INHERIT
) {
557 /* If input got downgraded, inherit the original value */
558 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
559 return open_terminal_as(exec_context_tty_path(context
), O_WRONLY
, fileno
);
561 /* If the input is connected to anything that's not a /dev/null, inherit that... */
562 if (i
!= EXEC_INPUT_NULL
)
563 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
565 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
569 /* We need to open /dev/null here anew, to get the right access mode. */
570 return open_null_as(O_WRONLY
, fileno
);
575 case EXEC_OUTPUT_NULL
:
576 return open_null_as(O_WRONLY
, fileno
);
578 case EXEC_OUTPUT_TTY
:
579 if (is_terminal_input(i
))
580 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
582 /* We don't reset the terminal if this is just about output */
583 return open_terminal_as(exec_context_tty_path(context
), O_WRONLY
, fileno
);
585 case EXEC_OUTPUT_SYSLOG
:
586 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
587 case EXEC_OUTPUT_KMSG
:
588 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
589 case EXEC_OUTPUT_JOURNAL
:
590 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
591 r
= connect_logger_as(unit
, context
, params
, o
, ident
, fileno
, uid
, gid
);
593 log_unit_warning_errno(unit
, r
, "Failed to connect %s to the journal socket, ignoring: %m", fileno
== STDOUT_FILENO
? "stdout" : "stderr");
594 r
= open_null_as(O_WRONLY
, fileno
);
598 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
599 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
600 * services to detect whether they are connected to the journal or not.
602 * If both stdout and stderr are connected to a stream then let's make sure to store the data
603 * about STDERR as that's usually the best way to do logging. */
605 if (fstat(fileno
, &st
) >= 0 &&
606 (*journal_stream_ino
== 0 || fileno
== STDERR_FILENO
)) {
607 *journal_stream_dev
= st
.st_dev
;
608 *journal_stream_ino
= st
.st_ino
;
613 case EXEC_OUTPUT_SOCKET
:
614 assert(socket_fd
>= 0);
615 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
617 case EXEC_OUTPUT_NAMED_FD
:
618 (void) fd_nonblock(named_iofds
[fileno
], false);
619 return dup2(named_iofds
[fileno
], fileno
) < 0 ? -errno
: fileno
;
622 assert_not_reached("Unknown error type");
626 static int chown_terminal(int fd
, uid_t uid
) {
631 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
635 /* This might fail. What matters are the results. */
636 (void) fchown(fd
, uid
, -1);
637 (void) fchmod(fd
, TTY_MODE
);
639 if (fstat(fd
, &st
) < 0)
642 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
648 static int setup_confirm_stdio(const char *vc
, int *_saved_stdin
, int *_saved_stdout
) {
649 _cleanup_close_
int fd
= -1, saved_stdin
= -1, saved_stdout
= -1;
652 assert(_saved_stdin
);
653 assert(_saved_stdout
);
655 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
659 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
660 if (saved_stdout
< 0)
663 fd
= acquire_terminal(vc
, false, false, false, DEFAULT_CONFIRM_USEC
);
667 r
= chown_terminal(fd
, getuid());
671 r
= reset_terminal_fd(fd
, true);
675 if (dup2(fd
, STDIN_FILENO
) < 0)
678 if (dup2(fd
, STDOUT_FILENO
) < 0)
685 *_saved_stdin
= saved_stdin
;
686 *_saved_stdout
= saved_stdout
;
688 saved_stdin
= saved_stdout
= -1;
693 static void write_confirm_error_fd(int err
, int fd
, const Unit
*u
) {
696 if (err
== -ETIMEDOUT
)
697 dprintf(fd
, "Confirmation question timed out for %s, assuming positive response.\n", u
->id
);
700 dprintf(fd
, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u
->id
);
704 static void write_confirm_error(int err
, const char *vc
, const Unit
*u
) {
705 _cleanup_close_
int fd
= -1;
709 fd
= open_terminal(vc
, O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
713 write_confirm_error_fd(err
, fd
, u
);
716 static int restore_confirm_stdio(int *saved_stdin
, int *saved_stdout
) {
720 assert(saved_stdout
);
724 if (*saved_stdin
>= 0)
725 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
728 if (*saved_stdout
>= 0)
729 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
732 *saved_stdin
= safe_close(*saved_stdin
);
733 *saved_stdout
= safe_close(*saved_stdout
);
739 CONFIRM_PRETEND_FAILURE
= -1,
740 CONFIRM_PRETEND_SUCCESS
= 0,
744 static int ask_for_confirmation(const char *vc
, Unit
*u
, const char *cmdline
) {
745 int saved_stdout
= -1, saved_stdin
= -1, r
;
746 _cleanup_free_
char *e
= NULL
;
749 /* For any internal errors, assume a positive response. */
750 r
= setup_confirm_stdio(vc
, &saved_stdin
, &saved_stdout
);
752 write_confirm_error(r
, vc
, u
);
753 return CONFIRM_EXECUTE
;
756 /* confirm_spawn might have been disabled while we were sleeping. */
757 if (manager_is_confirm_spawn_disabled(u
->manager
)) {
762 e
= ellipsize(cmdline
, 60, 100);
770 r
= ask_char(&c
, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e
);
772 write_confirm_error_fd(r
, STDOUT_FILENO
, u
);
779 printf("Resuming normal execution.\n");
780 manager_disable_confirm_spawn();
784 unit_dump(u
, stdout
, " ");
785 continue; /* ask again */
787 printf("Failing execution.\n");
788 r
= CONFIRM_PRETEND_FAILURE
;
791 printf(" c - continue, proceed without asking anymore\n"
792 " D - dump, show the state of the unit\n"
793 " f - fail, don't execute the command and pretend it failed\n"
795 " i - info, show a short summary of the unit\n"
796 " j - jobs, show jobs that are in progress\n"
797 " s - skip, don't execute the command and pretend it succeeded\n"
798 " y - yes, execute the command\n");
799 continue; /* ask again */
801 printf(" Description: %s\n"
804 u
->id
, u
->description
, cmdline
);
805 continue; /* ask again */
807 manager_dump_jobs(u
->manager
, stdout
, " ");
808 continue; /* ask again */
810 /* 'n' was removed in favor of 'f'. */
811 printf("Didn't understand 'n', did you mean 'f'?\n");
812 continue; /* ask again */
814 printf("Skipping execution.\n");
815 r
= CONFIRM_PRETEND_SUCCESS
;
821 assert_not_reached("Unhandled choice");
827 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
831 static int get_fixed_user(const ExecContext
*c
, const char **user
,
832 uid_t
*uid
, gid_t
*gid
,
833 const char **home
, const char **shell
) {
842 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
843 * (i.e. are "/" or "/bin/nologin"). */
846 r
= get_user_creds_clean(&name
, uid
, gid
, home
, shell
);
854 static int get_fixed_group(const ExecContext
*c
, const char **group
, gid_t
*gid
) {
864 r
= get_group_creds(&name
, gid
);
872 static int get_supplementary_groups(const ExecContext
*c
, const char *user
,
873 const char *group
, gid_t gid
,
874 gid_t
**supplementary_gids
, int *ngids
) {
878 bool keep_groups
= false;
879 gid_t
*groups
= NULL
;
880 _cleanup_free_ gid_t
*l_gids
= NULL
;
885 * If user is given, then lookup GID and supplementary groups list.
886 * We avoid NSS lookups for gid=0. Also we have to initialize groups
887 * here and as early as possible so we keep the list of supplementary
888 * groups of the caller.
890 if (user
&& gid_is_valid(gid
) && gid
!= 0) {
891 /* First step, initialize groups from /etc/groups */
892 if (initgroups(user
, gid
) < 0)
898 if (!c
->supplementary_groups
)
902 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
903 * be positive, otherwise fail.
906 ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
);
907 if (ngroups_max
<= 0) {
911 return -EOPNOTSUPP
; /* For all other values */
914 l_gids
= new(gid_t
, ngroups_max
);
920 * Lookup the list of groups that the user belongs to, we
921 * avoid NSS lookups here too for gid=0.
924 if (getgrouplist(user
, gid
, l_gids
, &k
) < 0)
929 STRV_FOREACH(i
, c
->supplementary_groups
) {
932 if (k
>= ngroups_max
)
936 r
= get_group_creds(&g
, l_gids
+k
);
944 * Sets ngids to zero to drop all supplementary groups, happens
945 * when we are under root and SupplementaryGroups= is empty.
952 /* Otherwise get the final list of supplementary groups */
953 groups
= memdup(l_gids
, sizeof(gid_t
) * k
);
957 *supplementary_gids
= groups
;
965 static int enforce_groups(const ExecContext
*context
, gid_t gid
,
966 gid_t
*supplementary_gids
, int ngids
) {
971 /* Handle SupplementaryGroups= even if it is empty */
972 if (context
->supplementary_groups
) {
973 r
= maybe_setgroups(ngids
, supplementary_gids
);
978 if (gid_is_valid(gid
)) {
979 /* Then set our gids */
980 if (setresgid(gid
, gid
, gid
) < 0)
987 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
990 if (!uid_is_valid(uid
))
993 /* Sets (but doesn't look up) the uid and make sure we keep the
994 * capabilities while doing so. */
996 if (context
->capability_ambient_set
!= 0) {
998 /* First step: If we need to keep capabilities but
999 * drop privileges we need to make sure we keep our
1000 * caps, while we drop privileges. */
1002 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
1004 if (prctl(PR_GET_SECUREBITS
) != sb
)
1005 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
1010 /* Second step: actually set the uids */
1011 if (setresuid(uid
, uid
, uid
) < 0)
1014 /* At this point we should have all necessary capabilities but
1015 are otherwise a normal user. However, the caps might got
1016 corrupted due to the setresuid() so we need clean them up
1017 later. This is done outside of this call. */
1024 static int null_conv(
1026 const struct pam_message
**msg
,
1027 struct pam_response
**resp
,
1028 void *appdata_ptr
) {
1030 /* We don't support conversations */
1032 return PAM_CONV_ERR
;
1037 static int setup_pam(
1044 int fds
[], unsigned n_fds
) {
1048 static const struct pam_conv conv
= {
1053 _cleanup_(barrier_destroy
) Barrier barrier
= BARRIER_NULL
;
1054 pam_handle_t
*handle
= NULL
;
1056 int pam_code
= PAM_SUCCESS
, r
;
1057 char **nv
, **e
= NULL
;
1058 bool close_session
= false;
1059 pid_t pam_pid
= 0, parent_pid
;
1066 /* We set up PAM in the parent process, then fork. The child
1067 * will then stay around until killed via PR_GET_PDEATHSIG or
1068 * systemd via the cgroup logic. It will then remove the PAM
1069 * session again. The parent process will exec() the actual
1070 * daemon. We do things this way to ensure that the main PID
1071 * of the daemon is the one we initially fork()ed. */
1073 r
= barrier_create(&barrier
);
1077 if (log_get_max_level() < LOG_DEBUG
)
1078 flags
|= PAM_SILENT
;
1080 pam_code
= pam_start(name
, user
, &conv
, &handle
);
1081 if (pam_code
!= PAM_SUCCESS
) {
1087 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
1088 if (pam_code
!= PAM_SUCCESS
)
1092 STRV_FOREACH(nv
, *env
) {
1093 pam_code
= pam_putenv(handle
, *nv
);
1094 if (pam_code
!= PAM_SUCCESS
)
1098 pam_code
= pam_acct_mgmt(handle
, flags
);
1099 if (pam_code
!= PAM_SUCCESS
)
1102 pam_code
= pam_open_session(handle
, flags
);
1103 if (pam_code
!= PAM_SUCCESS
)
1106 close_session
= true;
1108 e
= pam_getenvlist(handle
);
1110 pam_code
= PAM_BUF_ERR
;
1114 /* Block SIGTERM, so that we know that it won't get lost in
1117 assert_se(sigprocmask_many(SIG_BLOCK
, &old_ss
, SIGTERM
, -1) >= 0);
1119 parent_pid
= getpid_cached();
1128 int sig
, ret
= EXIT_PAM
;
1130 /* The child's job is to reset the PAM session on
1132 barrier_set_role(&barrier
, BARRIER_CHILD
);
1134 /* This string must fit in 10 chars (i.e. the length
1135 * of "/sbin/init"), to look pretty in /bin/ps */
1136 rename_process("(sd-pam)");
1138 /* Make sure we don't keep open the passed fds in this
1139 child. We assume that otherwise only those fds are
1140 open here that have been opened by PAM. */
1141 close_many(fds
, n_fds
);
1143 /* Drop privileges - we don't need any to pam_close_session
1144 * and this will make PR_SET_PDEATHSIG work in most cases.
1145 * If this fails, ignore the error - but expect sd-pam threads
1146 * to fail to exit normally */
1148 r
= maybe_setgroups(0, NULL
);
1150 log_warning_errno(r
, "Failed to setgroups() in sd-pam: %m");
1151 if (setresgid(gid
, gid
, gid
) < 0)
1152 log_warning_errno(errno
, "Failed to setresgid() in sd-pam: %m");
1153 if (setresuid(uid
, uid
, uid
) < 0)
1154 log_warning_errno(errno
, "Failed to setresuid() in sd-pam: %m");
1156 (void) ignore_signals(SIGPIPE
, -1);
1158 /* Wait until our parent died. This will only work if
1159 * the above setresuid() succeeds, otherwise the kernel
1160 * will not allow unprivileged parents kill their privileged
1161 * children this way. We rely on the control groups kill logic
1162 * to do the rest for us. */
1163 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
1166 /* Tell the parent that our setup is done. This is especially
1167 * important regarding dropping privileges. Otherwise, unit
1168 * setup might race against our setresuid(2) call.
1170 * If the parent aborted, we'll detect this below, hence ignore
1171 * return failure here. */
1172 (void) barrier_place(&barrier
);
1174 /* Check if our parent process might already have died? */
1175 if (getppid() == parent_pid
) {
1178 assert_se(sigemptyset(&ss
) >= 0);
1179 assert_se(sigaddset(&ss
, SIGTERM
) >= 0);
1182 if (sigwait(&ss
, &sig
) < 0) {
1189 assert(sig
== SIGTERM
);
1194 /* If our parent died we'll end the session */
1195 if (getppid() != parent_pid
) {
1196 pam_code
= pam_close_session(handle
, flags
);
1197 if (pam_code
!= PAM_SUCCESS
)
1204 pam_end(handle
, pam_code
| flags
);
1208 barrier_set_role(&barrier
, BARRIER_PARENT
);
1210 /* If the child was forked off successfully it will do all the
1211 * cleanups, so forget about the handle here. */
1214 /* Unblock SIGTERM again in the parent */
1215 assert_se(sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) >= 0);
1217 /* We close the log explicitly here, since the PAM modules
1218 * might have opened it, but we don't want this fd around. */
1221 /* Synchronously wait for the child to initialize. We don't care for
1222 * errors as we cannot recover. However, warn loudly if it happens. */
1223 if (!barrier_place_and_sync(&barrier
))
1224 log_error("PAM initialization failed");
1232 if (pam_code
!= PAM_SUCCESS
) {
1233 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
1234 r
= -EPERM
; /* PAM errors do not map to errno */
1236 log_error_errno(r
, "PAM failed: %m");
1240 pam_code
= pam_close_session(handle
, flags
);
1242 pam_end(handle
, pam_code
| flags
);
1254 static void rename_process_from_path(const char *path
) {
1255 char process_name
[11];
1259 /* This resulting string must fit in 10 chars (i.e. the length
1260 * of "/sbin/init") to look pretty in /bin/ps */
1264 rename_process("(...)");
1270 /* The end of the process name is usually more
1271 * interesting, since the first bit might just be
1277 process_name
[0] = '(';
1278 memcpy(process_name
+1, p
, l
);
1279 process_name
[1+l
] = ')';
1280 process_name
[1+l
+1] = 0;
1282 rename_process(process_name
);
1285 static bool context_has_address_families(const ExecContext
*c
) {
1288 return c
->address_families_whitelist
||
1289 !set_isempty(c
->address_families
);
1292 static bool context_has_syscall_filters(const ExecContext
*c
) {
1295 return c
->syscall_whitelist
||
1296 !set_isempty(c
->syscall_filter
);
1299 static bool context_has_no_new_privileges(const ExecContext
*c
) {
1302 if (c
->no_new_privileges
)
1305 if (have_effective_cap(CAP_SYS_ADMIN
)) /* if we are privileged, we don't need NNP */
1308 /* We need NNP if we have any form of seccomp and are unprivileged */
1309 return context_has_address_families(c
) ||
1310 c
->memory_deny_write_execute
||
1311 c
->restrict_realtime
||
1312 exec_context_restrict_namespaces_set(c
) ||
1313 c
->protect_kernel_tunables
||
1314 c
->protect_kernel_modules
||
1315 c
->private_devices
||
1316 context_has_syscall_filters(c
) ||
1317 !set_isempty(c
->syscall_archs
) ||
1318 c
->lock_personality
;
1323 static bool skip_seccomp_unavailable(const Unit
* u
, const char* msg
) {
1325 if (is_seccomp_available())
1328 log_unit_debug(u
, "SECCOMP features not detected in the kernel, skipping %s", msg
);
1332 static int apply_syscall_filter(const Unit
* u
, const ExecContext
*c
, bool needs_ambient_hack
) {
1333 uint32_t negative_action
, default_action
, action
;
1339 if (!context_has_syscall_filters(c
))
1342 if (skip_seccomp_unavailable(u
, "SystemCallFilter="))
1345 negative_action
= c
->syscall_errno
== 0 ? SCMP_ACT_KILL
: SCMP_ACT_ERRNO(c
->syscall_errno
);
1347 if (c
->syscall_whitelist
) {
1348 default_action
= negative_action
;
1349 action
= SCMP_ACT_ALLOW
;
1351 default_action
= SCMP_ACT_ALLOW
;
1352 action
= negative_action
;
1355 if (needs_ambient_hack
) {
1356 r
= seccomp_filter_set_add(c
->syscall_filter
, c
->syscall_whitelist
, syscall_filter_sets
+ SYSCALL_FILTER_SET_SETUID
);
1361 return seccomp_load_syscall_filter_set_raw(default_action
, c
->syscall_filter
, action
);
1364 static int apply_syscall_archs(const Unit
*u
, const ExecContext
*c
) {
1368 if (set_isempty(c
->syscall_archs
))
1371 if (skip_seccomp_unavailable(u
, "SystemCallArchitectures="))
1374 return seccomp_restrict_archs(c
->syscall_archs
);
1377 static int apply_address_families(const Unit
* u
, const ExecContext
*c
) {
1381 if (!context_has_address_families(c
))
1384 if (skip_seccomp_unavailable(u
, "RestrictAddressFamilies="))
1387 return seccomp_restrict_address_families(c
->address_families
, c
->address_families_whitelist
);
1390 static int apply_memory_deny_write_execute(const Unit
* u
, const ExecContext
*c
) {
1394 if (!c
->memory_deny_write_execute
)
1397 if (skip_seccomp_unavailable(u
, "MemoryDenyWriteExecute="))
1400 return seccomp_memory_deny_write_execute();
1403 static int apply_restrict_realtime(const Unit
* u
, const ExecContext
*c
) {
1407 if (!c
->restrict_realtime
)
1410 if (skip_seccomp_unavailable(u
, "RestrictRealtime="))
1413 return seccomp_restrict_realtime();
1416 static int apply_protect_sysctl(const Unit
*u
, const ExecContext
*c
) {
1420 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1421 * let's protect even those systems where this is left on in the kernel. */
1423 if (!c
->protect_kernel_tunables
)
1426 if (skip_seccomp_unavailable(u
, "ProtectKernelTunables="))
1429 return seccomp_protect_sysctl();
1432 static int apply_protect_kernel_modules(const Unit
*u
, const ExecContext
*c
) {
1436 /* Turn off module syscalls on ProtectKernelModules=yes */
1438 if (!c
->protect_kernel_modules
)
1441 if (skip_seccomp_unavailable(u
, "ProtectKernelModules="))
1444 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW
, syscall_filter_sets
+ SYSCALL_FILTER_SET_MODULE
, SCMP_ACT_ERRNO(EPERM
));
1447 static int apply_private_devices(const Unit
*u
, const ExecContext
*c
) {
1451 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1453 if (!c
->private_devices
)
1456 if (skip_seccomp_unavailable(u
, "PrivateDevices="))
1459 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW
, syscall_filter_sets
+ SYSCALL_FILTER_SET_RAW_IO
, SCMP_ACT_ERRNO(EPERM
));
1462 static int apply_restrict_namespaces(Unit
*u
, const ExecContext
*c
) {
1466 if (!exec_context_restrict_namespaces_set(c
))
1469 if (skip_seccomp_unavailable(u
, "RestrictNamespaces="))
1472 return seccomp_restrict_namespaces(c
->restrict_namespaces
);
1475 static int apply_lock_personality(const Unit
* u
, const ExecContext
*c
) {
1476 unsigned long personality
;
1482 if (!c
->lock_personality
)
1485 if (skip_seccomp_unavailable(u
, "LockPersonality="))
1488 personality
= c
->personality
;
1490 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1491 if (personality
== PERSONALITY_INVALID
) {
1493 r
= opinionated_personality(&personality
);
1498 return seccomp_lock_personality(personality
);
1503 static void do_idle_pipe_dance(int idle_pipe
[4]) {
1506 idle_pipe
[1] = safe_close(idle_pipe
[1]);
1507 idle_pipe
[2] = safe_close(idle_pipe
[2]);
1509 if (idle_pipe
[0] >= 0) {
1512 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1514 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1517 /* Signal systemd that we are bored and want to continue. */
1518 n
= write(idle_pipe
[3], "x", 1);
1520 /* Wait for systemd to react to the signal above. */
1521 fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1524 idle_pipe
[0] = safe_close(idle_pipe
[0]);
1528 idle_pipe
[3] = safe_close(idle_pipe
[3]);
1531 static int build_environment(
1533 const ExecContext
*c
,
1534 const ExecParameters
*p
,
1537 const char *username
,
1539 dev_t journal_stream_dev
,
1540 ino_t journal_stream_ino
,
1543 _cleanup_strv_free_
char **our_env
= NULL
;
1551 our_env
= new0(char*, 14);
1556 _cleanup_free_
char *joined
= NULL
;
1558 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid_cached()) < 0)
1560 our_env
[n_env
++] = x
;
1562 if (asprintf(&x
, "LISTEN_FDS=%u", n_fds
) < 0)
1564 our_env
[n_env
++] = x
;
1566 joined
= strv_join(p
->fd_names
, ":");
1570 x
= strjoin("LISTEN_FDNAMES=", joined
);
1573 our_env
[n_env
++] = x
;
1576 if ((p
->flags
& EXEC_SET_WATCHDOG
) && p
->watchdog_usec
> 0) {
1577 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid_cached()) < 0)
1579 our_env
[n_env
++] = x
;
1581 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, p
->watchdog_usec
) < 0)
1583 our_env
[n_env
++] = x
;
1586 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1587 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1588 * check the database directly. */
1589 if (p
->flags
& EXEC_NSS_BYPASS_BUS
) {
1590 x
= strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1593 our_env
[n_env
++] = x
;
1597 x
= strappend("HOME=", home
);
1600 our_env
[n_env
++] = x
;
1604 x
= strappend("LOGNAME=", username
);
1607 our_env
[n_env
++] = x
;
1609 x
= strappend("USER=", username
);
1612 our_env
[n_env
++] = x
;
1616 x
= strappend("SHELL=", shell
);
1619 our_env
[n_env
++] = x
;
1622 if (!sd_id128_is_null(u
->invocation_id
)) {
1623 if (asprintf(&x
, "INVOCATION_ID=" SD_ID128_FORMAT_STR
, SD_ID128_FORMAT_VAL(u
->invocation_id
)) < 0)
1626 our_env
[n_env
++] = x
;
1629 if (exec_context_needs_term(c
)) {
1630 const char *tty_path
, *term
= NULL
;
1632 tty_path
= exec_context_tty_path(c
);
1634 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1635 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1636 * passes to PID 1 ends up all the way in the console login shown. */
1638 if (path_equal(tty_path
, "/dev/console") && getppid() == 1)
1639 term
= getenv("TERM");
1641 term
= default_term_for_tty(tty_path
);
1643 x
= strappend("TERM=", term
);
1646 our_env
[n_env
++] = x
;
1649 if (journal_stream_dev
!= 0 && journal_stream_ino
!= 0) {
1650 if (asprintf(&x
, "JOURNAL_STREAM=" DEV_FMT
":" INO_FMT
, journal_stream_dev
, journal_stream_ino
) < 0)
1653 our_env
[n_env
++] = x
;
1656 our_env
[n_env
++] = NULL
;
1657 assert(n_env
<= 12);
1665 static int build_pass_environment(const ExecContext
*c
, char ***ret
) {
1666 _cleanup_strv_free_
char **pass_env
= NULL
;
1667 size_t n_env
= 0, n_bufsize
= 0;
1670 STRV_FOREACH(i
, c
->pass_environment
) {
1671 _cleanup_free_
char *x
= NULL
;
1677 x
= strjoin(*i
, "=", v
);
1681 if (!GREEDY_REALLOC(pass_env
, n_bufsize
, n_env
+ 2))
1684 pass_env
[n_env
++] = x
;
1685 pass_env
[n_env
] = NULL
;
1695 static bool exec_needs_mount_namespace(
1696 const ExecContext
*context
,
1697 const ExecParameters
*params
,
1698 ExecRuntime
*runtime
) {
1703 if (context
->root_image
)
1706 if (!strv_isempty(context
->read_write_paths
) ||
1707 !strv_isempty(context
->read_only_paths
) ||
1708 !strv_isempty(context
->inaccessible_paths
))
1711 if (context
->n_bind_mounts
> 0)
1714 if (context
->mount_flags
!= 0)
1717 if (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
))
1720 if (context
->private_devices
||
1721 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1722 context
->protect_home
!= PROTECT_HOME_NO
||
1723 context
->protect_kernel_tunables
||
1724 context
->protect_kernel_modules
||
1725 context
->protect_control_groups
)
1728 if (context
->mount_apivfs
&& (context
->root_image
|| context
->root_directory
))
1734 static int setup_private_users(uid_t uid
, gid_t gid
) {
1735 _cleanup_free_
char *uid_map
= NULL
, *gid_map
= NULL
;
1736 _cleanup_close_pair_
int errno_pipe
[2] = { -1, -1 };
1737 _cleanup_close_
int unshare_ready_fd
= -1;
1738 _cleanup_(sigkill_waitp
) pid_t pid
= 0;
1744 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1745 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1746 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1747 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1748 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1749 * continues execution normally. */
1751 if (uid
!= 0 && uid_is_valid(uid
)) {
1752 r
= asprintf(&uid_map
,
1753 "0 0 1\n" /* Map root → root */
1754 UID_FMT
" " UID_FMT
" 1\n", /* Map $UID → $UID */
1759 uid_map
= strdup("0 0 1\n"); /* The case where the above is the same */
1764 if (gid
!= 0 && gid_is_valid(gid
)) {
1765 r
= asprintf(&gid_map
,
1766 "0 0 1\n" /* Map root → root */
1767 GID_FMT
" " GID_FMT
" 1\n", /* Map $GID → $GID */
1772 gid_map
= strdup("0 0 1\n"); /* The case where the above is the same */
1777 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1779 unshare_ready_fd
= eventfd(0, EFD_CLOEXEC
);
1780 if (unshare_ready_fd
< 0)
1783 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1785 if (pipe2(errno_pipe
, O_CLOEXEC
) < 0)
1793 _cleanup_close_
int fd
= -1;
1797 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1798 * here, after the parent opened its own user namespace. */
1801 errno_pipe
[0] = safe_close(errno_pipe
[0]);
1803 /* Wait until the parent unshared the user namespace */
1804 if (read(unshare_ready_fd
, &c
, sizeof(c
)) < 0) {
1809 /* Disable the setgroups() system call in the child user namespace, for good. */
1810 a
= procfs_file_alloca(ppid
, "setgroups");
1811 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
1813 if (errno
!= ENOENT
) {
1818 /* If the file is missing the kernel is too old, let's continue anyway. */
1820 if (write(fd
, "deny\n", 5) < 0) {
1825 fd
= safe_close(fd
);
1828 /* First write the GID map */
1829 a
= procfs_file_alloca(ppid
, "gid_map");
1830 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
1835 if (write(fd
, gid_map
, strlen(gid_map
)) < 0) {
1839 fd
= safe_close(fd
);
1841 /* The write the UID map */
1842 a
= procfs_file_alloca(ppid
, "uid_map");
1843 fd
= open(a
, O_WRONLY
|O_CLOEXEC
);
1848 if (write(fd
, uid_map
, strlen(uid_map
)) < 0) {
1853 _exit(EXIT_SUCCESS
);
1856 (void) write(errno_pipe
[1], &r
, sizeof(r
));
1857 _exit(EXIT_FAILURE
);
1860 errno_pipe
[1] = safe_close(errno_pipe
[1]);
1862 if (unshare(CLONE_NEWUSER
) < 0)
1865 /* Let the child know that the namespace is ready now */
1866 if (write(unshare_ready_fd
, &c
, sizeof(c
)) < 0)
1869 /* Try to read an error code from the child */
1870 n
= read(errno_pipe
[0], &r
, sizeof(r
));
1873 if (n
== sizeof(r
)) { /* an error code was sent to us */
1878 if (n
!= 0) /* on success we should have read 0 bytes */
1881 r
= wait_for_terminate(pid
, &si
);
1886 /* If something strange happened with the child, let's consider this fatal, too */
1887 if (si
.si_code
!= CLD_EXITED
|| si
.si_status
!= 0)
1893 static int setup_exec_directory(
1894 const ExecContext
*context
,
1895 const ExecParameters
*params
,
1898 ExecDirectoryType type
,
1901 static const int exit_status_table
[_EXEC_DIRECTORY_TYPE_MAX
] = {
1902 [EXEC_DIRECTORY_RUNTIME
] = EXIT_RUNTIME_DIRECTORY
,
1903 [EXEC_DIRECTORY_STATE
] = EXIT_STATE_DIRECTORY
,
1904 [EXEC_DIRECTORY_CACHE
] = EXIT_CACHE_DIRECTORY
,
1905 [EXEC_DIRECTORY_LOGS
] = EXIT_LOGS_DIRECTORY
,
1906 [EXEC_DIRECTORY_CONFIGURATION
] = EXIT_CONFIGURATION_DIRECTORY
,
1913 assert(type
>= 0 && type
< _EXEC_DIRECTORY_TYPE_MAX
);
1914 assert(exit_status
);
1916 if (!params
->prefix
[type
])
1919 if (params
->flags
& EXEC_CHOWN_DIRECTORIES
) {
1920 if (!uid_is_valid(uid
))
1922 if (!gid_is_valid(gid
))
1926 STRV_FOREACH(rt
, context
->directories
[type
].paths
) {
1927 _cleanup_free_
char *p
;
1929 p
= strjoin(params
->prefix
[type
], "/", *rt
);
1935 r
= mkdir_parents_label(p
, 0755);
1939 r
= mkdir_label(p
, context
->directories
[type
].mode
);
1940 if (r
< 0 && r
!= -EEXIST
)
1943 /* First lock down the access mode */
1944 if (chmod(p
, context
->directories
[type
].mode
) < 0) {
1949 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
1950 * a service, and shall not be writable. */
1951 if (type
== EXEC_DIRECTORY_CONFIGURATION
)
1954 /* Then, change the ownership of the whole tree, if necessary */
1955 r
= path_chown_recursive(p
, uid
, gid
);
1963 *exit_status
= exit_status_table
[type
];
1967 static int setup_smack(
1968 const ExecContext
*context
,
1969 const ExecCommand
*command
) {
1976 if (context
->smack_process_label
) {
1977 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
1981 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1983 _cleanup_free_
char *exec_label
= NULL
;
1985 r
= mac_smack_read(command
->path
, SMACK_ATTR_EXEC
, &exec_label
);
1986 if (r
< 0 && r
!= -ENODATA
&& r
!= -EOPNOTSUPP
)
1989 r
= mac_smack_apply_pid(0, exec_label
? : SMACK_DEFAULT_PROCESS_LABEL
);
1998 static int compile_read_write_paths(
1999 const ExecContext
*context
,
2000 const ExecParameters
*params
,
2003 _cleanup_strv_free_
char **l
= NULL
;
2005 ExecDirectoryType i
;
2007 /* Compile the list of writable paths. This is the combination of
2008 * the explicitly configured paths, plus all runtime directories. */
2010 if (strv_isempty(context
->read_write_paths
)) {
2011 for (i
= 0; i
< _EXEC_DIRECTORY_TYPE_MAX
; i
++)
2012 if (!strv_isempty(context
->directories
[i
].paths
))
2015 if (i
== _EXEC_DIRECTORY_TYPE_MAX
) {
2016 *ret
= NULL
; /* NOP if neither is set */
2021 l
= strv_copy(context
->read_write_paths
);
2025 for (i
= 0; i
< _EXEC_DIRECTORY_TYPE_MAX
; i
++) {
2026 if (!params
->prefix
[i
])
2029 STRV_FOREACH(rt
, context
->directories
[i
].paths
) {
2032 s
= strjoin(params
->prefix
[i
], "/", *rt
);
2036 if (strv_consume(&l
, s
) < 0)
2047 static int apply_mount_namespace(
2049 ExecCommand
*command
,
2050 const ExecContext
*context
,
2051 const ExecParameters
*params
,
2052 ExecRuntime
*runtime
) {
2054 _cleanup_strv_free_
char **rw
= NULL
;
2055 char *tmp
= NULL
, *var
= NULL
;
2056 const char *root_dir
= NULL
, *root_image
= NULL
;
2057 NameSpaceInfo ns_info
= {
2058 .ignore_protect_paths
= false,
2059 .private_dev
= context
->private_devices
,
2060 .protect_control_groups
= context
->protect_control_groups
,
2061 .protect_kernel_tunables
= context
->protect_kernel_tunables
,
2062 .protect_kernel_modules
= context
->protect_kernel_modules
,
2063 .mount_apivfs
= context
->mount_apivfs
,
2065 bool needs_sandboxing
;
2070 /* The runtime struct only contains the parent of the private /tmp,
2071 * which is non-accessible to world users. Inside of it there's a /tmp
2072 * that is sticky, and that's the one we want to use here. */
2074 if (context
->private_tmp
&& runtime
) {
2075 if (runtime
->tmp_dir
)
2076 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
2077 if (runtime
->var_tmp_dir
)
2078 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
2081 r
= compile_read_write_paths(context
, params
, &rw
);
2085 if (params
->flags
& EXEC_APPLY_CHROOT
) {
2086 root_image
= context
->root_image
;
2089 root_dir
= context
->root_directory
;
2093 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2094 * sandbox info, otherwise enforce it, don't ignore protected paths and
2095 * fail if we are enable to apply the sandbox inside the mount namespace.
2097 if (!context
->dynamic_user
&& root_dir
)
2098 ns_info
.ignore_protect_paths
= true;
2100 needs_sandboxing
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& EXEC_COMMAND_FULLY_PRIVILEGED
);
2102 r
= setup_namespace(root_dir
, root_image
,
2104 needs_sandboxing
? context
->read_only_paths
: NULL
,
2105 needs_sandboxing
? context
->inaccessible_paths
: NULL
,
2106 context
->bind_mounts
,
2107 context
->n_bind_mounts
,
2110 needs_sandboxing
? context
->protect_home
: PROTECT_HOME_NO
,
2111 needs_sandboxing
? context
->protect_system
: PROTECT_SYSTEM_NO
,
2112 context
->mount_flags
,
2113 DISSECT_IMAGE_DISCARD_ON_LOOP
);
2115 /* If we couldn't set up the namespace this is probably due to a
2116 * missing capability. In this case, silently proceeed. */
2117 if (IN_SET(r
, -EPERM
, -EACCES
)) {
2118 log_unit_debug_errno(u
, r
, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2125 static int apply_working_directory(
2126 const ExecContext
*context
,
2127 const ExecParameters
*params
,
2129 const bool needs_mount_ns
,
2135 assert(exit_status
);
2137 if (context
->working_directory_home
) {
2140 *exit_status
= EXIT_CHDIR
;
2146 } else if (context
->working_directory
)
2147 wd
= context
->working_directory
;
2151 if (params
->flags
& EXEC_APPLY_CHROOT
) {
2152 if (!needs_mount_ns
&& context
->root_directory
)
2153 if (chroot(context
->root_directory
) < 0) {
2154 *exit_status
= EXIT_CHROOT
;
2160 d
= prefix_roota(context
->root_directory
, wd
);
2162 if (chdir(d
) < 0 && !context
->working_directory_missing_ok
) {
2163 *exit_status
= EXIT_CHDIR
;
2170 static int setup_keyring(
2172 const ExecContext
*context
,
2173 const ExecParameters
*p
,
2174 uid_t uid
, gid_t gid
) {
2176 key_serial_t keyring
;
2183 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2184 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2185 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2186 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2187 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2188 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2190 if (!(p
->flags
& EXEC_NEW_KEYRING
))
2193 if (context
->keyring_mode
== EXEC_KEYRING_INHERIT
)
2196 keyring
= keyctl(KEYCTL_JOIN_SESSION_KEYRING
, 0, 0, 0, 0);
2197 if (keyring
== -1) {
2198 if (errno
== ENOSYS
)
2199 log_unit_debug_errno(u
, errno
, "Kernel keyring not supported, ignoring.");
2200 else if (IN_SET(errno
, EACCES
, EPERM
))
2201 log_unit_debug_errno(u
, errno
, "Kernel keyring access prohibited, ignoring.");
2202 else if (errno
== EDQUOT
)
2203 log_unit_debug_errno(u
, errno
, "Out of kernel keyrings to allocate, ignoring.");
2205 return log_unit_error_errno(u
, errno
, "Setting up kernel keyring failed: %m");
2210 /* Populate they keyring with the invocation ID by default. */
2211 if (!sd_id128_is_null(u
->invocation_id
)) {
2214 key
= add_key("user", "invocation_id", &u
->invocation_id
, sizeof(u
->invocation_id
), KEY_SPEC_SESSION_KEYRING
);
2216 log_unit_debug_errno(u
, errno
, "Failed to add invocation ID to keyring, ignoring: %m");
2218 if (keyctl(KEYCTL_SETPERM
, key
,
2219 KEY_POS_VIEW
|KEY_POS_READ
|KEY_POS_SEARCH
|
2220 KEY_USR_VIEW
|KEY_USR_READ
|KEY_USR_SEARCH
, 0, 0) < 0)
2221 return log_unit_error_errno(u
, errno
, "Failed to restrict invocation ID permission: %m");
2225 /* And now, make the keyring owned by the service's user */
2226 if (uid_is_valid(uid
) || gid_is_valid(gid
))
2227 if (keyctl(KEYCTL_CHOWN
, keyring
, uid
, gid
, 0) < 0)
2228 return log_unit_error_errno(u
, errno
, "Failed to change ownership of session keyring: %m");
2230 /* When requested link the user keyring into the session keyring. */
2231 if (context
->keyring_mode
== EXEC_KEYRING_SHARED
) {
2235 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2236 * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2237 * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2239 saved_uid
= getuid();
2240 saved_gid
= getgid();
2242 if (gid_is_valid(gid
) && gid
!= saved_gid
) {
2243 if (setregid(gid
, -1) < 0)
2244 return log_unit_error_errno(u
, errno
, "Failed to change GID for user keyring: %m");
2247 if (uid_is_valid(uid
) && uid
!= saved_uid
) {
2248 if (setreuid(uid
, -1) < 0) {
2249 (void) setregid(saved_gid
, -1);
2250 return log_unit_error_errno(u
, errno
, "Failed to change UID for user keyring: %m");
2254 if (keyctl(KEYCTL_LINK
,
2255 KEY_SPEC_USER_KEYRING
,
2256 KEY_SPEC_SESSION_KEYRING
, 0, 0) < 0) {
2260 (void) setreuid(saved_uid
, -1);
2261 (void) setregid(saved_gid
, -1);
2263 return log_unit_error_errno(u
, r
, "Failed to link user keyring into session keyring: %m");
2266 if (uid_is_valid(uid
) && uid
!= saved_uid
) {
2267 if (setreuid(saved_uid
, -1) < 0) {
2268 (void) setregid(saved_gid
, -1);
2269 return log_unit_error_errno(u
, errno
, "Failed to change UID back for user keyring: %m");
2273 if (gid_is_valid(gid
) && gid
!= saved_gid
) {
2274 if (setregid(saved_gid
, -1) < 0)
2275 return log_unit_error_errno(u
, errno
, "Failed to change GID back for user keyring: %m");
2282 static void append_socket_pair(int *array
, unsigned *n
, int pair
[2]) {
2290 array
[(*n
)++] = pair
[0];
2292 array
[(*n
)++] = pair
[1];
2295 static int close_remaining_fds(
2296 const ExecParameters
*params
,
2297 ExecRuntime
*runtime
,
2298 DynamicCreds
*dcreds
,
2301 int *fds
, unsigned n_fds
) {
2303 unsigned n_dont_close
= 0;
2304 int dont_close
[n_fds
+ 12];
2308 if (params
->stdin_fd
>= 0)
2309 dont_close
[n_dont_close
++] = params
->stdin_fd
;
2310 if (params
->stdout_fd
>= 0)
2311 dont_close
[n_dont_close
++] = params
->stdout_fd
;
2312 if (params
->stderr_fd
>= 0)
2313 dont_close
[n_dont_close
++] = params
->stderr_fd
;
2316 dont_close
[n_dont_close
++] = socket_fd
;
2318 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
2319 n_dont_close
+= n_fds
;
2323 append_socket_pair(dont_close
, &n_dont_close
, runtime
->netns_storage_socket
);
2327 append_socket_pair(dont_close
, &n_dont_close
, dcreds
->user
->storage_socket
);
2329 append_socket_pair(dont_close
, &n_dont_close
, dcreds
->group
->storage_socket
);
2332 if (user_lookup_fd
>= 0)
2333 dont_close
[n_dont_close
++] = user_lookup_fd
;
2335 return close_all_fds(dont_close
, n_dont_close
);
2338 static int send_user_lookup(
2346 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2347 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2350 if (user_lookup_fd
< 0)
2353 if (!uid_is_valid(uid
) && !gid_is_valid(gid
))
2356 if (writev(user_lookup_fd
,
2358 IOVEC_INIT(&uid
, sizeof(uid
)),
2359 IOVEC_INIT(&gid
, sizeof(gid
)),
2360 IOVEC_INIT_STRING(unit
->id
) }, 3) < 0)
2366 static int acquire_home(const ExecContext
*c
, uid_t uid
, const char** home
, char **buf
) {
2373 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2378 if (!c
->working_directory_home
)
2382 /* Hardcode /root as home directory for UID 0 */
2387 r
= get_home_dir(buf
);
2395 static int exec_child(
2397 ExecCommand
*command
,
2398 const ExecContext
*context
,
2399 const ExecParameters
*params
,
2400 ExecRuntime
*runtime
,
2401 DynamicCreds
*dcreds
,
2406 unsigned n_storage_fds
,
2407 unsigned n_socket_fds
,
2412 _cleanup_strv_free_
char **our_env
= NULL
, **pass_env
= NULL
, **accum_env
= NULL
, **final_argv
= NULL
;
2413 _cleanup_free_
char *mac_selinux_context_net
= NULL
, *home_buffer
= NULL
;
2414 _cleanup_free_ gid_t
*supplementary_gids
= NULL
;
2415 const char *username
= NULL
, *groupname
= NULL
;
2416 const char *home
= NULL
, *shell
= NULL
;
2417 dev_t journal_stream_dev
= 0;
2418 ino_t journal_stream_ino
= 0;
2419 bool needs_sandboxing
, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2420 needs_setuid
, /* Do we need to do the actual setresuid()/setresgid() calls? */
2421 needs_mount_namespace
, /* Do we need to set up a mount namespace for this kernel? */
2422 needs_ambient_hack
; /* Do we need to apply the ambient capabilities hack? */
2424 bool use_selinux
= false;
2427 bool use_smack
= false;
2429 #ifdef HAVE_APPARMOR
2430 bool use_apparmor
= false;
2432 uid_t uid
= UID_INVALID
;
2433 gid_t gid
= GID_INVALID
;
2434 int i
, r
, ngids
= 0;
2436 ExecDirectoryType dt
;
2443 assert(exit_status
);
2445 rename_process_from_path(command
->path
);
2447 /* We reset exactly these signals, since they are the
2448 * only ones we set to SIG_IGN in the main daemon. All
2449 * others we leave untouched because we set them to
2450 * SIG_DFL or a valid handler initially, both of which
2451 * will be demoted to SIG_DFL. */
2452 (void) default_signals(SIGNALS_CRASH_HANDLER
,
2453 SIGNALS_IGNORE
, -1);
2455 if (context
->ignore_sigpipe
)
2456 (void) ignore_signals(SIGPIPE
, -1);
2458 r
= reset_signal_mask();
2460 *exit_status
= EXIT_SIGNAL_MASK
;
2461 return log_unit_error_errno(unit
, r
, "Failed to set process signal mask: %m");
2464 if (params
->idle_pipe
)
2465 do_idle_pipe_dance(params
->idle_pipe
);
2467 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2468 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2469 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2470 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
2473 log_set_open_when_needed(true);
2475 /* In case anything used libc syslog(), close this here, too */
2478 n_fds
= n_storage_fds
+ n_socket_fds
;
2479 r
= close_remaining_fds(params
, runtime
, dcreds
, user_lookup_fd
, socket_fd
, fds
, n_fds
);
2481 *exit_status
= EXIT_FDS
;
2482 return log_unit_error_errno(unit
, r
, "Failed to close unwanted file descriptors: %m");
2485 if (!context
->same_pgrp
)
2487 *exit_status
= EXIT_SETSID
;
2488 return log_unit_error_errno(unit
, errno
, "Failed to create new process session: %m");
2491 exec_context_tty_reset(context
, params
);
2493 if (unit_shall_confirm_spawn(unit
)) {
2494 const char *vc
= params
->confirm_spawn
;
2495 _cleanup_free_
char *cmdline
= NULL
;
2497 cmdline
= exec_command_line(argv
);
2499 *exit_status
= EXIT_MEMORY
;
2503 r
= ask_for_confirmation(vc
, unit
, cmdline
);
2504 if (r
!= CONFIRM_EXECUTE
) {
2505 if (r
== CONFIRM_PRETEND_SUCCESS
) {
2506 *exit_status
= EXIT_SUCCESS
;
2509 *exit_status
= EXIT_CONFIRM
;
2510 log_unit_error(unit
, "Execution cancelled by the user");
2515 if (context
->dynamic_user
&& dcreds
) {
2517 /* Make sure we bypass our own NSS module for any NSS checks */
2518 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2519 *exit_status
= EXIT_USER
;
2520 return log_unit_error_errno(unit
, errno
, "Failed to update environment: %m");
2523 r
= dynamic_creds_realize(dcreds
, &uid
, &gid
);
2525 *exit_status
= EXIT_USER
;
2526 return log_unit_error_errno(unit
, r
, "Failed to update dynamic user credentials: %m");
2529 if (!uid_is_valid(uid
)) {
2530 *exit_status
= EXIT_USER
;
2531 log_unit_error(unit
, "UID validation failed for \""UID_FMT
"\"", uid
);
2535 if (!gid_is_valid(gid
)) {
2536 *exit_status
= EXIT_USER
;
2537 log_unit_error(unit
, "GID validation failed for \""GID_FMT
"\"", gid
);
2542 username
= dcreds
->user
->name
;
2545 r
= get_fixed_user(context
, &username
, &uid
, &gid
, &home
, &shell
);
2547 *exit_status
= EXIT_USER
;
2548 return log_unit_error_errno(unit
, r
, "Failed to determine user credentials: %m");
2551 r
= get_fixed_group(context
, &groupname
, &gid
);
2553 *exit_status
= EXIT_GROUP
;
2554 return log_unit_error_errno(unit
, r
, "Failed to determine group credentials: %m");
2558 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2559 r
= get_supplementary_groups(context
, username
, groupname
, gid
,
2560 &supplementary_gids
, &ngids
);
2562 *exit_status
= EXIT_GROUP
;
2563 return log_unit_error_errno(unit
, r
, "Failed to determine supplementary groups: %m");
2566 r
= send_user_lookup(unit
, user_lookup_fd
, uid
, gid
);
2568 *exit_status
= EXIT_USER
;
2569 return log_unit_error_errno(unit
, r
, "Failed to send user credentials to PID1: %m");
2572 user_lookup_fd
= safe_close(user_lookup_fd
);
2574 r
= acquire_home(context
, uid
, &home
, &home_buffer
);
2576 *exit_status
= EXIT_CHDIR
;
2577 return log_unit_error_errno(unit
, r
, "Failed to determine $HOME for user: %m");
2580 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2581 * must sure to drop O_NONBLOCK */
2583 (void) fd_nonblock(socket_fd
, false);
2585 r
= setup_input(context
, params
, socket_fd
, named_iofds
);
2587 *exit_status
= EXIT_STDIN
;
2588 return log_unit_error_errno(unit
, r
, "Failed to set up standard input: %m");
2591 r
= setup_output(unit
, context
, params
, STDOUT_FILENO
, socket_fd
, named_iofds
, basename(command
->path
), uid
, gid
, &journal_stream_dev
, &journal_stream_ino
);
2593 *exit_status
= EXIT_STDOUT
;
2594 return log_unit_error_errno(unit
, r
, "Failed to set up standard output: %m");
2597 r
= setup_output(unit
, context
, params
, STDERR_FILENO
, socket_fd
, named_iofds
, basename(command
->path
), uid
, gid
, &journal_stream_dev
, &journal_stream_ino
);
2599 *exit_status
= EXIT_STDERR
;
2600 return log_unit_error_errno(unit
, r
, "Failed to set up standard error output: %m");
2603 if (params
->cgroup_path
) {
2604 r
= cg_attach_everywhere(params
->cgroup_supported
, params
->cgroup_path
, 0, NULL
, NULL
);
2606 *exit_status
= EXIT_CGROUP
;
2607 return log_unit_error_errno(unit
, r
, "Failed to attach to cgroup %s: %m", params
->cgroup_path
);
2611 if (context
->oom_score_adjust_set
) {
2612 char t
[DECIMAL_STR_MAX(context
->oom_score_adjust
)];
2614 /* When we can't make this change due to EPERM, then
2615 * let's silently skip over it. User namespaces
2616 * prohibit write access to this file, and we
2617 * shouldn't trip up over that. */
2619 sprintf(t
, "%i", context
->oom_score_adjust
);
2620 r
= write_string_file("/proc/self/oom_score_adj", t
, 0);
2621 if (IN_SET(r
, -EPERM
, -EACCES
))
2622 log_unit_debug_errno(unit
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
2624 *exit_status
= EXIT_OOM_ADJUST
;
2625 return log_unit_error_errno(unit
, r
, "Failed to adjust OOM setting: %m");
2629 if (context
->nice_set
)
2630 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
2631 *exit_status
= EXIT_NICE
;
2632 return log_unit_error_errno(unit
, errno
, "Failed to set up process scheduling priority (nice level): %m");
2635 if (context
->cpu_sched_set
) {
2636 struct sched_param param
= {
2637 .sched_priority
= context
->cpu_sched_priority
,
2640 r
= sched_setscheduler(0,
2641 context
->cpu_sched_policy
|
2642 (context
->cpu_sched_reset_on_fork
?
2643 SCHED_RESET_ON_FORK
: 0),
2646 *exit_status
= EXIT_SETSCHEDULER
;
2647 return log_unit_error_errno(unit
, errno
, "Failed to set up CPU scheduling: %m");
2651 if (context
->cpuset
)
2652 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
2653 *exit_status
= EXIT_CPUAFFINITY
;
2654 return log_unit_error_errno(unit
, errno
, "Failed to set up CPU affinity: %m");
2657 if (context
->ioprio_set
)
2658 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
2659 *exit_status
= EXIT_IOPRIO
;
2660 return log_unit_error_errno(unit
, errno
, "Failed to set up IO scheduling priority: %m");
2663 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
2664 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
2665 *exit_status
= EXIT_TIMERSLACK
;
2666 return log_unit_error_errno(unit
, errno
, "Failed to set up timer slack: %m");
2669 if (context
->personality
!= PERSONALITY_INVALID
) {
2670 r
= safe_personality(context
->personality
);
2672 *exit_status
= EXIT_PERSONALITY
;
2673 return log_unit_error_errno(unit
, r
, "Failed to set up execution domain (personality): %m");
2677 if (context
->utmp_id
)
2678 utmp_put_init_process(context
->utmp_id
, getpid_cached(), getsid(0),
2680 context
->utmp_mode
== EXEC_UTMP_INIT
? INIT_PROCESS
:
2681 context
->utmp_mode
== EXEC_UTMP_LOGIN
? LOGIN_PROCESS
:
2685 if (context
->user
) {
2686 r
= chown_terminal(STDIN_FILENO
, uid
);
2688 *exit_status
= EXIT_STDIN
;
2689 return log_unit_error_errno(unit
, r
, "Failed to change ownership of terminal: %m");
2693 /* If delegation is enabled we'll pass ownership of the cgroup
2694 * (but only in systemd's own controller hierarchy!) to the
2695 * user of the new process. */
2696 if (params
->cgroup_path
&& context
->user
&& (params
->flags
& EXEC_CGROUP_DELEGATE
)) {
2697 r
= cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0644, uid
, gid
);
2699 *exit_status
= EXIT_CGROUP
;
2700 return log_unit_error_errno(unit
, r
, "Failed to adjust control group access: %m");
2703 r
= cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0755, uid
, gid
);
2705 *exit_status
= EXIT_CGROUP
;
2706 return log_unit_error_errno(unit
, r
, "Failed to adjust control group access: %m");
2710 for (dt
= 0; dt
< _EXEC_DIRECTORY_TYPE_MAX
; dt
++) {
2711 r
= setup_exec_directory(context
, params
, uid
, gid
, dt
, exit_status
);
2713 return log_unit_error_errno(unit
, r
, "Failed to set up special execution directory in %s: %m", params
->prefix
[dt
]);
2716 r
= build_environment(
2728 *exit_status
= EXIT_MEMORY
;
2732 r
= build_pass_environment(context
, &pass_env
);
2734 *exit_status
= EXIT_MEMORY
;
2738 accum_env
= strv_env_merge(5,
2739 params
->environment
,
2742 context
->environment
,
2746 *exit_status
= EXIT_MEMORY
;
2749 accum_env
= strv_env_clean(accum_env
);
2751 (void) umask(context
->umask
);
2753 r
= setup_keyring(unit
, context
, params
, uid
, gid
);
2755 *exit_status
= EXIT_KEYRING
;
2756 return log_unit_error_errno(unit
, r
, "Failed to set up kernel keyring: %m");
2759 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
2760 needs_sandboxing
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& EXEC_COMMAND_FULLY_PRIVILEGED
);
2762 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2763 needs_ambient_hack
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && (command
->flags
& EXEC_COMMAND_AMBIENT_MAGIC
) && !ambient_capabilities_supported();
2765 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
2766 if (needs_ambient_hack
)
2767 needs_setuid
= false;
2769 needs_setuid
= (params
->flags
& EXEC_APPLY_SANDBOXING
) && !(command
->flags
& (EXEC_COMMAND_FULLY_PRIVILEGED
|EXEC_COMMAND_NO_SETUID
));
2771 if (needs_sandboxing
) {
2772 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
2773 * present. The actual MAC context application will happen later, as late as possible, to avoid
2774 * impacting our own code paths. */
2777 use_selinux
= mac_selinux_use();
2780 use_smack
= mac_smack_use();
2782 #ifdef HAVE_APPARMOR
2783 use_apparmor
= mac_apparmor_use();
2788 if (context
->pam_name
&& username
) {
2789 r
= setup_pam(context
->pam_name
, username
, uid
, gid
, context
->tty_path
, &accum_env
, fds
, n_fds
);
2791 *exit_status
= EXIT_PAM
;
2792 return log_unit_error_errno(unit
, r
, "Failed to set up PAM session: %m");
2797 if (context
->private_network
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
2798 r
= setup_netns(runtime
->netns_storage_socket
);
2800 *exit_status
= EXIT_NETWORK
;
2801 return log_unit_error_errno(unit
, r
, "Failed to set up network namespacing: %m");
2805 needs_mount_namespace
= exec_needs_mount_namespace(context
, params
, runtime
);
2806 if (needs_mount_namespace
) {
2807 r
= apply_mount_namespace(unit
, command
, context
, params
, runtime
);
2809 *exit_status
= EXIT_NAMESPACE
;
2810 return log_unit_error_errno(unit
, r
, "Failed to set up mount namespacing: %m");
2814 /* Apply just after mount namespace setup */
2815 r
= apply_working_directory(context
, params
, home
, needs_mount_namespace
, exit_status
);
2817 return log_unit_error_errno(unit
, r
, "Changing to the requested working directory failed: %m");
2819 /* Drop groups as early as possbile */
2821 r
= enforce_groups(context
, gid
, supplementary_gids
, ngids
);
2823 *exit_status
= EXIT_GROUP
;
2824 return log_unit_error_errno(unit
, r
, "Changing group credentials failed: %m");
2828 if (needs_sandboxing
) {
2830 if (use_selinux
&& params
->selinux_context_net
&& socket_fd
>= 0) {
2831 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
2833 *exit_status
= EXIT_SELINUX_CONTEXT
;
2834 return log_unit_error_errno(unit
, r
, "Failed to determine SELinux context: %m");
2839 if (context
->private_users
) {
2840 r
= setup_private_users(uid
, gid
);
2842 *exit_status
= EXIT_USER
;
2843 return log_unit_error_errno(unit
, r
, "Failed to set up user namespacing: %m");
2848 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
2849 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
2850 * was needed to upload the policy and can now be closed as well. */
2851 r
= close_all_fds(fds
, n_fds
);
2853 r
= shift_fds(fds
, n_fds
);
2855 r
= flags_fds(fds
, n_storage_fds
, n_socket_fds
, context
->non_blocking
);
2857 *exit_status
= EXIT_FDS
;
2858 return log_unit_error_errno(unit
, r
, "Failed to adjust passed file descriptors: %m");
2861 secure_bits
= context
->secure_bits
;
2863 if (needs_sandboxing
) {
2866 for (i
= 0; i
< _RLIMIT_MAX
; i
++) {
2868 if (!context
->rlimit
[i
])
2871 r
= setrlimit_closest(i
, context
->rlimit
[i
]);
2873 *exit_status
= EXIT_LIMITS
;
2874 return log_unit_error_errno(unit
, r
, "Failed to adjust resource limit %s: %m", rlimit_to_string(i
));
2878 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2879 if (context
->restrict_realtime
&& !context
->rlimit
[RLIMIT_RTPRIO
]) {
2880 if (setrlimit(RLIMIT_RTPRIO
, &RLIMIT_MAKE_CONST(0)) < 0) {
2881 *exit_status
= EXIT_LIMITS
;
2882 return log_unit_error_errno(unit
, errno
, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
2886 bset
= context
->capability_bounding_set
;
2887 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
2888 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
2889 * instead of us doing that */
2890 if (needs_ambient_hack
)
2891 bset
|= (UINT64_C(1) << CAP_SETPCAP
) |
2892 (UINT64_C(1) << CAP_SETUID
) |
2893 (UINT64_C(1) << CAP_SETGID
);
2895 if (!cap_test_all(bset
)) {
2896 r
= capability_bounding_set_drop(bset
, false);
2898 *exit_status
= EXIT_CAPABILITIES
;
2899 return log_unit_error_errno(unit
, r
, "Failed to drop capabilities: %m");
2903 /* This is done before enforce_user, but ambient set
2904 * does not survive over setresuid() if keep_caps is not set. */
2905 if (!needs_ambient_hack
&&
2906 context
->capability_ambient_set
!= 0) {
2907 r
= capability_ambient_set_apply(context
->capability_ambient_set
, true);
2909 *exit_status
= EXIT_CAPABILITIES
;
2910 return log_unit_error_errno(unit
, r
, "Failed to apply ambient capabilities (before UID change): %m");
2916 if (context
->user
) {
2917 r
= enforce_user(context
, uid
);
2919 *exit_status
= EXIT_USER
;
2920 return log_unit_error_errno(unit
, r
, "Failed to change UID to " UID_FMT
": %m", uid
);
2923 if (!needs_ambient_hack
&&
2924 context
->capability_ambient_set
!= 0) {
2926 /* Fix the ambient capabilities after user change. */
2927 r
= capability_ambient_set_apply(context
->capability_ambient_set
, false);
2929 *exit_status
= EXIT_CAPABILITIES
;
2930 return log_unit_error_errno(unit
, r
, "Failed to apply ambient capabilities (after UID change): %m");
2933 /* If we were asked to change user and ambient capabilities
2934 * were requested, we had to add keep-caps to the securebits
2935 * so that we would maintain the inherited capability set
2936 * through the setresuid(). Make sure that the bit is added
2937 * also to the context secure_bits so that we don't try to
2938 * drop the bit away next. */
2940 secure_bits
|= 1<<SECURE_KEEP_CAPS
;
2945 if (needs_sandboxing
) {
2946 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
2947 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
2948 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
2949 * are restricted. */
2953 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
2956 r
= setexeccon(exec_context
);
2958 *exit_status
= EXIT_SELINUX_CONTEXT
;
2959 return log_unit_error_errno(unit
, r
, "Failed to change SELinux context to %s: %m", exec_context
);
2967 r
= setup_smack(context
, command
);
2969 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
2970 return log_unit_error_errno(unit
, r
, "Failed to set SMACK process label: %m");
2975 #ifdef HAVE_APPARMOR
2976 if (use_apparmor
&& context
->apparmor_profile
) {
2977 r
= aa_change_onexec(context
->apparmor_profile
);
2978 if (r
< 0 && !context
->apparmor_profile_ignore
) {
2979 *exit_status
= EXIT_APPARMOR_PROFILE
;
2980 return log_unit_error_errno(unit
, errno
, "Failed to prepare AppArmor profile change to %s: %m", context
->apparmor_profile
);
2985 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
2986 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
2987 if (prctl(PR_GET_SECUREBITS
) != secure_bits
)
2988 if (prctl(PR_SET_SECUREBITS
, secure_bits
) < 0) {
2989 *exit_status
= EXIT_SECUREBITS
;
2990 return log_unit_error_errno(unit
, errno
, "Failed to set process secure bits: %m");
2993 if (context_has_no_new_privileges(context
))
2994 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
2995 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
2996 return log_unit_error_errno(unit
, errno
, "Failed to disable new privileges: %m");
3000 r
= apply_address_families(unit
, context
);
3002 *exit_status
= EXIT_ADDRESS_FAMILIES
;
3003 return log_unit_error_errno(unit
, r
, "Failed to restrict address families: %m");
3006 r
= apply_memory_deny_write_execute(unit
, context
);
3008 *exit_status
= EXIT_SECCOMP
;
3009 return log_unit_error_errno(unit
, r
, "Failed to disable writing to executable memory: %m");
3012 r
= apply_restrict_realtime(unit
, context
);
3014 *exit_status
= EXIT_SECCOMP
;
3015 return log_unit_error_errno(unit
, r
, "Failed to apply realtime restrictions: %m");
3018 r
= apply_restrict_namespaces(unit
, context
);
3020 *exit_status
= EXIT_SECCOMP
;
3021 return log_unit_error_errno(unit
, r
, "Failed to apply namespace restrictions: %m");
3024 r
= apply_protect_sysctl(unit
, context
);
3026 *exit_status
= EXIT_SECCOMP
;
3027 return log_unit_error_errno(unit
, r
, "Failed to apply sysctl restrictions: %m");
3030 r
= apply_protect_kernel_modules(unit
, context
);
3032 *exit_status
= EXIT_SECCOMP
;
3033 return log_unit_error_errno(unit
, r
, "Failed to apply module loading restrictions: %m");
3036 r
= apply_private_devices(unit
, context
);
3038 *exit_status
= EXIT_SECCOMP
;
3039 return log_unit_error_errno(unit
, r
, "Failed to set up private devices: %m");
3042 r
= apply_syscall_archs(unit
, context
);
3044 *exit_status
= EXIT_SECCOMP
;
3045 return log_unit_error_errno(unit
, r
, "Failed to apply syscall architecture restrictions: %m");
3048 r
= apply_lock_personality(unit
, context
);
3050 *exit_status
= EXIT_SECCOMP
;
3051 return log_unit_error_errno(unit
, r
, "Failed to lock personalities: %m");
3054 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3055 * by the filter as little as possible. */
3056 r
= apply_syscall_filter(unit
, context
, needs_ambient_hack
);
3058 *exit_status
= EXIT_SECCOMP
;
3059 return log_unit_error_errno(unit
, r
, "Failed to apply system call filters: %m");
3064 if (!strv_isempty(context
->unset_environment
)) {
3067 ee
= strv_env_delete(accum_env
, 1, context
->unset_environment
);
3069 *exit_status
= EXIT_MEMORY
;
3073 strv_free(accum_env
);
3077 final_argv
= replace_env_argv(argv
, accum_env
);
3079 *exit_status
= EXIT_MEMORY
;
3083 if (_unlikely_(log_get_max_level() >= LOG_DEBUG
)) {
3084 _cleanup_free_
char *line
;
3086 line
= exec_command_line(final_argv
);
3088 log_struct(LOG_DEBUG
,
3089 "EXECUTABLE=%s", command
->path
,
3090 LOG_UNIT_MESSAGE(unit
, "Executing: %s", line
),
3092 LOG_UNIT_INVOCATION_ID(unit
),
3097 execve(command
->path
, final_argv
, accum_env
);
3099 if (errno
== ENOENT
&& (command
->flags
& EXEC_COMMAND_IGNORE_FAILURE
)) {
3101 log_struct_errno(LOG_INFO
, errno
,
3102 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR
,
3104 LOG_UNIT_INVOCATION_ID(unit
),
3105 LOG_UNIT_MESSAGE(unit
, "Executable %s missing, skipping: %m",
3107 "EXECUTABLE=%s", command
->path
,
3113 *exit_status
= EXIT_EXEC
;
3114 return log_unit_error_errno(unit
, errno
, "Failed to execute command: %m");
3117 int exec_spawn(Unit
*unit
,
3118 ExecCommand
*command
,
3119 const ExecContext
*context
,
3120 const ExecParameters
*params
,
3121 ExecRuntime
*runtime
,
3122 DynamicCreds
*dcreds
,
3125 _cleanup_strv_free_
char **files_env
= NULL
;
3127 unsigned n_storage_fds
= 0, n_socket_fds
= 0;
3128 _cleanup_free_
char *line
= NULL
;
3130 int named_iofds
[3] = { -1, -1, -1 };
3139 assert(params
->fds
|| (params
->n_storage_fds
+ params
->n_socket_fds
<= 0));
3141 if (context
->std_input
== EXEC_INPUT_SOCKET
||
3142 context
->std_output
== EXEC_OUTPUT_SOCKET
||
3143 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
3145 if (params
->n_socket_fds
> 1) {
3146 log_unit_error(unit
, "Got more than one socket.");
3150 if (params
->n_socket_fds
== 0) {
3151 log_unit_error(unit
, "Got no socket.");
3155 socket_fd
= params
->fds
[0];
3159 n_storage_fds
= params
->n_storage_fds
;
3160 n_socket_fds
= params
->n_socket_fds
;
3163 r
= exec_context_named_iofds(unit
, context
, params
, named_iofds
);
3165 return log_unit_error_errno(unit
, r
, "Failed to load a named file descriptor: %m");
3167 r
= exec_context_load_environment(unit
, context
, &files_env
);
3169 return log_unit_error_errno(unit
, r
, "Failed to load environment files: %m");
3171 argv
= params
->argv
?: command
->argv
;
3172 line
= exec_command_line(argv
);
3176 log_struct(LOG_DEBUG
,
3177 LOG_UNIT_MESSAGE(unit
, "About to execute: %s", line
),
3178 "EXECUTABLE=%s", command
->path
,
3180 LOG_UNIT_INVOCATION_ID(unit
),
3185 return log_unit_error_errno(unit
, errno
, "Failed to fork: %m");
3188 int exit_status
= EXIT_SUCCESS
;
3190 r
= exec_child(unit
,
3203 unit
->manager
->user_lookup_fds
[1],
3207 log_struct_errno(LOG_ERR
, r
,
3208 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR
,
3210 LOG_UNIT_INVOCATION_ID(unit
),
3211 LOG_UNIT_MESSAGE(unit
, "Failed at step %s spawning %s: %m",
3212 exit_status_to_string(exit_status
, EXIT_STATUS_SYSTEMD
),
3214 "EXECUTABLE=%s", command
->path
,
3221 log_unit_debug(unit
, "Forked %s as "PID_FMT
, command
->path
, pid
);
3223 /* We add the new process to the cgroup both in the child (so
3224 * that we can be sure that no user code is ever executed
3225 * outside of the cgroup) and in the parent (so that we can be
3226 * sure that when we kill the cgroup the process will be
3228 if (params
->cgroup_path
)
3229 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, pid
);
3231 exec_status_start(&command
->exec_status
, pid
);
3237 void exec_context_init(ExecContext
*c
) {
3238 ExecDirectoryType i
;
3243 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
3244 c
->cpu_sched_policy
= SCHED_OTHER
;
3245 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
3246 c
->syslog_level_prefix
= true;
3247 c
->ignore_sigpipe
= true;
3248 c
->timer_slack_nsec
= NSEC_INFINITY
;
3249 c
->personality
= PERSONALITY_INVALID
;
3250 for (i
= 0; i
< _EXEC_DIRECTORY_TYPE_MAX
; i
++)
3251 c
->directories
[i
].mode
= 0755;
3252 c
->capability_bounding_set
= CAP_ALL
;
3253 c
->restrict_namespaces
= NAMESPACE_FLAGS_ALL
;
3256 void exec_context_done(ExecContext
*c
) {
3258 ExecDirectoryType i
;
3262 c
->environment
= strv_free(c
->environment
);
3263 c
->environment_files
= strv_free(c
->environment_files
);
3264 c
->pass_environment
= strv_free(c
->pass_environment
);
3265 c
->unset_environment
= strv_free(c
->unset_environment
);
3267 for (l
= 0; l
< ELEMENTSOF(c
->rlimit
); l
++)
3268 c
->rlimit
[l
] = mfree(c
->rlimit
[l
]);
3270 for (l
= 0; l
< 3; l
++)
3271 c
->stdio_fdname
[l
] = mfree(c
->stdio_fdname
[l
]);
3273 c
->working_directory
= mfree(c
->working_directory
);
3274 c
->root_directory
= mfree(c
->root_directory
);
3275 c
->root_image
= mfree(c
->root_image
);
3276 c
->tty_path
= mfree(c
->tty_path
);
3277 c
->syslog_identifier
= mfree(c
->syslog_identifier
);
3278 c
->user
= mfree(c
->user
);
3279 c
->group
= mfree(c
->group
);
3281 c
->supplementary_groups
= strv_free(c
->supplementary_groups
);
3283 c
->pam_name
= mfree(c
->pam_name
);
3285 c
->read_only_paths
= strv_free(c
->read_only_paths
);
3286 c
->read_write_paths
= strv_free(c
->read_write_paths
);
3287 c
->inaccessible_paths
= strv_free(c
->inaccessible_paths
);
3289 bind_mount_free_many(c
->bind_mounts
, c
->n_bind_mounts
);
3292 CPU_FREE(c
->cpuset
);
3294 c
->utmp_id
= mfree(c
->utmp_id
);
3295 c
->selinux_context
= mfree(c
->selinux_context
);
3296 c
->apparmor_profile
= mfree(c
->apparmor_profile
);
3297 c
->smack_process_label
= mfree(c
->smack_process_label
);
3299 c
->syscall_filter
= set_free(c
->syscall_filter
);
3300 c
->syscall_archs
= set_free(c
->syscall_archs
);
3301 c
->address_families
= set_free(c
->address_families
);
3303 for (i
= 0; i
< _EXEC_DIRECTORY_TYPE_MAX
; i
++)
3304 c
->directories
[i
].paths
= strv_free(c
->directories
[i
].paths
);
3307 int exec_context_destroy_runtime_directory(ExecContext
*c
, const char *runtime_prefix
) {
3312 if (!runtime_prefix
)
3315 STRV_FOREACH(i
, c
->directories
[EXEC_DIRECTORY_RUNTIME
].paths
) {
3316 _cleanup_free_
char *p
;
3318 p
= strjoin(runtime_prefix
, "/", *i
);
3322 /* We execute this synchronously, since we need to be
3323 * sure this is gone when we start the service
3325 (void) rm_rf(p
, REMOVE_ROOT
);
3331 void exec_command_done(ExecCommand
*c
) {
3334 c
->path
= mfree(c
->path
);
3336 c
->argv
= strv_free(c
->argv
);
3339 void exec_command_done_array(ExecCommand
*c
, unsigned n
) {
3342 for (i
= 0; i
< n
; i
++)
3343 exec_command_done(c
+i
);
3346 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
3350 LIST_REMOVE(command
, c
, i
);
3351 exec_command_done(i
);
3358 void exec_command_free_array(ExecCommand
**c
, unsigned n
) {
3361 for (i
= 0; i
< n
; i
++)
3362 c
[i
] = exec_command_free_list(c
[i
]);
3365 typedef struct InvalidEnvInfo
{
3370 static void invalid_env(const char *p
, void *userdata
) {
3371 InvalidEnvInfo
*info
= userdata
;
3373 log_unit_error(info
->unit
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
3376 const char* exec_context_fdname(const ExecContext
*c
, int fd_index
) {
3381 if (c
->std_input
!= EXEC_INPUT_NAMED_FD
)
3383 return c
->stdio_fdname
[STDIN_FILENO
] ?: "stdin";
3385 if (c
->std_output
!= EXEC_OUTPUT_NAMED_FD
)
3387 return c
->stdio_fdname
[STDOUT_FILENO
] ?: "stdout";
3389 if (c
->std_error
!= EXEC_OUTPUT_NAMED_FD
)
3391 return c
->stdio_fdname
[STDERR_FILENO
] ?: "stderr";
3397 int exec_context_named_iofds(Unit
*unit
, const ExecContext
*c
, const ExecParameters
*p
, int named_iofds
[3]) {
3398 unsigned i
, targets
;
3399 const char* stdio_fdname
[3];
3405 targets
= (c
->std_input
== EXEC_INPUT_NAMED_FD
) +
3406 (c
->std_output
== EXEC_OUTPUT_NAMED_FD
) +
3407 (c
->std_error
== EXEC_OUTPUT_NAMED_FD
);
3409 for (i
= 0; i
< 3; i
++)
3410 stdio_fdname
[i
] = exec_context_fdname(c
, i
);
3412 n_fds
= p
->n_storage_fds
+ p
->n_socket_fds
;
3414 for (i
= 0; i
< n_fds
&& targets
> 0; i
++)
3415 if (named_iofds
[STDIN_FILENO
] < 0 &&
3416 c
->std_input
== EXEC_INPUT_NAMED_FD
&&
3417 stdio_fdname
[STDIN_FILENO
] &&
3418 streq(p
->fd_names
[i
], stdio_fdname
[STDIN_FILENO
])) {
3420 named_iofds
[STDIN_FILENO
] = p
->fds
[i
];
3423 } else if (named_iofds
[STDOUT_FILENO
] < 0 &&
3424 c
->std_output
== EXEC_OUTPUT_NAMED_FD
&&
3425 stdio_fdname
[STDOUT_FILENO
] &&
3426 streq(p
->fd_names
[i
], stdio_fdname
[STDOUT_FILENO
])) {
3428 named_iofds
[STDOUT_FILENO
] = p
->fds
[i
];
3431 } else if (named_iofds
[STDERR_FILENO
] < 0 &&
3432 c
->std_error
== EXEC_OUTPUT_NAMED_FD
&&
3433 stdio_fdname
[STDERR_FILENO
] &&
3434 streq(p
->fd_names
[i
], stdio_fdname
[STDERR_FILENO
])) {
3436 named_iofds
[STDERR_FILENO
] = p
->fds
[i
];
3440 return targets
== 0 ? 0 : -ENOENT
;
3443 int exec_context_load_environment(Unit
*unit
, const ExecContext
*c
, char ***l
) {
3444 char **i
, **r
= NULL
;
3449 STRV_FOREACH(i
, c
->environment_files
) {
3453 bool ignore
= false;
3455 _cleanup_globfree_ glob_t pglob
= {};
3464 if (!path_is_absolute(fn
)) {
3472 /* Filename supports globbing, take all matching files */
3473 k
= safe_glob(fn
, 0, &pglob
);
3482 /* When we don't match anything, -ENOENT should be returned */
3483 assert(pglob
.gl_pathc
> 0);
3485 for (n
= 0; n
< pglob
.gl_pathc
; n
++) {
3486 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], NULL
, &p
);
3494 /* Log invalid environment variables with filename */
3496 InvalidEnvInfo info
= {
3498 .path
= pglob
.gl_pathv
[n
]
3501 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
3509 m
= strv_env_merge(2, r
, p
);
3525 static bool tty_may_match_dev_console(const char *tty
) {
3526 _cleanup_free_
char *active
= NULL
;
3532 tty
= skip_dev_prefix(tty
);
3534 /* trivial identity? */
3535 if (streq(tty
, "console"))
3538 console
= resolve_dev_console(&active
);
3539 /* if we could not resolve, assume it may */
3543 /* "tty0" means the active VC, so it may be the same sometimes */
3544 return streq(console
, tty
) || (streq(console
, "tty0") && tty_is_vc(tty
));
3547 bool exec_context_may_touch_console(ExecContext
*ec
) {
3549 return (ec
->tty_reset
||
3551 ec
->tty_vt_disallocate
||
3552 is_terminal_input(ec
->std_input
) ||
3553 is_terminal_output(ec
->std_output
) ||
3554 is_terminal_output(ec
->std_error
)) &&
3555 tty_may_match_dev_console(exec_context_tty_path(ec
));
3558 static void strv_fprintf(FILE *f
, char **l
) {
3564 fprintf(f
, " %s", *g
);
3567 void exec_context_dump(ExecContext
*c
, FILE* f
, const char *prefix
) {
3570 ExecDirectoryType dt
;
3576 prefix
= strempty(prefix
);
3580 "%sWorkingDirectory: %s\n"
3581 "%sRootDirectory: %s\n"
3582 "%sNonBlocking: %s\n"
3583 "%sPrivateTmp: %s\n"
3584 "%sPrivateDevices: %s\n"
3585 "%sProtectKernelTunables: %s\n"
3586 "%sProtectKernelModules: %s\n"
3587 "%sProtectControlGroups: %s\n"
3588 "%sPrivateNetwork: %s\n"
3589 "%sPrivateUsers: %s\n"
3590 "%sProtectHome: %s\n"
3591 "%sProtectSystem: %s\n"
3592 "%sMountAPIVFS: %s\n"
3593 "%sIgnoreSIGPIPE: %s\n"
3594 "%sMemoryDenyWriteExecute: %s\n"
3595 "%sRestrictRealtime: %s\n"
3596 "%sKeyringMode: %s\n",
3598 prefix
, c
->working_directory
? c
->working_directory
: "/",
3599 prefix
, c
->root_directory
? c
->root_directory
: "/",
3600 prefix
, yes_no(c
->non_blocking
),
3601 prefix
, yes_no(c
->private_tmp
),
3602 prefix
, yes_no(c
->private_devices
),
3603 prefix
, yes_no(c
->protect_kernel_tunables
),
3604 prefix
, yes_no(c
->protect_kernel_modules
),
3605 prefix
, yes_no(c
->protect_control_groups
),
3606 prefix
, yes_no(c
->private_network
),
3607 prefix
, yes_no(c
->private_users
),
3608 prefix
, protect_home_to_string(c
->protect_home
),
3609 prefix
, protect_system_to_string(c
->protect_system
),
3610 prefix
, yes_no(c
->mount_apivfs
),
3611 prefix
, yes_no(c
->ignore_sigpipe
),
3612 prefix
, yes_no(c
->memory_deny_write_execute
),
3613 prefix
, yes_no(c
->restrict_realtime
),
3614 prefix
, exec_keyring_mode_to_string(c
->keyring_mode
));
3617 fprintf(f
, "%sRootImage: %s\n", prefix
, c
->root_image
);
3619 STRV_FOREACH(e
, c
->environment
)
3620 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
3622 STRV_FOREACH(e
, c
->environment_files
)
3623 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
3625 STRV_FOREACH(e
, c
->pass_environment
)
3626 fprintf(f
, "%sPassEnvironment: %s\n", prefix
, *e
);
3628 STRV_FOREACH(e
, c
->unset_environment
)
3629 fprintf(f
, "%sUnsetEnvironment: %s\n", prefix
, *e
);
3631 fprintf(f
, "%sRuntimeDirectoryPreserve: %s\n", prefix
, exec_preserve_mode_to_string(c
->runtime_directory_preserve_mode
));
3633 for (dt
= 0; dt
< _EXEC_DIRECTORY_TYPE_MAX
; dt
++) {
3634 fprintf(f
, "%s%sMode: %04o\n", prefix
, exec_directory_type_to_string(dt
), c
->directories
[dt
].mode
);
3636 STRV_FOREACH(d
, c
->directories
[dt
].paths
)
3637 fprintf(f
, "%s%s: %s\n", prefix
, exec_directory_type_to_string(dt
), *d
);
3645 if (c
->oom_score_adjust_set
)
3647 "%sOOMScoreAdjust: %i\n",
3648 prefix
, c
->oom_score_adjust
);
3650 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
3652 fprintf(f
, "%s%s: " RLIM_FMT
"\n",
3653 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
3654 fprintf(f
, "%s%sSoft: " RLIM_FMT
"\n",
3655 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_cur
);
3658 if (c
->ioprio_set
) {
3659 _cleanup_free_
char *class_str
= NULL
;
3661 r
= ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
3663 fprintf(f
, "%sIOSchedulingClass: %s\n", prefix
, class_str
);
3665 fprintf(f
, "%sIOPriority: %lu\n", prefix
, IOPRIO_PRIO_DATA(c
->ioprio
));
3668 if (c
->cpu_sched_set
) {
3669 _cleanup_free_
char *policy_str
= NULL
;
3671 r
= sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
3673 fprintf(f
, "%sCPUSchedulingPolicy: %s\n", prefix
, policy_str
);
3676 "%sCPUSchedulingPriority: %i\n"
3677 "%sCPUSchedulingResetOnFork: %s\n",
3678 prefix
, c
->cpu_sched_priority
,
3679 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
3683 fprintf(f
, "%sCPUAffinity:", prefix
);
3684 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
3685 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
3686 fprintf(f
, " %u", i
);
3690 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
3691 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
3694 "%sStandardInput: %s\n"
3695 "%sStandardOutput: %s\n"
3696 "%sStandardError: %s\n",
3697 prefix
, exec_input_to_string(c
->std_input
),
3698 prefix
, exec_output_to_string(c
->std_output
),
3699 prefix
, exec_output_to_string(c
->std_error
));
3705 "%sTTYVHangup: %s\n"
3706 "%sTTYVTDisallocate: %s\n",
3707 prefix
, c
->tty_path
,
3708 prefix
, yes_no(c
->tty_reset
),
3709 prefix
, yes_no(c
->tty_vhangup
),
3710 prefix
, yes_no(c
->tty_vt_disallocate
));
3712 if (IN_SET(c
->std_output
,
3715 EXEC_OUTPUT_JOURNAL
,
3716 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
3717 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
3718 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) ||
3719 IN_SET(c
->std_error
,
3722 EXEC_OUTPUT_JOURNAL
,
3723 EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
3724 EXEC_OUTPUT_KMSG_AND_CONSOLE
,
3725 EXEC_OUTPUT_JOURNAL_AND_CONSOLE
)) {
3727 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
3729 r
= log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
3731 fprintf(f
, "%sSyslogFacility: %s\n", prefix
, fac_str
);
3733 r
= log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
3735 fprintf(f
, "%sSyslogLevel: %s\n", prefix
, lvl_str
);
3738 if (c
->secure_bits
) {
3739 _cleanup_free_
char *str
= NULL
;
3741 r
= secure_bits_to_string_alloc(c
->secure_bits
, &str
);
3743 fprintf(f
, "%sSecure Bits: %s\n", prefix
, str
);
3746 if (c
->capability_bounding_set
!= CAP_ALL
) {
3747 _cleanup_free_
char *str
= NULL
;
3749 r
= capability_set_to_string_alloc(c
->capability_bounding_set
, &str
);
3751 fprintf(f
, "%sCapabilityBoundingSet: %s\n", prefix
, str
);
3754 if (c
->capability_ambient_set
!= 0) {
3755 _cleanup_free_
char *str
= NULL
;
3757 r
= capability_set_to_string_alloc(c
->capability_ambient_set
, &str
);
3759 fprintf(f
, "%sAmbientCapabilities: %s\n", prefix
, str
);
3763 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
3765 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
3767 fprintf(f
, "%sDynamicUser: %s\n", prefix
, yes_no(c
->dynamic_user
));
3769 if (strv_length(c
->supplementary_groups
) > 0) {
3770 fprintf(f
, "%sSupplementaryGroups:", prefix
);
3771 strv_fprintf(f
, c
->supplementary_groups
);
3776 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
3778 if (strv_length(c
->read_write_paths
) > 0) {
3779 fprintf(f
, "%sReadWritePaths:", prefix
);
3780 strv_fprintf(f
, c
->read_write_paths
);
3784 if (strv_length(c
->read_only_paths
) > 0) {
3785 fprintf(f
, "%sReadOnlyPaths:", prefix
);
3786 strv_fprintf(f
, c
->read_only_paths
);
3790 if (strv_length(c
->inaccessible_paths
) > 0) {
3791 fprintf(f
, "%sInaccessiblePaths:", prefix
);
3792 strv_fprintf(f
, c
->inaccessible_paths
);
3796 if (c
->n_bind_mounts
> 0)
3797 for (i
= 0; i
< c
->n_bind_mounts
; i
++) {
3798 fprintf(f
, "%s%s: %s:%s:%s\n", prefix
,
3799 c
->bind_mounts
[i
].read_only
? "BindReadOnlyPaths" : "BindPaths",
3800 c
->bind_mounts
[i
].source
,
3801 c
->bind_mounts
[i
].destination
,
3802 c
->bind_mounts
[i
].recursive
? "rbind" : "norbind");
3807 "%sUtmpIdentifier: %s\n",
3808 prefix
, c
->utmp_id
);
3810 if (c
->selinux_context
)
3812 "%sSELinuxContext: %s%s\n",
3813 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
3815 if (c
->apparmor_profile
)
3817 "%sAppArmorProfile: %s%s\n",
3818 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
3820 if (c
->smack_process_label
)
3822 "%sSmackProcessLabel: %s%s\n",
3823 prefix
, c
->smack_process_label_ignore
? "-" : "", c
->smack_process_label
);
3825 if (c
->personality
!= PERSONALITY_INVALID
)
3827 "%sPersonality: %s\n",
3828 prefix
, strna(personality_to_string(c
->personality
)));
3831 "%sLockPersonality: %s\n",
3832 prefix
, yes_no(c
->lock_personality
));
3834 if (c
->syscall_filter
) {
3842 "%sSystemCallFilter: ",
3845 if (!c
->syscall_whitelist
)
3849 SET_FOREACH(id
, c
->syscall_filter
, j
) {
3850 _cleanup_free_
char *name
= NULL
;
3857 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
3858 fputs(strna(name
), f
);
3865 if (c
->syscall_archs
) {
3872 "%sSystemCallArchitectures:",
3876 SET_FOREACH(id
, c
->syscall_archs
, j
)
3877 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
3882 if (exec_context_restrict_namespaces_set(c
)) {
3883 _cleanup_free_
char *s
= NULL
;
3885 r
= namespace_flag_to_string_many(c
->restrict_namespaces
, &s
);
3887 fprintf(f
, "%sRestrictNamespaces: %s\n",
3891 if (c
->syscall_errno
> 0)
3893 "%sSystemCallErrorNumber: %s\n",
3894 prefix
, strna(errno_to_name(c
->syscall_errno
)));
3896 if (c
->apparmor_profile
)
3898 "%sAppArmorProfile: %s%s\n",
3899 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
3902 bool exec_context_maintains_privileges(ExecContext
*c
) {
3905 /* Returns true if the process forked off would run under
3906 * an unchanged UID or as root. */
3911 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
3917 int exec_context_get_effective_ioprio(ExecContext
*c
) {
3925 p
= ioprio_get(IOPRIO_WHO_PROCESS
, 0);
3927 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 4);
3932 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
3937 dual_timestamp_get(&s
->start_timestamp
);
3940 void exec_status_exit(ExecStatus
*s
, ExecContext
*context
, pid_t pid
, int code
, int status
) {
3943 if (s
->pid
&& s
->pid
!= pid
)
3947 dual_timestamp_get(&s
->exit_timestamp
);
3953 if (context
->utmp_id
)
3954 utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
3956 exec_context_tty_reset(context
, NULL
);
3960 void exec_status_dump(ExecStatus
*s
, FILE *f
, const char *prefix
) {
3961 char buf
[FORMAT_TIMESTAMP_MAX
];
3969 prefix
= strempty(prefix
);
3972 "%sPID: "PID_FMT
"\n",
3975 if (dual_timestamp_is_set(&s
->start_timestamp
))
3977 "%sStart Timestamp: %s\n",
3978 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
3980 if (dual_timestamp_is_set(&s
->exit_timestamp
))
3982 "%sExit Timestamp: %s\n"
3984 "%sExit Status: %i\n",
3985 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
3986 prefix
, sigchld_code_to_string(s
->code
),
3990 char *exec_command_line(char **argv
) {
3998 STRV_FOREACH(a
, argv
)
4006 STRV_FOREACH(a
, argv
) {
4013 if (strpbrk(*a
, WHITESPACE
)) {
4024 /* FIXME: this doesn't really handle arguments that have
4025 * spaces and ticks in them */
4030 void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
4031 _cleanup_free_
char *cmd
= NULL
;
4032 const char *prefix2
;
4037 prefix
= strempty(prefix
);
4038 prefix2
= strjoina(prefix
, "\t");
4040 cmd
= exec_command_line(c
->argv
);
4042 "%sCommand Line: %s\n",
4043 prefix
, cmd
? cmd
: strerror(ENOMEM
));
4045 exec_status_dump(&c
->exec_status
, f
, prefix2
);
4048 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
4051 prefix
= strempty(prefix
);
4053 LIST_FOREACH(command
, c
, c
)
4054 exec_command_dump(c
, f
, prefix
);
4057 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
4064 /* It's kind of important, that we keep the order here */
4065 LIST_FIND_TAIL(command
, *l
, end
);
4066 LIST_INSERT_AFTER(command
, *l
, end
, e
);
4071 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
4079 l
= strv_new_ap(path
, ap
);
4100 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
4101 _cleanup_strv_free_
char **l
= NULL
;
4109 l
= strv_new_ap(path
, ap
);
4115 r
= strv_extend_strv(&c
->argv
, l
, false);
4123 static int exec_runtime_allocate(ExecRuntime
**rt
) {
4128 *rt
= new0(ExecRuntime
, 1);
4133 (*rt
)->netns_storage_socket
[0] = (*rt
)->netns_storage_socket
[1] = -1;
4138 int exec_runtime_make(ExecRuntime
**rt
, ExecContext
*c
, const char *id
) {
4148 if (!c
->private_network
&& !c
->private_tmp
)
4151 r
= exec_runtime_allocate(rt
);
4155 if (c
->private_network
&& (*rt
)->netns_storage_socket
[0] < 0) {
4156 if (socketpair(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0, (*rt
)->netns_storage_socket
) < 0)
4160 if (c
->private_tmp
&& !(*rt
)->tmp_dir
) {
4161 r
= setup_tmp_dirs(id
, &(*rt
)->tmp_dir
, &(*rt
)->var_tmp_dir
);
4169 ExecRuntime
*exec_runtime_ref(ExecRuntime
*r
) {
4171 assert(r
->n_ref
> 0);
4177 ExecRuntime
*exec_runtime_unref(ExecRuntime
*r
) {
4182 assert(r
->n_ref
> 0);
4189 free(r
->var_tmp_dir
);
4190 safe_close_pair(r
->netns_storage_socket
);
4194 int exec_runtime_serialize(Unit
*u
, ExecRuntime
*rt
, FILE *f
, FDSet
*fds
) {
4203 unit_serialize_item(u
, f
, "tmp-dir", rt
->tmp_dir
);
4205 if (rt
->var_tmp_dir
)
4206 unit_serialize_item(u
, f
, "var-tmp-dir", rt
->var_tmp_dir
);
4208 if (rt
->netns_storage_socket
[0] >= 0) {
4211 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
4215 unit_serialize_item_format(u
, f
, "netns-socket-0", "%i", copy
);
4218 if (rt
->netns_storage_socket
[1] >= 0) {
4221 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
4225 unit_serialize_item_format(u
, f
, "netns-socket-1", "%i", copy
);
4231 int exec_runtime_deserialize_item(Unit
*u
, ExecRuntime
**rt
, const char *key
, const char *value
, FDSet
*fds
) {
4238 if (streq(key
, "tmp-dir")) {
4241 r
= exec_runtime_allocate(rt
);
4245 copy
= strdup(value
);
4249 free((*rt
)->tmp_dir
);
4250 (*rt
)->tmp_dir
= copy
;
4252 } else if (streq(key
, "var-tmp-dir")) {
4255 r
= exec_runtime_allocate(rt
);
4259 copy
= strdup(value
);
4263 free((*rt
)->var_tmp_dir
);
4264 (*rt
)->var_tmp_dir
= copy
;
4266 } else if (streq(key
, "netns-socket-0")) {
4269 r
= exec_runtime_allocate(rt
);
4273 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
4274 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
4276 safe_close((*rt
)->netns_storage_socket
[0]);
4277 (*rt
)->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
4279 } else if (streq(key
, "netns-socket-1")) {
4282 r
= exec_runtime_allocate(rt
);
4286 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
4287 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
4289 safe_close((*rt
)->netns_storage_socket
[1]);
4290 (*rt
)->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
4298 static void *remove_tmpdir_thread(void *p
) {
4299 _cleanup_free_
char *path
= p
;
4301 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
4305 void exec_runtime_destroy(ExecRuntime
*rt
) {
4311 /* If there are multiple users of this, let's leave the stuff around */
4316 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
4318 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
4320 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
4327 if (rt
->var_tmp_dir
) {
4328 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
4330 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
4332 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
4333 free(rt
->var_tmp_dir
);
4336 rt
->var_tmp_dir
= NULL
;
4339 safe_close_pair(rt
->netns_storage_socket
);
4342 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
4343 [EXEC_INPUT_NULL
] = "null",
4344 [EXEC_INPUT_TTY
] = "tty",
4345 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
4346 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
4347 [EXEC_INPUT_SOCKET
] = "socket",
4348 [EXEC_INPUT_NAMED_FD
] = "fd",
4351 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
4353 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
4354 [EXEC_OUTPUT_INHERIT
] = "inherit",
4355 [EXEC_OUTPUT_NULL
] = "null",
4356 [EXEC_OUTPUT_TTY
] = "tty",
4357 [EXEC_OUTPUT_SYSLOG
] = "syslog",
4358 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
4359 [EXEC_OUTPUT_KMSG
] = "kmsg",
4360 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
4361 [EXEC_OUTPUT_JOURNAL
] = "journal",
4362 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
4363 [EXEC_OUTPUT_SOCKET
] = "socket",
4364 [EXEC_OUTPUT_NAMED_FD
] = "fd",
4367 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);
4369 static const char* const exec_utmp_mode_table
[_EXEC_UTMP_MODE_MAX
] = {
4370 [EXEC_UTMP_INIT
] = "init",
4371 [EXEC_UTMP_LOGIN
] = "login",
4372 [EXEC_UTMP_USER
] = "user",
4375 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode
, ExecUtmpMode
);
4377 static const char* const exec_preserve_mode_table
[_EXEC_PRESERVE_MODE_MAX
] = {
4378 [EXEC_PRESERVE_NO
] = "no",
4379 [EXEC_PRESERVE_YES
] = "yes",
4380 [EXEC_PRESERVE_RESTART
] = "restart",
4383 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode
, ExecPreserveMode
, EXEC_PRESERVE_YES
);
4385 static const char* const exec_directory_type_table
[_EXEC_DIRECTORY_TYPE_MAX
] = {
4386 [EXEC_DIRECTORY_RUNTIME
] = "RuntimeDirectory",
4387 [EXEC_DIRECTORY_STATE
] = "StateDirectory",
4388 [EXEC_DIRECTORY_CACHE
] = "CacheDirectory",
4389 [EXEC_DIRECTORY_LOGS
] = "LogsDirectory",
4390 [EXEC_DIRECTORY_CONFIGURATION
] = "ConfigurationDirectory",
4393 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type
, ExecDirectoryType
);
4395 static const char* const exec_keyring_mode_table
[_EXEC_KEYRING_MODE_MAX
] = {
4396 [EXEC_KEYRING_INHERIT
] = "inherit",
4397 [EXEC_KEYRING_PRIVATE
] = "private",
4398 [EXEC_KEYRING_SHARED
] = "shared",
4401 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode
, ExecKeyringMode
);