1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/socket.h>
29 #include <sys/prctl.h>
35 #include <sys/personality.h>
38 #include <security/pam_appl.h>
42 #include <selinux/selinux.h>
50 #include <sys/apparmor.h>
53 #include "sd-messages.h"
57 #include "capability.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
65 #include "utmp-wtmp.h"
67 #include "path-util.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
82 #include "signal-util.h"
85 #include "apparmor-util.h"
89 #include "seccomp-util.h"
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97 /* This assumes there is a 'tty' group */
100 #define SNDBUF_SIZE (8*1024*1024)
102 static int shift_fds(int fds
[], unsigned n_fds
) {
103 int start
, restart_from
;
108 /* Modifies the fds array! (sorts it) */
118 for (i
= start
; i
< (int) n_fds
; i
++) {
121 /* Already at right index? */
125 nfd
= fcntl(fds
[i
], F_DUPFD
, i
+ 3);
132 /* Hmm, the fd we wanted isn't free? Then
133 * let's remember that and try again from here */
134 if (nfd
!= i
+3 && restart_from
< 0)
138 if (restart_from
< 0)
141 start
= restart_from
;
147 static int flags_fds(const int fds
[], unsigned n_fds
, bool nonblock
) {
156 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
158 for (i
= 0; i
< n_fds
; i
++) {
160 r
= fd_nonblock(fds
[i
], nonblock
);
164 /* We unconditionally drop FD_CLOEXEC from the fds,
165 * since after all we want to pass these fds to our
168 r
= fd_cloexec(fds
[i
], false);
176 _pure_
static const char *tty_path(const ExecContext
*context
) {
179 if (context
->tty_path
)
180 return context
->tty_path
;
182 return "/dev/console";
185 static void exec_context_tty_reset(const ExecContext
*context
) {
188 if (context
->tty_vhangup
)
189 terminal_vhangup(tty_path(context
));
191 if (context
->tty_reset
)
192 reset_terminal(tty_path(context
));
194 if (context
->tty_vt_disallocate
&& context
->tty_path
)
195 vt_disallocate(context
->tty_path
);
198 static bool is_terminal_output(ExecOutput o
) {
200 o
== EXEC_OUTPUT_TTY
||
201 o
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
202 o
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
203 o
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
;
206 static int open_null_as(int flags
, int nfd
) {
211 fd
= open("/dev/null", flags
|O_NOCTTY
);
216 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
224 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
225 union sockaddr_union sa
= {
226 .un
.sun_family
= AF_UNIX
,
227 .un
.sun_path
= "/run/systemd/journal/stdout",
229 uid_t olduid
= UID_INVALID
;
230 gid_t oldgid
= GID_INVALID
;
233 if (gid
!= GID_INVALID
) {
241 if (uid
!= UID_INVALID
) {
251 r
= connect(fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(sa
.un
.sun_path
));
255 /* If we fail to restore the uid or gid, things will likely
256 fail later on. This should only happen if an LSM interferes. */
258 if (uid
!= UID_INVALID
)
259 (void) seteuid(olduid
);
262 if (gid
!= GID_INVALID
)
263 (void) setegid(oldgid
);
268 static int connect_logger_as(const ExecContext
*context
, ExecOutput output
, const char *ident
, const char *unit_id
, int nfd
, uid_t uid
, gid_t gid
) {
272 assert(output
< _EXEC_OUTPUT_MAX
);
276 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
280 r
= connect_journal_socket(fd
, uid
, gid
);
284 if (shutdown(fd
, SHUT_RD
) < 0) {
289 fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
299 context
->syslog_identifier
? context
->syslog_identifier
: ident
,
301 context
->syslog_priority
,
302 !!context
->syslog_level_prefix
,
303 output
== EXEC_OUTPUT_SYSLOG
|| output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
304 output
== EXEC_OUTPUT_KMSG
|| output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
,
305 is_terminal_output(output
));
308 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
315 static int open_terminal_as(const char *path
, mode_t mode
, int nfd
) {
321 fd
= open_terminal(path
, mode
| O_NOCTTY
);
326 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
334 static bool is_terminal_input(ExecInput i
) {
336 i
== EXEC_INPUT_TTY
||
337 i
== EXEC_INPUT_TTY_FORCE
||
338 i
== EXEC_INPUT_TTY_FAIL
;
341 static int fixup_input(ExecInput std_input
, int socket_fd
, bool apply_tty_stdin
) {
343 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
344 return EXEC_INPUT_NULL
;
346 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
347 return EXEC_INPUT_NULL
;
352 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
354 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
355 return EXEC_OUTPUT_INHERIT
;
360 static int setup_input(const ExecContext
*context
, int socket_fd
, bool apply_tty_stdin
) {
365 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
369 case EXEC_INPUT_NULL
:
370 return open_null_as(O_RDONLY
, STDIN_FILENO
);
373 case EXEC_INPUT_TTY_FORCE
:
374 case EXEC_INPUT_TTY_FAIL
: {
377 fd
= acquire_terminal(tty_path(context
),
378 i
== EXEC_INPUT_TTY_FAIL
,
379 i
== EXEC_INPUT_TTY_FORCE
,
385 if (fd
!= STDIN_FILENO
) {
386 r
= dup2(fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
394 case EXEC_INPUT_SOCKET
:
395 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
398 assert_not_reached("Unknown input type");
402 static int setup_output(Unit
*unit
, const ExecContext
*context
, int fileno
, int socket_fd
, const char *ident
, bool apply_tty_stdin
, uid_t uid
, gid_t gid
) {
411 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
412 o
= fixup_output(context
->std_output
, socket_fd
);
414 if (fileno
== STDERR_FILENO
) {
416 e
= fixup_output(context
->std_error
, socket_fd
);
418 /* This expects the input and output are already set up */
420 /* Don't change the stderr file descriptor if we inherit all
421 * the way and are not on a tty */
422 if (e
== EXEC_OUTPUT_INHERIT
&&
423 o
== EXEC_OUTPUT_INHERIT
&&
424 i
== EXEC_INPUT_NULL
&&
425 !is_terminal_input(context
->std_input
) &&
429 /* Duplicate from stdout if possible */
430 if (e
== o
|| e
== EXEC_OUTPUT_INHERIT
)
431 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
435 } else if (o
== EXEC_OUTPUT_INHERIT
) {
436 /* If input got downgraded, inherit the original value */
437 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
438 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
440 /* If the input is connected to anything that's not a /dev/null, inherit that... */
441 if (i
!= EXEC_INPUT_NULL
)
442 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
444 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
448 /* We need to open /dev/null here anew, to get the right access mode. */
449 return open_null_as(O_WRONLY
, fileno
);
454 case EXEC_OUTPUT_NULL
:
455 return open_null_as(O_WRONLY
, fileno
);
457 case EXEC_OUTPUT_TTY
:
458 if (is_terminal_input(i
))
459 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
461 /* We don't reset the terminal if this is just about output */
462 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
464 case EXEC_OUTPUT_SYSLOG
:
465 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
466 case EXEC_OUTPUT_KMSG
:
467 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
468 case EXEC_OUTPUT_JOURNAL
:
469 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
470 r
= connect_logger_as(context
, o
, ident
, unit
->id
, fileno
, uid
, gid
);
472 log_unit_error_errno(unit
, r
, "Failed to connect %s to the journal socket, ignoring: %m", fileno
== STDOUT_FILENO
? "stdout" : "stderr");
473 r
= open_null_as(O_WRONLY
, fileno
);
477 case EXEC_OUTPUT_SOCKET
:
478 assert(socket_fd
>= 0);
479 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
482 assert_not_reached("Unknown error type");
486 static int chown_terminal(int fd
, uid_t uid
) {
491 /* This might fail. What matters are the results. */
492 (void) fchown(fd
, uid
, -1);
493 (void) fchmod(fd
, TTY_MODE
);
495 if (fstat(fd
, &st
) < 0)
498 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
504 static int setup_confirm_stdio(int *_saved_stdin
,
505 int *_saved_stdout
) {
506 int fd
= -1, saved_stdin
, saved_stdout
= -1, r
;
508 assert(_saved_stdin
);
509 assert(_saved_stdout
);
511 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
515 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
516 if (saved_stdout
< 0) {
521 fd
= acquire_terminal(
526 DEFAULT_CONFIRM_USEC
);
532 r
= chown_terminal(fd
, getuid());
536 if (dup2(fd
, STDIN_FILENO
) < 0) {
541 if (dup2(fd
, STDOUT_FILENO
) < 0) {
549 *_saved_stdin
= saved_stdin
;
550 *_saved_stdout
= saved_stdout
;
555 safe_close(saved_stdout
);
556 safe_close(saved_stdin
);
562 _printf_(1, 2) static int write_confirm_message(const char *format
, ...) {
563 _cleanup_close_
int fd
= -1;
568 fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
572 va_start(ap
, format
);
573 vdprintf(fd
, format
, ap
);
579 static int restore_confirm_stdio(int *saved_stdin
,
585 assert(saved_stdout
);
589 if (*saved_stdin
>= 0)
590 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
593 if (*saved_stdout
>= 0)
594 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
597 safe_close(*saved_stdin
);
598 safe_close(*saved_stdout
);
603 static int ask_for_confirmation(char *response
, char **argv
) {
604 int saved_stdout
= -1, saved_stdin
= -1, r
;
605 _cleanup_free_
char *line
= NULL
;
607 r
= setup_confirm_stdio(&saved_stdin
, &saved_stdout
);
611 line
= exec_command_line(argv
);
615 r
= ask_char(response
, "yns", "Execute %s? [Yes, No, Skip] ", line
);
617 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
622 static int enforce_groups(const ExecContext
*context
, const char *username
, gid_t gid
) {
623 bool keep_groups
= false;
628 /* Lookup and set GID and supplementary group list. Here too
629 * we avoid NSS lookups for gid=0. */
631 if (context
->group
|| username
) {
632 /* First step, initialize groups from /etc/groups */
633 if (username
&& gid
!= 0) {
634 if (initgroups(username
, gid
) < 0)
640 /* Second step, set our gids */
641 if (setresgid(gid
, gid
, gid
) < 0)
645 if (context
->supplementary_groups
) {
650 /* Final step, initialize any manually set supplementary groups */
651 assert_se((ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
)) > 0);
653 if (!(gids
= new(gid_t
, ngroups_max
)))
657 k
= getgroups(ngroups_max
, gids
);
665 STRV_FOREACH(i
, context
->supplementary_groups
) {
668 if (k
>= ngroups_max
) {
674 r
= get_group_creds(&g
, gids
+k
);
683 if (setgroups(k
, gids
) < 0) {
694 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
697 /* Sets (but doesn't lookup) the uid and make sure we keep the
698 * capabilities while doing so. */
700 if (context
->capabilities
) {
701 _cleanup_cap_free_ cap_t d
= NULL
;
702 static const cap_value_t bits
[] = {
703 CAP_SETUID
, /* Necessary so that we can run setresuid() below */
704 CAP_SETPCAP
/* Necessary so that we can set PR_SET_SECUREBITS later on */
707 /* First step: If we need to keep capabilities but
708 * drop privileges we need to make sure we keep our
709 * caps, while we drop privileges. */
711 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
713 if (prctl(PR_GET_SECUREBITS
) != sb
)
714 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
718 /* Second step: set the capabilities. This will reduce
719 * the capabilities to the minimum we need. */
721 d
= cap_dup(context
->capabilities
);
725 if (cap_set_flag(d
, CAP_EFFECTIVE
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0 ||
726 cap_set_flag(d
, CAP_PERMITTED
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0)
729 if (cap_set_proc(d
) < 0)
733 /* Third step: actually set the uids */
734 if (setresuid(uid
, uid
, uid
) < 0)
737 /* At this point we should have all necessary capabilities but
738 are otherwise a normal user. However, the caps might got
739 corrupted due to the setresuid() so we need clean them up
740 later. This is done outside of this call. */
747 static int null_conv(
749 const struct pam_message
**msg
,
750 struct pam_response
**resp
,
753 /* We don't support conversations */
758 static int setup_pam(
764 int fds
[], unsigned n_fds
) {
766 static const struct pam_conv conv
= {
771 pam_handle_t
*handle
= NULL
;
773 int pam_code
= PAM_SUCCESS
;
776 bool close_session
= false;
777 pid_t pam_pid
= 0, parent_pid
;
784 /* We set up PAM in the parent process, then fork. The child
785 * will then stay around until killed via PR_GET_PDEATHSIG or
786 * systemd via the cgroup logic. It will then remove the PAM
787 * session again. The parent process will exec() the actual
788 * daemon. We do things this way to ensure that the main PID
789 * of the daemon is the one we initially fork()ed. */
791 if (log_get_max_level() < LOG_DEBUG
)
794 pam_code
= pam_start(name
, user
, &conv
, &handle
);
795 if (pam_code
!= PAM_SUCCESS
) {
801 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
802 if (pam_code
!= PAM_SUCCESS
)
806 pam_code
= pam_acct_mgmt(handle
, flags
);
807 if (pam_code
!= PAM_SUCCESS
)
810 pam_code
= pam_open_session(handle
, flags
);
811 if (pam_code
!= PAM_SUCCESS
)
814 close_session
= true;
816 e
= pam_getenvlist(handle
);
818 pam_code
= PAM_BUF_ERR
;
822 /* Block SIGTERM, so that we know that it won't get lost in
825 assert_se(sigprocmask_many(SIG_BLOCK
, &old_ss
, SIGTERM
, -1) >= 0);
827 parent_pid
= getpid();
837 /* The child's job is to reset the PAM session on
840 /* This string must fit in 10 chars (i.e. the length
841 * of "/sbin/init"), to look pretty in /bin/ps */
842 rename_process("(sd-pam)");
844 /* Make sure we don't keep open the passed fds in this
845 child. We assume that otherwise only those fds are
846 open here that have been opened by PAM. */
847 close_many(fds
, n_fds
);
849 /* Drop privileges - we don't need any to pam_close_session
850 * and this will make PR_SET_PDEATHSIG work in most cases.
851 * If this fails, ignore the error - but expect sd-pam threads
852 * to fail to exit normally */
853 if (setresuid(uid
, uid
, uid
) < 0)
854 log_error_errno(r
, "Error: Failed to setresuid() in sd-pam: %m");
856 (void) ignore_signals(SIGPIPE
, -1);
858 /* Wait until our parent died. This will only work if
859 * the above setresuid() succeeds, otherwise the kernel
860 * will not allow unprivileged parents kill their privileged
861 * children this way. We rely on the control groups kill logic
862 * to do the rest for us. */
863 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
866 /* Check if our parent process might already have
868 if (getppid() == parent_pid
) {
871 assert_se(sigemptyset(&ss
) >= 0);
872 assert_se(sigaddset(&ss
, SIGTERM
) >= 0);
875 if (sigwait(&ss
, &sig
) < 0) {
882 assert(sig
== SIGTERM
);
887 /* If our parent died we'll end the session */
888 if (getppid() != parent_pid
) {
889 pam_code
= pam_close_session(handle
, flags
);
890 if (pam_code
!= PAM_SUCCESS
)
897 pam_end(handle
, pam_code
| flags
);
901 /* If the child was forked off successfully it will do all the
902 * cleanups, so forget about the handle here. */
905 /* Unblock SIGTERM again in the parent */
906 assert_se(sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) >= 0);
908 /* We close the log explicitly here, since the PAM modules
909 * might have opened it, but we don't want this fd around. */
918 if (pam_code
!= PAM_SUCCESS
) {
919 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
920 err
= -EPERM
; /* PAM errors do not map to errno */
922 err
= log_error_errno(errno
, "PAM failed: %m");
927 pam_code
= pam_close_session(handle
, flags
);
929 pam_end(handle
, pam_code
| flags
);
937 kill(pam_pid
, SIGTERM
);
938 kill(pam_pid
, SIGCONT
);
945 static void rename_process_from_path(const char *path
) {
946 char process_name
[11];
950 /* This resulting string must fit in 10 chars (i.e. the length
951 * of "/sbin/init") to look pretty in /bin/ps */
955 rename_process("(...)");
961 /* The end of the process name is usually more
962 * interesting, since the first bit might just be
968 process_name
[0] = '(';
969 memcpy(process_name
+1, p
, l
);
970 process_name
[1+l
] = ')';
971 process_name
[1+l
+1] = 0;
973 rename_process(process_name
);
978 static int apply_seccomp(const ExecContext
*c
) {
979 uint32_t negative_action
, action
;
980 scmp_filter_ctx
*seccomp
;
987 negative_action
= c
->syscall_errno
== 0 ? SCMP_ACT_KILL
: SCMP_ACT_ERRNO(c
->syscall_errno
);
989 seccomp
= seccomp_init(c
->syscall_whitelist
? negative_action
: SCMP_ACT_ALLOW
);
993 if (c
->syscall_archs
) {
995 SET_FOREACH(id
, c
->syscall_archs
, i
) {
996 r
= seccomp_arch_add(seccomp
, PTR_TO_UINT32(id
) - 1);
1004 r
= seccomp_add_secondary_archs(seccomp
);
1009 action
= c
->syscall_whitelist
? SCMP_ACT_ALLOW
: negative_action
;
1010 SET_FOREACH(id
, c
->syscall_filter
, i
) {
1011 r
= seccomp_rule_add(seccomp
, action
, PTR_TO_INT(id
) - 1, 0);
1016 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1020 r
= seccomp_load(seccomp
);
1023 seccomp_release(seccomp
);
1027 static int apply_address_families(const ExecContext
*c
) {
1028 scmp_filter_ctx
*seccomp
;
1034 seccomp
= seccomp_init(SCMP_ACT_ALLOW
);
1038 r
= seccomp_add_secondary_archs(seccomp
);
1042 if (c
->address_families_whitelist
) {
1043 int af
, first
= 0, last
= 0;
1046 /* If this is a whitelist, we first block the address
1047 * families that are out of range and then everything
1048 * that is not in the set. First, we find the lowest
1049 * and highest address family in the set. */
1051 SET_FOREACH(afp
, c
->address_families
, i
) {
1052 af
= PTR_TO_INT(afp
);
1054 if (af
<= 0 || af
>= af_max())
1057 if (first
== 0 || af
< first
)
1060 if (last
== 0 || af
> last
)
1064 assert((first
== 0) == (last
== 0));
1068 /* No entries in the valid range, block everything */
1069 r
= seccomp_rule_add(
1071 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1079 /* Block everything below the first entry */
1080 r
= seccomp_rule_add(
1082 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1085 SCMP_A0(SCMP_CMP_LT
, first
));
1089 /* Block everything above the last entry */
1090 r
= seccomp_rule_add(
1092 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1095 SCMP_A0(SCMP_CMP_GT
, last
));
1099 /* Block everything between the first and last
1101 for (af
= 1; af
< af_max(); af
++) {
1103 if (set_contains(c
->address_families
, INT_TO_PTR(af
)))
1106 r
= seccomp_rule_add(
1108 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1111 SCMP_A0(SCMP_CMP_EQ
, af
));
1120 /* If this is a blacklist, then generate one rule for
1121 * each address family that are then combined in OR
1124 SET_FOREACH(af
, c
->address_families
, i
) {
1126 r
= seccomp_rule_add(
1128 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1131 SCMP_A0(SCMP_CMP_EQ
, PTR_TO_INT(af
)));
1137 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1141 r
= seccomp_load(seccomp
);
1144 seccomp_release(seccomp
);
1150 static void do_idle_pipe_dance(int idle_pipe
[4]) {
1154 idle_pipe
[1] = safe_close(idle_pipe
[1]);
1155 idle_pipe
[2] = safe_close(idle_pipe
[2]);
1157 if (idle_pipe
[0] >= 0) {
1160 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1162 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1165 /* Signal systemd that we are bored and want to continue. */
1166 n
= write(idle_pipe
[3], "x", 1);
1168 /* Wait for systemd to react to the signal above. */
1169 fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1172 idle_pipe
[0] = safe_close(idle_pipe
[0]);
1176 idle_pipe
[3] = safe_close(idle_pipe
[3]);
1179 static int build_environment(
1180 const ExecContext
*c
,
1182 usec_t watchdog_usec
,
1184 const char *username
,
1188 _cleanup_strv_free_
char **our_env
= NULL
;
1195 our_env
= new0(char*, 10);
1200 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid()) < 0)
1202 our_env
[n_env
++] = x
;
1204 if (asprintf(&x
, "LISTEN_FDS=%u", n_fds
) < 0)
1206 our_env
[n_env
++] = x
;
1209 if (watchdog_usec
> 0) {
1210 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid()) < 0)
1212 our_env
[n_env
++] = x
;
1214 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, watchdog_usec
) < 0)
1216 our_env
[n_env
++] = x
;
1220 x
= strappend("HOME=", home
);
1223 our_env
[n_env
++] = x
;
1227 x
= strappend("LOGNAME=", username
);
1230 our_env
[n_env
++] = x
;
1232 x
= strappend("USER=", username
);
1235 our_env
[n_env
++] = x
;
1239 x
= strappend("SHELL=", shell
);
1242 our_env
[n_env
++] = x
;
1245 if (is_terminal_input(c
->std_input
) ||
1246 c
->std_output
== EXEC_OUTPUT_TTY
||
1247 c
->std_error
== EXEC_OUTPUT_TTY
||
1250 x
= strdup(default_term_for_tty(tty_path(c
)));
1253 our_env
[n_env
++] = x
;
1256 our_env
[n_env
++] = NULL
;
1257 assert(n_env
<= 10);
1265 static bool exec_needs_mount_namespace(
1266 const ExecContext
*context
,
1267 const ExecParameters
*params
,
1268 ExecRuntime
*runtime
) {
1273 if (!strv_isempty(context
->read_write_dirs
) ||
1274 !strv_isempty(context
->read_only_dirs
) ||
1275 !strv_isempty(context
->inaccessible_dirs
))
1278 if (context
->mount_flags
!= 0)
1281 if (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
))
1284 if (params
->bus_endpoint_path
)
1287 if (context
->private_devices
||
1288 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1289 context
->protect_home
!= PROTECT_HOME_NO
)
1295 static int exec_child(
1297 ExecCommand
*command
,
1298 const ExecContext
*context
,
1299 const ExecParameters
*params
,
1300 ExecRuntime
*runtime
,
1303 int *fds
, unsigned n_fds
,
1307 _cleanup_strv_free_
char **our_env
= NULL
, **pam_env
= NULL
, **final_env
= NULL
, **final_argv
= NULL
;
1308 _cleanup_free_
char *mac_selinux_context_net
= NULL
;
1309 const char *username
= NULL
, *home
= NULL
, *shell
= NULL
;
1310 unsigned n_dont_close
= 0;
1311 int dont_close
[n_fds
+ 4];
1312 uid_t uid
= UID_INVALID
;
1313 gid_t gid
= GID_INVALID
;
1315 bool needs_mount_namespace
;
1321 assert(exit_status
);
1323 rename_process_from_path(command
->path
);
1325 /* We reset exactly these signals, since they are the
1326 * only ones we set to SIG_IGN in the main daemon. All
1327 * others we leave untouched because we set them to
1328 * SIG_DFL or a valid handler initially, both of which
1329 * will be demoted to SIG_DFL. */
1330 (void) default_signals(SIGNALS_CRASH_HANDLER
,
1331 SIGNALS_IGNORE
, -1);
1333 if (context
->ignore_sigpipe
)
1334 (void) ignore_signals(SIGPIPE
, -1);
1336 r
= reset_signal_mask();
1338 *exit_status
= EXIT_SIGNAL_MASK
;
1342 if (params
->idle_pipe
)
1343 do_idle_pipe_dance(params
->idle_pipe
);
1345 /* Close sockets very early to make sure we don't
1346 * block init reexecution because it cannot bind its
1352 dont_close
[n_dont_close
++] = socket_fd
;
1354 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
1355 n_dont_close
+= n_fds
;
1357 if (params
->bus_endpoint_fd
>= 0)
1358 dont_close
[n_dont_close
++] = params
->bus_endpoint_fd
;
1360 if (runtime
->netns_storage_socket
[0] >= 0)
1361 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[0];
1362 if (runtime
->netns_storage_socket
[1] >= 0)
1363 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[1];
1366 r
= close_all_fds(dont_close
, n_dont_close
);
1368 *exit_status
= EXIT_FDS
;
1372 if (!context
->same_pgrp
)
1374 *exit_status
= EXIT_SETSID
;
1378 exec_context_tty_reset(context
);
1380 if (params
->confirm_spawn
) {
1383 r
= ask_for_confirmation(&response
, argv
);
1384 if (r
== -ETIMEDOUT
)
1385 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1387 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r
));
1388 else if (response
== 's') {
1389 write_confirm_message("Skipping execution.\n");
1390 *exit_status
= EXIT_CONFIRM
;
1392 } else if (response
== 'n') {
1393 write_confirm_message("Failing execution.\n");
1399 if (context
->user
) {
1400 username
= context
->user
;
1401 r
= get_user_creds(&username
, &uid
, &gid
, &home
, &shell
);
1403 *exit_status
= EXIT_USER
;
1408 if (context
->group
) {
1409 const char *g
= context
->group
;
1411 r
= get_group_creds(&g
, &gid
);
1413 *exit_status
= EXIT_GROUP
;
1419 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1420 * must sure to drop O_NONBLOCK */
1422 fd_nonblock(socket_fd
, false);
1424 r
= setup_input(context
, socket_fd
, params
->apply_tty_stdin
);
1426 *exit_status
= EXIT_STDIN
;
1430 r
= setup_output(unit
, context
, STDOUT_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1432 *exit_status
= EXIT_STDOUT
;
1436 r
= setup_output(unit
, context
, STDERR_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1438 *exit_status
= EXIT_STDERR
;
1442 if (params
->cgroup_path
) {
1443 r
= cg_attach_everywhere(params
->cgroup_supported
, params
->cgroup_path
, 0, NULL
, NULL
);
1445 *exit_status
= EXIT_CGROUP
;
1450 if (context
->oom_score_adjust_set
) {
1451 char t
[DECIMAL_STR_MAX(context
->oom_score_adjust
)];
1453 /* When we can't make this change due to EPERM, then
1454 * let's silently skip over it. User namespaces
1455 * prohibit write access to this file, and we
1456 * shouldn't trip up over that. */
1458 sprintf(t
, "%i", context
->oom_score_adjust
);
1459 r
= write_string_file("/proc/self/oom_score_adj", t
, 0);
1460 if (r
== -EPERM
|| r
== -EACCES
) {
1462 log_unit_debug_errno(unit
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1465 *exit_status
= EXIT_OOM_ADJUST
;
1470 if (context
->nice_set
)
1471 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
1472 *exit_status
= EXIT_NICE
;
1476 if (context
->cpu_sched_set
) {
1477 struct sched_param param
= {
1478 .sched_priority
= context
->cpu_sched_priority
,
1481 r
= sched_setscheduler(0,
1482 context
->cpu_sched_policy
|
1483 (context
->cpu_sched_reset_on_fork
?
1484 SCHED_RESET_ON_FORK
: 0),
1487 *exit_status
= EXIT_SETSCHEDULER
;
1492 if (context
->cpuset
)
1493 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
1494 *exit_status
= EXIT_CPUAFFINITY
;
1498 if (context
->ioprio_set
)
1499 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
1500 *exit_status
= EXIT_IOPRIO
;
1504 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
1505 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
1506 *exit_status
= EXIT_TIMERSLACK
;
1510 if (context
->personality
!= PERSONALITY_INVALID
)
1511 if (personality(context
->personality
) < 0) {
1512 *exit_status
= EXIT_PERSONALITY
;
1516 if (context
->utmp_id
)
1517 utmp_put_init_process(context
->utmp_id
, getpid(), getsid(0), context
->tty_path
,
1518 context
->utmp_mode
== EXEC_UTMP_INIT
? INIT_PROCESS
:
1519 context
->utmp_mode
== EXEC_UTMP_LOGIN
? LOGIN_PROCESS
:
1521 username
? "root" : context
->user
);
1523 if (context
->user
&& is_terminal_input(context
->std_input
)) {
1524 r
= chown_terminal(STDIN_FILENO
, uid
);
1526 *exit_status
= EXIT_STDIN
;
1531 if (params
->bus_endpoint_fd
>= 0 && context
->bus_endpoint
) {
1532 uid_t ep_uid
= (uid
== UID_INVALID
) ? 0 : uid
;
1534 r
= bus_kernel_set_endpoint_policy(params
->bus_endpoint_fd
, ep_uid
, context
->bus_endpoint
);
1536 *exit_status
= EXIT_BUS_ENDPOINT
;
1541 /* If delegation is enabled we'll pass ownership of the cgroup
1542 * (but only in systemd's own controller hierarchy!) to the
1543 * user of the new process. */
1544 if (params
->cgroup_path
&& context
->user
&& params
->cgroup_delegate
) {
1545 r
= cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0644, uid
, gid
);
1547 *exit_status
= EXIT_CGROUP
;
1552 r
= cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0755, uid
, gid
);
1554 *exit_status
= EXIT_CGROUP
;
1559 if (!strv_isempty(context
->runtime_directory
) && params
->runtime_prefix
) {
1562 STRV_FOREACH(rt
, context
->runtime_directory
) {
1563 _cleanup_free_
char *p
;
1565 p
= strjoin(params
->runtime_prefix
, "/", *rt
, NULL
);
1567 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1571 r
= mkdir_p_label(p
, context
->runtime_directory_mode
);
1573 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1577 r
= chmod_and_chown(p
, context
->runtime_directory_mode
, uid
, gid
);
1579 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1585 if (params
->apply_permissions
) {
1586 r
= enforce_groups(context
, username
, gid
);
1588 *exit_status
= EXIT_GROUP
;
1593 umask(context
->umask
);
1596 if (params
->apply_permissions
&& context
->pam_name
&& username
) {
1597 r
= setup_pam(context
->pam_name
, username
, uid
, context
->tty_path
, &pam_env
, fds
, n_fds
);
1599 *exit_status
= EXIT_PAM
;
1605 if (context
->private_network
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
1606 r
= setup_netns(runtime
->netns_storage_socket
);
1608 *exit_status
= EXIT_NETWORK
;
1613 needs_mount_namespace
= exec_needs_mount_namespace(context
, params
, runtime
);
1615 if (needs_mount_namespace
) {
1616 char *tmp
= NULL
, *var
= NULL
;
1618 /* The runtime struct only contains the parent
1619 * of the private /tmp, which is
1620 * non-accessible to world users. Inside of it
1621 * there's a /tmp that is sticky, and that's
1622 * the one we want to use here. */
1624 if (context
->private_tmp
&& runtime
) {
1625 if (runtime
->tmp_dir
)
1626 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
1627 if (runtime
->var_tmp_dir
)
1628 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
1631 r
= setup_namespace(
1632 params
->apply_chroot
? context
->root_directory
: NULL
,
1633 context
->read_write_dirs
,
1634 context
->read_only_dirs
,
1635 context
->inaccessible_dirs
,
1638 params
->bus_endpoint_path
,
1639 context
->private_devices
,
1640 context
->protect_home
,
1641 context
->protect_system
,
1642 context
->mount_flags
);
1644 /* If we couldn't set up the namespace this is
1645 * probably due to a missing capability. In this case,
1646 * silently proceeed. */
1647 if (r
== -EPERM
|| r
== -EACCES
) {
1649 log_unit_debug_errno(unit
, r
, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1652 *exit_status
= EXIT_NAMESPACE
;
1657 if (params
->apply_chroot
) {
1658 if (!needs_mount_namespace
&& context
->root_directory
)
1659 if (chroot(context
->root_directory
) < 0) {
1660 *exit_status
= EXIT_CHROOT
;
1664 if (chdir(context
->working_directory
?: "/") < 0 &&
1665 !context
->working_directory_missing_ok
) {
1666 *exit_status
= EXIT_CHDIR
;
1670 _cleanup_free_
char *d
= NULL
;
1672 if (asprintf(&d
, "%s/%s",
1673 context
->root_directory
?: "",
1674 context
->working_directory
?: "") < 0) {
1675 *exit_status
= EXIT_MEMORY
;
1680 !context
->working_directory_missing_ok
) {
1681 *exit_status
= EXIT_CHDIR
;
1687 if (params
->apply_permissions
&& mac_selinux_use() && params
->selinux_context_net
&& socket_fd
>= 0) {
1688 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
1690 *exit_status
= EXIT_SELINUX_CONTEXT
;
1696 /* We repeat the fd closing here, to make sure that
1697 * nothing is leaked from the PAM modules. Note that
1698 * we are more aggressive this time since socket_fd
1699 * and the netns fds we don't need anymore. The custom
1700 * endpoint fd was needed to upload the policy and can
1701 * now be closed as well. */
1702 r
= close_all_fds(fds
, n_fds
);
1704 r
= shift_fds(fds
, n_fds
);
1706 r
= flags_fds(fds
, n_fds
, context
->non_blocking
);
1708 *exit_status
= EXIT_FDS
;
1712 if (params
->apply_permissions
) {
1714 for (i
= 0; i
< _RLIMIT_MAX
; i
++) {
1715 if (!context
->rlimit
[i
])
1718 if (setrlimit_closest(i
, context
->rlimit
[i
]) < 0) {
1719 *exit_status
= EXIT_LIMITS
;
1724 if (context
->capability_bounding_set_drop
) {
1725 r
= capability_bounding_set_drop(context
->capability_bounding_set_drop
, false);
1727 *exit_status
= EXIT_CAPABILITIES
;
1733 if (context
->smack_process_label
) {
1734 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
1736 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1740 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1742 _cleanup_free_
char *exec_label
= NULL
;
1744 r
= mac_smack_read(command
->path
, SMACK_ATTR_EXEC
, &exec_label
);
1745 if (r
< 0 && r
!= -ENODATA
&& r
!= -EOPNOTSUPP
) {
1746 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1750 r
= mac_smack_apply_pid(0, exec_label
? : SMACK_DEFAULT_PROCESS_LABEL
);
1752 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1759 if (context
->user
) {
1760 r
= enforce_user(context
, uid
);
1762 *exit_status
= EXIT_USER
;
1767 /* PR_GET_SECUREBITS is not privileged, while
1768 * PR_SET_SECUREBITS is. So to suppress
1769 * potential EPERMs we'll try not to call
1770 * PR_SET_SECUREBITS unless necessary. */
1771 if (prctl(PR_GET_SECUREBITS
) != context
->secure_bits
)
1772 if (prctl(PR_SET_SECUREBITS
, context
->secure_bits
) < 0) {
1773 *exit_status
= EXIT_SECUREBITS
;
1777 if (context
->capabilities
)
1778 if (cap_set_proc(context
->capabilities
) < 0) {
1779 *exit_status
= EXIT_CAPABILITIES
;
1783 if (context
->no_new_privileges
)
1784 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
1785 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
1790 if (context
->address_families_whitelist
||
1791 !set_isempty(context
->address_families
)) {
1792 r
= apply_address_families(context
);
1794 *exit_status
= EXIT_ADDRESS_FAMILIES
;
1799 if (context
->syscall_whitelist
||
1800 !set_isempty(context
->syscall_filter
) ||
1801 !set_isempty(context
->syscall_archs
)) {
1802 r
= apply_seccomp(context
);
1804 *exit_status
= EXIT_SECCOMP
;
1811 if (mac_selinux_use()) {
1812 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
1815 r
= setexeccon(exec_context
);
1817 *exit_status
= EXIT_SELINUX_CONTEXT
;
1824 #ifdef HAVE_APPARMOR
1825 if (context
->apparmor_profile
&& mac_apparmor_use()) {
1826 r
= aa_change_onexec(context
->apparmor_profile
);
1827 if (r
< 0 && !context
->apparmor_profile_ignore
) {
1828 *exit_status
= EXIT_APPARMOR_PROFILE
;
1835 r
= build_environment(context
, n_fds
, params
->watchdog_usec
, home
, username
, shell
, &our_env
);
1837 *exit_status
= EXIT_MEMORY
;
1841 final_env
= strv_env_merge(5,
1842 params
->environment
,
1844 context
->environment
,
1849 *exit_status
= EXIT_MEMORY
;
1853 final_argv
= replace_env_argv(argv
, final_env
);
1855 *exit_status
= EXIT_MEMORY
;
1859 final_env
= strv_env_clean(final_env
);
1861 if (_unlikely_(log_get_max_level() >= LOG_DEBUG
)) {
1862 _cleanup_free_
char *line
;
1864 line
= exec_command_line(final_argv
);
1867 log_struct(LOG_DEBUG
,
1869 "EXECUTABLE=%s", command
->path
,
1870 LOG_UNIT_MESSAGE(unit
, "Executing: %s", line
),
1876 execve(command
->path
, final_argv
, final_env
);
1877 *exit_status
= EXIT_EXEC
;
1881 int exec_spawn(Unit
*unit
,
1882 ExecCommand
*command
,
1883 const ExecContext
*context
,
1884 const ExecParameters
*params
,
1885 ExecRuntime
*runtime
,
1888 _cleanup_strv_free_
char **files_env
= NULL
;
1889 int *fds
= NULL
; unsigned n_fds
= 0;
1890 _cleanup_free_
char *line
= NULL
;
1900 assert(params
->fds
|| params
->n_fds
<= 0);
1902 if (context
->std_input
== EXEC_INPUT_SOCKET
||
1903 context
->std_output
== EXEC_OUTPUT_SOCKET
||
1904 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
1906 if (params
->n_fds
!= 1) {
1907 log_unit_error(unit
, "Got more than one socket.");
1911 socket_fd
= params
->fds
[0];
1915 n_fds
= params
->n_fds
;
1918 r
= exec_context_load_environment(unit
, context
, &files_env
);
1920 return log_unit_error_errno(unit
, r
, "Failed to load environment files: %m");
1922 argv
= params
->argv
?: command
->argv
;
1923 line
= exec_command_line(argv
);
1927 log_struct(LOG_DEBUG
,
1929 LOG_UNIT_MESSAGE(unit
, "About to execute: %s", line
),
1930 "EXECUTABLE=%s", command
->path
,
1934 return log_unit_error_errno(unit
, r
, "Failed to fork: %m");
1939 r
= exec_child(unit
,
1951 log_struct_errno(LOG_ERR
, r
,
1952 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED
),
1954 LOG_UNIT_MESSAGE(unit
, "Failed at step %s spawning %s: %m",
1955 exit_status_to_string(exit_status
, EXIT_STATUS_SYSTEMD
),
1957 "EXECUTABLE=%s", command
->path
,
1964 log_unit_debug(unit
, "Forked %s as "PID_FMT
, command
->path
, pid
);
1966 /* We add the new process to the cgroup both in the child (so
1967 * that we can be sure that no user code is ever executed
1968 * outside of the cgroup) and in the parent (so that we can be
1969 * sure that when we kill the cgroup the process will be
1971 if (params
->cgroup_path
)
1972 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, pid
);
1974 exec_status_start(&command
->exec_status
, pid
);
1980 void exec_context_init(ExecContext
*c
) {
1984 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
1985 c
->cpu_sched_policy
= SCHED_OTHER
;
1986 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
1987 c
->syslog_level_prefix
= true;
1988 c
->ignore_sigpipe
= true;
1989 c
->timer_slack_nsec
= NSEC_INFINITY
;
1990 c
->personality
= PERSONALITY_INVALID
;
1991 c
->runtime_directory_mode
= 0755;
1994 void exec_context_done(ExecContext
*c
) {
1999 c
->environment
= strv_free(c
->environment
);
2000 c
->environment_files
= strv_free(c
->environment_files
);
2002 for (l
= 0; l
< ELEMENTSOF(c
->rlimit
); l
++)
2003 c
->rlimit
[l
] = mfree(c
->rlimit
[l
]);
2005 c
->working_directory
= mfree(c
->working_directory
);
2006 c
->root_directory
= mfree(c
->root_directory
);
2007 c
->tty_path
= mfree(c
->tty_path
);
2008 c
->syslog_identifier
= mfree(c
->syslog_identifier
);
2009 c
->user
= mfree(c
->user
);
2010 c
->group
= mfree(c
->group
);
2012 c
->supplementary_groups
= strv_free(c
->supplementary_groups
);
2014 c
->pam_name
= mfree(c
->pam_name
);
2016 if (c
->capabilities
) {
2017 cap_free(c
->capabilities
);
2018 c
->capabilities
= NULL
;
2021 c
->read_only_dirs
= strv_free(c
->read_only_dirs
);
2022 c
->read_write_dirs
= strv_free(c
->read_write_dirs
);
2023 c
->inaccessible_dirs
= strv_free(c
->inaccessible_dirs
);
2026 CPU_FREE(c
->cpuset
);
2028 c
->utmp_id
= mfree(c
->utmp_id
);
2029 c
->selinux_context
= mfree(c
->selinux_context
);
2030 c
->apparmor_profile
= mfree(c
->apparmor_profile
);
2032 c
->syscall_filter
= set_free(c
->syscall_filter
);
2033 c
->syscall_archs
= set_free(c
->syscall_archs
);
2034 c
->address_families
= set_free(c
->address_families
);
2036 c
->runtime_directory
= strv_free(c
->runtime_directory
);
2038 bus_endpoint_free(c
->bus_endpoint
);
2039 c
->bus_endpoint
= NULL
;
2042 int exec_context_destroy_runtime_directory(ExecContext
*c
, const char *runtime_prefix
) {
2047 if (!runtime_prefix
)
2050 STRV_FOREACH(i
, c
->runtime_directory
) {
2051 _cleanup_free_
char *p
;
2053 p
= strjoin(runtime_prefix
, "/", *i
, NULL
);
2057 /* We execute this synchronously, since we need to be
2058 * sure this is gone when we start the service
2060 (void) rm_rf(p
, REMOVE_ROOT
);
2066 void exec_command_done(ExecCommand
*c
) {
2069 c
->path
= mfree(c
->path
);
2071 c
->argv
= strv_free(c
->argv
);
2074 void exec_command_done_array(ExecCommand
*c
, unsigned n
) {
2077 for (i
= 0; i
< n
; i
++)
2078 exec_command_done(c
+i
);
2081 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
2085 LIST_REMOVE(command
, c
, i
);
2086 exec_command_done(i
);
2093 void exec_command_free_array(ExecCommand
**c
, unsigned n
) {
2096 for (i
= 0; i
< n
; i
++)
2097 c
[i
] = exec_command_free_list(c
[i
]);
2100 typedef struct InvalidEnvInfo
{
2105 static void invalid_env(const char *p
, void *userdata
) {
2106 InvalidEnvInfo
*info
= userdata
;
2108 log_unit_error(info
->unit
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
2111 int exec_context_load_environment(Unit
*unit
, const ExecContext
*c
, char ***l
) {
2112 char **i
, **r
= NULL
;
2117 STRV_FOREACH(i
, c
->environment_files
) {
2120 bool ignore
= false;
2122 _cleanup_globfree_ glob_t pglob
= {};
2132 if (!path_is_absolute(fn
)) {
2140 /* Filename supports globbing, take all matching files */
2142 if (glob(fn
, 0, NULL
, &pglob
) != 0) {
2147 return errno
? -errno
: -EINVAL
;
2149 count
= pglob
.gl_pathc
;
2157 for (n
= 0; n
< count
; n
++) {
2158 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], NULL
, &p
);
2166 /* Log invalid environment variables with filename */
2168 InvalidEnvInfo info
= {
2170 .path
= pglob
.gl_pathv
[n
]
2173 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
2181 m
= strv_env_merge(2, r
, p
);
2197 static bool tty_may_match_dev_console(const char *tty
) {
2198 _cleanup_free_
char *active
= NULL
;
2201 if (startswith(tty
, "/dev/"))
2204 /* trivial identity? */
2205 if (streq(tty
, "console"))
2208 console
= resolve_dev_console(&active
);
2209 /* if we could not resolve, assume it may */
2213 /* "tty0" means the active VC, so it may be the same sometimes */
2214 return streq(console
, tty
) || (streq(console
, "tty0") && tty_is_vc(tty
));
2217 bool exec_context_may_touch_console(ExecContext
*ec
) {
2218 return (ec
->tty_reset
|| ec
->tty_vhangup
|| ec
->tty_vt_disallocate
||
2219 is_terminal_input(ec
->std_input
) ||
2220 is_terminal_output(ec
->std_output
) ||
2221 is_terminal_output(ec
->std_error
)) &&
2222 tty_may_match_dev_console(tty_path(ec
));
2225 static void strv_fprintf(FILE *f
, char **l
) {
2231 fprintf(f
, " %s", *g
);
2234 void exec_context_dump(ExecContext
*c
, FILE* f
, const char *prefix
) {
2241 prefix
= strempty(prefix
);
2245 "%sWorkingDirectory: %s\n"
2246 "%sRootDirectory: %s\n"
2247 "%sNonBlocking: %s\n"
2248 "%sPrivateTmp: %s\n"
2249 "%sPrivateNetwork: %s\n"
2250 "%sPrivateDevices: %s\n"
2251 "%sProtectHome: %s\n"
2252 "%sProtectSystem: %s\n"
2253 "%sIgnoreSIGPIPE: %s\n",
2255 prefix
, c
->working_directory
? c
->working_directory
: "/",
2256 prefix
, c
->root_directory
? c
->root_directory
: "/",
2257 prefix
, yes_no(c
->non_blocking
),
2258 prefix
, yes_no(c
->private_tmp
),
2259 prefix
, yes_no(c
->private_network
),
2260 prefix
, yes_no(c
->private_devices
),
2261 prefix
, protect_home_to_string(c
->protect_home
),
2262 prefix
, protect_system_to_string(c
->protect_system
),
2263 prefix
, yes_no(c
->ignore_sigpipe
));
2265 STRV_FOREACH(e
, c
->environment
)
2266 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
2268 STRV_FOREACH(e
, c
->environment_files
)
2269 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
2276 if (c
->oom_score_adjust_set
)
2278 "%sOOMScoreAdjust: %i\n",
2279 prefix
, c
->oom_score_adjust
);
2281 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
2283 fprintf(f
, "%s%s: "RLIM_FMT
"\n",
2284 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
2286 if (c
->ioprio_set
) {
2287 _cleanup_free_
char *class_str
= NULL
;
2289 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
2291 "%sIOSchedulingClass: %s\n"
2292 "%sIOPriority: %i\n",
2293 prefix
, strna(class_str
),
2294 prefix
, (int) IOPRIO_PRIO_DATA(c
->ioprio
));
2297 if (c
->cpu_sched_set
) {
2298 _cleanup_free_
char *policy_str
= NULL
;
2300 sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
2302 "%sCPUSchedulingPolicy: %s\n"
2303 "%sCPUSchedulingPriority: %i\n"
2304 "%sCPUSchedulingResetOnFork: %s\n",
2305 prefix
, strna(policy_str
),
2306 prefix
, c
->cpu_sched_priority
,
2307 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
2311 fprintf(f
, "%sCPUAffinity:", prefix
);
2312 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
2313 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
2314 fprintf(f
, " %u", i
);
2318 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
2319 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
2322 "%sStandardInput: %s\n"
2323 "%sStandardOutput: %s\n"
2324 "%sStandardError: %s\n",
2325 prefix
, exec_input_to_string(c
->std_input
),
2326 prefix
, exec_output_to_string(c
->std_output
),
2327 prefix
, exec_output_to_string(c
->std_error
));
2333 "%sTTYVHangup: %s\n"
2334 "%sTTYVTDisallocate: %s\n",
2335 prefix
, c
->tty_path
,
2336 prefix
, yes_no(c
->tty_reset
),
2337 prefix
, yes_no(c
->tty_vhangup
),
2338 prefix
, yes_no(c
->tty_vt_disallocate
));
2340 if (c
->std_output
== EXEC_OUTPUT_SYSLOG
||
2341 c
->std_output
== EXEC_OUTPUT_KMSG
||
2342 c
->std_output
== EXEC_OUTPUT_JOURNAL
||
2343 c
->std_output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2344 c
->std_output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2345 c
->std_output
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
||
2346 c
->std_error
== EXEC_OUTPUT_SYSLOG
||
2347 c
->std_error
== EXEC_OUTPUT_KMSG
||
2348 c
->std_error
== EXEC_OUTPUT_JOURNAL
||
2349 c
->std_error
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2350 c
->std_error
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2351 c
->std_error
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) {
2353 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
2355 log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
2356 log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
2359 "%sSyslogFacility: %s\n"
2360 "%sSyslogLevel: %s\n",
2361 prefix
, strna(fac_str
),
2362 prefix
, strna(lvl_str
));
2365 if (c
->capabilities
) {
2366 _cleanup_cap_free_charp_
char *t
;
2368 t
= cap_to_text(c
->capabilities
, NULL
);
2370 fprintf(f
, "%sCapabilities: %s\n", prefix
, t
);
2374 fprintf(f
, "%sSecure Bits:%s%s%s%s%s%s\n",
2376 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS
) ? " keep-caps" : "",
2377 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS_LOCKED
) ? " keep-caps-locked" : "",
2378 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP
) ? " no-setuid-fixup" : "",
2379 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP_LOCKED
) ? " no-setuid-fixup-locked" : "",
2380 (c
->secure_bits
& 1<<SECURE_NOROOT
) ? " noroot" : "",
2381 (c
->secure_bits
& 1<<SECURE_NOROOT_LOCKED
) ? "noroot-locked" : "");
2383 if (c
->capability_bounding_set_drop
) {
2385 fprintf(f
, "%sCapabilityBoundingSet:", prefix
);
2387 for (l
= 0; l
<= cap_last_cap(); l
++)
2388 if (!(c
->capability_bounding_set_drop
& ((uint64_t) 1ULL << (uint64_t) l
)))
2389 fprintf(f
, " %s", strna(capability_to_name(l
)));
2395 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
2397 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
2399 if (strv_length(c
->supplementary_groups
) > 0) {
2400 fprintf(f
, "%sSupplementaryGroups:", prefix
);
2401 strv_fprintf(f
, c
->supplementary_groups
);
2406 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
2408 if (strv_length(c
->read_write_dirs
) > 0) {
2409 fprintf(f
, "%sReadWriteDirs:", prefix
);
2410 strv_fprintf(f
, c
->read_write_dirs
);
2414 if (strv_length(c
->read_only_dirs
) > 0) {
2415 fprintf(f
, "%sReadOnlyDirs:", prefix
);
2416 strv_fprintf(f
, c
->read_only_dirs
);
2420 if (strv_length(c
->inaccessible_dirs
) > 0) {
2421 fprintf(f
, "%sInaccessibleDirs:", prefix
);
2422 strv_fprintf(f
, c
->inaccessible_dirs
);
2428 "%sUtmpIdentifier: %s\n",
2429 prefix
, c
->utmp_id
);
2431 if (c
->selinux_context
)
2433 "%sSELinuxContext: %s%s\n",
2434 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
2436 if (c
->personality
!= PERSONALITY_INVALID
)
2438 "%sPersonality: %s\n",
2439 prefix
, strna(personality_to_string(c
->personality
)));
2441 if (c
->syscall_filter
) {
2449 "%sSystemCallFilter: ",
2452 if (!c
->syscall_whitelist
)
2456 SET_FOREACH(id
, c
->syscall_filter
, j
) {
2457 _cleanup_free_
char *name
= NULL
;
2464 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
2465 fputs(strna(name
), f
);
2472 if (c
->syscall_archs
) {
2479 "%sSystemCallArchitectures:",
2483 SET_FOREACH(id
, c
->syscall_archs
, j
)
2484 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
2489 if (c
->syscall_errno
!= 0)
2491 "%sSystemCallErrorNumber: %s\n",
2492 prefix
, strna(errno_to_name(c
->syscall_errno
)));
2494 if (c
->apparmor_profile
)
2496 "%sAppArmorProfile: %s%s\n",
2497 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
2500 bool exec_context_maintains_privileges(ExecContext
*c
) {
2503 /* Returns true if the process forked off would run run under
2504 * an unchanged UID or as root. */
2509 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
2515 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
2520 dual_timestamp_get(&s
->start_timestamp
);
2523 void exec_status_exit(ExecStatus
*s
, ExecContext
*context
, pid_t pid
, int code
, int status
) {
2526 if (s
->pid
&& s
->pid
!= pid
)
2530 dual_timestamp_get(&s
->exit_timestamp
);
2536 if (context
->utmp_id
)
2537 utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
2539 exec_context_tty_reset(context
);
2543 void exec_status_dump(ExecStatus
*s
, FILE *f
, const char *prefix
) {
2544 char buf
[FORMAT_TIMESTAMP_MAX
];
2552 prefix
= strempty(prefix
);
2555 "%sPID: "PID_FMT
"\n",
2558 if (s
->start_timestamp
.realtime
> 0)
2560 "%sStart Timestamp: %s\n",
2561 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
2563 if (s
->exit_timestamp
.realtime
> 0)
2565 "%sExit Timestamp: %s\n"
2567 "%sExit Status: %i\n",
2568 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
2569 prefix
, sigchld_code_to_string(s
->code
),
2573 char *exec_command_line(char **argv
) {
2581 STRV_FOREACH(a
, argv
)
2584 if (!(n
= new(char, k
)))
2588 STRV_FOREACH(a
, argv
) {
2595 if (strpbrk(*a
, WHITESPACE
)) {
2606 /* FIXME: this doesn't really handle arguments that have
2607 * spaces and ticks in them */
2612 void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2613 _cleanup_free_
char *cmd
= NULL
;
2614 const char *prefix2
;
2619 prefix
= strempty(prefix
);
2620 prefix2
= strjoina(prefix
, "\t");
2622 cmd
= exec_command_line(c
->argv
);
2624 "%sCommand Line: %s\n",
2625 prefix
, cmd
? cmd
: strerror(ENOMEM
));
2627 exec_status_dump(&c
->exec_status
, f
, prefix2
);
2630 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2633 prefix
= strempty(prefix
);
2635 LIST_FOREACH(command
, c
, c
)
2636 exec_command_dump(c
, f
, prefix
);
2639 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
2646 /* It's kind of important, that we keep the order here */
2647 LIST_FIND_TAIL(command
, *l
, end
);
2648 LIST_INSERT_AFTER(command
, *l
, end
, e
);
2653 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
2661 l
= strv_new_ap(path
, ap
);
2682 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
2683 _cleanup_strv_free_
char **l
= NULL
;
2691 l
= strv_new_ap(path
, ap
);
2697 r
= strv_extend_strv(&c
->argv
, l
);
2705 static int exec_runtime_allocate(ExecRuntime
**rt
) {
2710 *rt
= new0(ExecRuntime
, 1);
2715 (*rt
)->netns_storage_socket
[0] = (*rt
)->netns_storage_socket
[1] = -1;
2720 int exec_runtime_make(ExecRuntime
**rt
, ExecContext
*c
, const char *id
) {
2730 if (!c
->private_network
&& !c
->private_tmp
)
2733 r
= exec_runtime_allocate(rt
);
2737 if (c
->private_network
&& (*rt
)->netns_storage_socket
[0] < 0) {
2738 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, (*rt
)->netns_storage_socket
) < 0)
2742 if (c
->private_tmp
&& !(*rt
)->tmp_dir
) {
2743 r
= setup_tmp_dirs(id
, &(*rt
)->tmp_dir
, &(*rt
)->var_tmp_dir
);
2751 ExecRuntime
*exec_runtime_ref(ExecRuntime
*r
) {
2753 assert(r
->n_ref
> 0);
2759 ExecRuntime
*exec_runtime_unref(ExecRuntime
*r
) {
2764 assert(r
->n_ref
> 0);
2771 free(r
->var_tmp_dir
);
2772 safe_close_pair(r
->netns_storage_socket
);
2778 int exec_runtime_serialize(Unit
*u
, ExecRuntime
*rt
, FILE *f
, FDSet
*fds
) {
2787 unit_serialize_item(u
, f
, "tmp-dir", rt
->tmp_dir
);
2789 if (rt
->var_tmp_dir
)
2790 unit_serialize_item(u
, f
, "var-tmp-dir", rt
->var_tmp_dir
);
2792 if (rt
->netns_storage_socket
[0] >= 0) {
2795 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
2799 unit_serialize_item_format(u
, f
, "netns-socket-0", "%i", copy
);
2802 if (rt
->netns_storage_socket
[1] >= 0) {
2805 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
2809 unit_serialize_item_format(u
, f
, "netns-socket-1", "%i", copy
);
2815 int exec_runtime_deserialize_item(Unit
*u
, ExecRuntime
**rt
, const char *key
, const char *value
, FDSet
*fds
) {
2822 if (streq(key
, "tmp-dir")) {
2825 r
= exec_runtime_allocate(rt
);
2829 copy
= strdup(value
);
2833 free((*rt
)->tmp_dir
);
2834 (*rt
)->tmp_dir
= copy
;
2836 } else if (streq(key
, "var-tmp-dir")) {
2839 r
= exec_runtime_allocate(rt
);
2843 copy
= strdup(value
);
2847 free((*rt
)->var_tmp_dir
);
2848 (*rt
)->var_tmp_dir
= copy
;
2850 } else if (streq(key
, "netns-socket-0")) {
2853 r
= exec_runtime_allocate(rt
);
2857 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2858 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2860 safe_close((*rt
)->netns_storage_socket
[0]);
2861 (*rt
)->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
2863 } else if (streq(key
, "netns-socket-1")) {
2866 r
= exec_runtime_allocate(rt
);
2870 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2871 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2873 safe_close((*rt
)->netns_storage_socket
[1]);
2874 (*rt
)->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
2882 static void *remove_tmpdir_thread(void *p
) {
2883 _cleanup_free_
char *path
= p
;
2885 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
2889 void exec_runtime_destroy(ExecRuntime
*rt
) {
2895 /* If there are multiple users of this, let's leave the stuff around */
2900 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
2902 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
2904 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
2911 if (rt
->var_tmp_dir
) {
2912 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
2914 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
2916 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
2917 free(rt
->var_tmp_dir
);
2920 rt
->var_tmp_dir
= NULL
;
2923 safe_close_pair(rt
->netns_storage_socket
);
2926 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
2927 [EXEC_INPUT_NULL
] = "null",
2928 [EXEC_INPUT_TTY
] = "tty",
2929 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
2930 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
2931 [EXEC_INPUT_SOCKET
] = "socket"
2934 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
2936 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
2937 [EXEC_OUTPUT_INHERIT
] = "inherit",
2938 [EXEC_OUTPUT_NULL
] = "null",
2939 [EXEC_OUTPUT_TTY
] = "tty",
2940 [EXEC_OUTPUT_SYSLOG
] = "syslog",
2941 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
2942 [EXEC_OUTPUT_KMSG
] = "kmsg",
2943 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
2944 [EXEC_OUTPUT_JOURNAL
] = "journal",
2945 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
2946 [EXEC_OUTPUT_SOCKET
] = "socket"
2949 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);
2951 static const char* const exec_utmp_mode_table
[_EXEC_UTMP_MODE_MAX
] = {
2952 [EXEC_UTMP_INIT
] = "init",
2953 [EXEC_UTMP_LOGIN
] = "login",
2954 [EXEC_UTMP_USER
] = "user",
2957 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode
, ExecUtmpMode
);