1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/socket.h>
29 #include <sys/prctl.h>
34 #include <sys/personality.h>
37 #include <security/pam_appl.h>
41 #include <selinux/selinux.h>
49 #include <sys/apparmor.h>
52 #include "sd-messages.h"
56 #include "capability.h"
60 #include "securebits.h"
61 #include "namespace.h"
62 #include "exit-status.h"
64 #include "utmp-wtmp.h"
66 #include "path-util.h"
71 #include "selinux-util.h"
72 #include "errno-list.h"
75 #include "smack-util.h"
76 #include "bus-endpoint.h"
78 #include "formats-util.h"
79 #include "process-util.h"
80 #include "terminal-util.h"
81 #include "signal-util.h"
84 #include "apparmor-util.h"
88 #include "seccomp-util.h"
93 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
94 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
96 /* This assumes there is a 'tty' group */
99 #define SNDBUF_SIZE (8*1024*1024)
101 static int shift_fds(int fds
[], unsigned n_fds
) {
102 int start
, restart_from
;
107 /* Modifies the fds array! (sorts it) */
117 for (i
= start
; i
< (int) n_fds
; i
++) {
120 /* Already at right index? */
124 if ((nfd
= fcntl(fds
[i
], F_DUPFD
, i
+3)) < 0)
130 /* Hmm, the fd we wanted isn't free? Then
131 * let's remember that and try again from here */
132 if (nfd
!= i
+3 && restart_from
< 0)
136 if (restart_from
< 0)
139 start
= restart_from
;
145 static int flags_fds(const int fds
[], unsigned n_fds
, bool nonblock
) {
154 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
156 for (i
= 0; i
< n_fds
; i
++) {
158 if ((r
= fd_nonblock(fds
[i
], nonblock
)) < 0)
161 /* We unconditionally drop FD_CLOEXEC from the fds,
162 * since after all we want to pass these fds to our
165 if ((r
= fd_cloexec(fds
[i
], false)) < 0)
172 _pure_
static const char *tty_path(const ExecContext
*context
) {
175 if (context
->tty_path
)
176 return context
->tty_path
;
178 return "/dev/console";
181 static void exec_context_tty_reset(const ExecContext
*context
) {
184 if (context
->tty_vhangup
)
185 terminal_vhangup(tty_path(context
));
187 if (context
->tty_reset
)
188 reset_terminal(tty_path(context
));
190 if (context
->tty_vt_disallocate
&& context
->tty_path
)
191 vt_disallocate(context
->tty_path
);
194 static bool is_terminal_output(ExecOutput o
) {
196 o
== EXEC_OUTPUT_TTY
||
197 o
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
198 o
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
199 o
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
;
202 static int open_null_as(int flags
, int nfd
) {
207 fd
= open("/dev/null", flags
|O_NOCTTY
);
212 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
220 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
221 union sockaddr_union sa
= {
222 .un
.sun_family
= AF_UNIX
,
223 .un
.sun_path
= "/run/systemd/journal/stdout",
225 uid_t olduid
= UID_INVALID
;
226 gid_t oldgid
= GID_INVALID
;
229 if (gid
!= GID_INVALID
) {
237 if (uid
!= UID_INVALID
) {
247 r
= connect(fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(sa
.un
.sun_path
));
251 /* If we fail to restore the uid or gid, things will likely
252 fail later on. This should only happen if an LSM interferes. */
254 if (uid
!= UID_INVALID
)
255 (void) seteuid(olduid
);
258 if (gid
!= GID_INVALID
)
259 (void) setegid(oldgid
);
264 static int connect_logger_as(const ExecContext
*context
, ExecOutput output
, const char *ident
, const char *unit_id
, int nfd
, uid_t uid
, gid_t gid
) {
268 assert(output
< _EXEC_OUTPUT_MAX
);
272 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
276 r
= connect_journal_socket(fd
, uid
, gid
);
280 if (shutdown(fd
, SHUT_RD
) < 0) {
285 fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
295 context
->syslog_identifier
? context
->syslog_identifier
: ident
,
297 context
->syslog_priority
,
298 !!context
->syslog_level_prefix
,
299 output
== EXEC_OUTPUT_SYSLOG
|| output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
300 output
== EXEC_OUTPUT_KMSG
|| output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
,
301 is_terminal_output(output
));
304 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
311 static int open_terminal_as(const char *path
, mode_t mode
, int nfd
) {
317 if ((fd
= open_terminal(path
, mode
| O_NOCTTY
)) < 0)
321 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
329 static bool is_terminal_input(ExecInput i
) {
331 i
== EXEC_INPUT_TTY
||
332 i
== EXEC_INPUT_TTY_FORCE
||
333 i
== EXEC_INPUT_TTY_FAIL
;
336 static int fixup_input(ExecInput std_input
, int socket_fd
, bool apply_tty_stdin
) {
338 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
339 return EXEC_INPUT_NULL
;
341 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
342 return EXEC_INPUT_NULL
;
347 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
349 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
350 return EXEC_OUTPUT_INHERIT
;
355 static int setup_input(const ExecContext
*context
, int socket_fd
, bool apply_tty_stdin
) {
360 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
364 case EXEC_INPUT_NULL
:
365 return open_null_as(O_RDONLY
, STDIN_FILENO
);
368 case EXEC_INPUT_TTY_FORCE
:
369 case EXEC_INPUT_TTY_FAIL
: {
372 fd
= acquire_terminal(tty_path(context
),
373 i
== EXEC_INPUT_TTY_FAIL
,
374 i
== EXEC_INPUT_TTY_FORCE
,
380 if (fd
!= STDIN_FILENO
) {
381 r
= dup2(fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
389 case EXEC_INPUT_SOCKET
:
390 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
393 assert_not_reached("Unknown input type");
397 static int setup_output(Unit
*unit
, const ExecContext
*context
, int fileno
, int socket_fd
, const char *ident
, bool apply_tty_stdin
, uid_t uid
, gid_t gid
) {
406 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
407 o
= fixup_output(context
->std_output
, socket_fd
);
409 if (fileno
== STDERR_FILENO
) {
411 e
= fixup_output(context
->std_error
, socket_fd
);
413 /* This expects the input and output are already set up */
415 /* Don't change the stderr file descriptor if we inherit all
416 * the way and are not on a tty */
417 if (e
== EXEC_OUTPUT_INHERIT
&&
418 o
== EXEC_OUTPUT_INHERIT
&&
419 i
== EXEC_INPUT_NULL
&&
420 !is_terminal_input(context
->std_input
) &&
424 /* Duplicate from stdout if possible */
425 if (e
== o
|| e
== EXEC_OUTPUT_INHERIT
)
426 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
430 } else if (o
== EXEC_OUTPUT_INHERIT
) {
431 /* If input got downgraded, inherit the original value */
432 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
433 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
435 /* If the input is connected to anything that's not a /dev/null, inherit that... */
436 if (i
!= EXEC_INPUT_NULL
)
437 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
439 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
443 /* We need to open /dev/null here anew, to get the right access mode. */
444 return open_null_as(O_WRONLY
, fileno
);
449 case EXEC_OUTPUT_NULL
:
450 return open_null_as(O_WRONLY
, fileno
);
452 case EXEC_OUTPUT_TTY
:
453 if (is_terminal_input(i
))
454 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
456 /* We don't reset the terminal if this is just about output */
457 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
459 case EXEC_OUTPUT_SYSLOG
:
460 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
461 case EXEC_OUTPUT_KMSG
:
462 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
463 case EXEC_OUTPUT_JOURNAL
:
464 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
465 r
= connect_logger_as(context
, o
, ident
, unit
->id
, fileno
, uid
, gid
);
467 log_unit_error_errno(unit
, r
, "Failed to connect %s to the journal socket, ignoring: %m", fileno
== STDOUT_FILENO
? "stdout" : "stderr");
468 r
= open_null_as(O_WRONLY
, fileno
);
472 case EXEC_OUTPUT_SOCKET
:
473 assert(socket_fd
>= 0);
474 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
477 assert_not_reached("Unknown error type");
481 static int chown_terminal(int fd
, uid_t uid
) {
486 /* This might fail. What matters are the results. */
487 (void) fchown(fd
, uid
, -1);
488 (void) fchmod(fd
, TTY_MODE
);
490 if (fstat(fd
, &st
) < 0)
493 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
499 static int setup_confirm_stdio(int *_saved_stdin
,
500 int *_saved_stdout
) {
501 int fd
= -1, saved_stdin
, saved_stdout
= -1, r
;
503 assert(_saved_stdin
);
504 assert(_saved_stdout
);
506 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
510 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
511 if (saved_stdout
< 0) {
516 fd
= acquire_terminal(
521 DEFAULT_CONFIRM_USEC
);
527 r
= chown_terminal(fd
, getuid());
531 if (dup2(fd
, STDIN_FILENO
) < 0) {
536 if (dup2(fd
, STDOUT_FILENO
) < 0) {
544 *_saved_stdin
= saved_stdin
;
545 *_saved_stdout
= saved_stdout
;
550 safe_close(saved_stdout
);
551 safe_close(saved_stdin
);
557 _printf_(1, 2) static int write_confirm_message(const char *format
, ...) {
558 _cleanup_close_
int fd
= -1;
563 fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
567 va_start(ap
, format
);
568 vdprintf(fd
, format
, ap
);
574 static int restore_confirm_stdio(int *saved_stdin
,
580 assert(saved_stdout
);
584 if (*saved_stdin
>= 0)
585 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
588 if (*saved_stdout
>= 0)
589 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
592 safe_close(*saved_stdin
);
593 safe_close(*saved_stdout
);
598 static int ask_for_confirmation(char *response
, char **argv
) {
599 int saved_stdout
= -1, saved_stdin
= -1, r
;
600 _cleanup_free_
char *line
= NULL
;
602 r
= setup_confirm_stdio(&saved_stdin
, &saved_stdout
);
606 line
= exec_command_line(argv
);
610 r
= ask_char(response
, "yns", "Execute %s? [Yes, No, Skip] ", line
);
612 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
617 static int enforce_groups(const ExecContext
*context
, const char *username
, gid_t gid
) {
618 bool keep_groups
= false;
623 /* Lookup and set GID and supplementary group list. Here too
624 * we avoid NSS lookups for gid=0. */
626 if (context
->group
|| username
) {
628 if (context
->group
) {
629 const char *g
= context
->group
;
631 if ((r
= get_group_creds(&g
, &gid
)) < 0)
635 /* First step, initialize groups from /etc/groups */
636 if (username
&& gid
!= 0) {
637 if (initgroups(username
, gid
) < 0)
643 /* Second step, set our gids */
644 if (setresgid(gid
, gid
, gid
) < 0)
648 if (context
->supplementary_groups
) {
653 /* Final step, initialize any manually set supplementary groups */
654 assert_se((ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
)) > 0);
656 if (!(gids
= new(gid_t
, ngroups_max
)))
660 if ((k
= getgroups(ngroups_max
, gids
)) < 0) {
667 STRV_FOREACH(i
, context
->supplementary_groups
) {
670 if (k
>= ngroups_max
) {
676 r
= get_group_creds(&g
, gids
+k
);
685 if (setgroups(k
, gids
) < 0) {
696 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
699 /* Sets (but doesn't lookup) the uid and make sure we keep the
700 * capabilities while doing so. */
702 if (context
->capabilities
) {
703 _cleanup_cap_free_ cap_t d
= NULL
;
704 static const cap_value_t bits
[] = {
705 CAP_SETUID
, /* Necessary so that we can run setresuid() below */
706 CAP_SETPCAP
/* Necessary so that we can set PR_SET_SECUREBITS later on */
709 /* First step: If we need to keep capabilities but
710 * drop privileges we need to make sure we keep our
711 * caps, while we drop privileges. */
713 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
715 if (prctl(PR_GET_SECUREBITS
) != sb
)
716 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
720 /* Second step: set the capabilities. This will reduce
721 * the capabilities to the minimum we need. */
723 d
= cap_dup(context
->capabilities
);
727 if (cap_set_flag(d
, CAP_EFFECTIVE
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0 ||
728 cap_set_flag(d
, CAP_PERMITTED
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0)
731 if (cap_set_proc(d
) < 0)
735 /* Third step: actually set the uids */
736 if (setresuid(uid
, uid
, uid
) < 0)
739 /* At this point we should have all necessary capabilities but
740 are otherwise a normal user. However, the caps might got
741 corrupted due to the setresuid() so we need clean them up
742 later. This is done outside of this call. */
749 static int null_conv(
751 const struct pam_message
**msg
,
752 struct pam_response
**resp
,
755 /* We don't support conversations */
760 static int setup_pam(
766 int fds
[], unsigned n_fds
) {
768 static const struct pam_conv conv
= {
773 pam_handle_t
*handle
= NULL
;
775 int pam_code
= PAM_SUCCESS
;
778 bool close_session
= false;
779 pid_t pam_pid
= 0, parent_pid
;
786 /* We set up PAM in the parent process, then fork. The child
787 * will then stay around until killed via PR_GET_PDEATHSIG or
788 * systemd via the cgroup logic. It will then remove the PAM
789 * session again. The parent process will exec() the actual
790 * daemon. We do things this way to ensure that the main PID
791 * of the daemon is the one we initially fork()ed. */
793 if (log_get_max_level() < LOG_DEBUG
)
796 pam_code
= pam_start(name
, user
, &conv
, &handle
);
797 if (pam_code
!= PAM_SUCCESS
) {
803 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
804 if (pam_code
!= PAM_SUCCESS
)
808 pam_code
= pam_acct_mgmt(handle
, flags
);
809 if (pam_code
!= PAM_SUCCESS
)
812 pam_code
= pam_open_session(handle
, flags
);
813 if (pam_code
!= PAM_SUCCESS
)
816 close_session
= true;
818 e
= pam_getenvlist(handle
);
820 pam_code
= PAM_BUF_ERR
;
824 /* Block SIGTERM, so that we know that it won't get lost in
827 assert_se(sigprocmask_many(SIG_BLOCK
, &old_ss
, SIGTERM
, -1) >= 0);
829 parent_pid
= getpid();
839 /* The child's job is to reset the PAM session on
842 /* This string must fit in 10 chars (i.e. the length
843 * of "/sbin/init"), to look pretty in /bin/ps */
844 rename_process("(sd-pam)");
846 /* Make sure we don't keep open the passed fds in this
847 child. We assume that otherwise only those fds are
848 open here that have been opened by PAM. */
849 close_many(fds
, n_fds
);
851 /* Drop privileges - we don't need any to pam_close_session
852 * and this will make PR_SET_PDEATHSIG work in most cases.
853 * If this fails, ignore the error - but expect sd-pam threads
854 * to fail to exit normally */
855 if (setresuid(uid
, uid
, uid
) < 0)
856 log_error_errno(r
, "Error: Failed to setresuid() in sd-pam: %m");
858 (void) ignore_signals(SIGPIPE
, -1);
860 /* Wait until our parent died. This will only work if
861 * the above setresuid() succeeds, otherwise the kernel
862 * will not allow unprivileged parents kill their privileged
863 * children this way. We rely on the control groups kill logic
864 * to do the rest for us. */
865 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
868 /* Check if our parent process might already have
870 if (getppid() == parent_pid
) {
873 assert_se(sigemptyset(&ss
) >= 0);
874 assert_se(sigaddset(&ss
, SIGTERM
) >= 0);
877 if (sigwait(&ss
, &sig
) < 0) {
884 assert(sig
== SIGTERM
);
889 /* If our parent died we'll end the session */
890 if (getppid() != parent_pid
) {
891 pam_code
= pam_close_session(handle
, flags
);
892 if (pam_code
!= PAM_SUCCESS
)
899 pam_end(handle
, pam_code
| flags
);
903 /* If the child was forked off successfully it will do all the
904 * cleanups, so forget about the handle here. */
907 /* Unblock SIGTERM again in the parent */
908 assert_se(sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) >= 0);
910 /* We close the log explicitly here, since the PAM modules
911 * might have opened it, but we don't want this fd around. */
920 if (pam_code
!= PAM_SUCCESS
) {
921 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
922 err
= -EPERM
; /* PAM errors do not map to errno */
924 log_error_errno(errno
, "PAM failed: %m");
930 pam_code
= pam_close_session(handle
, flags
);
932 pam_end(handle
, pam_code
| flags
);
940 kill(pam_pid
, SIGTERM
);
941 kill(pam_pid
, SIGCONT
);
948 static void rename_process_from_path(const char *path
) {
949 char process_name
[11];
953 /* This resulting string must fit in 10 chars (i.e. the length
954 * of "/sbin/init") to look pretty in /bin/ps */
958 rename_process("(...)");
964 /* The end of the process name is usually more
965 * interesting, since the first bit might just be
971 process_name
[0] = '(';
972 memcpy(process_name
+1, p
, l
);
973 process_name
[1+l
] = ')';
974 process_name
[1+l
+1] = 0;
976 rename_process(process_name
);
981 static int apply_seccomp(const ExecContext
*c
) {
982 uint32_t negative_action
, action
;
983 scmp_filter_ctx
*seccomp
;
990 negative_action
= c
->syscall_errno
== 0 ? SCMP_ACT_KILL
: SCMP_ACT_ERRNO(c
->syscall_errno
);
992 seccomp
= seccomp_init(c
->syscall_whitelist
? negative_action
: SCMP_ACT_ALLOW
);
996 if (c
->syscall_archs
) {
998 SET_FOREACH(id
, c
->syscall_archs
, i
) {
999 r
= seccomp_arch_add(seccomp
, PTR_TO_UINT32(id
) - 1);
1007 r
= seccomp_add_secondary_archs(seccomp
);
1012 action
= c
->syscall_whitelist
? SCMP_ACT_ALLOW
: negative_action
;
1013 SET_FOREACH(id
, c
->syscall_filter
, i
) {
1014 r
= seccomp_rule_add(seccomp
, action
, PTR_TO_INT(id
) - 1, 0);
1019 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1023 r
= seccomp_load(seccomp
);
1026 seccomp_release(seccomp
);
1030 static int apply_address_families(const ExecContext
*c
) {
1031 scmp_filter_ctx
*seccomp
;
1037 seccomp
= seccomp_init(SCMP_ACT_ALLOW
);
1041 r
= seccomp_add_secondary_archs(seccomp
);
1045 if (c
->address_families_whitelist
) {
1046 int af
, first
= 0, last
= 0;
1049 /* If this is a whitelist, we first block the address
1050 * families that are out of range and then everything
1051 * that is not in the set. First, we find the lowest
1052 * and highest address family in the set. */
1054 SET_FOREACH(afp
, c
->address_families
, i
) {
1055 af
= PTR_TO_INT(afp
);
1057 if (af
<= 0 || af
>= af_max())
1060 if (first
== 0 || af
< first
)
1063 if (last
== 0 || af
> last
)
1067 assert((first
== 0) == (last
== 0));
1071 /* No entries in the valid range, block everything */
1072 r
= seccomp_rule_add(
1074 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1082 /* Block everything below the first entry */
1083 r
= seccomp_rule_add(
1085 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1088 SCMP_A0(SCMP_CMP_LT
, first
));
1092 /* Block everything above the last entry */
1093 r
= seccomp_rule_add(
1095 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1098 SCMP_A0(SCMP_CMP_GT
, last
));
1102 /* Block everything between the first and last
1104 for (af
= 1; af
< af_max(); af
++) {
1106 if (set_contains(c
->address_families
, INT_TO_PTR(af
)))
1109 r
= seccomp_rule_add(
1111 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1114 SCMP_A0(SCMP_CMP_EQ
, af
));
1123 /* If this is a blacklist, then generate one rule for
1124 * each address family that are then combined in OR
1127 SET_FOREACH(af
, c
->address_families
, i
) {
1129 r
= seccomp_rule_add(
1131 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1134 SCMP_A0(SCMP_CMP_EQ
, PTR_TO_INT(af
)));
1140 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1144 r
= seccomp_load(seccomp
);
1147 seccomp_release(seccomp
);
1153 static void do_idle_pipe_dance(int idle_pipe
[4]) {
1157 safe_close(idle_pipe
[1]);
1158 safe_close(idle_pipe
[2]);
1160 if (idle_pipe
[0] >= 0) {
1163 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1165 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1166 /* Signal systemd that we are bored and want to continue. */
1167 r
= write(idle_pipe
[3], "x", 1);
1169 /* Wait for systemd to react to the signal above. */
1170 fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1173 safe_close(idle_pipe
[0]);
1177 safe_close(idle_pipe
[3]);
1180 static int build_environment(
1181 const ExecContext
*c
,
1183 usec_t watchdog_usec
,
1185 const char *username
,
1189 _cleanup_strv_free_
char **our_env
= NULL
;
1196 our_env
= new0(char*, 10);
1201 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid()) < 0)
1203 our_env
[n_env
++] = x
;
1205 if (asprintf(&x
, "LISTEN_FDS=%u", n_fds
) < 0)
1207 our_env
[n_env
++] = x
;
1210 if (watchdog_usec
> 0) {
1211 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid()) < 0)
1213 our_env
[n_env
++] = x
;
1215 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, watchdog_usec
) < 0)
1217 our_env
[n_env
++] = x
;
1221 x
= strappend("HOME=", home
);
1224 our_env
[n_env
++] = x
;
1228 x
= strappend("LOGNAME=", username
);
1231 our_env
[n_env
++] = x
;
1233 x
= strappend("USER=", username
);
1236 our_env
[n_env
++] = x
;
1240 x
= strappend("SHELL=", shell
);
1243 our_env
[n_env
++] = x
;
1246 if (is_terminal_input(c
->std_input
) ||
1247 c
->std_output
== EXEC_OUTPUT_TTY
||
1248 c
->std_error
== EXEC_OUTPUT_TTY
||
1251 x
= strdup(default_term_for_tty(tty_path(c
)));
1254 our_env
[n_env
++] = x
;
1257 our_env
[n_env
++] = NULL
;
1258 assert(n_env
<= 10);
1266 static bool exec_needs_mount_namespace(
1267 const ExecContext
*context
,
1268 const ExecParameters
*params
,
1269 ExecRuntime
*runtime
) {
1274 if (!strv_isempty(context
->read_write_dirs
) ||
1275 !strv_isempty(context
->read_only_dirs
) ||
1276 !strv_isempty(context
->inaccessible_dirs
))
1279 if (context
->mount_flags
!= 0)
1282 if (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
))
1285 if (params
->bus_endpoint_path
)
1288 if (context
->private_devices
||
1289 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1290 context
->protect_home
!= PROTECT_HOME_NO
)
1296 static int exec_child(
1298 ExecCommand
*command
,
1299 const ExecContext
*context
,
1300 const ExecParameters
*params
,
1301 ExecRuntime
*runtime
,
1304 int *fds
, unsigned n_fds
,
1308 _cleanup_strv_free_
char **our_env
= NULL
, **pam_env
= NULL
, **final_env
= NULL
, **final_argv
= NULL
;
1309 _cleanup_free_
char *mac_selinux_context_net
= NULL
;
1310 const char *username
= NULL
, *home
= NULL
, *shell
= NULL
;
1311 unsigned n_dont_close
= 0;
1312 int dont_close
[n_fds
+ 4];
1313 uid_t uid
= UID_INVALID
;
1314 gid_t gid
= GID_INVALID
;
1316 bool needs_mount_namespace
;
1322 assert(exit_status
);
1324 rename_process_from_path(command
->path
);
1326 /* We reset exactly these signals, since they are the
1327 * only ones we set to SIG_IGN in the main daemon. All
1328 * others we leave untouched because we set them to
1329 * SIG_DFL or a valid handler initially, both of which
1330 * will be demoted to SIG_DFL. */
1331 (void) default_signals(SIGNALS_CRASH_HANDLER
,
1332 SIGNALS_IGNORE
, -1);
1334 if (context
->ignore_sigpipe
)
1335 (void) ignore_signals(SIGPIPE
, -1);
1337 r
= reset_signal_mask();
1339 *exit_status
= EXIT_SIGNAL_MASK
;
1343 if (params
->idle_pipe
)
1344 do_idle_pipe_dance(params
->idle_pipe
);
1346 /* Close sockets very early to make sure we don't
1347 * block init reexecution because it cannot bind its
1353 dont_close
[n_dont_close
++] = socket_fd
;
1355 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
1356 n_dont_close
+= n_fds
;
1358 if (params
->bus_endpoint_fd
>= 0)
1359 dont_close
[n_dont_close
++] = params
->bus_endpoint_fd
;
1361 if (runtime
->netns_storage_socket
[0] >= 0)
1362 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[0];
1363 if (runtime
->netns_storage_socket
[1] >= 0)
1364 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[1];
1367 r
= close_all_fds(dont_close
, n_dont_close
);
1369 *exit_status
= EXIT_FDS
;
1373 if (!context
->same_pgrp
)
1375 *exit_status
= EXIT_SETSID
;
1379 exec_context_tty_reset(context
);
1381 if (params
->confirm_spawn
) {
1384 r
= ask_for_confirmation(&response
, argv
);
1385 if (r
== -ETIMEDOUT
)
1386 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1388 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r
));
1389 else if (response
== 's') {
1390 write_confirm_message("Skipping execution.\n");
1391 *exit_status
= EXIT_CONFIRM
;
1393 } else if (response
== 'n') {
1394 write_confirm_message("Failing execution.\n");
1400 if (context
->user
) {
1401 username
= context
->user
;
1402 r
= get_user_creds(&username
, &uid
, &gid
, &home
, &shell
);
1404 *exit_status
= EXIT_USER
;
1409 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1410 * must sure to drop O_NONBLOCK */
1412 fd_nonblock(socket_fd
, false);
1414 r
= setup_input(context
, socket_fd
, params
->apply_tty_stdin
);
1416 *exit_status
= EXIT_STDIN
;
1420 r
= setup_output(unit
, context
, STDOUT_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1422 *exit_status
= EXIT_STDOUT
;
1426 r
= setup_output(unit
, context
, STDERR_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1428 *exit_status
= EXIT_STDERR
;
1432 if (params
->cgroup_path
) {
1433 r
= cg_attach_everywhere(params
->cgroup_supported
, params
->cgroup_path
, 0, NULL
, NULL
);
1435 *exit_status
= EXIT_CGROUP
;
1440 if (context
->oom_score_adjust_set
) {
1441 char t
[DECIMAL_STR_MAX(context
->oom_score_adjust
)];
1443 /* When we can't make this change due to EPERM, then
1444 * let's silently skip over it. User namespaces
1445 * prohibit write access to this file, and we
1446 * shouldn't trip up over that. */
1448 sprintf(t
, "%i", context
->oom_score_adjust
);
1449 r
= write_string_file("/proc/self/oom_score_adj", t
, 0);
1450 if (r
== -EPERM
|| r
== -EACCES
) {
1452 log_unit_debug_errno(unit
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1455 *exit_status
= EXIT_OOM_ADJUST
;
1460 if (context
->nice_set
)
1461 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
1462 *exit_status
= EXIT_NICE
;
1466 if (context
->cpu_sched_set
) {
1467 struct sched_param param
= {
1468 .sched_priority
= context
->cpu_sched_priority
,
1471 r
= sched_setscheduler(0,
1472 context
->cpu_sched_policy
|
1473 (context
->cpu_sched_reset_on_fork
?
1474 SCHED_RESET_ON_FORK
: 0),
1477 *exit_status
= EXIT_SETSCHEDULER
;
1482 if (context
->cpuset
)
1483 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
1484 *exit_status
= EXIT_CPUAFFINITY
;
1488 if (context
->ioprio_set
)
1489 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
1490 *exit_status
= EXIT_IOPRIO
;
1494 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
1495 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
1496 *exit_status
= EXIT_TIMERSLACK
;
1500 if (context
->personality
!= PERSONALITY_INVALID
)
1501 if (personality(context
->personality
) < 0) {
1502 *exit_status
= EXIT_PERSONALITY
;
1506 if (context
->utmp_id
)
1507 utmp_put_init_process(context
->utmp_id
, getpid(), getsid(0), context
->tty_path
);
1509 if (context
->user
&& is_terminal_input(context
->std_input
)) {
1510 r
= chown_terminal(STDIN_FILENO
, uid
);
1512 *exit_status
= EXIT_STDIN
;
1517 if (params
->bus_endpoint_fd
>= 0 && context
->bus_endpoint
) {
1518 uid_t ep_uid
= (uid
== UID_INVALID
) ? 0 : uid
;
1520 r
= bus_kernel_set_endpoint_policy(params
->bus_endpoint_fd
, ep_uid
, context
->bus_endpoint
);
1522 *exit_status
= EXIT_BUS_ENDPOINT
;
1527 /* If delegation is enabled we'll pass ownership of the cgroup
1528 * (but only in systemd's own controller hierarchy!) to the
1529 * user of the new process. */
1530 if (params
->cgroup_path
&& context
->user
&& params
->cgroup_delegate
) {
1531 r
= cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0644, uid
, gid
);
1533 *exit_status
= EXIT_CGROUP
;
1538 r
= cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0755, uid
, gid
);
1540 *exit_status
= EXIT_CGROUP
;
1545 if (!strv_isempty(context
->runtime_directory
) && params
->runtime_prefix
) {
1548 STRV_FOREACH(rt
, context
->runtime_directory
) {
1549 _cleanup_free_
char *p
;
1551 p
= strjoin(params
->runtime_prefix
, "/", *rt
, NULL
);
1553 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1557 r
= mkdir_safe_label(p
, context
->runtime_directory_mode
, uid
, gid
);
1559 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1565 if (params
->apply_permissions
) {
1566 r
= enforce_groups(context
, username
, gid
);
1568 *exit_status
= EXIT_GROUP
;
1573 umask(context
->umask
);
1576 if (params
->apply_permissions
&& context
->pam_name
&& username
) {
1577 r
= setup_pam(context
->pam_name
, username
, uid
, context
->tty_path
, &pam_env
, fds
, n_fds
);
1579 *exit_status
= EXIT_PAM
;
1585 if (context
->private_network
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
1586 r
= setup_netns(runtime
->netns_storage_socket
);
1588 *exit_status
= EXIT_NETWORK
;
1593 needs_mount_namespace
= exec_needs_mount_namespace(context
, params
, runtime
);
1595 if (needs_mount_namespace
) {
1596 char *tmp
= NULL
, *var
= NULL
;
1598 /* The runtime struct only contains the parent
1599 * of the private /tmp, which is
1600 * non-accessible to world users. Inside of it
1601 * there's a /tmp that is sticky, and that's
1602 * the one we want to use here. */
1604 if (context
->private_tmp
&& runtime
) {
1605 if (runtime
->tmp_dir
)
1606 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
1607 if (runtime
->var_tmp_dir
)
1608 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
1611 r
= setup_namespace(
1612 params
->apply_chroot
? context
->root_directory
: NULL
,
1613 context
->read_write_dirs
,
1614 context
->read_only_dirs
,
1615 context
->inaccessible_dirs
,
1618 params
->bus_endpoint_path
,
1619 context
->private_devices
,
1620 context
->protect_home
,
1621 context
->protect_system
,
1622 context
->mount_flags
);
1624 /* If we couldn't set up the namespace this is
1625 * probably due to a missing capability. In this case,
1626 * silently proceeed. */
1627 if (r
== -EPERM
|| r
== -EACCES
) {
1629 log_unit_debug_errno(unit
, r
, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1632 *exit_status
= EXIT_NAMESPACE
;
1637 if (params
->apply_chroot
) {
1638 if (!needs_mount_namespace
&& context
->root_directory
)
1639 if (chroot(context
->root_directory
) < 0) {
1640 *exit_status
= EXIT_CHROOT
;
1644 if (chdir(context
->working_directory
?: "/") < 0 &&
1645 !context
->working_directory_missing_ok
) {
1646 *exit_status
= EXIT_CHDIR
;
1650 _cleanup_free_
char *d
= NULL
;
1652 if (asprintf(&d
, "%s/%s",
1653 context
->root_directory
?: "",
1654 context
->working_directory
?: "") < 0) {
1655 *exit_status
= EXIT_MEMORY
;
1660 !context
->working_directory_missing_ok
) {
1661 *exit_status
= EXIT_CHDIR
;
1667 if (params
->apply_permissions
&& mac_selinux_use() && params
->selinux_context_net
&& socket_fd
>= 0) {
1668 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
1670 *exit_status
= EXIT_SELINUX_CONTEXT
;
1676 /* We repeat the fd closing here, to make sure that
1677 * nothing is leaked from the PAM modules. Note that
1678 * we are more aggressive this time since socket_fd
1679 * and the netns fds we don't need anymore. The custom
1680 * endpoint fd was needed to upload the policy and can
1681 * now be closed as well. */
1682 r
= close_all_fds(fds
, n_fds
);
1684 r
= shift_fds(fds
, n_fds
);
1686 r
= flags_fds(fds
, n_fds
, context
->non_blocking
);
1688 *exit_status
= EXIT_FDS
;
1692 if (params
->apply_permissions
) {
1694 for (i
= 0; i
< _RLIMIT_MAX
; i
++) {
1695 if (!context
->rlimit
[i
])
1698 if (setrlimit_closest(i
, context
->rlimit
[i
]) < 0) {
1699 *exit_status
= EXIT_LIMITS
;
1704 if (context
->capability_bounding_set_drop
) {
1705 r
= capability_bounding_set_drop(context
->capability_bounding_set_drop
, false);
1707 *exit_status
= EXIT_CAPABILITIES
;
1713 if (context
->smack_process_label
) {
1714 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
1716 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1720 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1722 _cleanup_free_
char *exec_label
= NULL
;
1724 r
= mac_smack_read(command
->path
, SMACK_ATTR_EXEC
, &exec_label
);
1725 if (r
< 0 && r
!= -ENODATA
&& r
!= -EOPNOTSUPP
) {
1726 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1730 r
= mac_smack_apply_pid(0, exec_label
? : SMACK_DEFAULT_PROCESS_LABEL
);
1732 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1739 if (context
->user
) {
1740 r
= enforce_user(context
, uid
);
1742 *exit_status
= EXIT_USER
;
1747 /* PR_GET_SECUREBITS is not privileged, while
1748 * PR_SET_SECUREBITS is. So to suppress
1749 * potential EPERMs we'll try not to call
1750 * PR_SET_SECUREBITS unless necessary. */
1751 if (prctl(PR_GET_SECUREBITS
) != context
->secure_bits
)
1752 if (prctl(PR_SET_SECUREBITS
, context
->secure_bits
) < 0) {
1753 *exit_status
= EXIT_SECUREBITS
;
1757 if (context
->capabilities
)
1758 if (cap_set_proc(context
->capabilities
) < 0) {
1759 *exit_status
= EXIT_CAPABILITIES
;
1763 if (context
->no_new_privileges
)
1764 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
1765 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
1770 if (context
->address_families_whitelist
||
1771 !set_isempty(context
->address_families
)) {
1772 r
= apply_address_families(context
);
1774 *exit_status
= EXIT_ADDRESS_FAMILIES
;
1779 if (context
->syscall_whitelist
||
1780 !set_isempty(context
->syscall_filter
) ||
1781 !set_isempty(context
->syscall_archs
)) {
1782 r
= apply_seccomp(context
);
1784 *exit_status
= EXIT_SECCOMP
;
1791 if (mac_selinux_use()) {
1792 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
1795 r
= setexeccon(exec_context
);
1797 *exit_status
= EXIT_SELINUX_CONTEXT
;
1804 #ifdef HAVE_APPARMOR
1805 if (context
->apparmor_profile
&& mac_apparmor_use()) {
1806 r
= aa_change_onexec(context
->apparmor_profile
);
1807 if (r
< 0 && !context
->apparmor_profile_ignore
) {
1808 *exit_status
= EXIT_APPARMOR_PROFILE
;
1815 r
= build_environment(context
, n_fds
, params
->watchdog_usec
, home
, username
, shell
, &our_env
);
1817 *exit_status
= EXIT_MEMORY
;
1821 final_env
= strv_env_merge(5,
1822 params
->environment
,
1824 context
->environment
,
1829 *exit_status
= EXIT_MEMORY
;
1833 final_argv
= replace_env_argv(argv
, final_env
);
1835 *exit_status
= EXIT_MEMORY
;
1839 final_env
= strv_env_clean(final_env
);
1841 if (_unlikely_(log_get_max_level() >= LOG_DEBUG
)) {
1842 _cleanup_free_
char *line
;
1844 line
= exec_command_line(final_argv
);
1847 log_struct(LOG_DEBUG
,
1849 "EXECUTABLE=%s", command
->path
,
1850 LOG_UNIT_MESSAGE(unit
, "Executing: %s", line
),
1856 execve(command
->path
, final_argv
, final_env
);
1857 *exit_status
= EXIT_EXEC
;
1861 int exec_spawn(Unit
*unit
,
1862 ExecCommand
*command
,
1863 const ExecContext
*context
,
1864 const ExecParameters
*params
,
1865 ExecRuntime
*runtime
,
1868 _cleanup_strv_free_
char **files_env
= NULL
;
1869 int *fds
= NULL
; unsigned n_fds
= 0;
1870 _cleanup_free_
char *line
= NULL
;
1880 assert(params
->fds
|| params
->n_fds
<= 0);
1882 if (context
->std_input
== EXEC_INPUT_SOCKET
||
1883 context
->std_output
== EXEC_OUTPUT_SOCKET
||
1884 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
1886 if (params
->n_fds
!= 1) {
1887 log_unit_error(unit
, "Got more than one socket.");
1891 socket_fd
= params
->fds
[0];
1895 n_fds
= params
->n_fds
;
1898 r
= exec_context_load_environment(unit
, context
, &files_env
);
1900 return log_unit_error_errno(unit
, r
, "Failed to load environment files: %m");
1902 argv
= params
->argv
?: command
->argv
;
1903 line
= exec_command_line(argv
);
1907 log_struct(LOG_DEBUG
,
1909 LOG_UNIT_MESSAGE(unit
, "About to execute: %s", line
),
1910 "EXECUTABLE=%s", command
->path
,
1914 return log_unit_error_errno(unit
, r
, "Failed to fork: %m");
1919 r
= exec_child(unit
,
1931 log_struct_errno(LOG_ERR
, r
,
1932 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED
),
1934 LOG_UNIT_MESSAGE(unit
, "Failed at step %s spawning %s: %m",
1935 exit_status_to_string(exit_status
, EXIT_STATUS_SYSTEMD
),
1937 "EXECUTABLE=%s", command
->path
,
1944 log_unit_debug(unit
, "Forked %s as "PID_FMT
, command
->path
, pid
);
1946 /* We add the new process to the cgroup both in the child (so
1947 * that we can be sure that no user code is ever executed
1948 * outside of the cgroup) and in the parent (so that we can be
1949 * sure that when we kill the cgroup the process will be
1951 if (params
->cgroup_path
)
1952 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, pid
);
1954 exec_status_start(&command
->exec_status
, pid
);
1960 void exec_context_init(ExecContext
*c
) {
1964 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
1965 c
->cpu_sched_policy
= SCHED_OTHER
;
1966 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
1967 c
->syslog_level_prefix
= true;
1968 c
->ignore_sigpipe
= true;
1969 c
->timer_slack_nsec
= NSEC_INFINITY
;
1970 c
->personality
= PERSONALITY_INVALID
;
1971 c
->runtime_directory_mode
= 0755;
1974 void exec_context_done(ExecContext
*c
) {
1979 strv_free(c
->environment
);
1980 c
->environment
= NULL
;
1982 strv_free(c
->environment_files
);
1983 c
->environment_files
= NULL
;
1985 for (l
= 0; l
< ELEMENTSOF(c
->rlimit
); l
++) {
1987 c
->rlimit
[l
] = NULL
;
1990 free(c
->working_directory
);
1991 c
->working_directory
= NULL
;
1992 free(c
->root_directory
);
1993 c
->root_directory
= NULL
;
1998 free(c
->syslog_identifier
);
1999 c
->syslog_identifier
= NULL
;
2007 strv_free(c
->supplementary_groups
);
2008 c
->supplementary_groups
= NULL
;
2013 if (c
->capabilities
) {
2014 cap_free(c
->capabilities
);
2015 c
->capabilities
= NULL
;
2018 strv_free(c
->read_only_dirs
);
2019 c
->read_only_dirs
= NULL
;
2021 strv_free(c
->read_write_dirs
);
2022 c
->read_write_dirs
= NULL
;
2024 strv_free(c
->inaccessible_dirs
);
2025 c
->inaccessible_dirs
= NULL
;
2028 CPU_FREE(c
->cpuset
);
2033 free(c
->selinux_context
);
2034 c
->selinux_context
= NULL
;
2036 free(c
->apparmor_profile
);
2037 c
->apparmor_profile
= NULL
;
2039 set_free(c
->syscall_filter
);
2040 c
->syscall_filter
= NULL
;
2042 set_free(c
->syscall_archs
);
2043 c
->syscall_archs
= NULL
;
2045 set_free(c
->address_families
);
2046 c
->address_families
= NULL
;
2048 strv_free(c
->runtime_directory
);
2049 c
->runtime_directory
= NULL
;
2051 bus_endpoint_free(c
->bus_endpoint
);
2052 c
->bus_endpoint
= NULL
;
2055 int exec_context_destroy_runtime_directory(ExecContext
*c
, const char *runtime_prefix
) {
2060 if (!runtime_prefix
)
2063 STRV_FOREACH(i
, c
->runtime_directory
) {
2064 _cleanup_free_
char *p
;
2066 p
= strjoin(runtime_prefix
, "/", *i
, NULL
);
2070 /* We execute this synchronously, since we need to be
2071 * sure this is gone when we start the service
2073 (void) rm_rf(p
, REMOVE_ROOT
);
2079 void exec_command_done(ExecCommand
*c
) {
2089 void exec_command_done_array(ExecCommand
*c
, unsigned n
) {
2092 for (i
= 0; i
< n
; i
++)
2093 exec_command_done(c
+i
);
2096 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
2100 LIST_REMOVE(command
, c
, i
);
2101 exec_command_done(i
);
2108 void exec_command_free_array(ExecCommand
**c
, unsigned n
) {
2111 for (i
= 0; i
< n
; i
++)
2112 c
[i
] = exec_command_free_list(c
[i
]);
2115 typedef struct InvalidEnvInfo
{
2120 static void invalid_env(const char *p
, void *userdata
) {
2121 InvalidEnvInfo
*info
= userdata
;
2123 log_unit_error(info
->unit
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
2126 int exec_context_load_environment(Unit
*unit
, const ExecContext
*c
, char ***l
) {
2127 char **i
, **r
= NULL
;
2132 STRV_FOREACH(i
, c
->environment_files
) {
2135 bool ignore
= false;
2137 _cleanup_globfree_ glob_t pglob
= {};
2147 if (!path_is_absolute(fn
)) {
2155 /* Filename supports globbing, take all matching files */
2157 if (glob(fn
, 0, NULL
, &pglob
) != 0) {
2162 return errno
? -errno
: -EINVAL
;
2164 count
= pglob
.gl_pathc
;
2172 for (n
= 0; n
< count
; n
++) {
2173 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], NULL
, &p
);
2181 /* Log invalid environment variables with filename */
2183 InvalidEnvInfo info
= {
2185 .path
= pglob
.gl_pathv
[n
]
2188 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
2196 m
= strv_env_merge(2, r
, p
);
2212 static bool tty_may_match_dev_console(const char *tty
) {
2213 _cleanup_free_
char *active
= NULL
;
2216 if (startswith(tty
, "/dev/"))
2219 /* trivial identity? */
2220 if (streq(tty
, "console"))
2223 console
= resolve_dev_console(&active
);
2224 /* if we could not resolve, assume it may */
2228 /* "tty0" means the active VC, so it may be the same sometimes */
2229 return streq(console
, tty
) || (streq(console
, "tty0") && tty_is_vc(tty
));
2232 bool exec_context_may_touch_console(ExecContext
*ec
) {
2233 return (ec
->tty_reset
|| ec
->tty_vhangup
|| ec
->tty_vt_disallocate
||
2234 is_terminal_input(ec
->std_input
) ||
2235 is_terminal_output(ec
->std_output
) ||
2236 is_terminal_output(ec
->std_error
)) &&
2237 tty_may_match_dev_console(tty_path(ec
));
2240 static void strv_fprintf(FILE *f
, char **l
) {
2246 fprintf(f
, " %s", *g
);
2249 void exec_context_dump(ExecContext
*c
, FILE* f
, const char *prefix
) {
2256 prefix
= strempty(prefix
);
2260 "%sWorkingDirectory: %s\n"
2261 "%sRootDirectory: %s\n"
2262 "%sNonBlocking: %s\n"
2263 "%sPrivateTmp: %s\n"
2264 "%sPrivateNetwork: %s\n"
2265 "%sPrivateDevices: %s\n"
2266 "%sProtectHome: %s\n"
2267 "%sProtectSystem: %s\n"
2268 "%sIgnoreSIGPIPE: %s\n",
2270 prefix
, c
->working_directory
? c
->working_directory
: "/",
2271 prefix
, c
->root_directory
? c
->root_directory
: "/",
2272 prefix
, yes_no(c
->non_blocking
),
2273 prefix
, yes_no(c
->private_tmp
),
2274 prefix
, yes_no(c
->private_network
),
2275 prefix
, yes_no(c
->private_devices
),
2276 prefix
, protect_home_to_string(c
->protect_home
),
2277 prefix
, protect_system_to_string(c
->protect_system
),
2278 prefix
, yes_no(c
->ignore_sigpipe
));
2280 STRV_FOREACH(e
, c
->environment
)
2281 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
2283 STRV_FOREACH(e
, c
->environment_files
)
2284 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
2291 if (c
->oom_score_adjust_set
)
2293 "%sOOMScoreAdjust: %i\n",
2294 prefix
, c
->oom_score_adjust
);
2296 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
2298 fprintf(f
, "%s%s: "RLIM_FMT
"\n",
2299 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
2301 if (c
->ioprio_set
) {
2302 _cleanup_free_
char *class_str
= NULL
;
2304 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
2306 "%sIOSchedulingClass: %s\n"
2307 "%sIOPriority: %i\n",
2308 prefix
, strna(class_str
),
2309 prefix
, (int) IOPRIO_PRIO_DATA(c
->ioprio
));
2312 if (c
->cpu_sched_set
) {
2313 _cleanup_free_
char *policy_str
= NULL
;
2315 sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
2317 "%sCPUSchedulingPolicy: %s\n"
2318 "%sCPUSchedulingPriority: %i\n"
2319 "%sCPUSchedulingResetOnFork: %s\n",
2320 prefix
, strna(policy_str
),
2321 prefix
, c
->cpu_sched_priority
,
2322 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
2326 fprintf(f
, "%sCPUAffinity:", prefix
);
2327 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
2328 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
2329 fprintf(f
, " %u", i
);
2333 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
2334 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
2337 "%sStandardInput: %s\n"
2338 "%sStandardOutput: %s\n"
2339 "%sStandardError: %s\n",
2340 prefix
, exec_input_to_string(c
->std_input
),
2341 prefix
, exec_output_to_string(c
->std_output
),
2342 prefix
, exec_output_to_string(c
->std_error
));
2348 "%sTTYVHangup: %s\n"
2349 "%sTTYVTDisallocate: %s\n",
2350 prefix
, c
->tty_path
,
2351 prefix
, yes_no(c
->tty_reset
),
2352 prefix
, yes_no(c
->tty_vhangup
),
2353 prefix
, yes_no(c
->tty_vt_disallocate
));
2355 if (c
->std_output
== EXEC_OUTPUT_SYSLOG
||
2356 c
->std_output
== EXEC_OUTPUT_KMSG
||
2357 c
->std_output
== EXEC_OUTPUT_JOURNAL
||
2358 c
->std_output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2359 c
->std_output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2360 c
->std_output
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
||
2361 c
->std_error
== EXEC_OUTPUT_SYSLOG
||
2362 c
->std_error
== EXEC_OUTPUT_KMSG
||
2363 c
->std_error
== EXEC_OUTPUT_JOURNAL
||
2364 c
->std_error
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2365 c
->std_error
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2366 c
->std_error
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) {
2368 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
2370 log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
2371 log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
2374 "%sSyslogFacility: %s\n"
2375 "%sSyslogLevel: %s\n",
2376 prefix
, strna(fac_str
),
2377 prefix
, strna(lvl_str
));
2380 if (c
->capabilities
) {
2381 _cleanup_cap_free_charp_
char *t
;
2383 t
= cap_to_text(c
->capabilities
, NULL
);
2385 fprintf(f
, "%sCapabilities: %s\n", prefix
, t
);
2389 fprintf(f
, "%sSecure Bits:%s%s%s%s%s%s\n",
2391 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS
) ? " keep-caps" : "",
2392 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS_LOCKED
) ? " keep-caps-locked" : "",
2393 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP
) ? " no-setuid-fixup" : "",
2394 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP_LOCKED
) ? " no-setuid-fixup-locked" : "",
2395 (c
->secure_bits
& 1<<SECURE_NOROOT
) ? " noroot" : "",
2396 (c
->secure_bits
& 1<<SECURE_NOROOT_LOCKED
) ? "noroot-locked" : "");
2398 if (c
->capability_bounding_set_drop
) {
2400 fprintf(f
, "%sCapabilityBoundingSet:", prefix
);
2402 for (l
= 0; l
<= cap_last_cap(); l
++)
2403 if (!(c
->capability_bounding_set_drop
& ((uint64_t) 1ULL << (uint64_t) l
)))
2404 fprintf(f
, " %s", strna(capability_to_name(l
)));
2410 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
2412 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
2414 if (strv_length(c
->supplementary_groups
) > 0) {
2415 fprintf(f
, "%sSupplementaryGroups:", prefix
);
2416 strv_fprintf(f
, c
->supplementary_groups
);
2421 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
2423 if (strv_length(c
->read_write_dirs
) > 0) {
2424 fprintf(f
, "%sReadWriteDirs:", prefix
);
2425 strv_fprintf(f
, c
->read_write_dirs
);
2429 if (strv_length(c
->read_only_dirs
) > 0) {
2430 fprintf(f
, "%sReadOnlyDirs:", prefix
);
2431 strv_fprintf(f
, c
->read_only_dirs
);
2435 if (strv_length(c
->inaccessible_dirs
) > 0) {
2436 fprintf(f
, "%sInaccessibleDirs:", prefix
);
2437 strv_fprintf(f
, c
->inaccessible_dirs
);
2443 "%sUtmpIdentifier: %s\n",
2444 prefix
, c
->utmp_id
);
2446 if (c
->selinux_context
)
2448 "%sSELinuxContext: %s%s\n",
2449 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
2451 if (c
->personality
!= PERSONALITY_INVALID
)
2453 "%sPersonality: %s\n",
2454 prefix
, strna(personality_to_string(c
->personality
)));
2456 if (c
->syscall_filter
) {
2464 "%sSystemCallFilter: ",
2467 if (!c
->syscall_whitelist
)
2471 SET_FOREACH(id
, c
->syscall_filter
, j
) {
2472 _cleanup_free_
char *name
= NULL
;
2479 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
2480 fputs(strna(name
), f
);
2487 if (c
->syscall_archs
) {
2494 "%sSystemCallArchitectures:",
2498 SET_FOREACH(id
, c
->syscall_archs
, j
)
2499 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
2504 if (c
->syscall_errno
!= 0)
2506 "%sSystemCallErrorNumber: %s\n",
2507 prefix
, strna(errno_to_name(c
->syscall_errno
)));
2509 if (c
->apparmor_profile
)
2511 "%sAppArmorProfile: %s%s\n",
2512 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
2515 bool exec_context_maintains_privileges(ExecContext
*c
) {
2518 /* Returns true if the process forked off would run run under
2519 * an unchanged UID or as root. */
2524 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
2530 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
2535 dual_timestamp_get(&s
->start_timestamp
);
2538 void exec_status_exit(ExecStatus
*s
, ExecContext
*context
, pid_t pid
, int code
, int status
) {
2541 if (s
->pid
&& s
->pid
!= pid
)
2545 dual_timestamp_get(&s
->exit_timestamp
);
2551 if (context
->utmp_id
)
2552 utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
2554 exec_context_tty_reset(context
);
2558 void exec_status_dump(ExecStatus
*s
, FILE *f
, const char *prefix
) {
2559 char buf
[FORMAT_TIMESTAMP_MAX
];
2567 prefix
= strempty(prefix
);
2570 "%sPID: "PID_FMT
"\n",
2573 if (s
->start_timestamp
.realtime
> 0)
2575 "%sStart Timestamp: %s\n",
2576 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
2578 if (s
->exit_timestamp
.realtime
> 0)
2580 "%sExit Timestamp: %s\n"
2582 "%sExit Status: %i\n",
2583 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
2584 prefix
, sigchld_code_to_string(s
->code
),
2588 char *exec_command_line(char **argv
) {
2596 STRV_FOREACH(a
, argv
)
2599 if (!(n
= new(char, k
)))
2603 STRV_FOREACH(a
, argv
) {
2610 if (strpbrk(*a
, WHITESPACE
)) {
2621 /* FIXME: this doesn't really handle arguments that have
2622 * spaces and ticks in them */
2627 void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2628 _cleanup_free_
char *cmd
= NULL
;
2629 const char *prefix2
;
2634 prefix
= strempty(prefix
);
2635 prefix2
= strjoina(prefix
, "\t");
2637 cmd
= exec_command_line(c
->argv
);
2639 "%sCommand Line: %s\n",
2640 prefix
, cmd
? cmd
: strerror(ENOMEM
));
2642 exec_status_dump(&c
->exec_status
, f
, prefix2
);
2645 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2648 prefix
= strempty(prefix
);
2650 LIST_FOREACH(command
, c
, c
)
2651 exec_command_dump(c
, f
, prefix
);
2654 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
2661 /* It's kind of important, that we keep the order here */
2662 LIST_FIND_TAIL(command
, *l
, end
);
2663 LIST_INSERT_AFTER(command
, *l
, end
, e
);
2668 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
2676 l
= strv_new_ap(path
, ap
);
2697 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
2698 _cleanup_strv_free_
char **l
= NULL
;
2706 l
= strv_new_ap(path
, ap
);
2712 r
= strv_extend_strv(&c
->argv
, l
);
2720 static int exec_runtime_allocate(ExecRuntime
**rt
) {
2725 *rt
= new0(ExecRuntime
, 1);
2730 (*rt
)->netns_storage_socket
[0] = (*rt
)->netns_storage_socket
[1] = -1;
2735 int exec_runtime_make(ExecRuntime
**rt
, ExecContext
*c
, const char *id
) {
2745 if (!c
->private_network
&& !c
->private_tmp
)
2748 r
= exec_runtime_allocate(rt
);
2752 if (c
->private_network
&& (*rt
)->netns_storage_socket
[0] < 0) {
2753 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, (*rt
)->netns_storage_socket
) < 0)
2757 if (c
->private_tmp
&& !(*rt
)->tmp_dir
) {
2758 r
= setup_tmp_dirs(id
, &(*rt
)->tmp_dir
, &(*rt
)->var_tmp_dir
);
2766 ExecRuntime
*exec_runtime_ref(ExecRuntime
*r
) {
2768 assert(r
->n_ref
> 0);
2774 ExecRuntime
*exec_runtime_unref(ExecRuntime
*r
) {
2779 assert(r
->n_ref
> 0);
2786 free(r
->var_tmp_dir
);
2787 safe_close_pair(r
->netns_storage_socket
);
2793 int exec_runtime_serialize(Unit
*u
, ExecRuntime
*rt
, FILE *f
, FDSet
*fds
) {
2802 unit_serialize_item(u
, f
, "tmp-dir", rt
->tmp_dir
);
2804 if (rt
->var_tmp_dir
)
2805 unit_serialize_item(u
, f
, "var-tmp-dir", rt
->var_tmp_dir
);
2807 if (rt
->netns_storage_socket
[0] >= 0) {
2810 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
2814 unit_serialize_item_format(u
, f
, "netns-socket-0", "%i", copy
);
2817 if (rt
->netns_storage_socket
[1] >= 0) {
2820 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
2824 unit_serialize_item_format(u
, f
, "netns-socket-1", "%i", copy
);
2830 int exec_runtime_deserialize_item(Unit
*u
, ExecRuntime
**rt
, const char *key
, const char *value
, FDSet
*fds
) {
2837 if (streq(key
, "tmp-dir")) {
2840 r
= exec_runtime_allocate(rt
);
2844 copy
= strdup(value
);
2848 free((*rt
)->tmp_dir
);
2849 (*rt
)->tmp_dir
= copy
;
2851 } else if (streq(key
, "var-tmp-dir")) {
2854 r
= exec_runtime_allocate(rt
);
2858 copy
= strdup(value
);
2862 free((*rt
)->var_tmp_dir
);
2863 (*rt
)->var_tmp_dir
= copy
;
2865 } else if (streq(key
, "netns-socket-0")) {
2868 r
= exec_runtime_allocate(rt
);
2872 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2873 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2875 safe_close((*rt
)->netns_storage_socket
[0]);
2876 (*rt
)->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
2878 } else if (streq(key
, "netns-socket-1")) {
2881 r
= exec_runtime_allocate(rt
);
2885 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2886 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2888 safe_close((*rt
)->netns_storage_socket
[1]);
2889 (*rt
)->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
2897 static void *remove_tmpdir_thread(void *p
) {
2898 _cleanup_free_
char *path
= p
;
2900 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
2904 void exec_runtime_destroy(ExecRuntime
*rt
) {
2910 /* If there are multiple users of this, let's leave the stuff around */
2915 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
2917 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
2919 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
2926 if (rt
->var_tmp_dir
) {
2927 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
2929 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
2931 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
2932 free(rt
->var_tmp_dir
);
2935 rt
->var_tmp_dir
= NULL
;
2938 safe_close_pair(rt
->netns_storage_socket
);
2941 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
2942 [EXEC_INPUT_NULL
] = "null",
2943 [EXEC_INPUT_TTY
] = "tty",
2944 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
2945 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
2946 [EXEC_INPUT_SOCKET
] = "socket"
2949 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
2951 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
2952 [EXEC_OUTPUT_INHERIT
] = "inherit",
2953 [EXEC_OUTPUT_NULL
] = "null",
2954 [EXEC_OUTPUT_TTY
] = "tty",
2955 [EXEC_OUTPUT_SYSLOG
] = "syslog",
2956 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
2957 [EXEC_OUTPUT_KMSG
] = "kmsg",
2958 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
2959 [EXEC_OUTPUT_JOURNAL
] = "journal",
2960 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
2961 [EXEC_OUTPUT_SOCKET
] = "socket"
2964 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);