1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/socket.h>
29 #include <sys/prctl.h>
35 #include <sys/personality.h>
38 #include <security/pam_appl.h>
42 #include <selinux/selinux.h>
50 #include <sys/apparmor.h>
53 #include "sd-messages.h"
57 #include "capability.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
65 #include "utmp-wtmp.h"
67 #include "path-util.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
82 #include "signal-util.h"
85 #include "apparmor-util.h"
89 #include "seccomp-util.h"
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97 /* This assumes there is a 'tty' group */
100 #define SNDBUF_SIZE (8*1024*1024)
102 static int shift_fds(int fds
[], unsigned n_fds
) {
103 int start
, restart_from
;
108 /* Modifies the fds array! (sorts it) */
118 for (i
= start
; i
< (int) n_fds
; i
++) {
121 /* Already at right index? */
125 nfd
= fcntl(fds
[i
], F_DUPFD
, i
+ 3);
132 /* Hmm, the fd we wanted isn't free? Then
133 * let's remember that and try again from here */
134 if (nfd
!= i
+3 && restart_from
< 0)
138 if (restart_from
< 0)
141 start
= restart_from
;
147 static int flags_fds(const int fds
[], unsigned n_fds
, bool nonblock
) {
156 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
158 for (i
= 0; i
< n_fds
; i
++) {
160 r
= fd_nonblock(fds
[i
], nonblock
);
164 /* We unconditionally drop FD_CLOEXEC from the fds,
165 * since after all we want to pass these fds to our
168 r
= fd_cloexec(fds
[i
], false);
176 _pure_
static const char *tty_path(const ExecContext
*context
) {
179 if (context
->tty_path
)
180 return context
->tty_path
;
182 return "/dev/console";
185 static void exec_context_tty_reset(const ExecContext
*context
) {
188 if (context
->tty_vhangup
)
189 terminal_vhangup(tty_path(context
));
191 if (context
->tty_reset
)
192 reset_terminal(tty_path(context
));
194 if (context
->tty_vt_disallocate
&& context
->tty_path
)
195 vt_disallocate(context
->tty_path
);
198 static bool is_terminal_output(ExecOutput o
) {
200 o
== EXEC_OUTPUT_TTY
||
201 o
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
202 o
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
203 o
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
;
206 static int open_null_as(int flags
, int nfd
) {
211 fd
= open("/dev/null", flags
|O_NOCTTY
);
216 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
224 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
225 union sockaddr_union sa
= {
226 .un
.sun_family
= AF_UNIX
,
227 .un
.sun_path
= "/run/systemd/journal/stdout",
229 uid_t olduid
= UID_INVALID
;
230 gid_t oldgid
= GID_INVALID
;
233 if (gid
!= GID_INVALID
) {
241 if (uid
!= UID_INVALID
) {
251 r
= connect(fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(sa
.un
.sun_path
));
255 /* If we fail to restore the uid or gid, things will likely
256 fail later on. This should only happen if an LSM interferes. */
258 if (uid
!= UID_INVALID
)
259 (void) seteuid(olduid
);
262 if (gid
!= GID_INVALID
)
263 (void) setegid(oldgid
);
268 static int connect_logger_as(const ExecContext
*context
, ExecOutput output
, const char *ident
, const char *unit_id
, int nfd
, uid_t uid
, gid_t gid
) {
272 assert(output
< _EXEC_OUTPUT_MAX
);
276 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
280 r
= connect_journal_socket(fd
, uid
, gid
);
284 if (shutdown(fd
, SHUT_RD
) < 0) {
289 fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
299 context
->syslog_identifier
? context
->syslog_identifier
: ident
,
301 context
->syslog_priority
,
302 !!context
->syslog_level_prefix
,
303 output
== EXEC_OUTPUT_SYSLOG
|| output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
304 output
== EXEC_OUTPUT_KMSG
|| output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
,
305 is_terminal_output(output
));
308 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
315 static int open_terminal_as(const char *path
, mode_t mode
, int nfd
) {
321 fd
= open_terminal(path
, mode
| O_NOCTTY
);
326 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
334 static bool is_terminal_input(ExecInput i
) {
336 i
== EXEC_INPUT_TTY
||
337 i
== EXEC_INPUT_TTY_FORCE
||
338 i
== EXEC_INPUT_TTY_FAIL
;
341 static int fixup_input(ExecInput std_input
, int socket_fd
, bool apply_tty_stdin
) {
343 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
344 return EXEC_INPUT_NULL
;
346 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
347 return EXEC_INPUT_NULL
;
352 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
354 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
355 return EXEC_OUTPUT_INHERIT
;
360 static int setup_input(const ExecContext
*context
, int socket_fd
, bool apply_tty_stdin
) {
365 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
369 case EXEC_INPUT_NULL
:
370 return open_null_as(O_RDONLY
, STDIN_FILENO
);
373 case EXEC_INPUT_TTY_FORCE
:
374 case EXEC_INPUT_TTY_FAIL
: {
377 fd
= acquire_terminal(tty_path(context
),
378 i
== EXEC_INPUT_TTY_FAIL
,
379 i
== EXEC_INPUT_TTY_FORCE
,
385 if (fd
!= STDIN_FILENO
) {
386 r
= dup2(fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
394 case EXEC_INPUT_SOCKET
:
395 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
398 assert_not_reached("Unknown input type");
402 static int setup_output(Unit
*unit
, const ExecContext
*context
, int fileno
, int socket_fd
, const char *ident
, bool apply_tty_stdin
, uid_t uid
, gid_t gid
) {
411 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
412 o
= fixup_output(context
->std_output
, socket_fd
);
414 if (fileno
== STDERR_FILENO
) {
416 e
= fixup_output(context
->std_error
, socket_fd
);
418 /* This expects the input and output are already set up */
420 /* Don't change the stderr file descriptor if we inherit all
421 * the way and are not on a tty */
422 if (e
== EXEC_OUTPUT_INHERIT
&&
423 o
== EXEC_OUTPUT_INHERIT
&&
424 i
== EXEC_INPUT_NULL
&&
425 !is_terminal_input(context
->std_input
) &&
429 /* Duplicate from stdout if possible */
430 if (e
== o
|| e
== EXEC_OUTPUT_INHERIT
)
431 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
435 } else if (o
== EXEC_OUTPUT_INHERIT
) {
436 /* If input got downgraded, inherit the original value */
437 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
438 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
440 /* If the input is connected to anything that's not a /dev/null, inherit that... */
441 if (i
!= EXEC_INPUT_NULL
)
442 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
444 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
448 /* We need to open /dev/null here anew, to get the right access mode. */
449 return open_null_as(O_WRONLY
, fileno
);
454 case EXEC_OUTPUT_NULL
:
455 return open_null_as(O_WRONLY
, fileno
);
457 case EXEC_OUTPUT_TTY
:
458 if (is_terminal_input(i
))
459 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
461 /* We don't reset the terminal if this is just about output */
462 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
464 case EXEC_OUTPUT_SYSLOG
:
465 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
466 case EXEC_OUTPUT_KMSG
:
467 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
468 case EXEC_OUTPUT_JOURNAL
:
469 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
470 r
= connect_logger_as(context
, o
, ident
, unit
->id
, fileno
, uid
, gid
);
472 log_unit_error_errno(unit
, r
, "Failed to connect %s to the journal socket, ignoring: %m", fileno
== STDOUT_FILENO
? "stdout" : "stderr");
473 r
= open_null_as(O_WRONLY
, fileno
);
477 case EXEC_OUTPUT_SOCKET
:
478 assert(socket_fd
>= 0);
479 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
482 assert_not_reached("Unknown error type");
486 static int chown_terminal(int fd
, uid_t uid
) {
491 /* This might fail. What matters are the results. */
492 (void) fchown(fd
, uid
, -1);
493 (void) fchmod(fd
, TTY_MODE
);
495 if (fstat(fd
, &st
) < 0)
498 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
504 static int setup_confirm_stdio(int *_saved_stdin
,
505 int *_saved_stdout
) {
506 int fd
= -1, saved_stdin
, saved_stdout
= -1, r
;
508 assert(_saved_stdin
);
509 assert(_saved_stdout
);
511 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
515 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
516 if (saved_stdout
< 0) {
521 fd
= acquire_terminal(
526 DEFAULT_CONFIRM_USEC
);
532 r
= chown_terminal(fd
, getuid());
536 if (dup2(fd
, STDIN_FILENO
) < 0) {
541 if (dup2(fd
, STDOUT_FILENO
) < 0) {
549 *_saved_stdin
= saved_stdin
;
550 *_saved_stdout
= saved_stdout
;
555 safe_close(saved_stdout
);
556 safe_close(saved_stdin
);
562 _printf_(1, 2) static int write_confirm_message(const char *format
, ...) {
563 _cleanup_close_
int fd
= -1;
568 fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
572 va_start(ap
, format
);
573 vdprintf(fd
, format
, ap
);
579 static int restore_confirm_stdio(int *saved_stdin
,
585 assert(saved_stdout
);
589 if (*saved_stdin
>= 0)
590 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
593 if (*saved_stdout
>= 0)
594 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
597 safe_close(*saved_stdin
);
598 safe_close(*saved_stdout
);
603 static int ask_for_confirmation(char *response
, char **argv
) {
604 int saved_stdout
= -1, saved_stdin
= -1, r
;
605 _cleanup_free_
char *line
= NULL
;
607 r
= setup_confirm_stdio(&saved_stdin
, &saved_stdout
);
611 line
= exec_command_line(argv
);
615 r
= ask_char(response
, "yns", "Execute %s? [Yes, No, Skip] ", line
);
617 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
622 static int enforce_groups(const ExecContext
*context
, const char *username
, gid_t gid
) {
623 bool keep_groups
= false;
628 /* Lookup and set GID and supplementary group list. Here too
629 * we avoid NSS lookups for gid=0. */
631 if (context
->group
|| username
) {
633 if (context
->group
) {
634 const char *g
= context
->group
;
636 r
= get_group_creds(&g
, &gid
);
641 /* First step, initialize groups from /etc/groups */
642 if (username
&& gid
!= 0) {
643 if (initgroups(username
, gid
) < 0)
649 /* Second step, set our gids */
650 if (setresgid(gid
, gid
, gid
) < 0)
654 if (context
->supplementary_groups
) {
659 /* Final step, initialize any manually set supplementary groups */
660 assert_se((ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
)) > 0);
662 if (!(gids
= new(gid_t
, ngroups_max
)))
666 k
= getgroups(ngroups_max
, gids
);
674 STRV_FOREACH(i
, context
->supplementary_groups
) {
677 if (k
>= ngroups_max
) {
683 r
= get_group_creds(&g
, gids
+k
);
692 if (setgroups(k
, gids
) < 0) {
703 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
706 /* Sets (but doesn't lookup) the uid and make sure we keep the
707 * capabilities while doing so. */
709 if (context
->capabilities
) {
710 _cleanup_cap_free_ cap_t d
= NULL
;
711 static const cap_value_t bits
[] = {
712 CAP_SETUID
, /* Necessary so that we can run setresuid() below */
713 CAP_SETPCAP
/* Necessary so that we can set PR_SET_SECUREBITS later on */
716 /* First step: If we need to keep capabilities but
717 * drop privileges we need to make sure we keep our
718 * caps, while we drop privileges. */
720 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
722 if (prctl(PR_GET_SECUREBITS
) != sb
)
723 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
727 /* Second step: set the capabilities. This will reduce
728 * the capabilities to the minimum we need. */
730 d
= cap_dup(context
->capabilities
);
734 if (cap_set_flag(d
, CAP_EFFECTIVE
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0 ||
735 cap_set_flag(d
, CAP_PERMITTED
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0)
738 if (cap_set_proc(d
) < 0)
742 /* Third step: actually set the uids */
743 if (setresuid(uid
, uid
, uid
) < 0)
746 /* At this point we should have all necessary capabilities but
747 are otherwise a normal user. However, the caps might got
748 corrupted due to the setresuid() so we need clean them up
749 later. This is done outside of this call. */
756 static int null_conv(
758 const struct pam_message
**msg
,
759 struct pam_response
**resp
,
762 /* We don't support conversations */
767 static int setup_pam(
773 int fds
[], unsigned n_fds
) {
775 static const struct pam_conv conv
= {
780 pam_handle_t
*handle
= NULL
;
782 int pam_code
= PAM_SUCCESS
;
785 bool close_session
= false;
786 pid_t pam_pid
= 0, parent_pid
;
793 /* We set up PAM in the parent process, then fork. The child
794 * will then stay around until killed via PR_GET_PDEATHSIG or
795 * systemd via the cgroup logic. It will then remove the PAM
796 * session again. The parent process will exec() the actual
797 * daemon. We do things this way to ensure that the main PID
798 * of the daemon is the one we initially fork()ed. */
800 if (log_get_max_level() < LOG_DEBUG
)
803 pam_code
= pam_start(name
, user
, &conv
, &handle
);
804 if (pam_code
!= PAM_SUCCESS
) {
810 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
811 if (pam_code
!= PAM_SUCCESS
)
815 pam_code
= pam_acct_mgmt(handle
, flags
);
816 if (pam_code
!= PAM_SUCCESS
)
819 pam_code
= pam_open_session(handle
, flags
);
820 if (pam_code
!= PAM_SUCCESS
)
823 close_session
= true;
825 e
= pam_getenvlist(handle
);
827 pam_code
= PAM_BUF_ERR
;
831 /* Block SIGTERM, so that we know that it won't get lost in
834 assert_se(sigprocmask_many(SIG_BLOCK
, &old_ss
, SIGTERM
, -1) >= 0);
836 parent_pid
= getpid();
846 /* The child's job is to reset the PAM session on
849 /* This string must fit in 10 chars (i.e. the length
850 * of "/sbin/init"), to look pretty in /bin/ps */
851 rename_process("(sd-pam)");
853 /* Make sure we don't keep open the passed fds in this
854 child. We assume that otherwise only those fds are
855 open here that have been opened by PAM. */
856 close_many(fds
, n_fds
);
858 /* Drop privileges - we don't need any to pam_close_session
859 * and this will make PR_SET_PDEATHSIG work in most cases.
860 * If this fails, ignore the error - but expect sd-pam threads
861 * to fail to exit normally */
862 if (setresuid(uid
, uid
, uid
) < 0)
863 log_error_errno(r
, "Error: Failed to setresuid() in sd-pam: %m");
865 (void) ignore_signals(SIGPIPE
, -1);
867 /* Wait until our parent died. This will only work if
868 * the above setresuid() succeeds, otherwise the kernel
869 * will not allow unprivileged parents kill their privileged
870 * children this way. We rely on the control groups kill logic
871 * to do the rest for us. */
872 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
875 /* Check if our parent process might already have
877 if (getppid() == parent_pid
) {
880 assert_se(sigemptyset(&ss
) >= 0);
881 assert_se(sigaddset(&ss
, SIGTERM
) >= 0);
884 if (sigwait(&ss
, &sig
) < 0) {
891 assert(sig
== SIGTERM
);
896 /* If our parent died we'll end the session */
897 if (getppid() != parent_pid
) {
898 pam_code
= pam_close_session(handle
, flags
);
899 if (pam_code
!= PAM_SUCCESS
)
906 pam_end(handle
, pam_code
| flags
);
910 /* If the child was forked off successfully it will do all the
911 * cleanups, so forget about the handle here. */
914 /* Unblock SIGTERM again in the parent */
915 assert_se(sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) >= 0);
917 /* We close the log explicitly here, since the PAM modules
918 * might have opened it, but we don't want this fd around. */
927 if (pam_code
!= PAM_SUCCESS
) {
928 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
929 err
= -EPERM
; /* PAM errors do not map to errno */
931 err
= log_error_errno(errno
, "PAM failed: %m");
936 pam_code
= pam_close_session(handle
, flags
);
938 pam_end(handle
, pam_code
| flags
);
946 kill(pam_pid
, SIGTERM
);
947 kill(pam_pid
, SIGCONT
);
954 static void rename_process_from_path(const char *path
) {
955 char process_name
[11];
959 /* This resulting string must fit in 10 chars (i.e. the length
960 * of "/sbin/init") to look pretty in /bin/ps */
964 rename_process("(...)");
970 /* The end of the process name is usually more
971 * interesting, since the first bit might just be
977 process_name
[0] = '(';
978 memcpy(process_name
+1, p
, l
);
979 process_name
[1+l
] = ')';
980 process_name
[1+l
+1] = 0;
982 rename_process(process_name
);
987 static int apply_seccomp(const ExecContext
*c
) {
988 uint32_t negative_action
, action
;
989 scmp_filter_ctx
*seccomp
;
996 negative_action
= c
->syscall_errno
== 0 ? SCMP_ACT_KILL
: SCMP_ACT_ERRNO(c
->syscall_errno
);
998 seccomp
= seccomp_init(c
->syscall_whitelist
? negative_action
: SCMP_ACT_ALLOW
);
1002 if (c
->syscall_archs
) {
1004 SET_FOREACH(id
, c
->syscall_archs
, i
) {
1005 r
= seccomp_arch_add(seccomp
, PTR_TO_UINT32(id
) - 1);
1013 r
= seccomp_add_secondary_archs(seccomp
);
1018 action
= c
->syscall_whitelist
? SCMP_ACT_ALLOW
: negative_action
;
1019 SET_FOREACH(id
, c
->syscall_filter
, i
) {
1020 r
= seccomp_rule_add(seccomp
, action
, PTR_TO_INT(id
) - 1, 0);
1025 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1029 r
= seccomp_load(seccomp
);
1032 seccomp_release(seccomp
);
1036 static int apply_address_families(const ExecContext
*c
) {
1037 scmp_filter_ctx
*seccomp
;
1043 seccomp
= seccomp_init(SCMP_ACT_ALLOW
);
1047 r
= seccomp_add_secondary_archs(seccomp
);
1051 if (c
->address_families_whitelist
) {
1052 int af
, first
= 0, last
= 0;
1055 /* If this is a whitelist, we first block the address
1056 * families that are out of range and then everything
1057 * that is not in the set. First, we find the lowest
1058 * and highest address family in the set. */
1060 SET_FOREACH(afp
, c
->address_families
, i
) {
1061 af
= PTR_TO_INT(afp
);
1063 if (af
<= 0 || af
>= af_max())
1066 if (first
== 0 || af
< first
)
1069 if (last
== 0 || af
> last
)
1073 assert((first
== 0) == (last
== 0));
1077 /* No entries in the valid range, block everything */
1078 r
= seccomp_rule_add(
1080 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1088 /* Block everything below the first entry */
1089 r
= seccomp_rule_add(
1091 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1094 SCMP_A0(SCMP_CMP_LT
, first
));
1098 /* Block everything above the last entry */
1099 r
= seccomp_rule_add(
1101 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1104 SCMP_A0(SCMP_CMP_GT
, last
));
1108 /* Block everything between the first and last
1110 for (af
= 1; af
< af_max(); af
++) {
1112 if (set_contains(c
->address_families
, INT_TO_PTR(af
)))
1115 r
= seccomp_rule_add(
1117 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1120 SCMP_A0(SCMP_CMP_EQ
, af
));
1129 /* If this is a blacklist, then generate one rule for
1130 * each address family that are then combined in OR
1133 SET_FOREACH(af
, c
->address_families
, i
) {
1135 r
= seccomp_rule_add(
1137 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1140 SCMP_A0(SCMP_CMP_EQ
, PTR_TO_INT(af
)));
1146 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1150 r
= seccomp_load(seccomp
);
1153 seccomp_release(seccomp
);
1159 static void do_idle_pipe_dance(int idle_pipe
[4]) {
1163 safe_close(idle_pipe
[1]);
1164 safe_close(idle_pipe
[2]);
1166 if (idle_pipe
[0] >= 0) {
1169 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1171 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1172 /* Signal systemd that we are bored and want to continue. */
1173 r
= write(idle_pipe
[3], "x", 1);
1175 /* Wait for systemd to react to the signal above. */
1176 fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1179 safe_close(idle_pipe
[0]);
1183 safe_close(idle_pipe
[3]);
1186 static int build_environment(
1187 const ExecContext
*c
,
1189 usec_t watchdog_usec
,
1191 const char *username
,
1195 _cleanup_strv_free_
char **our_env
= NULL
;
1202 our_env
= new0(char*, 10);
1207 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid()) < 0)
1209 our_env
[n_env
++] = x
;
1211 if (asprintf(&x
, "LISTEN_FDS=%u", n_fds
) < 0)
1213 our_env
[n_env
++] = x
;
1216 if (watchdog_usec
> 0) {
1217 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid()) < 0)
1219 our_env
[n_env
++] = x
;
1221 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, watchdog_usec
) < 0)
1223 our_env
[n_env
++] = x
;
1227 x
= strappend("HOME=", home
);
1230 our_env
[n_env
++] = x
;
1234 x
= strappend("LOGNAME=", username
);
1237 our_env
[n_env
++] = x
;
1239 x
= strappend("USER=", username
);
1242 our_env
[n_env
++] = x
;
1246 x
= strappend("SHELL=", shell
);
1249 our_env
[n_env
++] = x
;
1252 if (is_terminal_input(c
->std_input
) ||
1253 c
->std_output
== EXEC_OUTPUT_TTY
||
1254 c
->std_error
== EXEC_OUTPUT_TTY
||
1257 x
= strdup(default_term_for_tty(tty_path(c
)));
1260 our_env
[n_env
++] = x
;
1263 our_env
[n_env
++] = NULL
;
1264 assert(n_env
<= 10);
1272 static bool exec_needs_mount_namespace(
1273 const ExecContext
*context
,
1274 const ExecParameters
*params
,
1275 ExecRuntime
*runtime
) {
1280 if (!strv_isempty(context
->read_write_dirs
) ||
1281 !strv_isempty(context
->read_only_dirs
) ||
1282 !strv_isempty(context
->inaccessible_dirs
))
1285 if (context
->mount_flags
!= 0)
1288 if (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
))
1291 if (params
->bus_endpoint_path
)
1294 if (context
->private_devices
||
1295 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1296 context
->protect_home
!= PROTECT_HOME_NO
)
1302 static int exec_child(
1304 ExecCommand
*command
,
1305 const ExecContext
*context
,
1306 const ExecParameters
*params
,
1307 ExecRuntime
*runtime
,
1310 int *fds
, unsigned n_fds
,
1314 _cleanup_strv_free_
char **our_env
= NULL
, **pam_env
= NULL
, **final_env
= NULL
, **final_argv
= NULL
;
1315 _cleanup_free_
char *mac_selinux_context_net
= NULL
;
1316 const char *username
= NULL
, *home
= NULL
, *shell
= NULL
;
1317 unsigned n_dont_close
= 0;
1318 int dont_close
[n_fds
+ 4];
1319 uid_t uid
= UID_INVALID
;
1320 gid_t gid
= GID_INVALID
;
1322 bool needs_mount_namespace
;
1328 assert(exit_status
);
1330 rename_process_from_path(command
->path
);
1332 /* We reset exactly these signals, since they are the
1333 * only ones we set to SIG_IGN in the main daemon. All
1334 * others we leave untouched because we set them to
1335 * SIG_DFL or a valid handler initially, both of which
1336 * will be demoted to SIG_DFL. */
1337 (void) default_signals(SIGNALS_CRASH_HANDLER
,
1338 SIGNALS_IGNORE
, -1);
1340 if (context
->ignore_sigpipe
)
1341 (void) ignore_signals(SIGPIPE
, -1);
1343 r
= reset_signal_mask();
1345 *exit_status
= EXIT_SIGNAL_MASK
;
1349 if (params
->idle_pipe
)
1350 do_idle_pipe_dance(params
->idle_pipe
);
1352 /* Close sockets very early to make sure we don't
1353 * block init reexecution because it cannot bind its
1359 dont_close
[n_dont_close
++] = socket_fd
;
1361 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
1362 n_dont_close
+= n_fds
;
1364 if (params
->bus_endpoint_fd
>= 0)
1365 dont_close
[n_dont_close
++] = params
->bus_endpoint_fd
;
1367 if (runtime
->netns_storage_socket
[0] >= 0)
1368 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[0];
1369 if (runtime
->netns_storage_socket
[1] >= 0)
1370 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[1];
1373 r
= close_all_fds(dont_close
, n_dont_close
);
1375 *exit_status
= EXIT_FDS
;
1379 if (!context
->same_pgrp
)
1381 *exit_status
= EXIT_SETSID
;
1385 exec_context_tty_reset(context
);
1387 if (params
->confirm_spawn
) {
1390 r
= ask_for_confirmation(&response
, argv
);
1391 if (r
== -ETIMEDOUT
)
1392 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1394 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r
));
1395 else if (response
== 's') {
1396 write_confirm_message("Skipping execution.\n");
1397 *exit_status
= EXIT_CONFIRM
;
1399 } else if (response
== 'n') {
1400 write_confirm_message("Failing execution.\n");
1406 if (context
->user
) {
1407 username
= context
->user
;
1408 r
= get_user_creds(&username
, &uid
, &gid
, &home
, &shell
);
1410 *exit_status
= EXIT_USER
;
1415 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1416 * must sure to drop O_NONBLOCK */
1418 fd_nonblock(socket_fd
, false);
1420 r
= setup_input(context
, socket_fd
, params
->apply_tty_stdin
);
1422 *exit_status
= EXIT_STDIN
;
1426 r
= setup_output(unit
, context
, STDOUT_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1428 *exit_status
= EXIT_STDOUT
;
1432 r
= setup_output(unit
, context
, STDERR_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1434 *exit_status
= EXIT_STDERR
;
1438 if (params
->cgroup_path
) {
1439 r
= cg_attach_everywhere(params
->cgroup_supported
, params
->cgroup_path
, 0, NULL
, NULL
);
1441 *exit_status
= EXIT_CGROUP
;
1446 if (context
->oom_score_adjust_set
) {
1447 char t
[DECIMAL_STR_MAX(context
->oom_score_adjust
)];
1449 /* When we can't make this change due to EPERM, then
1450 * let's silently skip over it. User namespaces
1451 * prohibit write access to this file, and we
1452 * shouldn't trip up over that. */
1454 sprintf(t
, "%i", context
->oom_score_adjust
);
1455 r
= write_string_file("/proc/self/oom_score_adj", t
, 0);
1456 if (r
== -EPERM
|| r
== -EACCES
) {
1458 log_unit_debug_errno(unit
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1461 *exit_status
= EXIT_OOM_ADJUST
;
1466 if (context
->nice_set
)
1467 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
1468 *exit_status
= EXIT_NICE
;
1472 if (context
->cpu_sched_set
) {
1473 struct sched_param param
= {
1474 .sched_priority
= context
->cpu_sched_priority
,
1477 r
= sched_setscheduler(0,
1478 context
->cpu_sched_policy
|
1479 (context
->cpu_sched_reset_on_fork
?
1480 SCHED_RESET_ON_FORK
: 0),
1483 *exit_status
= EXIT_SETSCHEDULER
;
1488 if (context
->cpuset
)
1489 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
1490 *exit_status
= EXIT_CPUAFFINITY
;
1494 if (context
->ioprio_set
)
1495 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
1496 *exit_status
= EXIT_IOPRIO
;
1500 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
1501 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
1502 *exit_status
= EXIT_TIMERSLACK
;
1506 if (context
->personality
!= PERSONALITY_INVALID
)
1507 if (personality(context
->personality
) < 0) {
1508 *exit_status
= EXIT_PERSONALITY
;
1512 if (context
->utmp_id
)
1513 utmp_put_init_process(context
->utmp_id
, getpid(), getsid(0), context
->tty_path
,
1514 context
->utmp_mode
== EXEC_UTMP_INIT
? INIT_PROCESS
:
1515 context
->utmp_mode
== EXEC_UTMP_LOGIN
? LOGIN_PROCESS
:
1517 username
? "root" : context
->user
);
1519 if (context
->user
&& is_terminal_input(context
->std_input
)) {
1520 r
= chown_terminal(STDIN_FILENO
, uid
);
1522 *exit_status
= EXIT_STDIN
;
1527 if (params
->bus_endpoint_fd
>= 0 && context
->bus_endpoint
) {
1528 uid_t ep_uid
= (uid
== UID_INVALID
) ? 0 : uid
;
1530 r
= bus_kernel_set_endpoint_policy(params
->bus_endpoint_fd
, ep_uid
, context
->bus_endpoint
);
1532 *exit_status
= EXIT_BUS_ENDPOINT
;
1537 /* If delegation is enabled we'll pass ownership of the cgroup
1538 * (but only in systemd's own controller hierarchy!) to the
1539 * user of the new process. */
1540 if (params
->cgroup_path
&& context
->user
&& params
->cgroup_delegate
) {
1541 r
= cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0644, uid
, gid
);
1543 *exit_status
= EXIT_CGROUP
;
1548 r
= cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0755, uid
, gid
);
1550 *exit_status
= EXIT_CGROUP
;
1555 if (!strv_isempty(context
->runtime_directory
) && params
->runtime_prefix
) {
1558 STRV_FOREACH(rt
, context
->runtime_directory
) {
1559 _cleanup_free_
char *p
;
1561 p
= strjoin(params
->runtime_prefix
, "/", *rt
, NULL
);
1563 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1567 r
= mkdir_p_label(p
, context
->runtime_directory_mode
);
1569 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1573 r
= chmod_and_chown(p
, context
->runtime_directory_mode
, uid
, gid
);
1575 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1581 if (params
->apply_permissions
) {
1582 r
= enforce_groups(context
, username
, gid
);
1584 *exit_status
= EXIT_GROUP
;
1589 umask(context
->umask
);
1592 if (params
->apply_permissions
&& context
->pam_name
&& username
) {
1593 r
= setup_pam(context
->pam_name
, username
, uid
, context
->tty_path
, &pam_env
, fds
, n_fds
);
1595 *exit_status
= EXIT_PAM
;
1601 if (context
->private_network
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
1602 r
= setup_netns(runtime
->netns_storage_socket
);
1604 *exit_status
= EXIT_NETWORK
;
1609 needs_mount_namespace
= exec_needs_mount_namespace(context
, params
, runtime
);
1611 if (needs_mount_namespace
) {
1612 char *tmp
= NULL
, *var
= NULL
;
1614 /* The runtime struct only contains the parent
1615 * of the private /tmp, which is
1616 * non-accessible to world users. Inside of it
1617 * there's a /tmp that is sticky, and that's
1618 * the one we want to use here. */
1620 if (context
->private_tmp
&& runtime
) {
1621 if (runtime
->tmp_dir
)
1622 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
1623 if (runtime
->var_tmp_dir
)
1624 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
1627 r
= setup_namespace(
1628 params
->apply_chroot
? context
->root_directory
: NULL
,
1629 context
->read_write_dirs
,
1630 context
->read_only_dirs
,
1631 context
->inaccessible_dirs
,
1634 params
->bus_endpoint_path
,
1635 context
->private_devices
,
1636 context
->protect_home
,
1637 context
->protect_system
,
1638 context
->mount_flags
);
1640 /* If we couldn't set up the namespace this is
1641 * probably due to a missing capability. In this case,
1642 * silently proceeed. */
1643 if (r
== -EPERM
|| r
== -EACCES
) {
1645 log_unit_debug_errno(unit
, r
, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1648 *exit_status
= EXIT_NAMESPACE
;
1653 if (params
->apply_chroot
) {
1654 if (!needs_mount_namespace
&& context
->root_directory
)
1655 if (chroot(context
->root_directory
) < 0) {
1656 *exit_status
= EXIT_CHROOT
;
1660 if (chdir(context
->working_directory
?: "/") < 0 &&
1661 !context
->working_directory_missing_ok
) {
1662 *exit_status
= EXIT_CHDIR
;
1666 _cleanup_free_
char *d
= NULL
;
1668 if (asprintf(&d
, "%s/%s",
1669 context
->root_directory
?: "",
1670 context
->working_directory
?: "") < 0) {
1671 *exit_status
= EXIT_MEMORY
;
1676 !context
->working_directory_missing_ok
) {
1677 *exit_status
= EXIT_CHDIR
;
1683 if (params
->apply_permissions
&& mac_selinux_use() && params
->selinux_context_net
&& socket_fd
>= 0) {
1684 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
1686 *exit_status
= EXIT_SELINUX_CONTEXT
;
1692 /* We repeat the fd closing here, to make sure that
1693 * nothing is leaked from the PAM modules. Note that
1694 * we are more aggressive this time since socket_fd
1695 * and the netns fds we don't need anymore. The custom
1696 * endpoint fd was needed to upload the policy and can
1697 * now be closed as well. */
1698 r
= close_all_fds(fds
, n_fds
);
1700 r
= shift_fds(fds
, n_fds
);
1702 r
= flags_fds(fds
, n_fds
, context
->non_blocking
);
1704 *exit_status
= EXIT_FDS
;
1708 if (params
->apply_permissions
) {
1710 for (i
= 0; i
< _RLIMIT_MAX
; i
++) {
1711 if (!context
->rlimit
[i
])
1714 if (setrlimit_closest(i
, context
->rlimit
[i
]) < 0) {
1715 *exit_status
= EXIT_LIMITS
;
1720 if (context
->capability_bounding_set_drop
) {
1721 r
= capability_bounding_set_drop(context
->capability_bounding_set_drop
, false);
1723 *exit_status
= EXIT_CAPABILITIES
;
1729 if (context
->smack_process_label
) {
1730 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
1732 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1736 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1738 _cleanup_free_
char *exec_label
= NULL
;
1740 r
= mac_smack_read(command
->path
, SMACK_ATTR_EXEC
, &exec_label
);
1741 if (r
< 0 && r
!= -ENODATA
&& r
!= -EOPNOTSUPP
) {
1742 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1746 r
= mac_smack_apply_pid(0, exec_label
? : SMACK_DEFAULT_PROCESS_LABEL
);
1748 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1755 if (context
->user
) {
1756 r
= enforce_user(context
, uid
);
1758 *exit_status
= EXIT_USER
;
1763 /* PR_GET_SECUREBITS is not privileged, while
1764 * PR_SET_SECUREBITS is. So to suppress
1765 * potential EPERMs we'll try not to call
1766 * PR_SET_SECUREBITS unless necessary. */
1767 if (prctl(PR_GET_SECUREBITS
) != context
->secure_bits
)
1768 if (prctl(PR_SET_SECUREBITS
, context
->secure_bits
) < 0) {
1769 *exit_status
= EXIT_SECUREBITS
;
1773 if (context
->capabilities
)
1774 if (cap_set_proc(context
->capabilities
) < 0) {
1775 *exit_status
= EXIT_CAPABILITIES
;
1779 if (context
->no_new_privileges
)
1780 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
1781 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
1786 if (context
->address_families_whitelist
||
1787 !set_isempty(context
->address_families
)) {
1788 r
= apply_address_families(context
);
1790 *exit_status
= EXIT_ADDRESS_FAMILIES
;
1795 if (context
->syscall_whitelist
||
1796 !set_isempty(context
->syscall_filter
) ||
1797 !set_isempty(context
->syscall_archs
)) {
1798 r
= apply_seccomp(context
);
1800 *exit_status
= EXIT_SECCOMP
;
1807 if (mac_selinux_use()) {
1808 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
1811 r
= setexeccon(exec_context
);
1813 *exit_status
= EXIT_SELINUX_CONTEXT
;
1820 #ifdef HAVE_APPARMOR
1821 if (context
->apparmor_profile
&& mac_apparmor_use()) {
1822 r
= aa_change_onexec(context
->apparmor_profile
);
1823 if (r
< 0 && !context
->apparmor_profile_ignore
) {
1824 *exit_status
= EXIT_APPARMOR_PROFILE
;
1831 r
= build_environment(context
, n_fds
, params
->watchdog_usec
, home
, username
, shell
, &our_env
);
1833 *exit_status
= EXIT_MEMORY
;
1837 final_env
= strv_env_merge(5,
1838 params
->environment
,
1840 context
->environment
,
1845 *exit_status
= EXIT_MEMORY
;
1849 final_argv
= replace_env_argv(argv
, final_env
);
1851 *exit_status
= EXIT_MEMORY
;
1855 final_env
= strv_env_clean(final_env
);
1857 if (_unlikely_(log_get_max_level() >= LOG_DEBUG
)) {
1858 _cleanup_free_
char *line
;
1860 line
= exec_command_line(final_argv
);
1863 log_struct(LOG_DEBUG
,
1865 "EXECUTABLE=%s", command
->path
,
1866 LOG_UNIT_MESSAGE(unit
, "Executing: %s", line
),
1872 execve(command
->path
, final_argv
, final_env
);
1873 *exit_status
= EXIT_EXEC
;
1877 int exec_spawn(Unit
*unit
,
1878 ExecCommand
*command
,
1879 const ExecContext
*context
,
1880 const ExecParameters
*params
,
1881 ExecRuntime
*runtime
,
1884 _cleanup_strv_free_
char **files_env
= NULL
;
1885 int *fds
= NULL
; unsigned n_fds
= 0;
1886 _cleanup_free_
char *line
= NULL
;
1896 assert(params
->fds
|| params
->n_fds
<= 0);
1898 if (context
->std_input
== EXEC_INPUT_SOCKET
||
1899 context
->std_output
== EXEC_OUTPUT_SOCKET
||
1900 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
1902 if (params
->n_fds
!= 1) {
1903 log_unit_error(unit
, "Got more than one socket.");
1907 socket_fd
= params
->fds
[0];
1911 n_fds
= params
->n_fds
;
1914 r
= exec_context_load_environment(unit
, context
, &files_env
);
1916 return log_unit_error_errno(unit
, r
, "Failed to load environment files: %m");
1918 argv
= params
->argv
?: command
->argv
;
1919 line
= exec_command_line(argv
);
1923 log_struct(LOG_DEBUG
,
1925 LOG_UNIT_MESSAGE(unit
, "About to execute: %s", line
),
1926 "EXECUTABLE=%s", command
->path
,
1930 return log_unit_error_errno(unit
, r
, "Failed to fork: %m");
1935 r
= exec_child(unit
,
1947 log_struct_errno(LOG_ERR
, r
,
1948 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED
),
1950 LOG_UNIT_MESSAGE(unit
, "Failed at step %s spawning %s: %m",
1951 exit_status_to_string(exit_status
, EXIT_STATUS_SYSTEMD
),
1953 "EXECUTABLE=%s", command
->path
,
1960 log_unit_debug(unit
, "Forked %s as "PID_FMT
, command
->path
, pid
);
1962 /* We add the new process to the cgroup both in the child (so
1963 * that we can be sure that no user code is ever executed
1964 * outside of the cgroup) and in the parent (so that we can be
1965 * sure that when we kill the cgroup the process will be
1967 if (params
->cgroup_path
)
1968 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, pid
);
1970 exec_status_start(&command
->exec_status
, pid
);
1976 void exec_context_init(ExecContext
*c
) {
1980 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
1981 c
->cpu_sched_policy
= SCHED_OTHER
;
1982 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
1983 c
->syslog_level_prefix
= true;
1984 c
->ignore_sigpipe
= true;
1985 c
->timer_slack_nsec
= NSEC_INFINITY
;
1986 c
->personality
= PERSONALITY_INVALID
;
1987 c
->runtime_directory_mode
= 0755;
1990 void exec_context_done(ExecContext
*c
) {
1995 c
->environment
= strv_free(c
->environment
);
1996 c
->environment_files
= strv_free(c
->environment_files
);
1998 for (l
= 0; l
< ELEMENTSOF(c
->rlimit
); l
++)
1999 c
->rlimit
[l
] = mfree(c
->rlimit
[l
]);
2001 c
->working_directory
= mfree(c
->working_directory
);
2002 c
->root_directory
= mfree(c
->root_directory
);
2003 c
->tty_path
= mfree(c
->tty_path
);
2004 c
->syslog_identifier
= mfree(c
->syslog_identifier
);
2005 c
->user
= mfree(c
->user
);
2006 c
->group
= mfree(c
->group
);
2008 c
->supplementary_groups
= strv_free(c
->supplementary_groups
);
2010 c
->pam_name
= mfree(c
->pam_name
);
2012 if (c
->capabilities
) {
2013 cap_free(c
->capabilities
);
2014 c
->capabilities
= NULL
;
2017 c
->read_only_dirs
= strv_free(c
->read_only_dirs
);
2018 c
->read_write_dirs
= strv_free(c
->read_write_dirs
);
2019 c
->inaccessible_dirs
= strv_free(c
->inaccessible_dirs
);
2022 CPU_FREE(c
->cpuset
);
2024 c
->utmp_id
= mfree(c
->utmp_id
);
2025 c
->selinux_context
= mfree(c
->selinux_context
);
2026 c
->apparmor_profile
= mfree(c
->apparmor_profile
);
2028 c
->syscall_filter
= set_free(c
->syscall_filter
);
2029 c
->syscall_archs
= set_free(c
->syscall_archs
);
2030 c
->address_families
= set_free(c
->address_families
);
2032 c
->runtime_directory
= strv_free(c
->runtime_directory
);
2034 bus_endpoint_free(c
->bus_endpoint
);
2035 c
->bus_endpoint
= NULL
;
2038 int exec_context_destroy_runtime_directory(ExecContext
*c
, const char *runtime_prefix
) {
2043 if (!runtime_prefix
)
2046 STRV_FOREACH(i
, c
->runtime_directory
) {
2047 _cleanup_free_
char *p
;
2049 p
= strjoin(runtime_prefix
, "/", *i
, NULL
);
2053 /* We execute this synchronously, since we need to be
2054 * sure this is gone when we start the service
2056 (void) rm_rf(p
, REMOVE_ROOT
);
2062 void exec_command_done(ExecCommand
*c
) {
2065 c
->path
= mfree(c
->path
);
2067 c
->argv
= strv_free(c
->argv
);
2070 void exec_command_done_array(ExecCommand
*c
, unsigned n
) {
2073 for (i
= 0; i
< n
; i
++)
2074 exec_command_done(c
+i
);
2077 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
2081 LIST_REMOVE(command
, c
, i
);
2082 exec_command_done(i
);
2089 void exec_command_free_array(ExecCommand
**c
, unsigned n
) {
2092 for (i
= 0; i
< n
; i
++)
2093 c
[i
] = exec_command_free_list(c
[i
]);
2096 typedef struct InvalidEnvInfo
{
2101 static void invalid_env(const char *p
, void *userdata
) {
2102 InvalidEnvInfo
*info
= userdata
;
2104 log_unit_error(info
->unit
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
2107 int exec_context_load_environment(Unit
*unit
, const ExecContext
*c
, char ***l
) {
2108 char **i
, **r
= NULL
;
2113 STRV_FOREACH(i
, c
->environment_files
) {
2116 bool ignore
= false;
2118 _cleanup_globfree_ glob_t pglob
= {};
2128 if (!path_is_absolute(fn
)) {
2136 /* Filename supports globbing, take all matching files */
2138 if (glob(fn
, 0, NULL
, &pglob
) != 0) {
2143 return errno
? -errno
: -EINVAL
;
2145 count
= pglob
.gl_pathc
;
2153 for (n
= 0; n
< count
; n
++) {
2154 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], NULL
, &p
);
2162 /* Log invalid environment variables with filename */
2164 InvalidEnvInfo info
= {
2166 .path
= pglob
.gl_pathv
[n
]
2169 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
2177 m
= strv_env_merge(2, r
, p
);
2193 static bool tty_may_match_dev_console(const char *tty
) {
2194 _cleanup_free_
char *active
= NULL
;
2197 if (startswith(tty
, "/dev/"))
2200 /* trivial identity? */
2201 if (streq(tty
, "console"))
2204 console
= resolve_dev_console(&active
);
2205 /* if we could not resolve, assume it may */
2209 /* "tty0" means the active VC, so it may be the same sometimes */
2210 return streq(console
, tty
) || (streq(console
, "tty0") && tty_is_vc(tty
));
2213 bool exec_context_may_touch_console(ExecContext
*ec
) {
2214 return (ec
->tty_reset
|| ec
->tty_vhangup
|| ec
->tty_vt_disallocate
||
2215 is_terminal_input(ec
->std_input
) ||
2216 is_terminal_output(ec
->std_output
) ||
2217 is_terminal_output(ec
->std_error
)) &&
2218 tty_may_match_dev_console(tty_path(ec
));
2221 static void strv_fprintf(FILE *f
, char **l
) {
2227 fprintf(f
, " %s", *g
);
2230 void exec_context_dump(ExecContext
*c
, FILE* f
, const char *prefix
) {
2237 prefix
= strempty(prefix
);
2241 "%sWorkingDirectory: %s\n"
2242 "%sRootDirectory: %s\n"
2243 "%sNonBlocking: %s\n"
2244 "%sPrivateTmp: %s\n"
2245 "%sPrivateNetwork: %s\n"
2246 "%sPrivateDevices: %s\n"
2247 "%sProtectHome: %s\n"
2248 "%sProtectSystem: %s\n"
2249 "%sIgnoreSIGPIPE: %s\n",
2251 prefix
, c
->working_directory
? c
->working_directory
: "/",
2252 prefix
, c
->root_directory
? c
->root_directory
: "/",
2253 prefix
, yes_no(c
->non_blocking
),
2254 prefix
, yes_no(c
->private_tmp
),
2255 prefix
, yes_no(c
->private_network
),
2256 prefix
, yes_no(c
->private_devices
),
2257 prefix
, protect_home_to_string(c
->protect_home
),
2258 prefix
, protect_system_to_string(c
->protect_system
),
2259 prefix
, yes_no(c
->ignore_sigpipe
));
2261 STRV_FOREACH(e
, c
->environment
)
2262 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
2264 STRV_FOREACH(e
, c
->environment_files
)
2265 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
2272 if (c
->oom_score_adjust_set
)
2274 "%sOOMScoreAdjust: %i\n",
2275 prefix
, c
->oom_score_adjust
);
2277 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
2279 fprintf(f
, "%s%s: "RLIM_FMT
"\n",
2280 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
2282 if (c
->ioprio_set
) {
2283 _cleanup_free_
char *class_str
= NULL
;
2285 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
2287 "%sIOSchedulingClass: %s\n"
2288 "%sIOPriority: %i\n",
2289 prefix
, strna(class_str
),
2290 prefix
, (int) IOPRIO_PRIO_DATA(c
->ioprio
));
2293 if (c
->cpu_sched_set
) {
2294 _cleanup_free_
char *policy_str
= NULL
;
2296 sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
2298 "%sCPUSchedulingPolicy: %s\n"
2299 "%sCPUSchedulingPriority: %i\n"
2300 "%sCPUSchedulingResetOnFork: %s\n",
2301 prefix
, strna(policy_str
),
2302 prefix
, c
->cpu_sched_priority
,
2303 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
2307 fprintf(f
, "%sCPUAffinity:", prefix
);
2308 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
2309 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
2310 fprintf(f
, " %u", i
);
2314 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
2315 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
2318 "%sStandardInput: %s\n"
2319 "%sStandardOutput: %s\n"
2320 "%sStandardError: %s\n",
2321 prefix
, exec_input_to_string(c
->std_input
),
2322 prefix
, exec_output_to_string(c
->std_output
),
2323 prefix
, exec_output_to_string(c
->std_error
));
2329 "%sTTYVHangup: %s\n"
2330 "%sTTYVTDisallocate: %s\n",
2331 prefix
, c
->tty_path
,
2332 prefix
, yes_no(c
->tty_reset
),
2333 prefix
, yes_no(c
->tty_vhangup
),
2334 prefix
, yes_no(c
->tty_vt_disallocate
));
2336 if (c
->std_output
== EXEC_OUTPUT_SYSLOG
||
2337 c
->std_output
== EXEC_OUTPUT_KMSG
||
2338 c
->std_output
== EXEC_OUTPUT_JOURNAL
||
2339 c
->std_output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2340 c
->std_output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2341 c
->std_output
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
||
2342 c
->std_error
== EXEC_OUTPUT_SYSLOG
||
2343 c
->std_error
== EXEC_OUTPUT_KMSG
||
2344 c
->std_error
== EXEC_OUTPUT_JOURNAL
||
2345 c
->std_error
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2346 c
->std_error
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2347 c
->std_error
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) {
2349 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
2351 log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
2352 log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
2355 "%sSyslogFacility: %s\n"
2356 "%sSyslogLevel: %s\n",
2357 prefix
, strna(fac_str
),
2358 prefix
, strna(lvl_str
));
2361 if (c
->capabilities
) {
2362 _cleanup_cap_free_charp_
char *t
;
2364 t
= cap_to_text(c
->capabilities
, NULL
);
2366 fprintf(f
, "%sCapabilities: %s\n", prefix
, t
);
2370 fprintf(f
, "%sSecure Bits:%s%s%s%s%s%s\n",
2372 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS
) ? " keep-caps" : "",
2373 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS_LOCKED
) ? " keep-caps-locked" : "",
2374 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP
) ? " no-setuid-fixup" : "",
2375 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP_LOCKED
) ? " no-setuid-fixup-locked" : "",
2376 (c
->secure_bits
& 1<<SECURE_NOROOT
) ? " noroot" : "",
2377 (c
->secure_bits
& 1<<SECURE_NOROOT_LOCKED
) ? "noroot-locked" : "");
2379 if (c
->capability_bounding_set_drop
) {
2381 fprintf(f
, "%sCapabilityBoundingSet:", prefix
);
2383 for (l
= 0; l
<= cap_last_cap(); l
++)
2384 if (!(c
->capability_bounding_set_drop
& ((uint64_t) 1ULL << (uint64_t) l
)))
2385 fprintf(f
, " %s", strna(capability_to_name(l
)));
2391 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
2393 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
2395 if (strv_length(c
->supplementary_groups
) > 0) {
2396 fprintf(f
, "%sSupplementaryGroups:", prefix
);
2397 strv_fprintf(f
, c
->supplementary_groups
);
2402 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
2404 if (strv_length(c
->read_write_dirs
) > 0) {
2405 fprintf(f
, "%sReadWriteDirs:", prefix
);
2406 strv_fprintf(f
, c
->read_write_dirs
);
2410 if (strv_length(c
->read_only_dirs
) > 0) {
2411 fprintf(f
, "%sReadOnlyDirs:", prefix
);
2412 strv_fprintf(f
, c
->read_only_dirs
);
2416 if (strv_length(c
->inaccessible_dirs
) > 0) {
2417 fprintf(f
, "%sInaccessibleDirs:", prefix
);
2418 strv_fprintf(f
, c
->inaccessible_dirs
);
2424 "%sUtmpIdentifier: %s\n",
2425 prefix
, c
->utmp_id
);
2427 if (c
->selinux_context
)
2429 "%sSELinuxContext: %s%s\n",
2430 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
2432 if (c
->personality
!= PERSONALITY_INVALID
)
2434 "%sPersonality: %s\n",
2435 prefix
, strna(personality_to_string(c
->personality
)));
2437 if (c
->syscall_filter
) {
2445 "%sSystemCallFilter: ",
2448 if (!c
->syscall_whitelist
)
2452 SET_FOREACH(id
, c
->syscall_filter
, j
) {
2453 _cleanup_free_
char *name
= NULL
;
2460 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
2461 fputs(strna(name
), f
);
2468 if (c
->syscall_archs
) {
2475 "%sSystemCallArchitectures:",
2479 SET_FOREACH(id
, c
->syscall_archs
, j
)
2480 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
2485 if (c
->syscall_errno
!= 0)
2487 "%sSystemCallErrorNumber: %s\n",
2488 prefix
, strna(errno_to_name(c
->syscall_errno
)));
2490 if (c
->apparmor_profile
)
2492 "%sAppArmorProfile: %s%s\n",
2493 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
2496 bool exec_context_maintains_privileges(ExecContext
*c
) {
2499 /* Returns true if the process forked off would run run under
2500 * an unchanged UID or as root. */
2505 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
2511 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
2516 dual_timestamp_get(&s
->start_timestamp
);
2519 void exec_status_exit(ExecStatus
*s
, ExecContext
*context
, pid_t pid
, int code
, int status
) {
2522 if (s
->pid
&& s
->pid
!= pid
)
2526 dual_timestamp_get(&s
->exit_timestamp
);
2532 if (context
->utmp_id
)
2533 utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
2535 exec_context_tty_reset(context
);
2539 void exec_status_dump(ExecStatus
*s
, FILE *f
, const char *prefix
) {
2540 char buf
[FORMAT_TIMESTAMP_MAX
];
2548 prefix
= strempty(prefix
);
2551 "%sPID: "PID_FMT
"\n",
2554 if (s
->start_timestamp
.realtime
> 0)
2556 "%sStart Timestamp: %s\n",
2557 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
2559 if (s
->exit_timestamp
.realtime
> 0)
2561 "%sExit Timestamp: %s\n"
2563 "%sExit Status: %i\n",
2564 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
2565 prefix
, sigchld_code_to_string(s
->code
),
2569 char *exec_command_line(char **argv
) {
2577 STRV_FOREACH(a
, argv
)
2580 if (!(n
= new(char, k
)))
2584 STRV_FOREACH(a
, argv
) {
2591 if (strpbrk(*a
, WHITESPACE
)) {
2602 /* FIXME: this doesn't really handle arguments that have
2603 * spaces and ticks in them */
2608 void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2609 _cleanup_free_
char *cmd
= NULL
;
2610 const char *prefix2
;
2615 prefix
= strempty(prefix
);
2616 prefix2
= strjoina(prefix
, "\t");
2618 cmd
= exec_command_line(c
->argv
);
2620 "%sCommand Line: %s\n",
2621 prefix
, cmd
? cmd
: strerror(ENOMEM
));
2623 exec_status_dump(&c
->exec_status
, f
, prefix2
);
2626 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2629 prefix
= strempty(prefix
);
2631 LIST_FOREACH(command
, c
, c
)
2632 exec_command_dump(c
, f
, prefix
);
2635 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
2642 /* It's kind of important, that we keep the order here */
2643 LIST_FIND_TAIL(command
, *l
, end
);
2644 LIST_INSERT_AFTER(command
, *l
, end
, e
);
2649 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
2657 l
= strv_new_ap(path
, ap
);
2678 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
2679 _cleanup_strv_free_
char **l
= NULL
;
2687 l
= strv_new_ap(path
, ap
);
2693 r
= strv_extend_strv(&c
->argv
, l
);
2701 static int exec_runtime_allocate(ExecRuntime
**rt
) {
2706 *rt
= new0(ExecRuntime
, 1);
2711 (*rt
)->netns_storage_socket
[0] = (*rt
)->netns_storage_socket
[1] = -1;
2716 int exec_runtime_make(ExecRuntime
**rt
, ExecContext
*c
, const char *id
) {
2726 if (!c
->private_network
&& !c
->private_tmp
)
2729 r
= exec_runtime_allocate(rt
);
2733 if (c
->private_network
&& (*rt
)->netns_storage_socket
[0] < 0) {
2734 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, (*rt
)->netns_storage_socket
) < 0)
2738 if (c
->private_tmp
&& !(*rt
)->tmp_dir
) {
2739 r
= setup_tmp_dirs(id
, &(*rt
)->tmp_dir
, &(*rt
)->var_tmp_dir
);
2747 ExecRuntime
*exec_runtime_ref(ExecRuntime
*r
) {
2749 assert(r
->n_ref
> 0);
2755 ExecRuntime
*exec_runtime_unref(ExecRuntime
*r
) {
2760 assert(r
->n_ref
> 0);
2767 free(r
->var_tmp_dir
);
2768 safe_close_pair(r
->netns_storage_socket
);
2774 int exec_runtime_serialize(Unit
*u
, ExecRuntime
*rt
, FILE *f
, FDSet
*fds
) {
2783 unit_serialize_item(u
, f
, "tmp-dir", rt
->tmp_dir
);
2785 if (rt
->var_tmp_dir
)
2786 unit_serialize_item(u
, f
, "var-tmp-dir", rt
->var_tmp_dir
);
2788 if (rt
->netns_storage_socket
[0] >= 0) {
2791 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
2795 unit_serialize_item_format(u
, f
, "netns-socket-0", "%i", copy
);
2798 if (rt
->netns_storage_socket
[1] >= 0) {
2801 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
2805 unit_serialize_item_format(u
, f
, "netns-socket-1", "%i", copy
);
2811 int exec_runtime_deserialize_item(Unit
*u
, ExecRuntime
**rt
, const char *key
, const char *value
, FDSet
*fds
) {
2818 if (streq(key
, "tmp-dir")) {
2821 r
= exec_runtime_allocate(rt
);
2825 copy
= strdup(value
);
2829 free((*rt
)->tmp_dir
);
2830 (*rt
)->tmp_dir
= copy
;
2832 } else if (streq(key
, "var-tmp-dir")) {
2835 r
= exec_runtime_allocate(rt
);
2839 copy
= strdup(value
);
2843 free((*rt
)->var_tmp_dir
);
2844 (*rt
)->var_tmp_dir
= copy
;
2846 } else if (streq(key
, "netns-socket-0")) {
2849 r
= exec_runtime_allocate(rt
);
2853 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2854 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2856 safe_close((*rt
)->netns_storage_socket
[0]);
2857 (*rt
)->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
2859 } else if (streq(key
, "netns-socket-1")) {
2862 r
= exec_runtime_allocate(rt
);
2866 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2867 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2869 safe_close((*rt
)->netns_storage_socket
[1]);
2870 (*rt
)->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
2878 static void *remove_tmpdir_thread(void *p
) {
2879 _cleanup_free_
char *path
= p
;
2881 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
2885 void exec_runtime_destroy(ExecRuntime
*rt
) {
2891 /* If there are multiple users of this, let's leave the stuff around */
2896 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
2898 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
2900 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
2907 if (rt
->var_tmp_dir
) {
2908 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
2910 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
2912 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
2913 free(rt
->var_tmp_dir
);
2916 rt
->var_tmp_dir
= NULL
;
2919 safe_close_pair(rt
->netns_storage_socket
);
2922 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
2923 [EXEC_INPUT_NULL
] = "null",
2924 [EXEC_INPUT_TTY
] = "tty",
2925 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
2926 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
2927 [EXEC_INPUT_SOCKET
] = "socket"
2930 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
2932 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
2933 [EXEC_OUTPUT_INHERIT
] = "inherit",
2934 [EXEC_OUTPUT_NULL
] = "null",
2935 [EXEC_OUTPUT_TTY
] = "tty",
2936 [EXEC_OUTPUT_SYSLOG
] = "syslog",
2937 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
2938 [EXEC_OUTPUT_KMSG
] = "kmsg",
2939 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
2940 [EXEC_OUTPUT_JOURNAL
] = "journal",
2941 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
2942 [EXEC_OUTPUT_SOCKET
] = "socket"
2945 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);
2947 static const char* const exec_utmp_mode_table
[_EXEC_UTMP_MODE_MAX
] = {
2948 [EXEC_UTMP_INIT
] = "init",
2949 [EXEC_UTMP_LOGIN
] = "login",
2950 [EXEC_UTMP_USER
] = "user",
2953 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode
, ExecUtmpMode
);