1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/socket.h>
29 #include <sys/prctl.h>
35 #include <sys/personality.h>
38 #include <security/pam_appl.h>
42 #include <selinux/selinux.h>
50 #include <sys/apparmor.h>
53 #include "sd-messages.h"
57 #include "capability.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
65 #include "utmp-wtmp.h"
67 #include "path-util.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
82 #include "signal-util.h"
85 #include "apparmor-util.h"
89 #include "seccomp-util.h"
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97 /* This assumes there is a 'tty' group */
100 #define SNDBUF_SIZE (8*1024*1024)
102 static int shift_fds(int fds
[], unsigned n_fds
) {
103 int start
, restart_from
;
108 /* Modifies the fds array! (sorts it) */
118 for (i
= start
; i
< (int) n_fds
; i
++) {
121 /* Already at right index? */
125 nfd
= fcntl(fds
[i
], F_DUPFD
, i
+ 3);
132 /* Hmm, the fd we wanted isn't free? Then
133 * let's remember that and try again from here */
134 if (nfd
!= i
+3 && restart_from
< 0)
138 if (restart_from
< 0)
141 start
= restart_from
;
147 static int flags_fds(const int fds
[], unsigned n_fds
, bool nonblock
) {
156 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
158 for (i
= 0; i
< n_fds
; i
++) {
160 r
= fd_nonblock(fds
[i
], nonblock
);
164 /* We unconditionally drop FD_CLOEXEC from the fds,
165 * since after all we want to pass these fds to our
168 r
= fd_cloexec(fds
[i
], false);
176 _pure_
static const char *tty_path(const ExecContext
*context
) {
179 if (context
->tty_path
)
180 return context
->tty_path
;
182 return "/dev/console";
185 static void exec_context_tty_reset(const ExecContext
*context
) {
188 if (context
->tty_vhangup
)
189 terminal_vhangup(tty_path(context
));
191 if (context
->tty_reset
)
192 reset_terminal(tty_path(context
));
194 if (context
->tty_vt_disallocate
&& context
->tty_path
)
195 vt_disallocate(context
->tty_path
);
198 static bool is_terminal_output(ExecOutput o
) {
200 o
== EXEC_OUTPUT_TTY
||
201 o
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
202 o
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
203 o
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
;
206 static int open_null_as(int flags
, int nfd
) {
211 fd
= open("/dev/null", flags
|O_NOCTTY
);
216 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
224 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
225 union sockaddr_union sa
= {
226 .un
.sun_family
= AF_UNIX
,
227 .un
.sun_path
= "/run/systemd/journal/stdout",
229 uid_t olduid
= UID_INVALID
;
230 gid_t oldgid
= GID_INVALID
;
233 if (gid
!= GID_INVALID
) {
241 if (uid
!= UID_INVALID
) {
251 r
= connect(fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(sa
.un
.sun_path
));
255 /* If we fail to restore the uid or gid, things will likely
256 fail later on. This should only happen if an LSM interferes. */
258 if (uid
!= UID_INVALID
)
259 (void) seteuid(olduid
);
262 if (gid
!= GID_INVALID
)
263 (void) setegid(oldgid
);
268 static int connect_logger_as(const ExecContext
*context
, ExecOutput output
, const char *ident
, const char *unit_id
, int nfd
, uid_t uid
, gid_t gid
) {
272 assert(output
< _EXEC_OUTPUT_MAX
);
276 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
280 r
= connect_journal_socket(fd
, uid
, gid
);
284 if (shutdown(fd
, SHUT_RD
) < 0) {
289 fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
299 context
->syslog_identifier
? context
->syslog_identifier
: ident
,
301 context
->syslog_priority
,
302 !!context
->syslog_level_prefix
,
303 output
== EXEC_OUTPUT_SYSLOG
|| output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
304 output
== EXEC_OUTPUT_KMSG
|| output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
,
305 is_terminal_output(output
));
308 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
315 static int open_terminal_as(const char *path
, mode_t mode
, int nfd
) {
321 fd
= open_terminal(path
, mode
| O_NOCTTY
);
326 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
334 static bool is_terminal_input(ExecInput i
) {
336 i
== EXEC_INPUT_TTY
||
337 i
== EXEC_INPUT_TTY_FORCE
||
338 i
== EXEC_INPUT_TTY_FAIL
;
341 static int fixup_input(ExecInput std_input
, int socket_fd
, bool apply_tty_stdin
) {
343 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
344 return EXEC_INPUT_NULL
;
346 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
347 return EXEC_INPUT_NULL
;
352 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
354 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
355 return EXEC_OUTPUT_INHERIT
;
360 static int setup_input(const ExecContext
*context
, int socket_fd
, bool apply_tty_stdin
) {
365 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
369 case EXEC_INPUT_NULL
:
370 return open_null_as(O_RDONLY
, STDIN_FILENO
);
373 case EXEC_INPUT_TTY_FORCE
:
374 case EXEC_INPUT_TTY_FAIL
: {
377 fd
= acquire_terminal(tty_path(context
),
378 i
== EXEC_INPUT_TTY_FAIL
,
379 i
== EXEC_INPUT_TTY_FORCE
,
385 if (fd
!= STDIN_FILENO
) {
386 r
= dup2(fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
394 case EXEC_INPUT_SOCKET
:
395 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
398 assert_not_reached("Unknown input type");
402 static int setup_output(Unit
*unit
, const ExecContext
*context
, int fileno
, int socket_fd
, const char *ident
, bool apply_tty_stdin
, uid_t uid
, gid_t gid
) {
411 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
412 o
= fixup_output(context
->std_output
, socket_fd
);
414 if (fileno
== STDERR_FILENO
) {
416 e
= fixup_output(context
->std_error
, socket_fd
);
418 /* This expects the input and output are already set up */
420 /* Don't change the stderr file descriptor if we inherit all
421 * the way and are not on a tty */
422 if (e
== EXEC_OUTPUT_INHERIT
&&
423 o
== EXEC_OUTPUT_INHERIT
&&
424 i
== EXEC_INPUT_NULL
&&
425 !is_terminal_input(context
->std_input
) &&
429 /* Duplicate from stdout if possible */
430 if (e
== o
|| e
== EXEC_OUTPUT_INHERIT
)
431 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
435 } else if (o
== EXEC_OUTPUT_INHERIT
) {
436 /* If input got downgraded, inherit the original value */
437 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
438 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
440 /* If the input is connected to anything that's not a /dev/null, inherit that... */
441 if (i
!= EXEC_INPUT_NULL
)
442 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
444 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
448 /* We need to open /dev/null here anew, to get the right access mode. */
449 return open_null_as(O_WRONLY
, fileno
);
454 case EXEC_OUTPUT_NULL
:
455 return open_null_as(O_WRONLY
, fileno
);
457 case EXEC_OUTPUT_TTY
:
458 if (is_terminal_input(i
))
459 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
461 /* We don't reset the terminal if this is just about output */
462 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
464 case EXEC_OUTPUT_SYSLOG
:
465 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
466 case EXEC_OUTPUT_KMSG
:
467 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
468 case EXEC_OUTPUT_JOURNAL
:
469 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
470 r
= connect_logger_as(context
, o
, ident
, unit
->id
, fileno
, uid
, gid
);
472 log_unit_error_errno(unit
, r
, "Failed to connect %s to the journal socket, ignoring: %m", fileno
== STDOUT_FILENO
? "stdout" : "stderr");
473 r
= open_null_as(O_WRONLY
, fileno
);
477 case EXEC_OUTPUT_SOCKET
:
478 assert(socket_fd
>= 0);
479 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
482 assert_not_reached("Unknown error type");
486 static int chown_terminal(int fd
, uid_t uid
) {
491 /* This might fail. What matters are the results. */
492 (void) fchown(fd
, uid
, -1);
493 (void) fchmod(fd
, TTY_MODE
);
495 if (fstat(fd
, &st
) < 0)
498 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
504 static int setup_confirm_stdio(int *_saved_stdin
,
505 int *_saved_stdout
) {
506 int fd
= -1, saved_stdin
, saved_stdout
= -1, r
;
508 assert(_saved_stdin
);
509 assert(_saved_stdout
);
511 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
515 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
516 if (saved_stdout
< 0) {
521 fd
= acquire_terminal(
526 DEFAULT_CONFIRM_USEC
);
532 r
= chown_terminal(fd
, getuid());
536 if (dup2(fd
, STDIN_FILENO
) < 0) {
541 if (dup2(fd
, STDOUT_FILENO
) < 0) {
549 *_saved_stdin
= saved_stdin
;
550 *_saved_stdout
= saved_stdout
;
555 safe_close(saved_stdout
);
556 safe_close(saved_stdin
);
562 _printf_(1, 2) static int write_confirm_message(const char *format
, ...) {
563 _cleanup_close_
int fd
= -1;
568 fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
572 va_start(ap
, format
);
573 vdprintf(fd
, format
, ap
);
579 static int restore_confirm_stdio(int *saved_stdin
,
585 assert(saved_stdout
);
589 if (*saved_stdin
>= 0)
590 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
593 if (*saved_stdout
>= 0)
594 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
597 safe_close(*saved_stdin
);
598 safe_close(*saved_stdout
);
603 static int ask_for_confirmation(char *response
, char **argv
) {
604 int saved_stdout
= -1, saved_stdin
= -1, r
;
605 _cleanup_free_
char *line
= NULL
;
607 r
= setup_confirm_stdio(&saved_stdin
, &saved_stdout
);
611 line
= exec_command_line(argv
);
615 r
= ask_char(response
, "yns", "Execute %s? [Yes, No, Skip] ", line
);
617 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
622 static int enforce_groups(const ExecContext
*context
, const char *username
, gid_t gid
) {
623 bool keep_groups
= false;
628 /* Lookup and set GID and supplementary group list. Here too
629 * we avoid NSS lookups for gid=0. */
631 if (context
->group
|| username
) {
633 if (context
->group
) {
634 const char *g
= context
->group
;
636 r
= get_group_creds(&g
, &gid
);
641 /* First step, initialize groups from /etc/groups */
642 if (username
&& gid
!= 0) {
643 if (initgroups(username
, gid
) < 0)
649 /* Second step, set our gids */
650 if (setresgid(gid
, gid
, gid
) < 0)
654 if (context
->supplementary_groups
) {
659 /* Final step, initialize any manually set supplementary groups */
660 assert_se((ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
)) > 0);
662 if (!(gids
= new(gid_t
, ngroups_max
)))
666 k
= getgroups(ngroups_max
, gids
);
674 STRV_FOREACH(i
, context
->supplementary_groups
) {
677 if (k
>= ngroups_max
) {
683 r
= get_group_creds(&g
, gids
+k
);
692 if (setgroups(k
, gids
) < 0) {
703 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
706 /* Sets (but doesn't lookup) the uid and make sure we keep the
707 * capabilities while doing so. */
709 if (context
->capabilities
) {
710 _cleanup_cap_free_ cap_t d
= NULL
;
711 static const cap_value_t bits
[] = {
712 CAP_SETUID
, /* Necessary so that we can run setresuid() below */
713 CAP_SETPCAP
/* Necessary so that we can set PR_SET_SECUREBITS later on */
716 /* First step: If we need to keep capabilities but
717 * drop privileges we need to make sure we keep our
718 * caps, while we drop privileges. */
720 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
722 if (prctl(PR_GET_SECUREBITS
) != sb
)
723 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
727 /* Second step: set the capabilities. This will reduce
728 * the capabilities to the minimum we need. */
730 d
= cap_dup(context
->capabilities
);
734 if (cap_set_flag(d
, CAP_EFFECTIVE
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0 ||
735 cap_set_flag(d
, CAP_PERMITTED
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0)
738 if (cap_set_proc(d
) < 0)
742 /* Third step: actually set the uids */
743 if (setresuid(uid
, uid
, uid
) < 0)
746 /* At this point we should have all necessary capabilities but
747 are otherwise a normal user. However, the caps might got
748 corrupted due to the setresuid() so we need clean them up
749 later. This is done outside of this call. */
756 static int null_conv(
758 const struct pam_message
**msg
,
759 struct pam_response
**resp
,
762 /* We don't support conversations */
767 static int setup_pam(
773 int fds
[], unsigned n_fds
) {
775 static const struct pam_conv conv
= {
780 pam_handle_t
*handle
= NULL
;
782 int pam_code
= PAM_SUCCESS
;
785 bool close_session
= false;
786 pid_t pam_pid
= 0, parent_pid
;
793 /* We set up PAM in the parent process, then fork. The child
794 * will then stay around until killed via PR_GET_PDEATHSIG or
795 * systemd via the cgroup logic. It will then remove the PAM
796 * session again. The parent process will exec() the actual
797 * daemon. We do things this way to ensure that the main PID
798 * of the daemon is the one we initially fork()ed. */
800 if (log_get_max_level() < LOG_DEBUG
)
803 pam_code
= pam_start(name
, user
, &conv
, &handle
);
804 if (pam_code
!= PAM_SUCCESS
) {
810 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
811 if (pam_code
!= PAM_SUCCESS
)
815 pam_code
= pam_acct_mgmt(handle
, flags
);
816 if (pam_code
!= PAM_SUCCESS
)
819 pam_code
= pam_open_session(handle
, flags
);
820 if (pam_code
!= PAM_SUCCESS
)
823 close_session
= true;
825 e
= pam_getenvlist(handle
);
827 pam_code
= PAM_BUF_ERR
;
831 /* Block SIGTERM, so that we know that it won't get lost in
834 assert_se(sigprocmask_many(SIG_BLOCK
, &old_ss
, SIGTERM
, -1) >= 0);
836 parent_pid
= getpid();
846 /* The child's job is to reset the PAM session on
849 /* This string must fit in 10 chars (i.e. the length
850 * of "/sbin/init"), to look pretty in /bin/ps */
851 rename_process("(sd-pam)");
853 /* Make sure we don't keep open the passed fds in this
854 child. We assume that otherwise only those fds are
855 open here that have been opened by PAM. */
856 close_many(fds
, n_fds
);
858 /* Drop privileges - we don't need any to pam_close_session
859 * and this will make PR_SET_PDEATHSIG work in most cases.
860 * If this fails, ignore the error - but expect sd-pam threads
861 * to fail to exit normally */
862 if (setresuid(uid
, uid
, uid
) < 0)
863 log_error_errno(r
, "Error: Failed to setresuid() in sd-pam: %m");
865 (void) ignore_signals(SIGPIPE
, -1);
867 /* Wait until our parent died. This will only work if
868 * the above setresuid() succeeds, otherwise the kernel
869 * will not allow unprivileged parents kill their privileged
870 * children this way. We rely on the control groups kill logic
871 * to do the rest for us. */
872 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
875 /* Check if our parent process might already have
877 if (getppid() == parent_pid
) {
880 assert_se(sigemptyset(&ss
) >= 0);
881 assert_se(sigaddset(&ss
, SIGTERM
) >= 0);
884 if (sigwait(&ss
, &sig
) < 0) {
891 assert(sig
== SIGTERM
);
896 /* If our parent died we'll end the session */
897 if (getppid() != parent_pid
) {
898 pam_code
= pam_close_session(handle
, flags
);
899 if (pam_code
!= PAM_SUCCESS
)
906 pam_end(handle
, pam_code
| flags
);
910 /* If the child was forked off successfully it will do all the
911 * cleanups, so forget about the handle here. */
914 /* Unblock SIGTERM again in the parent */
915 assert_se(sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) >= 0);
917 /* We close the log explicitly here, since the PAM modules
918 * might have opened it, but we don't want this fd around. */
927 if (pam_code
!= PAM_SUCCESS
) {
928 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
929 err
= -EPERM
; /* PAM errors do not map to errno */
931 err
= log_error_errno(errno
, "PAM failed: %m");
936 pam_code
= pam_close_session(handle
, flags
);
938 pam_end(handle
, pam_code
| flags
);
946 kill(pam_pid
, SIGTERM
);
947 kill(pam_pid
, SIGCONT
);
954 static void rename_process_from_path(const char *path
) {
955 char process_name
[11];
959 /* This resulting string must fit in 10 chars (i.e. the length
960 * of "/sbin/init") to look pretty in /bin/ps */
964 rename_process("(...)");
970 /* The end of the process name is usually more
971 * interesting, since the first bit might just be
977 process_name
[0] = '(';
978 memcpy(process_name
+1, p
, l
);
979 process_name
[1+l
] = ')';
980 process_name
[1+l
+1] = 0;
982 rename_process(process_name
);
987 static int apply_seccomp(const ExecContext
*c
) {
988 uint32_t negative_action
, action
;
989 scmp_filter_ctx
*seccomp
;
996 negative_action
= c
->syscall_errno
== 0 ? SCMP_ACT_KILL
: SCMP_ACT_ERRNO(c
->syscall_errno
);
998 seccomp
= seccomp_init(c
->syscall_whitelist
? negative_action
: SCMP_ACT_ALLOW
);
1002 if (c
->syscall_archs
) {
1004 SET_FOREACH(id
, c
->syscall_archs
, i
) {
1005 r
= seccomp_arch_add(seccomp
, PTR_TO_UINT32(id
) - 1);
1013 r
= seccomp_add_secondary_archs(seccomp
);
1018 action
= c
->syscall_whitelist
? SCMP_ACT_ALLOW
: negative_action
;
1019 SET_FOREACH(id
, c
->syscall_filter
, i
) {
1020 r
= seccomp_rule_add(seccomp
, action
, PTR_TO_INT(id
) - 1, 0);
1025 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1029 r
= seccomp_load(seccomp
);
1032 seccomp_release(seccomp
);
1036 static int apply_address_families(const ExecContext
*c
) {
1037 scmp_filter_ctx
*seccomp
;
1043 seccomp
= seccomp_init(SCMP_ACT_ALLOW
);
1047 r
= seccomp_add_secondary_archs(seccomp
);
1051 if (c
->address_families_whitelist
) {
1052 int af
, first
= 0, last
= 0;
1055 /* If this is a whitelist, we first block the address
1056 * families that are out of range and then everything
1057 * that is not in the set. First, we find the lowest
1058 * and highest address family in the set. */
1060 SET_FOREACH(afp
, c
->address_families
, i
) {
1061 af
= PTR_TO_INT(afp
);
1063 if (af
<= 0 || af
>= af_max())
1066 if (first
== 0 || af
< first
)
1069 if (last
== 0 || af
> last
)
1073 assert((first
== 0) == (last
== 0));
1077 /* No entries in the valid range, block everything */
1078 r
= seccomp_rule_add(
1080 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1088 /* Block everything below the first entry */
1089 r
= seccomp_rule_add(
1091 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1094 SCMP_A0(SCMP_CMP_LT
, first
));
1098 /* Block everything above the last entry */
1099 r
= seccomp_rule_add(
1101 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1104 SCMP_A0(SCMP_CMP_GT
, last
));
1108 /* Block everything between the first and last
1110 for (af
= 1; af
< af_max(); af
++) {
1112 if (set_contains(c
->address_families
, INT_TO_PTR(af
)))
1115 r
= seccomp_rule_add(
1117 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1120 SCMP_A0(SCMP_CMP_EQ
, af
));
1129 /* If this is a blacklist, then generate one rule for
1130 * each address family that are then combined in OR
1133 SET_FOREACH(af
, c
->address_families
, i
) {
1135 r
= seccomp_rule_add(
1137 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1140 SCMP_A0(SCMP_CMP_EQ
, PTR_TO_INT(af
)));
1146 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1150 r
= seccomp_load(seccomp
);
1153 seccomp_release(seccomp
);
1159 static void do_idle_pipe_dance(int idle_pipe
[4]) {
1163 safe_close(idle_pipe
[1]);
1164 safe_close(idle_pipe
[2]);
1166 if (idle_pipe
[0] >= 0) {
1169 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1171 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1172 /* Signal systemd that we are bored and want to continue. */
1173 r
= write(idle_pipe
[3], "x", 1);
1175 /* Wait for systemd to react to the signal above. */
1176 fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1179 safe_close(idle_pipe
[0]);
1183 safe_close(idle_pipe
[3]);
1186 static int build_environment(
1187 const ExecContext
*c
,
1189 usec_t watchdog_usec
,
1191 const char *username
,
1195 _cleanup_strv_free_
char **our_env
= NULL
;
1202 our_env
= new0(char*, 10);
1207 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid()) < 0)
1209 our_env
[n_env
++] = x
;
1211 if (asprintf(&x
, "LISTEN_FDS=%u", n_fds
) < 0)
1213 our_env
[n_env
++] = x
;
1216 if (watchdog_usec
> 0) {
1217 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid()) < 0)
1219 our_env
[n_env
++] = x
;
1221 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, watchdog_usec
) < 0)
1223 our_env
[n_env
++] = x
;
1227 x
= strappend("HOME=", home
);
1230 our_env
[n_env
++] = x
;
1234 x
= strappend("LOGNAME=", username
);
1237 our_env
[n_env
++] = x
;
1239 x
= strappend("USER=", username
);
1242 our_env
[n_env
++] = x
;
1246 x
= strappend("SHELL=", shell
);
1249 our_env
[n_env
++] = x
;
1252 if (is_terminal_input(c
->std_input
) ||
1253 c
->std_output
== EXEC_OUTPUT_TTY
||
1254 c
->std_error
== EXEC_OUTPUT_TTY
||
1257 x
= strdup(default_term_for_tty(tty_path(c
)));
1260 our_env
[n_env
++] = x
;
1263 our_env
[n_env
++] = NULL
;
1264 assert(n_env
<= 10);
1272 static bool exec_needs_mount_namespace(
1273 const ExecContext
*context
,
1274 const ExecParameters
*params
,
1275 ExecRuntime
*runtime
) {
1280 if (!strv_isempty(context
->read_write_dirs
) ||
1281 !strv_isempty(context
->read_only_dirs
) ||
1282 !strv_isempty(context
->inaccessible_dirs
))
1285 if (context
->mount_flags
!= 0)
1288 if (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
))
1291 if (params
->bus_endpoint_path
)
1294 if (context
->private_devices
||
1295 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1296 context
->protect_home
!= PROTECT_HOME_NO
)
1302 static int exec_child(
1304 ExecCommand
*command
,
1305 const ExecContext
*context
,
1306 const ExecParameters
*params
,
1307 ExecRuntime
*runtime
,
1310 int *fds
, unsigned n_fds
,
1314 _cleanup_strv_free_
char **our_env
= NULL
, **pam_env
= NULL
, **final_env
= NULL
, **final_argv
= NULL
;
1315 _cleanup_free_
char *mac_selinux_context_net
= NULL
;
1316 const char *username
= NULL
, *home
= NULL
, *shell
= NULL
;
1317 unsigned n_dont_close
= 0;
1318 int dont_close
[n_fds
+ 4];
1319 uid_t uid
= UID_INVALID
;
1320 gid_t gid
= GID_INVALID
;
1322 bool needs_mount_namespace
;
1328 assert(exit_status
);
1330 rename_process_from_path(command
->path
);
1332 /* We reset exactly these signals, since they are the
1333 * only ones we set to SIG_IGN in the main daemon. All
1334 * others we leave untouched because we set them to
1335 * SIG_DFL or a valid handler initially, both of which
1336 * will be demoted to SIG_DFL. */
1337 (void) default_signals(SIGNALS_CRASH_HANDLER
,
1338 SIGNALS_IGNORE
, -1);
1340 if (context
->ignore_sigpipe
)
1341 (void) ignore_signals(SIGPIPE
, -1);
1343 r
= reset_signal_mask();
1345 *exit_status
= EXIT_SIGNAL_MASK
;
1349 if (params
->idle_pipe
)
1350 do_idle_pipe_dance(params
->idle_pipe
);
1352 /* Close sockets very early to make sure we don't
1353 * block init reexecution because it cannot bind its
1359 dont_close
[n_dont_close
++] = socket_fd
;
1361 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
1362 n_dont_close
+= n_fds
;
1364 if (params
->bus_endpoint_fd
>= 0)
1365 dont_close
[n_dont_close
++] = params
->bus_endpoint_fd
;
1367 if (runtime
->netns_storage_socket
[0] >= 0)
1368 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[0];
1369 if (runtime
->netns_storage_socket
[1] >= 0)
1370 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[1];
1373 r
= close_all_fds(dont_close
, n_dont_close
);
1375 *exit_status
= EXIT_FDS
;
1379 if (!context
->same_pgrp
)
1381 *exit_status
= EXIT_SETSID
;
1385 exec_context_tty_reset(context
);
1387 if (params
->confirm_spawn
) {
1390 r
= ask_for_confirmation(&response
, argv
);
1391 if (r
== -ETIMEDOUT
)
1392 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1394 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r
));
1395 else if (response
== 's') {
1396 write_confirm_message("Skipping execution.\n");
1397 *exit_status
= EXIT_CONFIRM
;
1399 } else if (response
== 'n') {
1400 write_confirm_message("Failing execution.\n");
1406 if (context
->user
) {
1407 username
= context
->user
;
1408 r
= get_user_creds(&username
, &uid
, &gid
, &home
, &shell
);
1410 *exit_status
= EXIT_USER
;
1415 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1416 * must sure to drop O_NONBLOCK */
1418 fd_nonblock(socket_fd
, false);
1420 r
= setup_input(context
, socket_fd
, params
->apply_tty_stdin
);
1422 *exit_status
= EXIT_STDIN
;
1426 r
= setup_output(unit
, context
, STDOUT_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1428 *exit_status
= EXIT_STDOUT
;
1432 r
= setup_output(unit
, context
, STDERR_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1434 *exit_status
= EXIT_STDERR
;
1438 if (params
->cgroup_path
) {
1439 r
= cg_attach_everywhere(params
->cgroup_supported
, params
->cgroup_path
, 0, NULL
, NULL
);
1441 *exit_status
= EXIT_CGROUP
;
1446 if (context
->oom_score_adjust_set
) {
1447 char t
[DECIMAL_STR_MAX(context
->oom_score_adjust
)];
1449 /* When we can't make this change due to EPERM, then
1450 * let's silently skip over it. User namespaces
1451 * prohibit write access to this file, and we
1452 * shouldn't trip up over that. */
1454 sprintf(t
, "%i", context
->oom_score_adjust
);
1455 r
= write_string_file("/proc/self/oom_score_adj", t
, 0);
1456 if (r
== -EPERM
|| r
== -EACCES
) {
1458 log_unit_debug_errno(unit
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1461 *exit_status
= EXIT_OOM_ADJUST
;
1466 if (context
->nice_set
)
1467 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
1468 *exit_status
= EXIT_NICE
;
1472 if (context
->cpu_sched_set
) {
1473 struct sched_param param
= {
1474 .sched_priority
= context
->cpu_sched_priority
,
1477 r
= sched_setscheduler(0,
1478 context
->cpu_sched_policy
|
1479 (context
->cpu_sched_reset_on_fork
?
1480 SCHED_RESET_ON_FORK
: 0),
1483 *exit_status
= EXIT_SETSCHEDULER
;
1488 if (context
->cpuset
)
1489 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
1490 *exit_status
= EXIT_CPUAFFINITY
;
1494 if (context
->ioprio_set
)
1495 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
1496 *exit_status
= EXIT_IOPRIO
;
1500 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
1501 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
1502 *exit_status
= EXIT_TIMERSLACK
;
1506 if (context
->personality
!= PERSONALITY_INVALID
)
1507 if (personality(context
->personality
) < 0) {
1508 *exit_status
= EXIT_PERSONALITY
;
1512 if (context
->utmp_id
)
1513 utmp_put_init_process(context
->utmp_id
, getpid(), getsid(0), context
->tty_path
,
1514 context
->utmp_mode
== EXEC_UTMP_INIT
? INIT_PROCESS
:
1515 context
->utmp_mode
== EXEC_UTMP_LOGIN
? LOGIN_PROCESS
:
1517 username
? "root" : context
->user
);
1519 if (context
->user
&& is_terminal_input(context
->std_input
)) {
1520 r
= chown_terminal(STDIN_FILENO
, uid
);
1522 *exit_status
= EXIT_STDIN
;
1527 if (params
->bus_endpoint_fd
>= 0 && context
->bus_endpoint
) {
1528 uid_t ep_uid
= (uid
== UID_INVALID
) ? 0 : uid
;
1530 r
= bus_kernel_set_endpoint_policy(params
->bus_endpoint_fd
, ep_uid
, context
->bus_endpoint
);
1532 *exit_status
= EXIT_BUS_ENDPOINT
;
1537 /* If delegation is enabled we'll pass ownership of the cgroup
1538 * (but only in systemd's own controller hierarchy!) to the
1539 * user of the new process. */
1540 if (params
->cgroup_path
&& context
->user
&& params
->cgroup_delegate
) {
1541 r
= cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0644, uid
, gid
);
1543 *exit_status
= EXIT_CGROUP
;
1548 r
= cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0755, uid
, gid
);
1550 *exit_status
= EXIT_CGROUP
;
1555 if (!strv_isempty(context
->runtime_directory
) && params
->runtime_prefix
) {
1558 STRV_FOREACH(rt
, context
->runtime_directory
) {
1559 _cleanup_free_
char *p
;
1561 p
= strjoin(params
->runtime_prefix
, "/", *rt
, NULL
);
1563 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1567 r
= mkdir_p_label(p
, context
->runtime_directory_mode
);
1569 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1573 r
= chmod_and_chown(p
, context
->runtime_directory_mode
, uid
, gid
);
1575 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1581 if (params
->apply_permissions
) {
1582 r
= enforce_groups(context
, username
, gid
);
1584 *exit_status
= EXIT_GROUP
;
1589 umask(context
->umask
);
1592 if (params
->apply_permissions
&& context
->pam_name
&& username
) {
1593 r
= setup_pam(context
->pam_name
, username
, uid
, context
->tty_path
, &pam_env
, fds
, n_fds
);
1595 *exit_status
= EXIT_PAM
;
1601 if (context
->private_network
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
1602 r
= setup_netns(runtime
->netns_storage_socket
);
1604 *exit_status
= EXIT_NETWORK
;
1609 needs_mount_namespace
= exec_needs_mount_namespace(context
, params
, runtime
);
1611 if (needs_mount_namespace
) {
1612 char *tmp
= NULL
, *var
= NULL
;
1614 /* The runtime struct only contains the parent
1615 * of the private /tmp, which is
1616 * non-accessible to world users. Inside of it
1617 * there's a /tmp that is sticky, and that's
1618 * the one we want to use here. */
1620 if (context
->private_tmp
&& runtime
) {
1621 if (runtime
->tmp_dir
)
1622 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
1623 if (runtime
->var_tmp_dir
)
1624 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
1627 r
= setup_namespace(
1628 params
->apply_chroot
? context
->root_directory
: NULL
,
1629 context
->read_write_dirs
,
1630 context
->read_only_dirs
,
1631 context
->inaccessible_dirs
,
1634 params
->bus_endpoint_path
,
1635 context
->private_devices
,
1636 context
->protect_home
,
1637 context
->protect_system
,
1638 context
->mount_flags
);
1640 /* If we couldn't set up the namespace this is
1641 * probably due to a missing capability. In this case,
1642 * silently proceeed. */
1643 if (r
== -EPERM
|| r
== -EACCES
) {
1645 log_unit_debug_errno(unit
, r
, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1648 *exit_status
= EXIT_NAMESPACE
;
1653 if (params
->apply_chroot
) {
1654 if (!needs_mount_namespace
&& context
->root_directory
)
1655 if (chroot(context
->root_directory
) < 0) {
1656 *exit_status
= EXIT_CHROOT
;
1660 if (chdir(context
->working_directory
?: "/") < 0 &&
1661 !context
->working_directory_missing_ok
) {
1662 *exit_status
= EXIT_CHDIR
;
1666 _cleanup_free_
char *d
= NULL
;
1668 if (asprintf(&d
, "%s/%s",
1669 context
->root_directory
?: "",
1670 context
->working_directory
?: "") < 0) {
1671 *exit_status
= EXIT_MEMORY
;
1676 !context
->working_directory_missing_ok
) {
1677 *exit_status
= EXIT_CHDIR
;
1683 if (params
->apply_permissions
&& mac_selinux_use() && params
->selinux_context_net
&& socket_fd
>= 0) {
1684 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
1686 *exit_status
= EXIT_SELINUX_CONTEXT
;
1692 /* We repeat the fd closing here, to make sure that
1693 * nothing is leaked from the PAM modules. Note that
1694 * we are more aggressive this time since socket_fd
1695 * and the netns fds we don't need anymore. The custom
1696 * endpoint fd was needed to upload the policy and can
1697 * now be closed as well. */
1698 r
= close_all_fds(fds
, n_fds
);
1700 r
= shift_fds(fds
, n_fds
);
1702 r
= flags_fds(fds
, n_fds
, context
->non_blocking
);
1704 *exit_status
= EXIT_FDS
;
1708 if (params
->apply_permissions
) {
1710 for (i
= 0; i
< _RLIMIT_MAX
; i
++) {
1711 if (!context
->rlimit
[i
])
1714 if (setrlimit_closest(i
, context
->rlimit
[i
]) < 0) {
1715 *exit_status
= EXIT_LIMITS
;
1720 if (context
->capability_bounding_set_drop
) {
1721 r
= capability_bounding_set_drop(context
->capability_bounding_set_drop
, false);
1723 *exit_status
= EXIT_CAPABILITIES
;
1729 if (context
->smack_process_label
) {
1730 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
1732 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1736 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1738 _cleanup_free_
char *exec_label
= NULL
;
1740 r
= mac_smack_read(command
->path
, SMACK_ATTR_EXEC
, &exec_label
);
1741 if (r
< 0 && r
!= -ENODATA
&& r
!= -EOPNOTSUPP
) {
1742 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1746 r
= mac_smack_apply_pid(0, exec_label
? : SMACK_DEFAULT_PROCESS_LABEL
);
1748 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1755 if (context
->user
) {
1756 r
= enforce_user(context
, uid
);
1758 *exit_status
= EXIT_USER
;
1763 /* PR_GET_SECUREBITS is not privileged, while
1764 * PR_SET_SECUREBITS is. So to suppress
1765 * potential EPERMs we'll try not to call
1766 * PR_SET_SECUREBITS unless necessary. */
1767 if (prctl(PR_GET_SECUREBITS
) != context
->secure_bits
)
1768 if (prctl(PR_SET_SECUREBITS
, context
->secure_bits
) < 0) {
1769 *exit_status
= EXIT_SECUREBITS
;
1773 if (context
->capabilities
)
1774 if (cap_set_proc(context
->capabilities
) < 0) {
1775 *exit_status
= EXIT_CAPABILITIES
;
1779 if (context
->no_new_privileges
)
1780 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
1781 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
1786 if (context
->address_families_whitelist
||
1787 !set_isempty(context
->address_families
)) {
1788 r
= apply_address_families(context
);
1790 *exit_status
= EXIT_ADDRESS_FAMILIES
;
1795 if (context
->syscall_whitelist
||
1796 !set_isempty(context
->syscall_filter
) ||
1797 !set_isempty(context
->syscall_archs
)) {
1798 r
= apply_seccomp(context
);
1800 *exit_status
= EXIT_SECCOMP
;
1807 if (mac_selinux_use()) {
1808 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
1811 r
= setexeccon(exec_context
);
1813 *exit_status
= EXIT_SELINUX_CONTEXT
;
1820 #ifdef HAVE_APPARMOR
1821 if (context
->apparmor_profile
&& mac_apparmor_use()) {
1822 r
= aa_change_onexec(context
->apparmor_profile
);
1823 if (r
< 0 && !context
->apparmor_profile_ignore
) {
1824 *exit_status
= EXIT_APPARMOR_PROFILE
;
1831 r
= build_environment(context
, n_fds
, params
->watchdog_usec
, home
, username
, shell
, &our_env
);
1833 *exit_status
= EXIT_MEMORY
;
1837 final_env
= strv_env_merge(5,
1838 params
->environment
,
1840 context
->environment
,
1845 *exit_status
= EXIT_MEMORY
;
1849 final_argv
= replace_env_argv(argv
, final_env
);
1851 *exit_status
= EXIT_MEMORY
;
1855 final_env
= strv_env_clean(final_env
);
1857 if (_unlikely_(log_get_max_level() >= LOG_DEBUG
)) {
1858 _cleanup_free_
char *line
;
1860 line
= exec_command_line(final_argv
);
1863 log_struct(LOG_DEBUG
,
1865 "EXECUTABLE=%s", command
->path
,
1866 LOG_UNIT_MESSAGE(unit
, "Executing: %s", line
),
1872 execve(command
->path
, final_argv
, final_env
);
1873 *exit_status
= EXIT_EXEC
;
1877 int exec_spawn(Unit
*unit
,
1878 ExecCommand
*command
,
1879 const ExecContext
*context
,
1880 const ExecParameters
*params
,
1881 ExecRuntime
*runtime
,
1884 _cleanup_strv_free_
char **files_env
= NULL
;
1885 int *fds
= NULL
; unsigned n_fds
= 0;
1886 _cleanup_free_
char *line
= NULL
;
1896 assert(params
->fds
|| params
->n_fds
<= 0);
1898 if (context
->std_input
== EXEC_INPUT_SOCKET
||
1899 context
->std_output
== EXEC_OUTPUT_SOCKET
||
1900 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
1902 if (params
->n_fds
!= 1) {
1903 log_unit_error(unit
, "Got more than one socket.");
1907 socket_fd
= params
->fds
[0];
1911 n_fds
= params
->n_fds
;
1914 r
= exec_context_load_environment(unit
, context
, &files_env
);
1916 return log_unit_error_errno(unit
, r
, "Failed to load environment files: %m");
1918 argv
= params
->argv
?: command
->argv
;
1919 line
= exec_command_line(argv
);
1923 log_struct(LOG_DEBUG
,
1925 LOG_UNIT_MESSAGE(unit
, "About to execute: %s", line
),
1926 "EXECUTABLE=%s", command
->path
,
1930 return log_unit_error_errno(unit
, r
, "Failed to fork: %m");
1935 r
= exec_child(unit
,
1947 log_struct_errno(LOG_ERR
, r
,
1948 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED
),
1950 LOG_UNIT_MESSAGE(unit
, "Failed at step %s spawning %s: %m",
1951 exit_status_to_string(exit_status
, EXIT_STATUS_SYSTEMD
),
1953 "EXECUTABLE=%s", command
->path
,
1960 log_unit_debug(unit
, "Forked %s as "PID_FMT
, command
->path
, pid
);
1962 /* We add the new process to the cgroup both in the child (so
1963 * that we can be sure that no user code is ever executed
1964 * outside of the cgroup) and in the parent (so that we can be
1965 * sure that when we kill the cgroup the process will be
1967 if (params
->cgroup_path
)
1968 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, pid
);
1970 exec_status_start(&command
->exec_status
, pid
);
1976 void exec_context_init(ExecContext
*c
) {
1980 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
1981 c
->cpu_sched_policy
= SCHED_OTHER
;
1982 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
1983 c
->syslog_level_prefix
= true;
1984 c
->ignore_sigpipe
= true;
1985 c
->timer_slack_nsec
= NSEC_INFINITY
;
1986 c
->personality
= PERSONALITY_INVALID
;
1987 c
->runtime_directory_mode
= 0755;
1990 void exec_context_done(ExecContext
*c
) {
1995 strv_free(c
->environment
);
1996 c
->environment
= NULL
;
1998 strv_free(c
->environment_files
);
1999 c
->environment_files
= NULL
;
2001 for (l
= 0; l
< ELEMENTSOF(c
->rlimit
); l
++)
2002 c
->rlimit
[l
] = mfree(c
->rlimit
[l
]);
2004 c
->working_directory
= mfree(c
->working_directory
);
2005 c
->root_directory
= mfree(c
->root_directory
);
2006 c
->tty_path
= mfree(c
->tty_path
);
2007 c
->syslog_identifier
= mfree(c
->syslog_identifier
);
2008 c
->user
= mfree(c
->user
);
2009 c
->group
= mfree(c
->group
);
2011 strv_free(c
->supplementary_groups
);
2012 c
->supplementary_groups
= NULL
;
2014 c
->pam_name
= mfree(c
->pam_name
);
2016 if (c
->capabilities
) {
2017 cap_free(c
->capabilities
);
2018 c
->capabilities
= NULL
;
2021 strv_free(c
->read_only_dirs
);
2022 c
->read_only_dirs
= NULL
;
2024 strv_free(c
->read_write_dirs
);
2025 c
->read_write_dirs
= NULL
;
2027 strv_free(c
->inaccessible_dirs
);
2028 c
->inaccessible_dirs
= NULL
;
2031 CPU_FREE(c
->cpuset
);
2033 c
->utmp_id
= mfree(c
->utmp_id
);
2034 c
->selinux_context
= mfree(c
->selinux_context
);
2035 c
->apparmor_profile
= mfree(c
->apparmor_profile
);
2037 set_free(c
->syscall_filter
);
2038 c
->syscall_filter
= NULL
;
2040 set_free(c
->syscall_archs
);
2041 c
->syscall_archs
= NULL
;
2043 set_free(c
->address_families
);
2044 c
->address_families
= NULL
;
2046 strv_free(c
->runtime_directory
);
2047 c
->runtime_directory
= NULL
;
2049 bus_endpoint_free(c
->bus_endpoint
);
2050 c
->bus_endpoint
= NULL
;
2053 int exec_context_destroy_runtime_directory(ExecContext
*c
, const char *runtime_prefix
) {
2058 if (!runtime_prefix
)
2061 STRV_FOREACH(i
, c
->runtime_directory
) {
2062 _cleanup_free_
char *p
;
2064 p
= strjoin(runtime_prefix
, "/", *i
, NULL
);
2068 /* We execute this synchronously, since we need to be
2069 * sure this is gone when we start the service
2071 (void) rm_rf(p
, REMOVE_ROOT
);
2077 void exec_command_done(ExecCommand
*c
) {
2080 c
->path
= mfree(c
->path
);
2086 void exec_command_done_array(ExecCommand
*c
, unsigned n
) {
2089 for (i
= 0; i
< n
; i
++)
2090 exec_command_done(c
+i
);
2093 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
2097 LIST_REMOVE(command
, c
, i
);
2098 exec_command_done(i
);
2105 void exec_command_free_array(ExecCommand
**c
, unsigned n
) {
2108 for (i
= 0; i
< n
; i
++)
2109 c
[i
] = exec_command_free_list(c
[i
]);
2112 typedef struct InvalidEnvInfo
{
2117 static void invalid_env(const char *p
, void *userdata
) {
2118 InvalidEnvInfo
*info
= userdata
;
2120 log_unit_error(info
->unit
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
2123 int exec_context_load_environment(Unit
*unit
, const ExecContext
*c
, char ***l
) {
2124 char **i
, **r
= NULL
;
2129 STRV_FOREACH(i
, c
->environment_files
) {
2132 bool ignore
= false;
2134 _cleanup_globfree_ glob_t pglob
= {};
2144 if (!path_is_absolute(fn
)) {
2152 /* Filename supports globbing, take all matching files */
2154 if (glob(fn
, 0, NULL
, &pglob
) != 0) {
2159 return errno
? -errno
: -EINVAL
;
2161 count
= pglob
.gl_pathc
;
2169 for (n
= 0; n
< count
; n
++) {
2170 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], NULL
, &p
);
2178 /* Log invalid environment variables with filename */
2180 InvalidEnvInfo info
= {
2182 .path
= pglob
.gl_pathv
[n
]
2185 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
2193 m
= strv_env_merge(2, r
, p
);
2209 static bool tty_may_match_dev_console(const char *tty
) {
2210 _cleanup_free_
char *active
= NULL
;
2213 if (startswith(tty
, "/dev/"))
2216 /* trivial identity? */
2217 if (streq(tty
, "console"))
2220 console
= resolve_dev_console(&active
);
2221 /* if we could not resolve, assume it may */
2225 /* "tty0" means the active VC, so it may be the same sometimes */
2226 return streq(console
, tty
) || (streq(console
, "tty0") && tty_is_vc(tty
));
2229 bool exec_context_may_touch_console(ExecContext
*ec
) {
2230 return (ec
->tty_reset
|| ec
->tty_vhangup
|| ec
->tty_vt_disallocate
||
2231 is_terminal_input(ec
->std_input
) ||
2232 is_terminal_output(ec
->std_output
) ||
2233 is_terminal_output(ec
->std_error
)) &&
2234 tty_may_match_dev_console(tty_path(ec
));
2237 static void strv_fprintf(FILE *f
, char **l
) {
2243 fprintf(f
, " %s", *g
);
2246 void exec_context_dump(ExecContext
*c
, FILE* f
, const char *prefix
) {
2253 prefix
= strempty(prefix
);
2257 "%sWorkingDirectory: %s\n"
2258 "%sRootDirectory: %s\n"
2259 "%sNonBlocking: %s\n"
2260 "%sPrivateTmp: %s\n"
2261 "%sPrivateNetwork: %s\n"
2262 "%sPrivateDevices: %s\n"
2263 "%sProtectHome: %s\n"
2264 "%sProtectSystem: %s\n"
2265 "%sIgnoreSIGPIPE: %s\n",
2267 prefix
, c
->working_directory
? c
->working_directory
: "/",
2268 prefix
, c
->root_directory
? c
->root_directory
: "/",
2269 prefix
, yes_no(c
->non_blocking
),
2270 prefix
, yes_no(c
->private_tmp
),
2271 prefix
, yes_no(c
->private_network
),
2272 prefix
, yes_no(c
->private_devices
),
2273 prefix
, protect_home_to_string(c
->protect_home
),
2274 prefix
, protect_system_to_string(c
->protect_system
),
2275 prefix
, yes_no(c
->ignore_sigpipe
));
2277 STRV_FOREACH(e
, c
->environment
)
2278 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
2280 STRV_FOREACH(e
, c
->environment_files
)
2281 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
2288 if (c
->oom_score_adjust_set
)
2290 "%sOOMScoreAdjust: %i\n",
2291 prefix
, c
->oom_score_adjust
);
2293 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
2295 fprintf(f
, "%s%s: "RLIM_FMT
"\n",
2296 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
2298 if (c
->ioprio_set
) {
2299 _cleanup_free_
char *class_str
= NULL
;
2301 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
2303 "%sIOSchedulingClass: %s\n"
2304 "%sIOPriority: %i\n",
2305 prefix
, strna(class_str
),
2306 prefix
, (int) IOPRIO_PRIO_DATA(c
->ioprio
));
2309 if (c
->cpu_sched_set
) {
2310 _cleanup_free_
char *policy_str
= NULL
;
2312 sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
2314 "%sCPUSchedulingPolicy: %s\n"
2315 "%sCPUSchedulingPriority: %i\n"
2316 "%sCPUSchedulingResetOnFork: %s\n",
2317 prefix
, strna(policy_str
),
2318 prefix
, c
->cpu_sched_priority
,
2319 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
2323 fprintf(f
, "%sCPUAffinity:", prefix
);
2324 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
2325 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
2326 fprintf(f
, " %u", i
);
2330 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
2331 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
2334 "%sStandardInput: %s\n"
2335 "%sStandardOutput: %s\n"
2336 "%sStandardError: %s\n",
2337 prefix
, exec_input_to_string(c
->std_input
),
2338 prefix
, exec_output_to_string(c
->std_output
),
2339 prefix
, exec_output_to_string(c
->std_error
));
2345 "%sTTYVHangup: %s\n"
2346 "%sTTYVTDisallocate: %s\n",
2347 prefix
, c
->tty_path
,
2348 prefix
, yes_no(c
->tty_reset
),
2349 prefix
, yes_no(c
->tty_vhangup
),
2350 prefix
, yes_no(c
->tty_vt_disallocate
));
2352 if (c
->std_output
== EXEC_OUTPUT_SYSLOG
||
2353 c
->std_output
== EXEC_OUTPUT_KMSG
||
2354 c
->std_output
== EXEC_OUTPUT_JOURNAL
||
2355 c
->std_output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2356 c
->std_output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2357 c
->std_output
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
||
2358 c
->std_error
== EXEC_OUTPUT_SYSLOG
||
2359 c
->std_error
== EXEC_OUTPUT_KMSG
||
2360 c
->std_error
== EXEC_OUTPUT_JOURNAL
||
2361 c
->std_error
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2362 c
->std_error
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2363 c
->std_error
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) {
2365 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
2367 log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
2368 log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
2371 "%sSyslogFacility: %s\n"
2372 "%sSyslogLevel: %s\n",
2373 prefix
, strna(fac_str
),
2374 prefix
, strna(lvl_str
));
2377 if (c
->capabilities
) {
2378 _cleanup_cap_free_charp_
char *t
;
2380 t
= cap_to_text(c
->capabilities
, NULL
);
2382 fprintf(f
, "%sCapabilities: %s\n", prefix
, t
);
2386 fprintf(f
, "%sSecure Bits:%s%s%s%s%s%s\n",
2388 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS
) ? " keep-caps" : "",
2389 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS_LOCKED
) ? " keep-caps-locked" : "",
2390 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP
) ? " no-setuid-fixup" : "",
2391 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP_LOCKED
) ? " no-setuid-fixup-locked" : "",
2392 (c
->secure_bits
& 1<<SECURE_NOROOT
) ? " noroot" : "",
2393 (c
->secure_bits
& 1<<SECURE_NOROOT_LOCKED
) ? "noroot-locked" : "");
2395 if (c
->capability_bounding_set_drop
) {
2397 fprintf(f
, "%sCapabilityBoundingSet:", prefix
);
2399 for (l
= 0; l
<= cap_last_cap(); l
++)
2400 if (!(c
->capability_bounding_set_drop
& ((uint64_t) 1ULL << (uint64_t) l
)))
2401 fprintf(f
, " %s", strna(capability_to_name(l
)));
2407 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
2409 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
2411 if (strv_length(c
->supplementary_groups
) > 0) {
2412 fprintf(f
, "%sSupplementaryGroups:", prefix
);
2413 strv_fprintf(f
, c
->supplementary_groups
);
2418 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
2420 if (strv_length(c
->read_write_dirs
) > 0) {
2421 fprintf(f
, "%sReadWriteDirs:", prefix
);
2422 strv_fprintf(f
, c
->read_write_dirs
);
2426 if (strv_length(c
->read_only_dirs
) > 0) {
2427 fprintf(f
, "%sReadOnlyDirs:", prefix
);
2428 strv_fprintf(f
, c
->read_only_dirs
);
2432 if (strv_length(c
->inaccessible_dirs
) > 0) {
2433 fprintf(f
, "%sInaccessibleDirs:", prefix
);
2434 strv_fprintf(f
, c
->inaccessible_dirs
);
2440 "%sUtmpIdentifier: %s\n",
2441 prefix
, c
->utmp_id
);
2443 if (c
->selinux_context
)
2445 "%sSELinuxContext: %s%s\n",
2446 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
2448 if (c
->personality
!= PERSONALITY_INVALID
)
2450 "%sPersonality: %s\n",
2451 prefix
, strna(personality_to_string(c
->personality
)));
2453 if (c
->syscall_filter
) {
2461 "%sSystemCallFilter: ",
2464 if (!c
->syscall_whitelist
)
2468 SET_FOREACH(id
, c
->syscall_filter
, j
) {
2469 _cleanup_free_
char *name
= NULL
;
2476 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
2477 fputs(strna(name
), f
);
2484 if (c
->syscall_archs
) {
2491 "%sSystemCallArchitectures:",
2495 SET_FOREACH(id
, c
->syscall_archs
, j
)
2496 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
2501 if (c
->syscall_errno
!= 0)
2503 "%sSystemCallErrorNumber: %s\n",
2504 prefix
, strna(errno_to_name(c
->syscall_errno
)));
2506 if (c
->apparmor_profile
)
2508 "%sAppArmorProfile: %s%s\n",
2509 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
2512 bool exec_context_maintains_privileges(ExecContext
*c
) {
2515 /* Returns true if the process forked off would run run under
2516 * an unchanged UID or as root. */
2521 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
2527 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
2532 dual_timestamp_get(&s
->start_timestamp
);
2535 void exec_status_exit(ExecStatus
*s
, ExecContext
*context
, pid_t pid
, int code
, int status
) {
2538 if (s
->pid
&& s
->pid
!= pid
)
2542 dual_timestamp_get(&s
->exit_timestamp
);
2548 if (context
->utmp_id
)
2549 utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
2551 exec_context_tty_reset(context
);
2555 void exec_status_dump(ExecStatus
*s
, FILE *f
, const char *prefix
) {
2556 char buf
[FORMAT_TIMESTAMP_MAX
];
2564 prefix
= strempty(prefix
);
2567 "%sPID: "PID_FMT
"\n",
2570 if (s
->start_timestamp
.realtime
> 0)
2572 "%sStart Timestamp: %s\n",
2573 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
2575 if (s
->exit_timestamp
.realtime
> 0)
2577 "%sExit Timestamp: %s\n"
2579 "%sExit Status: %i\n",
2580 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
2581 prefix
, sigchld_code_to_string(s
->code
),
2585 char *exec_command_line(char **argv
) {
2593 STRV_FOREACH(a
, argv
)
2596 if (!(n
= new(char, k
)))
2600 STRV_FOREACH(a
, argv
) {
2607 if (strpbrk(*a
, WHITESPACE
)) {
2618 /* FIXME: this doesn't really handle arguments that have
2619 * spaces and ticks in them */
2624 void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2625 _cleanup_free_
char *cmd
= NULL
;
2626 const char *prefix2
;
2631 prefix
= strempty(prefix
);
2632 prefix2
= strjoina(prefix
, "\t");
2634 cmd
= exec_command_line(c
->argv
);
2636 "%sCommand Line: %s\n",
2637 prefix
, cmd
? cmd
: strerror(ENOMEM
));
2639 exec_status_dump(&c
->exec_status
, f
, prefix2
);
2642 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2645 prefix
= strempty(prefix
);
2647 LIST_FOREACH(command
, c
, c
)
2648 exec_command_dump(c
, f
, prefix
);
2651 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
2658 /* It's kind of important, that we keep the order here */
2659 LIST_FIND_TAIL(command
, *l
, end
);
2660 LIST_INSERT_AFTER(command
, *l
, end
, e
);
2665 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
2673 l
= strv_new_ap(path
, ap
);
2694 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
2695 _cleanup_strv_free_
char **l
= NULL
;
2703 l
= strv_new_ap(path
, ap
);
2709 r
= strv_extend_strv(&c
->argv
, l
);
2717 static int exec_runtime_allocate(ExecRuntime
**rt
) {
2722 *rt
= new0(ExecRuntime
, 1);
2727 (*rt
)->netns_storage_socket
[0] = (*rt
)->netns_storage_socket
[1] = -1;
2732 int exec_runtime_make(ExecRuntime
**rt
, ExecContext
*c
, const char *id
) {
2742 if (!c
->private_network
&& !c
->private_tmp
)
2745 r
= exec_runtime_allocate(rt
);
2749 if (c
->private_network
&& (*rt
)->netns_storage_socket
[0] < 0) {
2750 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, (*rt
)->netns_storage_socket
) < 0)
2754 if (c
->private_tmp
&& !(*rt
)->tmp_dir
) {
2755 r
= setup_tmp_dirs(id
, &(*rt
)->tmp_dir
, &(*rt
)->var_tmp_dir
);
2763 ExecRuntime
*exec_runtime_ref(ExecRuntime
*r
) {
2765 assert(r
->n_ref
> 0);
2771 ExecRuntime
*exec_runtime_unref(ExecRuntime
*r
) {
2776 assert(r
->n_ref
> 0);
2783 free(r
->var_tmp_dir
);
2784 safe_close_pair(r
->netns_storage_socket
);
2790 int exec_runtime_serialize(Unit
*u
, ExecRuntime
*rt
, FILE *f
, FDSet
*fds
) {
2799 unit_serialize_item(u
, f
, "tmp-dir", rt
->tmp_dir
);
2801 if (rt
->var_tmp_dir
)
2802 unit_serialize_item(u
, f
, "var-tmp-dir", rt
->var_tmp_dir
);
2804 if (rt
->netns_storage_socket
[0] >= 0) {
2807 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
2811 unit_serialize_item_format(u
, f
, "netns-socket-0", "%i", copy
);
2814 if (rt
->netns_storage_socket
[1] >= 0) {
2817 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
2821 unit_serialize_item_format(u
, f
, "netns-socket-1", "%i", copy
);
2827 int exec_runtime_deserialize_item(Unit
*u
, ExecRuntime
**rt
, const char *key
, const char *value
, FDSet
*fds
) {
2834 if (streq(key
, "tmp-dir")) {
2837 r
= exec_runtime_allocate(rt
);
2841 copy
= strdup(value
);
2845 free((*rt
)->tmp_dir
);
2846 (*rt
)->tmp_dir
= copy
;
2848 } else if (streq(key
, "var-tmp-dir")) {
2851 r
= exec_runtime_allocate(rt
);
2855 copy
= strdup(value
);
2859 free((*rt
)->var_tmp_dir
);
2860 (*rt
)->var_tmp_dir
= copy
;
2862 } else if (streq(key
, "netns-socket-0")) {
2865 r
= exec_runtime_allocate(rt
);
2869 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2870 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2872 safe_close((*rt
)->netns_storage_socket
[0]);
2873 (*rt
)->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
2875 } else if (streq(key
, "netns-socket-1")) {
2878 r
= exec_runtime_allocate(rt
);
2882 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2883 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2885 safe_close((*rt
)->netns_storage_socket
[1]);
2886 (*rt
)->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
2894 static void *remove_tmpdir_thread(void *p
) {
2895 _cleanup_free_
char *path
= p
;
2897 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
2901 void exec_runtime_destroy(ExecRuntime
*rt
) {
2907 /* If there are multiple users of this, let's leave the stuff around */
2912 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
2914 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
2916 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
2923 if (rt
->var_tmp_dir
) {
2924 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
2926 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
2928 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
2929 free(rt
->var_tmp_dir
);
2932 rt
->var_tmp_dir
= NULL
;
2935 safe_close_pair(rt
->netns_storage_socket
);
2938 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
2939 [EXEC_INPUT_NULL
] = "null",
2940 [EXEC_INPUT_TTY
] = "tty",
2941 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
2942 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
2943 [EXEC_INPUT_SOCKET
] = "socket"
2946 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
2948 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
2949 [EXEC_OUTPUT_INHERIT
] = "inherit",
2950 [EXEC_OUTPUT_NULL
] = "null",
2951 [EXEC_OUTPUT_TTY
] = "tty",
2952 [EXEC_OUTPUT_SYSLOG
] = "syslog",
2953 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
2954 [EXEC_OUTPUT_KMSG
] = "kmsg",
2955 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
2956 [EXEC_OUTPUT_JOURNAL
] = "journal",
2957 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
2958 [EXEC_OUTPUT_SOCKET
] = "socket"
2961 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);
2963 static const char* const exec_utmp_mode_table
[_EXEC_UTMP_MODE_MAX
] = {
2964 [EXEC_UTMP_INIT
] = "init",
2965 [EXEC_UTMP_LOGIN
] = "login",
2966 [EXEC_UTMP_USER
] = "user",
2969 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode
, ExecUtmpMode
);