1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/socket.h>
29 #include <sys/prctl.h>
34 #include <sys/personality.h>
37 #include <security/pam_appl.h>
41 #include <selinux/selinux.h>
49 #include <sys/apparmor.h>
56 #include "capability.h"
59 #include "sd-messages.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
65 #include "utmp-wtmp.h"
67 #include "path-util.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
84 #include "apparmor-util.h"
88 #include "seccomp-util.h"
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
94 /* This assumes there is a 'tty' group */
97 #define SNDBUF_SIZE (8*1024*1024)
99 static int shift_fds(int fds
[], unsigned n_fds
) {
100 int start
, restart_from
;
105 /* Modifies the fds array! (sorts it) */
115 for (i
= start
; i
< (int) n_fds
; i
++) {
118 /* Already at right index? */
122 if ((nfd
= fcntl(fds
[i
], F_DUPFD
, i
+3)) < 0)
128 /* Hmm, the fd we wanted isn't free? Then
129 * let's remember that and try again from here */
130 if (nfd
!= i
+3 && restart_from
< 0)
134 if (restart_from
< 0)
137 start
= restart_from
;
143 static int flags_fds(const int fds
[], unsigned n_fds
, bool nonblock
) {
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
154 for (i
= 0; i
< n_fds
; i
++) {
156 if ((r
= fd_nonblock(fds
[i
], nonblock
)) < 0)
159 /* We unconditionally drop FD_CLOEXEC from the fds,
160 * since after all we want to pass these fds to our
163 if ((r
= fd_cloexec(fds
[i
], false)) < 0)
170 _pure_
static const char *tty_path(const ExecContext
*context
) {
173 if (context
->tty_path
)
174 return context
->tty_path
;
176 return "/dev/console";
179 static void exec_context_tty_reset(const ExecContext
*context
) {
182 if (context
->tty_vhangup
)
183 terminal_vhangup(tty_path(context
));
185 if (context
->tty_reset
)
186 reset_terminal(tty_path(context
));
188 if (context
->tty_vt_disallocate
&& context
->tty_path
)
189 vt_disallocate(context
->tty_path
);
192 static bool is_terminal_output(ExecOutput o
) {
194 o
== EXEC_OUTPUT_TTY
||
195 o
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
196 o
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
197 o
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
;
200 static int open_null_as(int flags
, int nfd
) {
205 fd
= open("/dev/null", flags
|O_NOCTTY
);
210 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
218 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
219 union sockaddr_union sa
= {
220 .un
.sun_family
= AF_UNIX
,
221 .un
.sun_path
= "/run/systemd/journal/stdout",
223 uid_t olduid
= UID_INVALID
;
224 gid_t oldgid
= GID_INVALID
;
227 if (gid
!= GID_INVALID
) {
235 if (uid
!= UID_INVALID
) {
245 r
= connect(fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(sa
.un
.sun_path
));
249 /* If we fail to restore the uid or gid, things will likely
250 fail later on. This should only happen if an LSM interferes. */
252 if (uid
!= UID_INVALID
)
253 (void) seteuid(olduid
);
256 if (gid
!= GID_INVALID
)
257 (void) setegid(oldgid
);
262 static int connect_logger_as(const ExecContext
*context
, ExecOutput output
, const char *ident
, const char *unit_id
, int nfd
, uid_t uid
, gid_t gid
) {
266 assert(output
< _EXEC_OUTPUT_MAX
);
270 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
274 r
= connect_journal_socket(fd
, uid
, gid
);
278 if (shutdown(fd
, SHUT_RD
) < 0) {
283 fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
293 context
->syslog_identifier
? context
->syslog_identifier
: ident
,
295 context
->syslog_priority
,
296 !!context
->syslog_level_prefix
,
297 output
== EXEC_OUTPUT_SYSLOG
|| output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
298 output
== EXEC_OUTPUT_KMSG
|| output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
,
299 is_terminal_output(output
));
302 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
309 static int open_terminal_as(const char *path
, mode_t mode
, int nfd
) {
315 if ((fd
= open_terminal(path
, mode
| O_NOCTTY
)) < 0)
319 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
327 static bool is_terminal_input(ExecInput i
) {
329 i
== EXEC_INPUT_TTY
||
330 i
== EXEC_INPUT_TTY_FORCE
||
331 i
== EXEC_INPUT_TTY_FAIL
;
334 static int fixup_input(ExecInput std_input
, int socket_fd
, bool apply_tty_stdin
) {
336 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
337 return EXEC_INPUT_NULL
;
339 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
340 return EXEC_INPUT_NULL
;
345 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
347 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
348 return EXEC_OUTPUT_INHERIT
;
353 static int setup_input(const ExecContext
*context
, int socket_fd
, bool apply_tty_stdin
) {
358 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
362 case EXEC_INPUT_NULL
:
363 return open_null_as(O_RDONLY
, STDIN_FILENO
);
366 case EXEC_INPUT_TTY_FORCE
:
367 case EXEC_INPUT_TTY_FAIL
: {
370 fd
= acquire_terminal(tty_path(context
),
371 i
== EXEC_INPUT_TTY_FAIL
,
372 i
== EXEC_INPUT_TTY_FORCE
,
378 if (fd
!= STDIN_FILENO
) {
379 r
= dup2(fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
387 case EXEC_INPUT_SOCKET
:
388 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
391 assert_not_reached("Unknown input type");
395 static int setup_output(Unit
*unit
, const ExecContext
*context
, int fileno
, int socket_fd
, const char *ident
, bool apply_tty_stdin
, uid_t uid
, gid_t gid
) {
404 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
405 o
= fixup_output(context
->std_output
, socket_fd
);
407 if (fileno
== STDERR_FILENO
) {
409 e
= fixup_output(context
->std_error
, socket_fd
);
411 /* This expects the input and output are already set up */
413 /* Don't change the stderr file descriptor if we inherit all
414 * the way and are not on a tty */
415 if (e
== EXEC_OUTPUT_INHERIT
&&
416 o
== EXEC_OUTPUT_INHERIT
&&
417 i
== EXEC_INPUT_NULL
&&
418 !is_terminal_input(context
->std_input
) &&
422 /* Duplicate from stdout if possible */
423 if (e
== o
|| e
== EXEC_OUTPUT_INHERIT
)
424 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
428 } else if (o
== EXEC_OUTPUT_INHERIT
) {
429 /* If input got downgraded, inherit the original value */
430 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
431 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
433 /* If the input is connected to anything that's not a /dev/null, inherit that... */
434 if (i
!= EXEC_INPUT_NULL
)
435 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
437 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
441 /* We need to open /dev/null here anew, to get the right access mode. */
442 return open_null_as(O_WRONLY
, fileno
);
447 case EXEC_OUTPUT_NULL
:
448 return open_null_as(O_WRONLY
, fileno
);
450 case EXEC_OUTPUT_TTY
:
451 if (is_terminal_input(i
))
452 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
454 /* We don't reset the terminal if this is just about output */
455 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
457 case EXEC_OUTPUT_SYSLOG
:
458 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
459 case EXEC_OUTPUT_KMSG
:
460 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
461 case EXEC_OUTPUT_JOURNAL
:
462 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
463 r
= connect_logger_as(context
, o
, ident
, unit
->id
, fileno
, uid
, gid
);
465 log_unit_error_errno(unit
, r
, "Failed to connect %s to the journal socket, ignoring: %m", fileno
== STDOUT_FILENO
? "stdout" : "stderr");
466 r
= open_null_as(O_WRONLY
, fileno
);
470 case EXEC_OUTPUT_SOCKET
:
471 assert(socket_fd
>= 0);
472 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
475 assert_not_reached("Unknown error type");
479 static int chown_terminal(int fd
, uid_t uid
) {
484 /* This might fail. What matters are the results. */
485 (void) fchown(fd
, uid
, -1);
486 (void) fchmod(fd
, TTY_MODE
);
488 if (fstat(fd
, &st
) < 0)
491 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
497 static int setup_confirm_stdio(int *_saved_stdin
,
498 int *_saved_stdout
) {
499 int fd
= -1, saved_stdin
, saved_stdout
= -1, r
;
501 assert(_saved_stdin
);
502 assert(_saved_stdout
);
504 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
508 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
509 if (saved_stdout
< 0) {
514 fd
= acquire_terminal(
519 DEFAULT_CONFIRM_USEC
);
525 r
= chown_terminal(fd
, getuid());
529 if (dup2(fd
, STDIN_FILENO
) < 0) {
534 if (dup2(fd
, STDOUT_FILENO
) < 0) {
542 *_saved_stdin
= saved_stdin
;
543 *_saved_stdout
= saved_stdout
;
548 safe_close(saved_stdout
);
549 safe_close(saved_stdin
);
555 _printf_(1, 2) static int write_confirm_message(const char *format
, ...) {
556 _cleanup_close_
int fd
= -1;
561 fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
565 va_start(ap
, format
);
566 vdprintf(fd
, format
, ap
);
572 static int restore_confirm_stdio(int *saved_stdin
,
578 assert(saved_stdout
);
582 if (*saved_stdin
>= 0)
583 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
586 if (*saved_stdout
>= 0)
587 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
590 safe_close(*saved_stdin
);
591 safe_close(*saved_stdout
);
596 static int ask_for_confirmation(char *response
, char **argv
) {
597 int saved_stdout
= -1, saved_stdin
= -1, r
;
598 _cleanup_free_
char *line
= NULL
;
600 r
= setup_confirm_stdio(&saved_stdin
, &saved_stdout
);
604 line
= exec_command_line(argv
);
608 r
= ask_char(response
, "yns", "Execute %s? [Yes, No, Skip] ", line
);
610 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
615 static int enforce_groups(const ExecContext
*context
, const char *username
, gid_t gid
) {
616 bool keep_groups
= false;
621 /* Lookup and set GID and supplementary group list. Here too
622 * we avoid NSS lookups for gid=0. */
624 if (context
->group
|| username
) {
626 if (context
->group
) {
627 const char *g
= context
->group
;
629 if ((r
= get_group_creds(&g
, &gid
)) < 0)
633 /* First step, initialize groups from /etc/groups */
634 if (username
&& gid
!= 0) {
635 if (initgroups(username
, gid
) < 0)
641 /* Second step, set our gids */
642 if (setresgid(gid
, gid
, gid
) < 0)
646 if (context
->supplementary_groups
) {
651 /* Final step, initialize any manually set supplementary groups */
652 assert_se((ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
)) > 0);
654 if (!(gids
= new(gid_t
, ngroups_max
)))
658 if ((k
= getgroups(ngroups_max
, gids
)) < 0) {
665 STRV_FOREACH(i
, context
->supplementary_groups
) {
668 if (k
>= ngroups_max
) {
674 r
= get_group_creds(&g
, gids
+k
);
683 if (setgroups(k
, gids
) < 0) {
694 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
697 /* Sets (but doesn't lookup) the uid and make sure we keep the
698 * capabilities while doing so. */
700 if (context
->capabilities
) {
701 _cleanup_cap_free_ cap_t d
= NULL
;
702 static const cap_value_t bits
[] = {
703 CAP_SETUID
, /* Necessary so that we can run setresuid() below */
704 CAP_SETPCAP
/* Necessary so that we can set PR_SET_SECUREBITS later on */
707 /* First step: If we need to keep capabilities but
708 * drop privileges we need to make sure we keep our
709 * caps, while we drop privileges. */
711 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
713 if (prctl(PR_GET_SECUREBITS
) != sb
)
714 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
718 /* Second step: set the capabilities. This will reduce
719 * the capabilities to the minimum we need. */
721 d
= cap_dup(context
->capabilities
);
725 if (cap_set_flag(d
, CAP_EFFECTIVE
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0 ||
726 cap_set_flag(d
, CAP_PERMITTED
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0)
729 if (cap_set_proc(d
) < 0)
733 /* Third step: actually set the uids */
734 if (setresuid(uid
, uid
, uid
) < 0)
737 /* At this point we should have all necessary capabilities but
738 are otherwise a normal user. However, the caps might got
739 corrupted due to the setresuid() so we need clean them up
740 later. This is done outside of this call. */
747 static int null_conv(
749 const struct pam_message
**msg
,
750 struct pam_response
**resp
,
753 /* We don't support conversations */
758 static int setup_pam(
764 int fds
[], unsigned n_fds
) {
766 static const struct pam_conv conv
= {
771 pam_handle_t
*handle
= NULL
;
773 int pam_code
= PAM_SUCCESS
;
776 bool close_session
= false;
777 pid_t pam_pid
= 0, parent_pid
;
784 /* We set up PAM in the parent process, then fork. The child
785 * will then stay around until killed via PR_GET_PDEATHSIG or
786 * systemd via the cgroup logic. It will then remove the PAM
787 * session again. The parent process will exec() the actual
788 * daemon. We do things this way to ensure that the main PID
789 * of the daemon is the one we initially fork()ed. */
791 if (log_get_max_level() < LOG_DEBUG
)
794 pam_code
= pam_start(name
, user
, &conv
, &handle
);
795 if (pam_code
!= PAM_SUCCESS
) {
801 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
802 if (pam_code
!= PAM_SUCCESS
)
806 pam_code
= pam_acct_mgmt(handle
, flags
);
807 if (pam_code
!= PAM_SUCCESS
)
810 pam_code
= pam_open_session(handle
, flags
);
811 if (pam_code
!= PAM_SUCCESS
)
814 close_session
= true;
816 e
= pam_getenvlist(handle
);
818 pam_code
= PAM_BUF_ERR
;
822 /* Block SIGTERM, so that we know that it won't get lost in
824 if (sigemptyset(&ss
) < 0 ||
825 sigaddset(&ss
, SIGTERM
) < 0 ||
826 sigprocmask(SIG_BLOCK
, &ss
, &old_ss
) < 0)
829 parent_pid
= getpid();
839 /* The child's job is to reset the PAM session on
842 /* This string must fit in 10 chars (i.e. the length
843 * of "/sbin/init"), to look pretty in /bin/ps */
844 rename_process("(sd-pam)");
846 /* Make sure we don't keep open the passed fds in this
847 child. We assume that otherwise only those fds are
848 open here that have been opened by PAM. */
849 close_many(fds
, n_fds
);
851 /* Drop privileges - we don't need any to pam_close_session
852 * and this will make PR_SET_PDEATHSIG work in most cases.
853 * If this fails, ignore the error - but expect sd-pam threads
854 * to fail to exit normally */
855 if (setresuid(uid
, uid
, uid
) < 0)
856 log_error_errno(r
, "Error: Failed to setresuid() in sd-pam: %m");
858 /* Wait until our parent died. This will only work if
859 * the above setresuid() succeeds, otherwise the kernel
860 * will not allow unprivileged parents kill their privileged
861 * children this way. We rely on the control groups kill logic
862 * to do the rest for us. */
863 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
866 /* Check if our parent process might already have
868 if (getppid() == parent_pid
) {
870 if (sigwait(&ss
, &sig
) < 0) {
877 assert(sig
== SIGTERM
);
882 /* If our parent died we'll end the session */
883 if (getppid() != parent_pid
) {
884 pam_code
= pam_close_session(handle
, flags
);
885 if (pam_code
!= PAM_SUCCESS
)
892 pam_end(handle
, pam_code
| flags
);
896 /* If the child was forked off successfully it will do all the
897 * cleanups, so forget about the handle here. */
900 /* Unblock SIGTERM again in the parent */
901 if (sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) < 0)
904 /* We close the log explicitly here, since the PAM modules
905 * might have opened it, but we don't want this fd around. */
914 if (pam_code
!= PAM_SUCCESS
) {
915 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
916 err
= -EPERM
; /* PAM errors do not map to errno */
918 log_error_errno(errno
, "PAM failed: %m");
924 pam_code
= pam_close_session(handle
, flags
);
926 pam_end(handle
, pam_code
| flags
);
934 kill(pam_pid
, SIGTERM
);
935 kill(pam_pid
, SIGCONT
);
942 static void rename_process_from_path(const char *path
) {
943 char process_name
[11];
947 /* This resulting string must fit in 10 chars (i.e. the length
948 * of "/sbin/init") to look pretty in /bin/ps */
952 rename_process("(...)");
958 /* The end of the process name is usually more
959 * interesting, since the first bit might just be
965 process_name
[0] = '(';
966 memcpy(process_name
+1, p
, l
);
967 process_name
[1+l
] = ')';
968 process_name
[1+l
+1] = 0;
970 rename_process(process_name
);
975 static int apply_seccomp(const ExecContext
*c
) {
976 uint32_t negative_action
, action
;
977 scmp_filter_ctx
*seccomp
;
984 negative_action
= c
->syscall_errno
== 0 ? SCMP_ACT_KILL
: SCMP_ACT_ERRNO(c
->syscall_errno
);
986 seccomp
= seccomp_init(c
->syscall_whitelist
? negative_action
: SCMP_ACT_ALLOW
);
990 if (c
->syscall_archs
) {
992 SET_FOREACH(id
, c
->syscall_archs
, i
) {
993 r
= seccomp_arch_add(seccomp
, PTR_TO_UINT32(id
) - 1);
1001 r
= seccomp_add_secondary_archs(seccomp
);
1006 action
= c
->syscall_whitelist
? SCMP_ACT_ALLOW
: negative_action
;
1007 SET_FOREACH(id
, c
->syscall_filter
, i
) {
1008 r
= seccomp_rule_add(seccomp
, action
, PTR_TO_INT(id
) - 1, 0);
1013 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1017 r
= seccomp_load(seccomp
);
1020 seccomp_release(seccomp
);
1024 static int apply_address_families(const ExecContext
*c
) {
1025 scmp_filter_ctx
*seccomp
;
1031 seccomp
= seccomp_init(SCMP_ACT_ALLOW
);
1035 r
= seccomp_add_secondary_archs(seccomp
);
1039 if (c
->address_families_whitelist
) {
1040 int af
, first
= 0, last
= 0;
1043 /* If this is a whitelist, we first block the address
1044 * families that are out of range and then everything
1045 * that is not in the set. First, we find the lowest
1046 * and highest address family in the set. */
1048 SET_FOREACH(afp
, c
->address_families
, i
) {
1049 af
= PTR_TO_INT(afp
);
1051 if (af
<= 0 || af
>= af_max())
1054 if (first
== 0 || af
< first
)
1057 if (last
== 0 || af
> last
)
1061 assert((first
== 0) == (last
== 0));
1065 /* No entries in the valid range, block everything */
1066 r
= seccomp_rule_add(
1068 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1076 /* Block everything below the first entry */
1077 r
= seccomp_rule_add(
1079 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1082 SCMP_A0(SCMP_CMP_LT
, first
));
1086 /* Block everything above the last entry */
1087 r
= seccomp_rule_add(
1089 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1092 SCMP_A0(SCMP_CMP_GT
, last
));
1096 /* Block everything between the first and last
1098 for (af
= 1; af
< af_max(); af
++) {
1100 if (set_contains(c
->address_families
, INT_TO_PTR(af
)))
1103 r
= seccomp_rule_add(
1105 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1108 SCMP_A0(SCMP_CMP_EQ
, af
));
1117 /* If this is a blacklist, then generate one rule for
1118 * each address family that are then combined in OR
1121 SET_FOREACH(af
, c
->address_families
, i
) {
1123 r
= seccomp_rule_add(
1125 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1128 SCMP_A0(SCMP_CMP_EQ
, PTR_TO_INT(af
)));
1134 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1138 r
= seccomp_load(seccomp
);
1141 seccomp_release(seccomp
);
1147 static void do_idle_pipe_dance(int idle_pipe
[4]) {
1151 safe_close(idle_pipe
[1]);
1152 safe_close(idle_pipe
[2]);
1154 if (idle_pipe
[0] >= 0) {
1157 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1159 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1160 /* Signal systemd that we are bored and want to continue. */
1161 r
= write(idle_pipe
[3], "x", 1);
1163 /* Wait for systemd to react to the signal above. */
1164 fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1167 safe_close(idle_pipe
[0]);
1171 safe_close(idle_pipe
[3]);
1174 static int build_environment(
1175 const ExecContext
*c
,
1177 usec_t watchdog_usec
,
1179 const char *username
,
1183 _cleanup_strv_free_
char **our_env
= NULL
;
1190 our_env
= new0(char*, 10);
1195 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid()) < 0)
1197 our_env
[n_env
++] = x
;
1199 if (asprintf(&x
, "LISTEN_FDS=%u", n_fds
) < 0)
1201 our_env
[n_env
++] = x
;
1204 if (watchdog_usec
> 0) {
1205 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid()) < 0)
1207 our_env
[n_env
++] = x
;
1209 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, watchdog_usec
) < 0)
1211 our_env
[n_env
++] = x
;
1215 x
= strappend("HOME=", home
);
1218 our_env
[n_env
++] = x
;
1222 x
= strappend("LOGNAME=", username
);
1225 our_env
[n_env
++] = x
;
1227 x
= strappend("USER=", username
);
1230 our_env
[n_env
++] = x
;
1234 x
= strappend("SHELL=", shell
);
1237 our_env
[n_env
++] = x
;
1240 if (is_terminal_input(c
->std_input
) ||
1241 c
->std_output
== EXEC_OUTPUT_TTY
||
1242 c
->std_error
== EXEC_OUTPUT_TTY
||
1245 x
= strdup(default_term_for_tty(tty_path(c
)));
1248 our_env
[n_env
++] = x
;
1251 our_env
[n_env
++] = NULL
;
1252 assert(n_env
<= 10);
1260 static bool exec_needs_mount_namespace(
1261 const ExecContext
*context
,
1262 const ExecParameters
*params
,
1263 ExecRuntime
*runtime
) {
1268 if (!strv_isempty(context
->read_write_dirs
) ||
1269 !strv_isempty(context
->read_only_dirs
) ||
1270 !strv_isempty(context
->inaccessible_dirs
))
1273 if (context
->mount_flags
!= 0)
1276 if (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
))
1279 if (params
->bus_endpoint_path
)
1282 if (context
->private_devices
||
1283 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1284 context
->protect_home
!= PROTECT_HOME_NO
)
1290 static int exec_child(
1292 ExecCommand
*command
,
1293 const ExecContext
*context
,
1294 const ExecParameters
*params
,
1295 ExecRuntime
*runtime
,
1298 int *fds
, unsigned n_fds
,
1302 _cleanup_strv_free_
char **our_env
= NULL
, **pam_env
= NULL
, **final_env
= NULL
, **final_argv
= NULL
;
1303 _cleanup_free_
char *mac_selinux_context_net
= NULL
;
1304 const char *username
= NULL
, *home
= NULL
, *shell
= NULL
;
1305 unsigned n_dont_close
= 0;
1306 int dont_close
[n_fds
+ 4];
1307 uid_t uid
= UID_INVALID
;
1308 gid_t gid
= GID_INVALID
;
1310 bool needs_mount_namespace
;
1316 assert(exit_status
);
1318 rename_process_from_path(command
->path
);
1320 /* We reset exactly these signals, since they are the
1321 * only ones we set to SIG_IGN in the main daemon. All
1322 * others we leave untouched because we set them to
1323 * SIG_DFL or a valid handler initially, both of which
1324 * will be demoted to SIG_DFL. */
1325 default_signals(SIGNALS_CRASH_HANDLER
,
1326 SIGNALS_IGNORE
, -1);
1328 if (context
->ignore_sigpipe
)
1329 ignore_signals(SIGPIPE
, -1);
1331 r
= reset_signal_mask();
1333 *exit_status
= EXIT_SIGNAL_MASK
;
1337 if (params
->idle_pipe
)
1338 do_idle_pipe_dance(params
->idle_pipe
);
1340 /* Close sockets very early to make sure we don't
1341 * block init reexecution because it cannot bind its
1347 dont_close
[n_dont_close
++] = socket_fd
;
1349 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
1350 n_dont_close
+= n_fds
;
1352 if (params
->bus_endpoint_fd
>= 0)
1353 dont_close
[n_dont_close
++] = params
->bus_endpoint_fd
;
1355 if (runtime
->netns_storage_socket
[0] >= 0)
1356 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[0];
1357 if (runtime
->netns_storage_socket
[1] >= 0)
1358 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[1];
1361 r
= close_all_fds(dont_close
, n_dont_close
);
1363 *exit_status
= EXIT_FDS
;
1367 if (!context
->same_pgrp
)
1369 *exit_status
= EXIT_SETSID
;
1373 exec_context_tty_reset(context
);
1375 if (params
->confirm_spawn
) {
1378 r
= ask_for_confirmation(&response
, argv
);
1379 if (r
== -ETIMEDOUT
)
1380 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1382 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r
));
1383 else if (response
== 's') {
1384 write_confirm_message("Skipping execution.\n");
1385 *exit_status
= EXIT_CONFIRM
;
1387 } else if (response
== 'n') {
1388 write_confirm_message("Failing execution.\n");
1394 if (context
->user
) {
1395 username
= context
->user
;
1396 r
= get_user_creds(&username
, &uid
, &gid
, &home
, &shell
);
1398 *exit_status
= EXIT_USER
;
1403 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1404 * must sure to drop O_NONBLOCK */
1406 fd_nonblock(socket_fd
, false);
1408 r
= setup_input(context
, socket_fd
, params
->apply_tty_stdin
);
1410 *exit_status
= EXIT_STDIN
;
1414 r
= setup_output(unit
, context
, STDOUT_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1416 *exit_status
= EXIT_STDOUT
;
1420 r
= setup_output(unit
, context
, STDERR_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1422 *exit_status
= EXIT_STDERR
;
1426 if (params
->cgroup_path
) {
1427 r
= cg_attach_everywhere(params
->cgroup_supported
, params
->cgroup_path
, 0, NULL
, NULL
);
1429 *exit_status
= EXIT_CGROUP
;
1434 if (context
->oom_score_adjust_set
) {
1435 char t
[DECIMAL_STR_MAX(context
->oom_score_adjust
)];
1437 /* When we can't make this change due to EPERM, then
1438 * let's silently skip over it. User namespaces
1439 * prohibit write access to this file, and we
1440 * shouldn't trip up over that. */
1442 sprintf(t
, "%i", context
->oom_score_adjust
);
1443 r
= write_string_file("/proc/self/oom_score_adj", t
);
1444 if (r
== -EPERM
|| r
== -EACCES
) {
1446 log_unit_debug_errno(unit
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1449 *exit_status
= EXIT_OOM_ADJUST
;
1454 if (context
->nice_set
)
1455 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
1456 *exit_status
= EXIT_NICE
;
1460 if (context
->cpu_sched_set
) {
1461 struct sched_param param
= {
1462 .sched_priority
= context
->cpu_sched_priority
,
1465 r
= sched_setscheduler(0,
1466 context
->cpu_sched_policy
|
1467 (context
->cpu_sched_reset_on_fork
?
1468 SCHED_RESET_ON_FORK
: 0),
1471 *exit_status
= EXIT_SETSCHEDULER
;
1476 if (context
->cpuset
)
1477 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
1478 *exit_status
= EXIT_CPUAFFINITY
;
1482 if (context
->ioprio_set
)
1483 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
1484 *exit_status
= EXIT_IOPRIO
;
1488 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
1489 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
1490 *exit_status
= EXIT_TIMERSLACK
;
1494 if (context
->personality
!= PERSONALITY_INVALID
)
1495 if (personality(context
->personality
) < 0) {
1496 *exit_status
= EXIT_PERSONALITY
;
1500 if (context
->utmp_id
)
1501 utmp_put_init_process(context
->utmp_id
, getpid(), getsid(0), context
->tty_path
);
1503 if (context
->user
&& is_terminal_input(context
->std_input
)) {
1504 r
= chown_terminal(STDIN_FILENO
, uid
);
1506 *exit_status
= EXIT_STDIN
;
1512 if (params
->bus_endpoint_fd
>= 0 && context
->bus_endpoint
) {
1513 uid_t ep_uid
= (uid
== UID_INVALID
) ? 0 : uid
;
1515 r
= bus_kernel_set_endpoint_policy(params
->bus_endpoint_fd
, ep_uid
, context
->bus_endpoint
);
1517 *exit_status
= EXIT_BUS_ENDPOINT
;
1523 /* If delegation is enabled we'll pass ownership of the cgroup
1524 * (but only in systemd's own controller hierarchy!) to the
1525 * user of the new process. */
1526 if (params
->cgroup_path
&& context
->user
&& params
->cgroup_delegate
) {
1527 r
= cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0644, uid
, gid
);
1529 *exit_status
= EXIT_CGROUP
;
1534 r
= cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0755, uid
, gid
);
1536 *exit_status
= EXIT_CGROUP
;
1541 if (!strv_isempty(context
->runtime_directory
) && params
->runtime_prefix
) {
1544 STRV_FOREACH(rt
, context
->runtime_directory
) {
1545 _cleanup_free_
char *p
;
1547 p
= strjoin(params
->runtime_prefix
, "/", *rt
, NULL
);
1549 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1553 r
= mkdir_safe_label(p
, context
->runtime_directory_mode
, uid
, gid
);
1555 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1561 if (params
->apply_permissions
) {
1562 r
= enforce_groups(context
, username
, gid
);
1564 *exit_status
= EXIT_GROUP
;
1569 umask(context
->umask
);
1572 if (params
->apply_permissions
&& context
->pam_name
&& username
) {
1573 r
= setup_pam(context
->pam_name
, username
, uid
, context
->tty_path
, &pam_env
, fds
, n_fds
);
1575 *exit_status
= EXIT_PAM
;
1581 if (context
->private_network
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
1582 r
= setup_netns(runtime
->netns_storage_socket
);
1584 *exit_status
= EXIT_NETWORK
;
1589 needs_mount_namespace
= exec_needs_mount_namespace(context
, params
, runtime
);
1591 if (needs_mount_namespace
) {
1592 char *tmp
= NULL
, *var
= NULL
;
1594 /* The runtime struct only contains the parent
1595 * of the private /tmp, which is
1596 * non-accessible to world users. Inside of it
1597 * there's a /tmp that is sticky, and that's
1598 * the one we want to use here. */
1600 if (context
->private_tmp
&& runtime
) {
1601 if (runtime
->tmp_dir
)
1602 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
1603 if (runtime
->var_tmp_dir
)
1604 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
1607 r
= setup_namespace(
1608 params
->apply_chroot
? context
->root_directory
: NULL
,
1609 context
->read_write_dirs
,
1610 context
->read_only_dirs
,
1611 context
->inaccessible_dirs
,
1614 params
->bus_endpoint_path
,
1615 context
->private_devices
,
1616 context
->protect_home
,
1617 context
->protect_system
,
1618 context
->mount_flags
);
1620 /* If we couldn't set up the namespace this is
1621 * probably due to a missing capability. In this case,
1622 * silently proceeed. */
1623 if (r
== -EPERM
|| r
== -EACCES
) {
1625 log_unit_debug_errno(unit
, r
, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1628 *exit_status
= EXIT_NAMESPACE
;
1633 if (params
->apply_chroot
) {
1634 if (!needs_mount_namespace
&& context
->root_directory
)
1635 if (chroot(context
->root_directory
) < 0) {
1636 *exit_status
= EXIT_CHROOT
;
1640 if (chdir(context
->working_directory
?: "/") < 0 &&
1641 !context
->working_directory_missing_ok
) {
1642 *exit_status
= EXIT_CHDIR
;
1646 _cleanup_free_
char *d
= NULL
;
1648 if (asprintf(&d
, "%s/%s",
1649 context
->root_directory
?: "",
1650 context
->working_directory
?: "") < 0) {
1651 *exit_status
= EXIT_MEMORY
;
1656 !context
->working_directory_missing_ok
) {
1657 *exit_status
= EXIT_CHDIR
;
1663 if (params
->apply_permissions
&& mac_selinux_use() && params
->selinux_context_net
&& socket_fd
>= 0) {
1664 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
1666 *exit_status
= EXIT_SELINUX_CONTEXT
;
1672 /* We repeat the fd closing here, to make sure that
1673 * nothing is leaked from the PAM modules. Note that
1674 * we are more aggressive this time since socket_fd
1675 * and the netns fds we don't need anymore. The custom
1676 * endpoint fd was needed to upload the policy and can
1677 * now be closed as well. */
1678 r
= close_all_fds(fds
, n_fds
);
1680 r
= shift_fds(fds
, n_fds
);
1682 r
= flags_fds(fds
, n_fds
, context
->non_blocking
);
1684 *exit_status
= EXIT_FDS
;
1688 if (params
->apply_permissions
) {
1690 for (i
= 0; i
< _RLIMIT_MAX
; i
++) {
1691 if (!context
->rlimit
[i
])
1694 if (setrlimit_closest(i
, context
->rlimit
[i
]) < 0) {
1695 *exit_status
= EXIT_LIMITS
;
1700 if (context
->capability_bounding_set_drop
) {
1701 r
= capability_bounding_set_drop(context
->capability_bounding_set_drop
, false);
1703 *exit_status
= EXIT_CAPABILITIES
;
1709 if (context
->smack_process_label
) {
1710 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
1712 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1718 if (context
->user
) {
1719 r
= enforce_user(context
, uid
);
1721 *exit_status
= EXIT_USER
;
1726 /* PR_GET_SECUREBITS is not privileged, while
1727 * PR_SET_SECUREBITS is. So to suppress
1728 * potential EPERMs we'll try not to call
1729 * PR_SET_SECUREBITS unless necessary. */
1730 if (prctl(PR_GET_SECUREBITS
) != context
->secure_bits
)
1731 if (prctl(PR_SET_SECUREBITS
, context
->secure_bits
) < 0) {
1732 *exit_status
= EXIT_SECUREBITS
;
1736 if (context
->capabilities
)
1737 if (cap_set_proc(context
->capabilities
) < 0) {
1738 *exit_status
= EXIT_CAPABILITIES
;
1742 if (context
->no_new_privileges
)
1743 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
1744 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
1749 if (context
->address_families_whitelist
||
1750 !set_isempty(context
->address_families
)) {
1751 r
= apply_address_families(context
);
1753 *exit_status
= EXIT_ADDRESS_FAMILIES
;
1758 if (context
->syscall_whitelist
||
1759 !set_isempty(context
->syscall_filter
) ||
1760 !set_isempty(context
->syscall_archs
)) {
1761 r
= apply_seccomp(context
);
1763 *exit_status
= EXIT_SECCOMP
;
1770 if (mac_selinux_use()) {
1771 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
1774 r
= setexeccon(exec_context
);
1776 *exit_status
= EXIT_SELINUX_CONTEXT
;
1783 #ifdef HAVE_APPARMOR
1784 if (context
->apparmor_profile
&& mac_apparmor_use()) {
1785 r
= aa_change_onexec(context
->apparmor_profile
);
1786 if (r
< 0 && !context
->apparmor_profile_ignore
) {
1787 *exit_status
= EXIT_APPARMOR_PROFILE
;
1794 r
= build_environment(context
, n_fds
, params
->watchdog_usec
, home
, username
, shell
, &our_env
);
1796 *exit_status
= EXIT_MEMORY
;
1800 final_env
= strv_env_merge(5,
1801 params
->environment
,
1803 context
->environment
,
1808 *exit_status
= EXIT_MEMORY
;
1812 final_argv
= replace_env_argv(argv
, final_env
);
1814 *exit_status
= EXIT_MEMORY
;
1818 final_env
= strv_env_clean(final_env
);
1820 if (_unlikely_(log_get_max_level() >= LOG_DEBUG
)) {
1821 _cleanup_free_
char *line
;
1823 line
= exec_command_line(final_argv
);
1826 log_struct(LOG_DEBUG
,
1828 "EXECUTABLE=%s", command
->path
,
1829 LOG_UNIT_MESSAGE(unit
, "Executing: %s", line
),
1835 execve(command
->path
, final_argv
, final_env
);
1836 *exit_status
= EXIT_EXEC
;
1840 int exec_spawn(Unit
*unit
,
1841 ExecCommand
*command
,
1842 const ExecContext
*context
,
1843 const ExecParameters
*params
,
1844 ExecRuntime
*runtime
,
1847 _cleanup_strv_free_
char **files_env
= NULL
;
1848 int *fds
= NULL
; unsigned n_fds
= 0;
1849 _cleanup_free_
char *line
= NULL
;
1859 assert(params
->fds
|| params
->n_fds
<= 0);
1861 if (context
->std_input
== EXEC_INPUT_SOCKET
||
1862 context
->std_output
== EXEC_OUTPUT_SOCKET
||
1863 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
1865 if (params
->n_fds
!= 1) {
1866 log_unit_error(unit
, "Got more than one socket.");
1870 socket_fd
= params
->fds
[0];
1874 n_fds
= params
->n_fds
;
1877 r
= exec_context_load_environment(unit
, context
, &files_env
);
1879 return log_unit_error_errno(unit
, r
, "Failed to load environment files: %m");
1881 argv
= params
->argv
?: command
->argv
;
1882 line
= exec_command_line(argv
);
1886 log_struct(LOG_DEBUG
,
1888 LOG_UNIT_MESSAGE(unit
, "About to execute: %s", line
),
1889 "EXECUTABLE=%s", command
->path
,
1893 return log_unit_error_errno(unit
, r
, "Failed to fork: %m");
1898 r
= exec_child(unit
,
1910 log_struct_errno(LOG_ERR
, r
,
1911 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED
),
1913 LOG_UNIT_MESSAGE(unit
, "Failed at step %s spawning %s: %m",
1914 exit_status_to_string(exit_status
, EXIT_STATUS_SYSTEMD
),
1916 "EXECUTABLE=%s", command
->path
,
1923 log_unit_debug(unit
, "Forked %s as "PID_FMT
, command
->path
, pid
);
1925 /* We add the new process to the cgroup both in the child (so
1926 * that we can be sure that no user code is ever executed
1927 * outside of the cgroup) and in the parent (so that we can be
1928 * sure that when we kill the cgroup the process will be
1930 if (params
->cgroup_path
)
1931 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, pid
);
1933 exec_status_start(&command
->exec_status
, pid
);
1939 void exec_context_init(ExecContext
*c
) {
1943 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
1944 c
->cpu_sched_policy
= SCHED_OTHER
;
1945 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
1946 c
->syslog_level_prefix
= true;
1947 c
->ignore_sigpipe
= true;
1948 c
->timer_slack_nsec
= NSEC_INFINITY
;
1949 c
->personality
= PERSONALITY_INVALID
;
1950 c
->runtime_directory_mode
= 0755;
1953 void exec_context_done(ExecContext
*c
) {
1958 strv_free(c
->environment
);
1959 c
->environment
= NULL
;
1961 strv_free(c
->environment_files
);
1962 c
->environment_files
= NULL
;
1964 for (l
= 0; l
< ELEMENTSOF(c
->rlimit
); l
++) {
1966 c
->rlimit
[l
] = NULL
;
1969 free(c
->working_directory
);
1970 c
->working_directory
= NULL
;
1971 free(c
->root_directory
);
1972 c
->root_directory
= NULL
;
1977 free(c
->syslog_identifier
);
1978 c
->syslog_identifier
= NULL
;
1986 strv_free(c
->supplementary_groups
);
1987 c
->supplementary_groups
= NULL
;
1992 if (c
->capabilities
) {
1993 cap_free(c
->capabilities
);
1994 c
->capabilities
= NULL
;
1997 strv_free(c
->read_only_dirs
);
1998 c
->read_only_dirs
= NULL
;
2000 strv_free(c
->read_write_dirs
);
2001 c
->read_write_dirs
= NULL
;
2003 strv_free(c
->inaccessible_dirs
);
2004 c
->inaccessible_dirs
= NULL
;
2007 CPU_FREE(c
->cpuset
);
2012 free(c
->selinux_context
);
2013 c
->selinux_context
= NULL
;
2015 free(c
->apparmor_profile
);
2016 c
->apparmor_profile
= NULL
;
2018 set_free(c
->syscall_filter
);
2019 c
->syscall_filter
= NULL
;
2021 set_free(c
->syscall_archs
);
2022 c
->syscall_archs
= NULL
;
2024 set_free(c
->address_families
);
2025 c
->address_families
= NULL
;
2027 strv_free(c
->runtime_directory
);
2028 c
->runtime_directory
= NULL
;
2030 bus_endpoint_free(c
->bus_endpoint
);
2031 c
->bus_endpoint
= NULL
;
2034 int exec_context_destroy_runtime_directory(ExecContext
*c
, const char *runtime_prefix
) {
2039 if (!runtime_prefix
)
2042 STRV_FOREACH(i
, c
->runtime_directory
) {
2043 _cleanup_free_
char *p
;
2045 p
= strjoin(runtime_prefix
, "/", *i
, NULL
);
2049 /* We execute this synchronously, since we need to be
2050 * sure this is gone when we start the service
2052 (void) rm_rf(p
, REMOVE_ROOT
);
2058 void exec_command_done(ExecCommand
*c
) {
2068 void exec_command_done_array(ExecCommand
*c
, unsigned n
) {
2071 for (i
= 0; i
< n
; i
++)
2072 exec_command_done(c
+i
);
2075 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
2079 LIST_REMOVE(command
, c
, i
);
2080 exec_command_done(i
);
2087 void exec_command_free_array(ExecCommand
**c
, unsigned n
) {
2090 for (i
= 0; i
< n
; i
++)
2091 c
[i
] = exec_command_free_list(c
[i
]);
2094 typedef struct InvalidEnvInfo
{
2099 static void invalid_env(const char *p
, void *userdata
) {
2100 InvalidEnvInfo
*info
= userdata
;
2102 log_unit_error(info
->unit
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
2105 int exec_context_load_environment(Unit
*unit
, const ExecContext
*c
, char ***l
) {
2106 char **i
, **r
= NULL
;
2111 STRV_FOREACH(i
, c
->environment_files
) {
2114 bool ignore
= false;
2116 _cleanup_globfree_ glob_t pglob
= {};
2126 if (!path_is_absolute(fn
)) {
2134 /* Filename supports globbing, take all matching files */
2136 if (glob(fn
, 0, NULL
, &pglob
) != 0) {
2141 return errno
? -errno
: -EINVAL
;
2143 count
= pglob
.gl_pathc
;
2151 for (n
= 0; n
< count
; n
++) {
2152 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], NULL
, &p
);
2160 /* Log invalid environment variables with filename */
2162 InvalidEnvInfo info
= {
2164 .path
= pglob
.gl_pathv
[n
]
2167 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
2175 m
= strv_env_merge(2, r
, p
);
2191 static bool tty_may_match_dev_console(const char *tty
) {
2192 _cleanup_free_
char *active
= NULL
;
2195 if (startswith(tty
, "/dev/"))
2198 /* trivial identity? */
2199 if (streq(tty
, "console"))
2202 console
= resolve_dev_console(&active
);
2203 /* if we could not resolve, assume it may */
2207 /* "tty0" means the active VC, so it may be the same sometimes */
2208 return streq(console
, tty
) || (streq(console
, "tty0") && tty_is_vc(tty
));
2211 bool exec_context_may_touch_console(ExecContext
*ec
) {
2212 return (ec
->tty_reset
|| ec
->tty_vhangup
|| ec
->tty_vt_disallocate
||
2213 is_terminal_input(ec
->std_input
) ||
2214 is_terminal_output(ec
->std_output
) ||
2215 is_terminal_output(ec
->std_error
)) &&
2216 tty_may_match_dev_console(tty_path(ec
));
2219 static void strv_fprintf(FILE *f
, char **l
) {
2225 fprintf(f
, " %s", *g
);
2228 void exec_context_dump(ExecContext
*c
, FILE* f
, const char *prefix
) {
2235 prefix
= strempty(prefix
);
2239 "%sWorkingDirectory: %s\n"
2240 "%sRootDirectory: %s\n"
2241 "%sNonBlocking: %s\n"
2242 "%sPrivateTmp: %s\n"
2243 "%sPrivateNetwork: %s\n"
2244 "%sPrivateDevices: %s\n"
2245 "%sProtectHome: %s\n"
2246 "%sProtectSystem: %s\n"
2247 "%sIgnoreSIGPIPE: %s\n",
2249 prefix
, c
->working_directory
? c
->working_directory
: "/",
2250 prefix
, c
->root_directory
? c
->root_directory
: "/",
2251 prefix
, yes_no(c
->non_blocking
),
2252 prefix
, yes_no(c
->private_tmp
),
2253 prefix
, yes_no(c
->private_network
),
2254 prefix
, yes_no(c
->private_devices
),
2255 prefix
, protect_home_to_string(c
->protect_home
),
2256 prefix
, protect_system_to_string(c
->protect_system
),
2257 prefix
, yes_no(c
->ignore_sigpipe
));
2259 STRV_FOREACH(e
, c
->environment
)
2260 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
2262 STRV_FOREACH(e
, c
->environment_files
)
2263 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
2270 if (c
->oom_score_adjust_set
)
2272 "%sOOMScoreAdjust: %i\n",
2273 prefix
, c
->oom_score_adjust
);
2275 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
2277 fprintf(f
, "%s%s: "RLIM_FMT
"\n",
2278 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
2280 if (c
->ioprio_set
) {
2281 _cleanup_free_
char *class_str
= NULL
;
2283 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
2285 "%sIOSchedulingClass: %s\n"
2286 "%sIOPriority: %i\n",
2287 prefix
, strna(class_str
),
2288 prefix
, (int) IOPRIO_PRIO_DATA(c
->ioprio
));
2291 if (c
->cpu_sched_set
) {
2292 _cleanup_free_
char *policy_str
= NULL
;
2294 sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
2296 "%sCPUSchedulingPolicy: %s\n"
2297 "%sCPUSchedulingPriority: %i\n"
2298 "%sCPUSchedulingResetOnFork: %s\n",
2299 prefix
, strna(policy_str
),
2300 prefix
, c
->cpu_sched_priority
,
2301 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
2305 fprintf(f
, "%sCPUAffinity:", prefix
);
2306 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
2307 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
2308 fprintf(f
, " %u", i
);
2312 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
2313 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
2316 "%sStandardInput: %s\n"
2317 "%sStandardOutput: %s\n"
2318 "%sStandardError: %s\n",
2319 prefix
, exec_input_to_string(c
->std_input
),
2320 prefix
, exec_output_to_string(c
->std_output
),
2321 prefix
, exec_output_to_string(c
->std_error
));
2327 "%sTTYVHangup: %s\n"
2328 "%sTTYVTDisallocate: %s\n",
2329 prefix
, c
->tty_path
,
2330 prefix
, yes_no(c
->tty_reset
),
2331 prefix
, yes_no(c
->tty_vhangup
),
2332 prefix
, yes_no(c
->tty_vt_disallocate
));
2334 if (c
->std_output
== EXEC_OUTPUT_SYSLOG
||
2335 c
->std_output
== EXEC_OUTPUT_KMSG
||
2336 c
->std_output
== EXEC_OUTPUT_JOURNAL
||
2337 c
->std_output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2338 c
->std_output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2339 c
->std_output
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
||
2340 c
->std_error
== EXEC_OUTPUT_SYSLOG
||
2341 c
->std_error
== EXEC_OUTPUT_KMSG
||
2342 c
->std_error
== EXEC_OUTPUT_JOURNAL
||
2343 c
->std_error
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2344 c
->std_error
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2345 c
->std_error
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) {
2347 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
2349 log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
2350 log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
2353 "%sSyslogFacility: %s\n"
2354 "%sSyslogLevel: %s\n",
2355 prefix
, strna(fac_str
),
2356 prefix
, strna(lvl_str
));
2359 if (c
->capabilities
) {
2360 _cleanup_cap_free_charp_
char *t
;
2362 t
= cap_to_text(c
->capabilities
, NULL
);
2364 fprintf(f
, "%sCapabilities: %s\n", prefix
, t
);
2368 fprintf(f
, "%sSecure Bits:%s%s%s%s%s%s\n",
2370 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS
) ? " keep-caps" : "",
2371 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS_LOCKED
) ? " keep-caps-locked" : "",
2372 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP
) ? " no-setuid-fixup" : "",
2373 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP_LOCKED
) ? " no-setuid-fixup-locked" : "",
2374 (c
->secure_bits
& 1<<SECURE_NOROOT
) ? " noroot" : "",
2375 (c
->secure_bits
& 1<<SECURE_NOROOT_LOCKED
) ? "noroot-locked" : "");
2377 if (c
->capability_bounding_set_drop
) {
2379 fprintf(f
, "%sCapabilityBoundingSet:", prefix
);
2381 for (l
= 0; l
<= cap_last_cap(); l
++)
2382 if (!(c
->capability_bounding_set_drop
& ((uint64_t) 1ULL << (uint64_t) l
)))
2383 fprintf(f
, " %s", strna(capability_to_name(l
)));
2389 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
2391 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
2393 if (strv_length(c
->supplementary_groups
) > 0) {
2394 fprintf(f
, "%sSupplementaryGroups:", prefix
);
2395 strv_fprintf(f
, c
->supplementary_groups
);
2400 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
2402 if (strv_length(c
->read_write_dirs
) > 0) {
2403 fprintf(f
, "%sReadWriteDirs:", prefix
);
2404 strv_fprintf(f
, c
->read_write_dirs
);
2408 if (strv_length(c
->read_only_dirs
) > 0) {
2409 fprintf(f
, "%sReadOnlyDirs:", prefix
);
2410 strv_fprintf(f
, c
->read_only_dirs
);
2414 if (strv_length(c
->inaccessible_dirs
) > 0) {
2415 fprintf(f
, "%sInaccessibleDirs:", prefix
);
2416 strv_fprintf(f
, c
->inaccessible_dirs
);
2422 "%sUtmpIdentifier: %s\n",
2423 prefix
, c
->utmp_id
);
2425 if (c
->selinux_context
)
2427 "%sSELinuxContext: %s%s\n",
2428 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
2430 if (c
->personality
!= PERSONALITY_INVALID
)
2432 "%sPersonality: %s\n",
2433 prefix
, strna(personality_to_string(c
->personality
)));
2435 if (c
->syscall_filter
) {
2443 "%sSystemCallFilter: ",
2446 if (!c
->syscall_whitelist
)
2450 SET_FOREACH(id
, c
->syscall_filter
, j
) {
2451 _cleanup_free_
char *name
= NULL
;
2458 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
2459 fputs(strna(name
), f
);
2466 if (c
->syscall_archs
) {
2473 "%sSystemCallArchitectures:",
2477 SET_FOREACH(id
, c
->syscall_archs
, j
)
2478 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
2483 if (c
->syscall_errno
!= 0)
2485 "%sSystemCallErrorNumber: %s\n",
2486 prefix
, strna(errno_to_name(c
->syscall_errno
)));
2488 if (c
->apparmor_profile
)
2490 "%sAppArmorProfile: %s%s\n",
2491 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
2494 bool exec_context_maintains_privileges(ExecContext
*c
) {
2497 /* Returns true if the process forked off would run run under
2498 * an unchanged UID or as root. */
2503 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
2509 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
2514 dual_timestamp_get(&s
->start_timestamp
);
2517 void exec_status_exit(ExecStatus
*s
, ExecContext
*context
, pid_t pid
, int code
, int status
) {
2520 if (s
->pid
&& s
->pid
!= pid
)
2524 dual_timestamp_get(&s
->exit_timestamp
);
2530 if (context
->utmp_id
)
2531 utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
2533 exec_context_tty_reset(context
);
2537 void exec_status_dump(ExecStatus
*s
, FILE *f
, const char *prefix
) {
2538 char buf
[FORMAT_TIMESTAMP_MAX
];
2546 prefix
= strempty(prefix
);
2549 "%sPID: "PID_FMT
"\n",
2552 if (s
->start_timestamp
.realtime
> 0)
2554 "%sStart Timestamp: %s\n",
2555 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
2557 if (s
->exit_timestamp
.realtime
> 0)
2559 "%sExit Timestamp: %s\n"
2561 "%sExit Status: %i\n",
2562 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
2563 prefix
, sigchld_code_to_string(s
->code
),
2567 char *exec_command_line(char **argv
) {
2575 STRV_FOREACH(a
, argv
)
2578 if (!(n
= new(char, k
)))
2582 STRV_FOREACH(a
, argv
) {
2589 if (strpbrk(*a
, WHITESPACE
)) {
2600 /* FIXME: this doesn't really handle arguments that have
2601 * spaces and ticks in them */
2606 void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2607 _cleanup_free_
char *cmd
= NULL
;
2608 const char *prefix2
;
2613 prefix
= strempty(prefix
);
2614 prefix2
= strjoina(prefix
, "\t");
2616 cmd
= exec_command_line(c
->argv
);
2618 "%sCommand Line: %s\n",
2619 prefix
, cmd
? cmd
: strerror(ENOMEM
));
2621 exec_status_dump(&c
->exec_status
, f
, prefix2
);
2624 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2627 prefix
= strempty(prefix
);
2629 LIST_FOREACH(command
, c
, c
)
2630 exec_command_dump(c
, f
, prefix
);
2633 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
2640 /* It's kind of important, that we keep the order here */
2641 LIST_FIND_TAIL(command
, *l
, end
);
2642 LIST_INSERT_AFTER(command
, *l
, end
, e
);
2647 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
2655 l
= strv_new_ap(path
, ap
);
2676 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
2677 _cleanup_strv_free_
char **l
= NULL
;
2685 l
= strv_new_ap(path
, ap
);
2691 r
= strv_extend_strv(&c
->argv
, l
);
2699 static int exec_runtime_allocate(ExecRuntime
**rt
) {
2704 *rt
= new0(ExecRuntime
, 1);
2709 (*rt
)->netns_storage_socket
[0] = (*rt
)->netns_storage_socket
[1] = -1;
2714 int exec_runtime_make(ExecRuntime
**rt
, ExecContext
*c
, const char *id
) {
2724 if (!c
->private_network
&& !c
->private_tmp
)
2727 r
= exec_runtime_allocate(rt
);
2731 if (c
->private_network
&& (*rt
)->netns_storage_socket
[0] < 0) {
2732 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, (*rt
)->netns_storage_socket
) < 0)
2736 if (c
->private_tmp
&& !(*rt
)->tmp_dir
) {
2737 r
= setup_tmp_dirs(id
, &(*rt
)->tmp_dir
, &(*rt
)->var_tmp_dir
);
2745 ExecRuntime
*exec_runtime_ref(ExecRuntime
*r
) {
2747 assert(r
->n_ref
> 0);
2753 ExecRuntime
*exec_runtime_unref(ExecRuntime
*r
) {
2758 assert(r
->n_ref
> 0);
2765 free(r
->var_tmp_dir
);
2766 safe_close_pair(r
->netns_storage_socket
);
2772 int exec_runtime_serialize(Unit
*u
, ExecRuntime
*rt
, FILE *f
, FDSet
*fds
) {
2781 unit_serialize_item(u
, f
, "tmp-dir", rt
->tmp_dir
);
2783 if (rt
->var_tmp_dir
)
2784 unit_serialize_item(u
, f
, "var-tmp-dir", rt
->var_tmp_dir
);
2786 if (rt
->netns_storage_socket
[0] >= 0) {
2789 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
2793 unit_serialize_item_format(u
, f
, "netns-socket-0", "%i", copy
);
2796 if (rt
->netns_storage_socket
[1] >= 0) {
2799 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
2803 unit_serialize_item_format(u
, f
, "netns-socket-1", "%i", copy
);
2809 int exec_runtime_deserialize_item(Unit
*u
, ExecRuntime
**rt
, const char *key
, const char *value
, FDSet
*fds
) {
2816 if (streq(key
, "tmp-dir")) {
2819 r
= exec_runtime_allocate(rt
);
2823 copy
= strdup(value
);
2827 free((*rt
)->tmp_dir
);
2828 (*rt
)->tmp_dir
= copy
;
2830 } else if (streq(key
, "var-tmp-dir")) {
2833 r
= exec_runtime_allocate(rt
);
2837 copy
= strdup(value
);
2841 free((*rt
)->var_tmp_dir
);
2842 (*rt
)->var_tmp_dir
= copy
;
2844 } else if (streq(key
, "netns-socket-0")) {
2847 r
= exec_runtime_allocate(rt
);
2851 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2852 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2854 safe_close((*rt
)->netns_storage_socket
[0]);
2855 (*rt
)->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
2857 } else if (streq(key
, "netns-socket-1")) {
2860 r
= exec_runtime_allocate(rt
);
2864 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2865 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2867 safe_close((*rt
)->netns_storage_socket
[1]);
2868 (*rt
)->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
2876 static void *remove_tmpdir_thread(void *p
) {
2877 _cleanup_free_
char *path
= p
;
2879 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
2883 void exec_runtime_destroy(ExecRuntime
*rt
) {
2889 /* If there are multiple users of this, let's leave the stuff around */
2894 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
2896 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
2898 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
2905 if (rt
->var_tmp_dir
) {
2906 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
2908 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
2910 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
2911 free(rt
->var_tmp_dir
);
2914 rt
->var_tmp_dir
= NULL
;
2917 safe_close_pair(rt
->netns_storage_socket
);
2920 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
2921 [EXEC_INPUT_NULL
] = "null",
2922 [EXEC_INPUT_TTY
] = "tty",
2923 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
2924 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
2925 [EXEC_INPUT_SOCKET
] = "socket"
2928 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
2930 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
2931 [EXEC_OUTPUT_INHERIT
] = "inherit",
2932 [EXEC_OUTPUT_NULL
] = "null",
2933 [EXEC_OUTPUT_TTY
] = "tty",
2934 [EXEC_OUTPUT_SYSLOG
] = "syslog",
2935 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
2936 [EXEC_OUTPUT_KMSG
] = "kmsg",
2937 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
2938 [EXEC_OUTPUT_JOURNAL
] = "journal",
2939 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
2940 [EXEC_OUTPUT_SOCKET
] = "socket"
2943 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);