1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/socket.h>
29 #include <sys/prctl.h>
34 #include <sys/personality.h>
37 #include <security/pam_appl.h>
41 #include <selinux/selinux.h>
49 #include <sys/apparmor.h>
56 #include "capability.h"
59 #include "sd-messages.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
65 #include "utmp-wtmp.h"
67 #include "path-util.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
81 #include "apparmor-util.h"
85 #include "seccomp-util.h"
88 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
89 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
91 /* This assumes there is a 'tty' group */
94 #define SNDBUF_SIZE (8*1024*1024)
96 static int shift_fds(int fds
[], unsigned n_fds
) {
97 int start
, restart_from
;
102 /* Modifies the fds array! (sorts it) */
112 for (i
= start
; i
< (int) n_fds
; i
++) {
115 /* Already at right index? */
119 if ((nfd
= fcntl(fds
[i
], F_DUPFD
, i
+3)) < 0)
125 /* Hmm, the fd we wanted isn't free? Then
126 * let's remember that and try again from here */
127 if (nfd
!= i
+3 && restart_from
< 0)
131 if (restart_from
< 0)
134 start
= restart_from
;
140 static int flags_fds(const int fds
[], unsigned n_fds
, bool nonblock
) {
149 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
151 for (i
= 0; i
< n_fds
; i
++) {
153 if ((r
= fd_nonblock(fds
[i
], nonblock
)) < 0)
156 /* We unconditionally drop FD_CLOEXEC from the fds,
157 * since after all we want to pass these fds to our
160 if ((r
= fd_cloexec(fds
[i
], false)) < 0)
167 _pure_
static const char *tty_path(const ExecContext
*context
) {
170 if (context
->tty_path
)
171 return context
->tty_path
;
173 return "/dev/console";
176 static void exec_context_tty_reset(const ExecContext
*context
) {
179 if (context
->tty_vhangup
)
180 terminal_vhangup(tty_path(context
));
182 if (context
->tty_reset
)
183 reset_terminal(tty_path(context
));
185 if (context
->tty_vt_disallocate
&& context
->tty_path
)
186 vt_disallocate(context
->tty_path
);
189 static bool is_terminal_output(ExecOutput o
) {
191 o
== EXEC_OUTPUT_TTY
||
192 o
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
193 o
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
194 o
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
;
197 static int open_null_as(int flags
, int nfd
) {
202 fd
= open("/dev/null", flags
|O_NOCTTY
);
207 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
215 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
216 union sockaddr_union sa
= {
217 .un
.sun_family
= AF_UNIX
,
218 .un
.sun_path
= "/run/systemd/journal/stdout",
220 uid_t olduid
= UID_INVALID
;
221 gid_t oldgid
= GID_INVALID
;
224 if (gid
!= GID_INVALID
) {
232 if (uid
!= UID_INVALID
) {
242 r
= connect(fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(sa
.un
.sun_path
));
246 /* If we fail to restore the uid or gid, things will likely
247 fail later on. This should only happen if an LSM interferes. */
249 if (uid
!= UID_INVALID
)
250 (void) seteuid(olduid
);
253 if (gid
!= GID_INVALID
)
254 (void) setegid(oldgid
);
259 static int connect_logger_as(const ExecContext
*context
, ExecOutput output
, const char *ident
, const char *unit_id
, int nfd
, uid_t uid
, gid_t gid
) {
263 assert(output
< _EXEC_OUTPUT_MAX
);
267 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
271 r
= connect_journal_socket(fd
, uid
, gid
);
275 if (shutdown(fd
, SHUT_RD
) < 0) {
280 fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
290 context
->syslog_identifier
? context
->syslog_identifier
: ident
,
292 context
->syslog_priority
,
293 !!context
->syslog_level_prefix
,
294 output
== EXEC_OUTPUT_SYSLOG
|| output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
295 output
== EXEC_OUTPUT_KMSG
|| output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
,
296 is_terminal_output(output
));
299 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
306 static int open_terminal_as(const char *path
, mode_t mode
, int nfd
) {
312 if ((fd
= open_terminal(path
, mode
| O_NOCTTY
)) < 0)
316 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
324 static bool is_terminal_input(ExecInput i
) {
326 i
== EXEC_INPUT_TTY
||
327 i
== EXEC_INPUT_TTY_FORCE
||
328 i
== EXEC_INPUT_TTY_FAIL
;
331 static int fixup_input(ExecInput std_input
, int socket_fd
, bool apply_tty_stdin
) {
333 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
334 return EXEC_INPUT_NULL
;
336 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
337 return EXEC_INPUT_NULL
;
342 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
344 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
345 return EXEC_OUTPUT_INHERIT
;
350 static int setup_input(const ExecContext
*context
, int socket_fd
, bool apply_tty_stdin
) {
355 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
359 case EXEC_INPUT_NULL
:
360 return open_null_as(O_RDONLY
, STDIN_FILENO
);
363 case EXEC_INPUT_TTY_FORCE
:
364 case EXEC_INPUT_TTY_FAIL
: {
367 fd
= acquire_terminal(tty_path(context
),
368 i
== EXEC_INPUT_TTY_FAIL
,
369 i
== EXEC_INPUT_TTY_FORCE
,
375 if (fd
!= STDIN_FILENO
) {
376 r
= dup2(fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
384 case EXEC_INPUT_SOCKET
:
385 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
388 assert_not_reached("Unknown input type");
392 static int setup_output(const ExecContext
*context
, int fileno
, int socket_fd
, const char *ident
, const char *unit_id
, bool apply_tty_stdin
, uid_t uid
, gid_t gid
) {
400 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
401 o
= fixup_output(context
->std_output
, socket_fd
);
403 if (fileno
== STDERR_FILENO
) {
405 e
= fixup_output(context
->std_error
, socket_fd
);
407 /* This expects the input and output are already set up */
409 /* Don't change the stderr file descriptor if we inherit all
410 * the way and are not on a tty */
411 if (e
== EXEC_OUTPUT_INHERIT
&&
412 o
== EXEC_OUTPUT_INHERIT
&&
413 i
== EXEC_INPUT_NULL
&&
414 !is_terminal_input(context
->std_input
) &&
418 /* Duplicate from stdout if possible */
419 if (e
== o
|| e
== EXEC_OUTPUT_INHERIT
)
420 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
424 } else if (o
== EXEC_OUTPUT_INHERIT
) {
425 /* If input got downgraded, inherit the original value */
426 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
427 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
429 /* If the input is connected to anything that's not a /dev/null, inherit that... */
430 if (i
!= EXEC_INPUT_NULL
)
431 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
433 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
437 /* We need to open /dev/null here anew, to get the right access mode. */
438 return open_null_as(O_WRONLY
, fileno
);
443 case EXEC_OUTPUT_NULL
:
444 return open_null_as(O_WRONLY
, fileno
);
446 case EXEC_OUTPUT_TTY
:
447 if (is_terminal_input(i
))
448 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
450 /* We don't reset the terminal if this is just about output */
451 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
453 case EXEC_OUTPUT_SYSLOG
:
454 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
455 case EXEC_OUTPUT_KMSG
:
456 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
457 case EXEC_OUTPUT_JOURNAL
:
458 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
459 r
= connect_logger_as(context
, o
, ident
, unit_id
, fileno
, uid
, gid
);
461 log_unit_struct(unit_id
,
463 LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
464 fileno
== STDOUT_FILENO
? "stdout" : "stderr",
465 unit_id
, strerror(-r
)),
468 r
= open_null_as(O_WRONLY
, fileno
);
472 case EXEC_OUTPUT_SOCKET
:
473 assert(socket_fd
>= 0);
474 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
477 assert_not_reached("Unknown error type");
481 static int chown_terminal(int fd
, uid_t uid
) {
486 /* This might fail. What matters are the results. */
487 (void) fchown(fd
, uid
, -1);
488 (void) fchmod(fd
, TTY_MODE
);
490 if (fstat(fd
, &st
) < 0)
493 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
499 static int setup_confirm_stdio(int *_saved_stdin
,
500 int *_saved_stdout
) {
501 int fd
= -1, saved_stdin
, saved_stdout
= -1, r
;
503 assert(_saved_stdin
);
504 assert(_saved_stdout
);
506 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
510 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
511 if (saved_stdout
< 0) {
516 fd
= acquire_terminal(
521 DEFAULT_CONFIRM_USEC
);
527 r
= chown_terminal(fd
, getuid());
531 if (dup2(fd
, STDIN_FILENO
) < 0) {
536 if (dup2(fd
, STDOUT_FILENO
) < 0) {
544 *_saved_stdin
= saved_stdin
;
545 *_saved_stdout
= saved_stdout
;
550 safe_close(saved_stdout
);
551 safe_close(saved_stdin
);
557 _printf_(1, 2) static int write_confirm_message(const char *format
, ...) {
558 _cleanup_close_
int fd
= -1;
563 fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
567 va_start(ap
, format
);
568 vdprintf(fd
, format
, ap
);
574 static int restore_confirm_stdio(int *saved_stdin
,
580 assert(saved_stdout
);
584 if (*saved_stdin
>= 0)
585 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
588 if (*saved_stdout
>= 0)
589 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
592 safe_close(*saved_stdin
);
593 safe_close(*saved_stdout
);
598 static int ask_for_confirmation(char *response
, char **argv
) {
599 int saved_stdout
= -1, saved_stdin
= -1, r
;
600 _cleanup_free_
char *line
= NULL
;
602 r
= setup_confirm_stdio(&saved_stdin
, &saved_stdout
);
606 line
= exec_command_line(argv
);
610 r
= ask_char(response
, "yns", "Execute %s? [Yes, No, Skip] ", line
);
612 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
617 static int enforce_groups(const ExecContext
*context
, const char *username
, gid_t gid
) {
618 bool keep_groups
= false;
623 /* Lookup and set GID and supplementary group list. Here too
624 * we avoid NSS lookups for gid=0. */
626 if (context
->group
|| username
) {
628 if (context
->group
) {
629 const char *g
= context
->group
;
631 if ((r
= get_group_creds(&g
, &gid
)) < 0)
635 /* First step, initialize groups from /etc/groups */
636 if (username
&& gid
!= 0) {
637 if (initgroups(username
, gid
) < 0)
643 /* Second step, set our gids */
644 if (setresgid(gid
, gid
, gid
) < 0)
648 if (context
->supplementary_groups
) {
653 /* Final step, initialize any manually set supplementary groups */
654 assert_se((ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
)) > 0);
656 if (!(gids
= new(gid_t
, ngroups_max
)))
660 if ((k
= getgroups(ngroups_max
, gids
)) < 0) {
667 STRV_FOREACH(i
, context
->supplementary_groups
) {
670 if (k
>= ngroups_max
) {
676 r
= get_group_creds(&g
, gids
+k
);
685 if (setgroups(k
, gids
) < 0) {
696 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
699 /* Sets (but doesn't lookup) the uid and make sure we keep the
700 * capabilities while doing so. */
702 if (context
->capabilities
) {
703 _cleanup_cap_free_ cap_t d
= NULL
;
704 static const cap_value_t bits
[] = {
705 CAP_SETUID
, /* Necessary so that we can run setresuid() below */
706 CAP_SETPCAP
/* Necessary so that we can set PR_SET_SECUREBITS later on */
709 /* First step: If we need to keep capabilities but
710 * drop privileges we need to make sure we keep our
711 * caps, while we drop privileges. */
713 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
715 if (prctl(PR_GET_SECUREBITS
) != sb
)
716 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
720 /* Second step: set the capabilities. This will reduce
721 * the capabilities to the minimum we need. */
723 d
= cap_dup(context
->capabilities
);
727 if (cap_set_flag(d
, CAP_EFFECTIVE
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0 ||
728 cap_set_flag(d
, CAP_PERMITTED
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0)
731 if (cap_set_proc(d
) < 0)
735 /* Third step: actually set the uids */
736 if (setresuid(uid
, uid
, uid
) < 0)
739 /* At this point we should have all necessary capabilities but
740 are otherwise a normal user. However, the caps might got
741 corrupted due to the setresuid() so we need clean them up
742 later. This is done outside of this call. */
749 static int null_conv(
751 const struct pam_message
**msg
,
752 struct pam_response
**resp
,
755 /* We don't support conversations */
760 static int setup_pam(
766 int fds
[], unsigned n_fds
) {
768 static const struct pam_conv conv
= {
773 pam_handle_t
*handle
= NULL
;
775 int pam_code
= PAM_SUCCESS
;
778 bool close_session
= false;
779 pid_t pam_pid
= 0, parent_pid
;
786 /* We set up PAM in the parent process, then fork. The child
787 * will then stay around until killed via PR_GET_PDEATHSIG or
788 * systemd via the cgroup logic. It will then remove the PAM
789 * session again. The parent process will exec() the actual
790 * daemon. We do things this way to ensure that the main PID
791 * of the daemon is the one we initially fork()ed. */
793 if (log_get_max_level() < LOG_DEBUG
)
796 pam_code
= pam_start(name
, user
, &conv
, &handle
);
797 if (pam_code
!= PAM_SUCCESS
) {
803 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
804 if (pam_code
!= PAM_SUCCESS
)
808 pam_code
= pam_acct_mgmt(handle
, flags
);
809 if (pam_code
!= PAM_SUCCESS
)
812 pam_code
= pam_open_session(handle
, flags
);
813 if (pam_code
!= PAM_SUCCESS
)
816 close_session
= true;
818 e
= pam_getenvlist(handle
);
820 pam_code
= PAM_BUF_ERR
;
824 /* Block SIGTERM, so that we know that it won't get lost in
826 if (sigemptyset(&ss
) < 0 ||
827 sigaddset(&ss
, SIGTERM
) < 0 ||
828 sigprocmask(SIG_BLOCK
, &ss
, &old_ss
) < 0)
831 parent_pid
= getpid();
841 /* The child's job is to reset the PAM session on
844 /* This string must fit in 10 chars (i.e. the length
845 * of "/sbin/init"), to look pretty in /bin/ps */
846 rename_process("(sd-pam)");
848 /* Make sure we don't keep open the passed fds in this
849 child. We assume that otherwise only those fds are
850 open here that have been opened by PAM. */
851 close_many(fds
, n_fds
);
853 /* Drop privileges - we don't need any to pam_close_session
854 * and this will make PR_SET_PDEATHSIG work in most cases.
855 * If this fails, ignore the error - but expect sd-pam threads
856 * to fail to exit normally */
857 if (setresuid(uid
, uid
, uid
) < 0)
858 log_error_errno(r
, "Error: Failed to setresuid() in sd-pam: %m");
860 /* Wait until our parent died. This will only work if
861 * the above setresuid() succeeds, otherwise the kernel
862 * will not allow unprivileged parents kill their privileged
863 * children this way. We rely on the control groups kill logic
864 * to do the rest for us. */
865 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
868 /* Check if our parent process might already have
870 if (getppid() == parent_pid
) {
872 if (sigwait(&ss
, &sig
) < 0) {
879 assert(sig
== SIGTERM
);
884 /* If our parent died we'll end the session */
885 if (getppid() != parent_pid
) {
886 pam_code
= pam_close_session(handle
, flags
);
887 if (pam_code
!= PAM_SUCCESS
)
894 pam_end(handle
, pam_code
| flags
);
898 /* If the child was forked off successfully it will do all the
899 * cleanups, so forget about the handle here. */
902 /* Unblock SIGTERM again in the parent */
903 if (sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) < 0)
906 /* We close the log explicitly here, since the PAM modules
907 * might have opened it, but we don't want this fd around. */
916 if (pam_code
!= PAM_SUCCESS
) {
917 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
918 err
= -EPERM
; /* PAM errors do not map to errno */
920 log_error_errno(errno
, "PAM failed: %m");
926 pam_code
= pam_close_session(handle
, flags
);
928 pam_end(handle
, pam_code
| flags
);
936 kill(pam_pid
, SIGTERM
);
937 kill(pam_pid
, SIGCONT
);
944 static void rename_process_from_path(const char *path
) {
945 char process_name
[11];
949 /* This resulting string must fit in 10 chars (i.e. the length
950 * of "/sbin/init") to look pretty in /bin/ps */
954 rename_process("(...)");
960 /* The end of the process name is usually more
961 * interesting, since the first bit might just be
967 process_name
[0] = '(';
968 memcpy(process_name
+1, p
, l
);
969 process_name
[1+l
] = ')';
970 process_name
[1+l
+1] = 0;
972 rename_process(process_name
);
977 static int apply_seccomp(const ExecContext
*c
) {
978 uint32_t negative_action
, action
;
979 scmp_filter_ctx
*seccomp
;
986 negative_action
= c
->syscall_errno
== 0 ? SCMP_ACT_KILL
: SCMP_ACT_ERRNO(c
->syscall_errno
);
988 seccomp
= seccomp_init(c
->syscall_whitelist
? negative_action
: SCMP_ACT_ALLOW
);
992 if (c
->syscall_archs
) {
994 SET_FOREACH(id
, c
->syscall_archs
, i
) {
995 r
= seccomp_arch_add(seccomp
, PTR_TO_UINT32(id
) - 1);
1003 r
= seccomp_add_secondary_archs(seccomp
);
1008 action
= c
->syscall_whitelist
? SCMP_ACT_ALLOW
: negative_action
;
1009 SET_FOREACH(id
, c
->syscall_filter
, i
) {
1010 r
= seccomp_rule_add(seccomp
, action
, PTR_TO_INT(id
) - 1, 0);
1015 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1019 r
= seccomp_load(seccomp
);
1022 seccomp_release(seccomp
);
1026 static int apply_address_families(const ExecContext
*c
) {
1027 scmp_filter_ctx
*seccomp
;
1033 seccomp
= seccomp_init(SCMP_ACT_ALLOW
);
1037 r
= seccomp_add_secondary_archs(seccomp
);
1041 if (c
->address_families_whitelist
) {
1042 int af
, first
= 0, last
= 0;
1045 /* If this is a whitelist, we first block the address
1046 * families that are out of range and then everything
1047 * that is not in the set. First, we find the lowest
1048 * and highest address family in the set. */
1050 SET_FOREACH(afp
, c
->address_families
, i
) {
1051 af
= PTR_TO_INT(afp
);
1053 if (af
<= 0 || af
>= af_max())
1056 if (first
== 0 || af
< first
)
1059 if (last
== 0 || af
> last
)
1063 assert((first
== 0) == (last
== 0));
1067 /* No entries in the valid range, block everything */
1068 r
= seccomp_rule_add(
1070 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1078 /* Block everything below the first entry */
1079 r
= seccomp_rule_add(
1081 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1084 SCMP_A0(SCMP_CMP_LT
, first
));
1088 /* Block everything above the last entry */
1089 r
= seccomp_rule_add(
1091 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1094 SCMP_A0(SCMP_CMP_GT
, last
));
1098 /* Block everything between the first and last
1100 for (af
= 1; af
< af_max(); af
++) {
1102 if (set_contains(c
->address_families
, INT_TO_PTR(af
)))
1105 r
= seccomp_rule_add(
1107 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1110 SCMP_A0(SCMP_CMP_EQ
, af
));
1119 /* If this is a blacklist, then generate one rule for
1120 * each address family that are then combined in OR
1123 SET_FOREACH(af
, c
->address_families
, i
) {
1125 r
= seccomp_rule_add(
1127 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1130 SCMP_A0(SCMP_CMP_EQ
, PTR_TO_INT(af
)));
1136 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1140 r
= seccomp_load(seccomp
);
1143 seccomp_release(seccomp
);
1149 static void do_idle_pipe_dance(int idle_pipe
[4]) {
1153 safe_close(idle_pipe
[1]);
1154 safe_close(idle_pipe
[2]);
1156 if (idle_pipe
[0] >= 0) {
1159 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1161 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1162 /* Signal systemd that we are bored and want to continue. */
1163 r
= write(idle_pipe
[3], "x", 1);
1165 /* Wait for systemd to react to the signal above. */
1166 fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1169 safe_close(idle_pipe
[0]);
1173 safe_close(idle_pipe
[3]);
1176 static int build_environment(
1177 const ExecContext
*c
,
1179 usec_t watchdog_usec
,
1181 const char *username
,
1185 _cleanup_strv_free_
char **our_env
= NULL
;
1192 our_env
= new0(char*, 10);
1197 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid()) < 0)
1199 our_env
[n_env
++] = x
;
1201 if (asprintf(&x
, "LISTEN_FDS=%u", n_fds
) < 0)
1203 our_env
[n_env
++] = x
;
1206 if (watchdog_usec
> 0) {
1207 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid()) < 0)
1209 our_env
[n_env
++] = x
;
1211 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, watchdog_usec
) < 0)
1213 our_env
[n_env
++] = x
;
1217 x
= strappend("HOME=", home
);
1220 our_env
[n_env
++] = x
;
1224 x
= strappend("LOGNAME=", username
);
1227 our_env
[n_env
++] = x
;
1229 x
= strappend("USER=", username
);
1232 our_env
[n_env
++] = x
;
1236 x
= strappend("SHELL=", shell
);
1239 our_env
[n_env
++] = x
;
1242 if (is_terminal_input(c
->std_input
) ||
1243 c
->std_output
== EXEC_OUTPUT_TTY
||
1244 c
->std_error
== EXEC_OUTPUT_TTY
||
1247 x
= strdup(default_term_for_tty(tty_path(c
)));
1250 our_env
[n_env
++] = x
;
1253 our_env
[n_env
++] = NULL
;
1254 assert(n_env
<= 10);
1262 static int exec_child(
1263 ExecCommand
*command
,
1264 const ExecContext
*context
,
1265 const ExecParameters
*params
,
1266 ExecRuntime
*runtime
,
1269 int *fds
, unsigned n_fds
,
1273 _cleanup_strv_free_
char **our_env
= NULL
, **pam_env
= NULL
, **final_env
= NULL
, **final_argv
= NULL
;
1274 _cleanup_free_
char *mac_selinux_context_net
= NULL
;
1275 const char *username
= NULL
, *home
= NULL
, *shell
= NULL
;
1276 unsigned n_dont_close
= 0;
1277 int dont_close
[n_fds
+ 4];
1278 uid_t uid
= UID_INVALID
;
1279 gid_t gid
= GID_INVALID
;
1285 assert(exit_status
);
1287 rename_process_from_path(command
->path
);
1289 /* We reset exactly these signals, since they are the
1290 * only ones we set to SIG_IGN in the main daemon. All
1291 * others we leave untouched because we set them to
1292 * SIG_DFL or a valid handler initially, both of which
1293 * will be demoted to SIG_DFL. */
1294 default_signals(SIGNALS_CRASH_HANDLER
,
1295 SIGNALS_IGNORE
, -1);
1297 if (context
->ignore_sigpipe
)
1298 ignore_signals(SIGPIPE
, -1);
1300 r
= reset_signal_mask();
1302 *exit_status
= EXIT_SIGNAL_MASK
;
1306 if (params
->idle_pipe
)
1307 do_idle_pipe_dance(params
->idle_pipe
);
1309 /* Close sockets very early to make sure we don't
1310 * block init reexecution because it cannot bind its
1316 dont_close
[n_dont_close
++] = socket_fd
;
1318 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
1319 n_dont_close
+= n_fds
;
1321 if (params
->bus_endpoint_fd
>= 0)
1322 dont_close
[n_dont_close
++] = params
->bus_endpoint_fd
;
1324 if (runtime
->netns_storage_socket
[0] >= 0)
1325 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[0];
1326 if (runtime
->netns_storage_socket
[1] >= 0)
1327 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[1];
1330 r
= close_all_fds(dont_close
, n_dont_close
);
1332 *exit_status
= EXIT_FDS
;
1336 if (!context
->same_pgrp
)
1338 *exit_status
= EXIT_SETSID
;
1342 exec_context_tty_reset(context
);
1344 if (params
->confirm_spawn
) {
1347 r
= ask_for_confirmation(&response
, argv
);
1348 if (r
== -ETIMEDOUT
)
1349 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1351 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r
));
1352 else if (response
== 's') {
1353 write_confirm_message("Skipping execution.\n");
1354 *exit_status
= EXIT_CONFIRM
;
1356 } else if (response
== 'n') {
1357 write_confirm_message("Failing execution.\n");
1363 if (context
->user
) {
1364 username
= context
->user
;
1365 r
= get_user_creds(&username
, &uid
, &gid
, &home
, &shell
);
1367 *exit_status
= EXIT_USER
;
1372 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1373 * must sure to drop O_NONBLOCK */
1375 fd_nonblock(socket_fd
, false);
1377 r
= setup_input(context
, socket_fd
, params
->apply_tty_stdin
);
1379 *exit_status
= EXIT_STDIN
;
1383 r
= setup_output(context
, STDOUT_FILENO
, socket_fd
, basename(command
->path
), params
->unit_id
, params
->apply_tty_stdin
, uid
, gid
);
1385 *exit_status
= EXIT_STDOUT
;
1389 r
= setup_output(context
, STDERR_FILENO
, socket_fd
, basename(command
->path
), params
->unit_id
, params
->apply_tty_stdin
, uid
, gid
);
1391 *exit_status
= EXIT_STDERR
;
1395 if (params
->cgroup_path
) {
1396 r
= cg_attach_everywhere(params
->cgroup_supported
, params
->cgroup_path
, 0, NULL
, NULL
);
1398 *exit_status
= EXIT_CGROUP
;
1403 if (context
->oom_score_adjust_set
) {
1404 char t
[DECIMAL_STR_MAX(context
->oom_score_adjust
)];
1406 /* When we can't make this change due to EPERM, then
1407 * let's silently skip over it. User namespaces
1408 * prohibit write access to this file, and we
1409 * shouldn't trip up over that. */
1411 sprintf(t
, "%i", context
->oom_score_adjust
);
1412 r
= write_string_file("/proc/self/oom_score_adj", t
);
1413 if (r
== -EPERM
|| r
== -EACCES
) {
1415 log_unit_debug_errno(params
->unit_id
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1418 *exit_status
= EXIT_OOM_ADJUST
;
1423 if (context
->nice_set
)
1424 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
1425 *exit_status
= EXIT_NICE
;
1429 if (context
->cpu_sched_set
) {
1430 struct sched_param param
= {
1431 .sched_priority
= context
->cpu_sched_priority
,
1434 r
= sched_setscheduler(0,
1435 context
->cpu_sched_policy
|
1436 (context
->cpu_sched_reset_on_fork
?
1437 SCHED_RESET_ON_FORK
: 0),
1440 *exit_status
= EXIT_SETSCHEDULER
;
1445 if (context
->cpuset
)
1446 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
1447 *exit_status
= EXIT_CPUAFFINITY
;
1451 if (context
->ioprio_set
)
1452 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
1453 *exit_status
= EXIT_IOPRIO
;
1457 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
1458 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
1459 *exit_status
= EXIT_TIMERSLACK
;
1463 if (context
->personality
!= 0xffffffffUL
)
1464 if (personality(context
->personality
) < 0) {
1465 *exit_status
= EXIT_PERSONALITY
;
1469 if (context
->utmp_id
)
1470 utmp_put_init_process(context
->utmp_id
, getpid(), getsid(0), context
->tty_path
);
1472 if (context
->user
&& is_terminal_input(context
->std_input
)) {
1473 r
= chown_terminal(STDIN_FILENO
, uid
);
1475 *exit_status
= EXIT_STDIN
;
1481 if (params
->bus_endpoint_fd
>= 0 && context
->bus_endpoint
) {
1482 uid_t ep_uid
= (uid
== UID_INVALID
) ? 0 : uid
;
1484 r
= bus_kernel_set_endpoint_policy(params
->bus_endpoint_fd
, ep_uid
, context
->bus_endpoint
);
1486 *exit_status
= EXIT_BUS_ENDPOINT
;
1492 /* If delegation is enabled we'll pass ownership of the cgroup
1493 * (but only in systemd's own controller hierarchy!) to the
1494 * user of the new process. */
1495 if (params
->cgroup_path
&& context
->user
&& params
->cgroup_delegate
) {
1496 r
= cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0644, uid
, gid
);
1498 *exit_status
= EXIT_CGROUP
;
1503 r
= cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0755, uid
, gid
);
1505 *exit_status
= EXIT_CGROUP
;
1510 if (!strv_isempty(context
->runtime_directory
) && params
->runtime_prefix
) {
1513 STRV_FOREACH(rt
, context
->runtime_directory
) {
1514 _cleanup_free_
char *p
;
1516 p
= strjoin(params
->runtime_prefix
, "/", *rt
, NULL
);
1518 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1522 r
= mkdir_safe_label(p
, context
->runtime_directory_mode
, uid
, gid
);
1524 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1530 if (params
->apply_permissions
) {
1531 r
= enforce_groups(context
, username
, gid
);
1533 *exit_status
= EXIT_GROUP
;
1538 umask(context
->umask
);
1541 if (params
->apply_permissions
&& context
->pam_name
&& username
) {
1542 r
= setup_pam(context
->pam_name
, username
, uid
, context
->tty_path
, &pam_env
, fds
, n_fds
);
1544 *exit_status
= EXIT_PAM
;
1550 if (context
->private_network
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
1551 r
= setup_netns(runtime
->netns_storage_socket
);
1553 *exit_status
= EXIT_NETWORK
;
1558 if (!strv_isempty(context
->read_write_dirs
) ||
1559 !strv_isempty(context
->read_only_dirs
) ||
1560 !strv_isempty(context
->inaccessible_dirs
) ||
1561 context
->mount_flags
!= 0 ||
1562 (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
)) ||
1563 params
->bus_endpoint_path
||
1564 context
->private_devices
||
1565 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1566 context
->protect_home
!= PROTECT_HOME_NO
) {
1568 char *tmp
= NULL
, *var
= NULL
;
1570 /* The runtime struct only contains the parent
1571 * of the private /tmp, which is
1572 * non-accessible to world users. Inside of it
1573 * there's a /tmp that is sticky, and that's
1574 * the one we want to use here. */
1576 if (context
->private_tmp
&& runtime
) {
1577 if (runtime
->tmp_dir
)
1578 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
1579 if (runtime
->var_tmp_dir
)
1580 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
1583 r
= setup_namespace(
1584 context
->read_write_dirs
,
1585 context
->read_only_dirs
,
1586 context
->inaccessible_dirs
,
1589 params
->bus_endpoint_path
,
1590 context
->private_devices
,
1591 context
->protect_home
,
1592 context
->protect_system
,
1593 context
->mount_flags
);
1595 /* If we couldn't set up the namespace this is
1596 * probably due to a missing capability. In this case,
1597 * silently proceeed. */
1598 if (r
== -EPERM
|| r
== -EACCES
) {
1600 log_unit_debug_errno(params
->unit_id
, r
, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1603 *exit_status
= EXIT_NAMESPACE
;
1608 if (params
->apply_chroot
) {
1609 if (context
->root_directory
)
1610 if (chroot(context
->root_directory
) < 0) {
1611 *exit_status
= EXIT_CHROOT
;
1615 if (chdir(context
->working_directory
?: "/") < 0 &&
1616 !context
->working_directory_missing_ok
) {
1617 *exit_status
= EXIT_CHDIR
;
1621 _cleanup_free_
char *d
= NULL
;
1623 if (asprintf(&d
, "%s/%s",
1624 context
->root_directory
?: "",
1625 context
->working_directory
?: "") < 0) {
1626 *exit_status
= EXIT_MEMORY
;
1631 !context
->working_directory_missing_ok
) {
1632 *exit_status
= EXIT_CHDIR
;
1638 if (params
->apply_permissions
&& mac_selinux_use() && params
->selinux_context_net
&& socket_fd
>= 0) {
1639 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
1641 *exit_status
= EXIT_SELINUX_CONTEXT
;
1647 /* We repeat the fd closing here, to make sure that
1648 * nothing is leaked from the PAM modules. Note that
1649 * we are more aggressive this time since socket_fd
1650 * and the netns fds we don't need anymore. The custom
1651 * endpoint fd was needed to upload the policy and can
1652 * now be closed as well. */
1653 r
= close_all_fds(fds
, n_fds
);
1655 r
= shift_fds(fds
, n_fds
);
1657 r
= flags_fds(fds
, n_fds
, context
->non_blocking
);
1659 *exit_status
= EXIT_FDS
;
1663 if (params
->apply_permissions
) {
1665 for (i
= 0; i
< _RLIMIT_MAX
; i
++) {
1666 if (!context
->rlimit
[i
])
1669 if (setrlimit_closest(i
, context
->rlimit
[i
]) < 0) {
1670 *exit_status
= EXIT_LIMITS
;
1675 if (context
->capability_bounding_set_drop
) {
1676 r
= capability_bounding_set_drop(context
->capability_bounding_set_drop
, false);
1678 *exit_status
= EXIT_CAPABILITIES
;
1684 if (context
->smack_process_label
) {
1685 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
1687 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1693 if (context
->user
) {
1694 r
= enforce_user(context
, uid
);
1696 *exit_status
= EXIT_USER
;
1701 /* PR_GET_SECUREBITS is not privileged, while
1702 * PR_SET_SECUREBITS is. So to suppress
1703 * potential EPERMs we'll try not to call
1704 * PR_SET_SECUREBITS unless necessary. */
1705 if (prctl(PR_GET_SECUREBITS
) != context
->secure_bits
)
1706 if (prctl(PR_SET_SECUREBITS
, context
->secure_bits
) < 0) {
1707 *exit_status
= EXIT_SECUREBITS
;
1711 if (context
->capabilities
)
1712 if (cap_set_proc(context
->capabilities
) < 0) {
1713 *exit_status
= EXIT_CAPABILITIES
;
1717 if (context
->no_new_privileges
)
1718 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
1719 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
1724 if (context
->address_families_whitelist
||
1725 !set_isempty(context
->address_families
)) {
1726 r
= apply_address_families(context
);
1728 *exit_status
= EXIT_ADDRESS_FAMILIES
;
1733 if (context
->syscall_whitelist
||
1734 !set_isempty(context
->syscall_filter
) ||
1735 !set_isempty(context
->syscall_archs
)) {
1736 r
= apply_seccomp(context
);
1738 *exit_status
= EXIT_SECCOMP
;
1745 if (mac_selinux_use()) {
1746 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
1749 r
= setexeccon(exec_context
);
1751 *exit_status
= EXIT_SELINUX_CONTEXT
;
1758 #ifdef HAVE_APPARMOR
1759 if (context
->apparmor_profile
&& mac_apparmor_use()) {
1760 r
= aa_change_onexec(context
->apparmor_profile
);
1761 if (r
< 0 && !context
->apparmor_profile_ignore
) {
1762 *exit_status
= EXIT_APPARMOR_PROFILE
;
1769 r
= build_environment(context
, n_fds
, params
->watchdog_usec
, home
, username
, shell
, &our_env
);
1771 *exit_status
= EXIT_MEMORY
;
1775 final_env
= strv_env_merge(5,
1776 params
->environment
,
1778 context
->environment
,
1783 *exit_status
= EXIT_MEMORY
;
1787 final_argv
= replace_env_argv(argv
, final_env
);
1789 *exit_status
= EXIT_MEMORY
;
1793 final_env
= strv_env_clean(final_env
);
1795 if (_unlikely_(log_get_max_level() >= LOG_DEBUG
)) {
1796 _cleanup_free_
char *line
;
1798 line
= exec_command_line(final_argv
);
1801 log_unit_struct(params
->unit_id
,
1803 "EXECUTABLE=%s", command
->path
,
1804 LOG_MESSAGE("Executing: %s", line
),
1809 execve(command
->path
, final_argv
, final_env
);
1810 *exit_status
= EXIT_EXEC
;
1814 int exec_spawn(ExecCommand
*command
,
1815 const ExecContext
*context
,
1816 const ExecParameters
*params
,
1817 ExecRuntime
*runtime
,
1820 _cleanup_strv_free_
char **files_env
= NULL
;
1821 int *fds
= NULL
; unsigned n_fds
= 0;
1822 _cleanup_free_
char *line
= NULL
;
1831 assert(params
->fds
|| params
->n_fds
<= 0);
1833 if (context
->std_input
== EXEC_INPUT_SOCKET
||
1834 context
->std_output
== EXEC_OUTPUT_SOCKET
||
1835 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
1837 if (params
->n_fds
!= 1) {
1838 log_unit_error(params
->unit_id
, "Got more than one socket.");
1842 socket_fd
= params
->fds
[0];
1846 n_fds
= params
->n_fds
;
1849 r
= exec_context_load_environment(context
, params
->unit_id
, &files_env
);
1851 return log_unit_error_errno(params
->unit_id
, r
, "Failed to load environment files: %m");
1853 argv
= params
->argv
?: command
->argv
;
1854 line
= exec_command_line(argv
);
1858 log_unit_struct(params
->unit_id
,
1860 "EXECUTABLE=%s", command
->path
,
1861 LOG_MESSAGE("About to execute: %s", line
),
1865 return log_unit_error_errno(params
->unit_id
, r
, "Failed to fork: %m");
1870 r
= exec_child(command
,
1881 log_unit_struct(params
->unit_id
,
1883 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED
),
1884 "EXECUTABLE=%s", command
->path
,
1885 LOG_MESSAGE("Failed at step %s spawning %s: %s",
1886 exit_status_to_string(exit_status
, EXIT_STATUS_SYSTEMD
),
1887 command
->path
, strerror(-r
)),
1895 log_unit_debug(params
->unit_id
, "Forked %s as "PID_FMT
, command
->path
, pid
);
1897 /* We add the new process to the cgroup both in the child (so
1898 * that we can be sure that no user code is ever executed
1899 * outside of the cgroup) and in the parent (so that we can be
1900 * sure that when we kill the cgroup the process will be
1902 if (params
->cgroup_path
)
1903 cg_attach(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, pid
);
1905 exec_status_start(&command
->exec_status
, pid
);
1911 void exec_context_init(ExecContext
*c
) {
1915 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
1916 c
->cpu_sched_policy
= SCHED_OTHER
;
1917 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
1918 c
->syslog_level_prefix
= true;
1919 c
->ignore_sigpipe
= true;
1920 c
->timer_slack_nsec
= NSEC_INFINITY
;
1921 c
->personality
= 0xffffffffUL
;
1922 c
->runtime_directory_mode
= 0755;
1925 void exec_context_done(ExecContext
*c
) {
1930 strv_free(c
->environment
);
1931 c
->environment
= NULL
;
1933 strv_free(c
->environment_files
);
1934 c
->environment_files
= NULL
;
1936 for (l
= 0; l
< ELEMENTSOF(c
->rlimit
); l
++) {
1938 c
->rlimit
[l
] = NULL
;
1941 free(c
->working_directory
);
1942 c
->working_directory
= NULL
;
1943 free(c
->root_directory
);
1944 c
->root_directory
= NULL
;
1949 free(c
->syslog_identifier
);
1950 c
->syslog_identifier
= NULL
;
1958 strv_free(c
->supplementary_groups
);
1959 c
->supplementary_groups
= NULL
;
1964 if (c
->capabilities
) {
1965 cap_free(c
->capabilities
);
1966 c
->capabilities
= NULL
;
1969 strv_free(c
->read_only_dirs
);
1970 c
->read_only_dirs
= NULL
;
1972 strv_free(c
->read_write_dirs
);
1973 c
->read_write_dirs
= NULL
;
1975 strv_free(c
->inaccessible_dirs
);
1976 c
->inaccessible_dirs
= NULL
;
1979 CPU_FREE(c
->cpuset
);
1984 free(c
->selinux_context
);
1985 c
->selinux_context
= NULL
;
1987 free(c
->apparmor_profile
);
1988 c
->apparmor_profile
= NULL
;
1990 set_free(c
->syscall_filter
);
1991 c
->syscall_filter
= NULL
;
1993 set_free(c
->syscall_archs
);
1994 c
->syscall_archs
= NULL
;
1996 set_free(c
->address_families
);
1997 c
->address_families
= NULL
;
1999 strv_free(c
->runtime_directory
);
2000 c
->runtime_directory
= NULL
;
2002 bus_endpoint_free(c
->bus_endpoint
);
2003 c
->bus_endpoint
= NULL
;
2006 int exec_context_destroy_runtime_directory(ExecContext
*c
, const char *runtime_prefix
) {
2011 if (!runtime_prefix
)
2014 STRV_FOREACH(i
, c
->runtime_directory
) {
2015 _cleanup_free_
char *p
;
2017 p
= strjoin(runtime_prefix
, "/", *i
, NULL
);
2021 /* We execute this synchronously, since we need to be
2022 * sure this is gone when we start the service
2024 (void) rm_rf(p
, REMOVE_ROOT
);
2030 void exec_command_done(ExecCommand
*c
) {
2040 void exec_command_done_array(ExecCommand
*c
, unsigned n
) {
2043 for (i
= 0; i
< n
; i
++)
2044 exec_command_done(c
+i
);
2047 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
2051 LIST_REMOVE(command
, c
, i
);
2052 exec_command_done(i
);
2059 void exec_command_free_array(ExecCommand
**c
, unsigned n
) {
2062 for (i
= 0; i
< n
; i
++)
2063 c
[i
] = exec_command_free_list(c
[i
]);
2066 typedef struct InvalidEnvInfo
{
2067 const char *unit_id
;
2071 static void invalid_env(const char *p
, void *userdata
) {
2072 InvalidEnvInfo
*info
= userdata
;
2074 log_unit_error(info
->unit_id
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
2077 int exec_context_load_environment(const ExecContext
*c
, const char *unit_id
, char ***l
) {
2078 char **i
, **r
= NULL
;
2083 STRV_FOREACH(i
, c
->environment_files
) {
2086 bool ignore
= false;
2088 _cleanup_globfree_ glob_t pglob
= {};
2098 if (!path_is_absolute(fn
)) {
2106 /* Filename supports globbing, take all matching files */
2108 if (glob(fn
, 0, NULL
, &pglob
) != 0) {
2113 return errno
? -errno
: -EINVAL
;
2115 count
= pglob
.gl_pathc
;
2123 for (n
= 0; n
< count
; n
++) {
2124 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], NULL
, &p
);
2132 /* Log invalid environment variables with filename */
2134 InvalidEnvInfo info
= {
2136 .path
= pglob
.gl_pathv
[n
]
2139 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
2147 m
= strv_env_merge(2, r
, p
);
2163 static bool tty_may_match_dev_console(const char *tty
) {
2164 _cleanup_free_
char *active
= NULL
;
2167 if (startswith(tty
, "/dev/"))
2170 /* trivial identity? */
2171 if (streq(tty
, "console"))
2174 console
= resolve_dev_console(&active
);
2175 /* if we could not resolve, assume it may */
2179 /* "tty0" means the active VC, so it may be the same sometimes */
2180 return streq(console
, tty
) || (streq(console
, "tty0") && tty_is_vc(tty
));
2183 bool exec_context_may_touch_console(ExecContext
*ec
) {
2184 return (ec
->tty_reset
|| ec
->tty_vhangup
|| ec
->tty_vt_disallocate
||
2185 is_terminal_input(ec
->std_input
) ||
2186 is_terminal_output(ec
->std_output
) ||
2187 is_terminal_output(ec
->std_error
)) &&
2188 tty_may_match_dev_console(tty_path(ec
));
2191 static void strv_fprintf(FILE *f
, char **l
) {
2197 fprintf(f
, " %s", *g
);
2200 void exec_context_dump(ExecContext
*c
, FILE* f
, const char *prefix
) {
2207 prefix
= strempty(prefix
);
2211 "%sWorkingDirectory: %s\n"
2212 "%sRootDirectory: %s\n"
2213 "%sNonBlocking: %s\n"
2214 "%sPrivateTmp: %s\n"
2215 "%sPrivateNetwork: %s\n"
2216 "%sPrivateDevices: %s\n"
2217 "%sProtectHome: %s\n"
2218 "%sProtectSystem: %s\n"
2219 "%sIgnoreSIGPIPE: %s\n",
2221 prefix
, c
->working_directory
? c
->working_directory
: "/",
2222 prefix
, c
->root_directory
? c
->root_directory
: "/",
2223 prefix
, yes_no(c
->non_blocking
),
2224 prefix
, yes_no(c
->private_tmp
),
2225 prefix
, yes_no(c
->private_network
),
2226 prefix
, yes_no(c
->private_devices
),
2227 prefix
, protect_home_to_string(c
->protect_home
),
2228 prefix
, protect_system_to_string(c
->protect_system
),
2229 prefix
, yes_no(c
->ignore_sigpipe
));
2231 STRV_FOREACH(e
, c
->environment
)
2232 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
2234 STRV_FOREACH(e
, c
->environment_files
)
2235 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
2242 if (c
->oom_score_adjust_set
)
2244 "%sOOMScoreAdjust: %i\n",
2245 prefix
, c
->oom_score_adjust
);
2247 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
2249 fprintf(f
, "%s%s: "RLIM_FMT
"\n",
2250 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
2252 if (c
->ioprio_set
) {
2253 _cleanup_free_
char *class_str
= NULL
;
2255 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
2257 "%sIOSchedulingClass: %s\n"
2258 "%sIOPriority: %i\n",
2259 prefix
, strna(class_str
),
2260 prefix
, (int) IOPRIO_PRIO_DATA(c
->ioprio
));
2263 if (c
->cpu_sched_set
) {
2264 _cleanup_free_
char *policy_str
= NULL
;
2266 sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
2268 "%sCPUSchedulingPolicy: %s\n"
2269 "%sCPUSchedulingPriority: %i\n"
2270 "%sCPUSchedulingResetOnFork: %s\n",
2271 prefix
, strna(policy_str
),
2272 prefix
, c
->cpu_sched_priority
,
2273 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
2277 fprintf(f
, "%sCPUAffinity:", prefix
);
2278 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
2279 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
2280 fprintf(f
, " %u", i
);
2284 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
2285 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
2288 "%sStandardInput: %s\n"
2289 "%sStandardOutput: %s\n"
2290 "%sStandardError: %s\n",
2291 prefix
, exec_input_to_string(c
->std_input
),
2292 prefix
, exec_output_to_string(c
->std_output
),
2293 prefix
, exec_output_to_string(c
->std_error
));
2299 "%sTTYVHangup: %s\n"
2300 "%sTTYVTDisallocate: %s\n",
2301 prefix
, c
->tty_path
,
2302 prefix
, yes_no(c
->tty_reset
),
2303 prefix
, yes_no(c
->tty_vhangup
),
2304 prefix
, yes_no(c
->tty_vt_disallocate
));
2306 if (c
->std_output
== EXEC_OUTPUT_SYSLOG
||
2307 c
->std_output
== EXEC_OUTPUT_KMSG
||
2308 c
->std_output
== EXEC_OUTPUT_JOURNAL
||
2309 c
->std_output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2310 c
->std_output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2311 c
->std_output
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
||
2312 c
->std_error
== EXEC_OUTPUT_SYSLOG
||
2313 c
->std_error
== EXEC_OUTPUT_KMSG
||
2314 c
->std_error
== EXEC_OUTPUT_JOURNAL
||
2315 c
->std_error
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2316 c
->std_error
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2317 c
->std_error
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) {
2319 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
2321 log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
2322 log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
2325 "%sSyslogFacility: %s\n"
2326 "%sSyslogLevel: %s\n",
2327 prefix
, strna(fac_str
),
2328 prefix
, strna(lvl_str
));
2331 if (c
->capabilities
) {
2332 _cleanup_cap_free_charp_
char *t
;
2334 t
= cap_to_text(c
->capabilities
, NULL
);
2336 fprintf(f
, "%sCapabilities: %s\n", prefix
, t
);
2340 fprintf(f
, "%sSecure Bits:%s%s%s%s%s%s\n",
2342 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS
) ? " keep-caps" : "",
2343 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS_LOCKED
) ? " keep-caps-locked" : "",
2344 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP
) ? " no-setuid-fixup" : "",
2345 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP_LOCKED
) ? " no-setuid-fixup-locked" : "",
2346 (c
->secure_bits
& 1<<SECURE_NOROOT
) ? " noroot" : "",
2347 (c
->secure_bits
& 1<<SECURE_NOROOT_LOCKED
) ? "noroot-locked" : "");
2349 if (c
->capability_bounding_set_drop
) {
2351 fprintf(f
, "%sCapabilityBoundingSet:", prefix
);
2353 for (l
= 0; l
<= cap_last_cap(); l
++)
2354 if (!(c
->capability_bounding_set_drop
& ((uint64_t) 1ULL << (uint64_t) l
)))
2355 fprintf(f
, " %s", strna(capability_to_name(l
)));
2361 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
2363 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
2365 if (strv_length(c
->supplementary_groups
) > 0) {
2366 fprintf(f
, "%sSupplementaryGroups:", prefix
);
2367 strv_fprintf(f
, c
->supplementary_groups
);
2372 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
2374 if (strv_length(c
->read_write_dirs
) > 0) {
2375 fprintf(f
, "%sReadWriteDirs:", prefix
);
2376 strv_fprintf(f
, c
->read_write_dirs
);
2380 if (strv_length(c
->read_only_dirs
) > 0) {
2381 fprintf(f
, "%sReadOnlyDirs:", prefix
);
2382 strv_fprintf(f
, c
->read_only_dirs
);
2386 if (strv_length(c
->inaccessible_dirs
) > 0) {
2387 fprintf(f
, "%sInaccessibleDirs:", prefix
);
2388 strv_fprintf(f
, c
->inaccessible_dirs
);
2394 "%sUtmpIdentifier: %s\n",
2395 prefix
, c
->utmp_id
);
2397 if (c
->selinux_context
)
2399 "%sSELinuxContext: %s%s\n",
2400 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
2402 if (c
->personality
!= 0xffffffffUL
)
2404 "%sPersonality: %s\n",
2405 prefix
, strna(personality_to_string(c
->personality
)));
2407 if (c
->syscall_filter
) {
2415 "%sSystemCallFilter: ",
2418 if (!c
->syscall_whitelist
)
2422 SET_FOREACH(id
, c
->syscall_filter
, j
) {
2423 _cleanup_free_
char *name
= NULL
;
2430 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
2431 fputs(strna(name
), f
);
2438 if (c
->syscall_archs
) {
2445 "%sSystemCallArchitectures:",
2449 SET_FOREACH(id
, c
->syscall_archs
, j
)
2450 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
2455 if (c
->syscall_errno
!= 0)
2457 "%sSystemCallErrorNumber: %s\n",
2458 prefix
, strna(errno_to_name(c
->syscall_errno
)));
2460 if (c
->apparmor_profile
)
2462 "%sAppArmorProfile: %s%s\n",
2463 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
2466 bool exec_context_maintains_privileges(ExecContext
*c
) {
2469 /* Returns true if the process forked off would run run under
2470 * an unchanged UID or as root. */
2475 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
2481 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
2486 dual_timestamp_get(&s
->start_timestamp
);
2489 void exec_status_exit(ExecStatus
*s
, ExecContext
*context
, pid_t pid
, int code
, int status
) {
2492 if (s
->pid
&& s
->pid
!= pid
)
2496 dual_timestamp_get(&s
->exit_timestamp
);
2502 if (context
->utmp_id
)
2503 utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
2505 exec_context_tty_reset(context
);
2509 void exec_status_dump(ExecStatus
*s
, FILE *f
, const char *prefix
) {
2510 char buf
[FORMAT_TIMESTAMP_MAX
];
2518 prefix
= strempty(prefix
);
2521 "%sPID: "PID_FMT
"\n",
2524 if (s
->start_timestamp
.realtime
> 0)
2526 "%sStart Timestamp: %s\n",
2527 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
2529 if (s
->exit_timestamp
.realtime
> 0)
2531 "%sExit Timestamp: %s\n"
2533 "%sExit Status: %i\n",
2534 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
2535 prefix
, sigchld_code_to_string(s
->code
),
2539 char *exec_command_line(char **argv
) {
2547 STRV_FOREACH(a
, argv
)
2550 if (!(n
= new(char, k
)))
2554 STRV_FOREACH(a
, argv
) {
2561 if (strpbrk(*a
, WHITESPACE
)) {
2572 /* FIXME: this doesn't really handle arguments that have
2573 * spaces and ticks in them */
2578 void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2579 _cleanup_free_
char *cmd
= NULL
;
2580 const char *prefix2
;
2585 prefix
= strempty(prefix
);
2586 prefix2
= strjoina(prefix
, "\t");
2588 cmd
= exec_command_line(c
->argv
);
2590 "%sCommand Line: %s\n",
2591 prefix
, cmd
? cmd
: strerror(ENOMEM
));
2593 exec_status_dump(&c
->exec_status
, f
, prefix2
);
2596 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2599 prefix
= strempty(prefix
);
2601 LIST_FOREACH(command
, c
, c
)
2602 exec_command_dump(c
, f
, prefix
);
2605 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
2612 /* It's kind of important, that we keep the order here */
2613 LIST_FIND_TAIL(command
, *l
, end
);
2614 LIST_INSERT_AFTER(command
, *l
, end
, e
);
2619 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
2627 l
= strv_new_ap(path
, ap
);
2648 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
2649 _cleanup_strv_free_
char **l
= NULL
;
2657 l
= strv_new_ap(path
, ap
);
2663 r
= strv_extend_strv(&c
->argv
, l
);
2671 static int exec_runtime_allocate(ExecRuntime
**rt
) {
2676 *rt
= new0(ExecRuntime
, 1);
2681 (*rt
)->netns_storage_socket
[0] = (*rt
)->netns_storage_socket
[1] = -1;
2686 int exec_runtime_make(ExecRuntime
**rt
, ExecContext
*c
, const char *id
) {
2696 if (!c
->private_network
&& !c
->private_tmp
)
2699 r
= exec_runtime_allocate(rt
);
2703 if (c
->private_network
&& (*rt
)->netns_storage_socket
[0] < 0) {
2704 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, (*rt
)->netns_storage_socket
) < 0)
2708 if (c
->private_tmp
&& !(*rt
)->tmp_dir
) {
2709 r
= setup_tmp_dirs(id
, &(*rt
)->tmp_dir
, &(*rt
)->var_tmp_dir
);
2717 ExecRuntime
*exec_runtime_ref(ExecRuntime
*r
) {
2719 assert(r
->n_ref
> 0);
2725 ExecRuntime
*exec_runtime_unref(ExecRuntime
*r
) {
2730 assert(r
->n_ref
> 0);
2733 if (r
->n_ref
<= 0) {
2735 free(r
->var_tmp_dir
);
2736 safe_close_pair(r
->netns_storage_socket
);
2743 int exec_runtime_serialize(ExecRuntime
*rt
, Unit
*u
, FILE *f
, FDSet
*fds
) {
2752 unit_serialize_item(u
, f
, "tmp-dir", rt
->tmp_dir
);
2754 if (rt
->var_tmp_dir
)
2755 unit_serialize_item(u
, f
, "var-tmp-dir", rt
->var_tmp_dir
);
2757 if (rt
->netns_storage_socket
[0] >= 0) {
2760 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
2764 unit_serialize_item_format(u
, f
, "netns-socket-0", "%i", copy
);
2767 if (rt
->netns_storage_socket
[1] >= 0) {
2770 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
2774 unit_serialize_item_format(u
, f
, "netns-socket-1", "%i", copy
);
2780 int exec_runtime_deserialize_item(ExecRuntime
**rt
, Unit
*u
, const char *key
, const char *value
, FDSet
*fds
) {
2787 if (streq(key
, "tmp-dir")) {
2790 r
= exec_runtime_allocate(rt
);
2794 copy
= strdup(value
);
2798 free((*rt
)->tmp_dir
);
2799 (*rt
)->tmp_dir
= copy
;
2801 } else if (streq(key
, "var-tmp-dir")) {
2804 r
= exec_runtime_allocate(rt
);
2808 copy
= strdup(value
);
2812 free((*rt
)->var_tmp_dir
);
2813 (*rt
)->var_tmp_dir
= copy
;
2815 } else if (streq(key
, "netns-socket-0")) {
2818 r
= exec_runtime_allocate(rt
);
2822 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2823 log_unit_debug(u
->id
, "Failed to parse netns socket value %s", value
);
2825 safe_close((*rt
)->netns_storage_socket
[0]);
2826 (*rt
)->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
2828 } else if (streq(key
, "netns-socket-1")) {
2831 r
= exec_runtime_allocate(rt
);
2835 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2836 log_unit_debug(u
->id
, "Failed to parse netns socket value %s", value
);
2838 safe_close((*rt
)->netns_storage_socket
[1]);
2839 (*rt
)->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
2847 static void *remove_tmpdir_thread(void *p
) {
2848 _cleanup_free_
char *path
= p
;
2850 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
2854 void exec_runtime_destroy(ExecRuntime
*rt
) {
2860 /* If there are multiple users of this, let's leave the stuff around */
2865 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
2867 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
2869 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
2876 if (rt
->var_tmp_dir
) {
2877 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
2879 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
2881 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
2882 free(rt
->var_tmp_dir
);
2885 rt
->var_tmp_dir
= NULL
;
2888 safe_close_pair(rt
->netns_storage_socket
);
2891 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
2892 [EXEC_INPUT_NULL
] = "null",
2893 [EXEC_INPUT_TTY
] = "tty",
2894 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
2895 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
2896 [EXEC_INPUT_SOCKET
] = "socket"
2899 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
2901 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
2902 [EXEC_OUTPUT_INHERIT
] = "inherit",
2903 [EXEC_OUTPUT_NULL
] = "null",
2904 [EXEC_OUTPUT_TTY
] = "tty",
2905 [EXEC_OUTPUT_SYSLOG
] = "syslog",
2906 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
2907 [EXEC_OUTPUT_KMSG
] = "kmsg",
2908 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
2909 [EXEC_OUTPUT_JOURNAL
] = "journal",
2910 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
2911 [EXEC_OUTPUT_SOCKET
] = "socket"
2914 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);