1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/socket.h>
29 #include <sys/prctl.h>
35 #include <sys/personality.h>
38 #include <security/pam_appl.h>
42 #include <selinux/selinux.h>
50 #include <sys/apparmor.h>
53 #include "sd-messages.h"
57 #include "capability.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
65 #include "utmp-wtmp.h"
67 #include "path-util.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
82 #include "signal-util.h"
85 #include "apparmor-util.h"
89 #include "seccomp-util.h"
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97 /* This assumes there is a 'tty' group */
100 #define SNDBUF_SIZE (8*1024*1024)
102 static int shift_fds(int fds
[], unsigned n_fds
) {
103 int start
, restart_from
;
108 /* Modifies the fds array! (sorts it) */
118 for (i
= start
; i
< (int) n_fds
; i
++) {
121 /* Already at right index? */
125 if ((nfd
= fcntl(fds
[i
], F_DUPFD
, i
+3)) < 0)
131 /* Hmm, the fd we wanted isn't free? Then
132 * let's remember that and try again from here */
133 if (nfd
!= i
+3 && restart_from
< 0)
137 if (restart_from
< 0)
140 start
= restart_from
;
146 static int flags_fds(const int fds
[], unsigned n_fds
, bool nonblock
) {
155 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157 for (i
= 0; i
< n_fds
; i
++) {
159 if ((r
= fd_nonblock(fds
[i
], nonblock
)) < 0)
162 /* We unconditionally drop FD_CLOEXEC from the fds,
163 * since after all we want to pass these fds to our
166 if ((r
= fd_cloexec(fds
[i
], false)) < 0)
173 _pure_
static const char *tty_path(const ExecContext
*context
) {
176 if (context
->tty_path
)
177 return context
->tty_path
;
179 return "/dev/console";
182 static void exec_context_tty_reset(const ExecContext
*context
) {
185 if (context
->tty_vhangup
)
186 terminal_vhangup(tty_path(context
));
188 if (context
->tty_reset
)
189 reset_terminal(tty_path(context
));
191 if (context
->tty_vt_disallocate
&& context
->tty_path
)
192 vt_disallocate(context
->tty_path
);
195 static bool is_terminal_output(ExecOutput o
) {
197 o
== EXEC_OUTPUT_TTY
||
198 o
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
199 o
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
200 o
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
;
203 static int open_null_as(int flags
, int nfd
) {
208 fd
= open("/dev/null", flags
|O_NOCTTY
);
213 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
221 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
222 union sockaddr_union sa
= {
223 .un
.sun_family
= AF_UNIX
,
224 .un
.sun_path
= "/run/systemd/journal/stdout",
226 uid_t olduid
= UID_INVALID
;
227 gid_t oldgid
= GID_INVALID
;
230 if (gid
!= GID_INVALID
) {
238 if (uid
!= UID_INVALID
) {
248 r
= connect(fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(sa
.un
.sun_path
));
252 /* If we fail to restore the uid or gid, things will likely
253 fail later on. This should only happen if an LSM interferes. */
255 if (uid
!= UID_INVALID
)
256 (void) seteuid(olduid
);
259 if (gid
!= GID_INVALID
)
260 (void) setegid(oldgid
);
265 static int connect_logger_as(const ExecContext
*context
, ExecOutput output
, const char *ident
, const char *unit_id
, int nfd
, uid_t uid
, gid_t gid
) {
269 assert(output
< _EXEC_OUTPUT_MAX
);
273 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
277 r
= connect_journal_socket(fd
, uid
, gid
);
281 if (shutdown(fd
, SHUT_RD
) < 0) {
286 fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
296 context
->syslog_identifier
? context
->syslog_identifier
: ident
,
298 context
->syslog_priority
,
299 !!context
->syslog_level_prefix
,
300 output
== EXEC_OUTPUT_SYSLOG
|| output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
301 output
== EXEC_OUTPUT_KMSG
|| output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
,
302 is_terminal_output(output
));
305 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
312 static int open_terminal_as(const char *path
, mode_t mode
, int nfd
) {
318 if ((fd
= open_terminal(path
, mode
| O_NOCTTY
)) < 0)
322 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
330 static bool is_terminal_input(ExecInput i
) {
332 i
== EXEC_INPUT_TTY
||
333 i
== EXEC_INPUT_TTY_FORCE
||
334 i
== EXEC_INPUT_TTY_FAIL
;
337 static int fixup_input(ExecInput std_input
, int socket_fd
, bool apply_tty_stdin
) {
339 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
340 return EXEC_INPUT_NULL
;
342 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
343 return EXEC_INPUT_NULL
;
348 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
350 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
351 return EXEC_OUTPUT_INHERIT
;
356 static int setup_input(const ExecContext
*context
, int socket_fd
, bool apply_tty_stdin
) {
361 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
365 case EXEC_INPUT_NULL
:
366 return open_null_as(O_RDONLY
, STDIN_FILENO
);
369 case EXEC_INPUT_TTY_FORCE
:
370 case EXEC_INPUT_TTY_FAIL
: {
373 fd
= acquire_terminal(tty_path(context
),
374 i
== EXEC_INPUT_TTY_FAIL
,
375 i
== EXEC_INPUT_TTY_FORCE
,
381 if (fd
!= STDIN_FILENO
) {
382 r
= dup2(fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
390 case EXEC_INPUT_SOCKET
:
391 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
394 assert_not_reached("Unknown input type");
398 static int setup_output(Unit
*unit
, const ExecContext
*context
, int fileno
, int socket_fd
, const char *ident
, bool apply_tty_stdin
, uid_t uid
, gid_t gid
) {
407 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
408 o
= fixup_output(context
->std_output
, socket_fd
);
410 if (fileno
== STDERR_FILENO
) {
412 e
= fixup_output(context
->std_error
, socket_fd
);
414 /* This expects the input and output are already set up */
416 /* Don't change the stderr file descriptor if we inherit all
417 * the way and are not on a tty */
418 if (e
== EXEC_OUTPUT_INHERIT
&&
419 o
== EXEC_OUTPUT_INHERIT
&&
420 i
== EXEC_INPUT_NULL
&&
421 !is_terminal_input(context
->std_input
) &&
425 /* Duplicate from stdout if possible */
426 if (e
== o
|| e
== EXEC_OUTPUT_INHERIT
)
427 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
431 } else if (o
== EXEC_OUTPUT_INHERIT
) {
432 /* If input got downgraded, inherit the original value */
433 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
434 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
436 /* If the input is connected to anything that's not a /dev/null, inherit that... */
437 if (i
!= EXEC_INPUT_NULL
)
438 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
440 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
444 /* We need to open /dev/null here anew, to get the right access mode. */
445 return open_null_as(O_WRONLY
, fileno
);
450 case EXEC_OUTPUT_NULL
:
451 return open_null_as(O_WRONLY
, fileno
);
453 case EXEC_OUTPUT_TTY
:
454 if (is_terminal_input(i
))
455 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
457 /* We don't reset the terminal if this is just about output */
458 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
460 case EXEC_OUTPUT_SYSLOG
:
461 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
462 case EXEC_OUTPUT_KMSG
:
463 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
464 case EXEC_OUTPUT_JOURNAL
:
465 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
466 r
= connect_logger_as(context
, o
, ident
, unit
->id
, fileno
, uid
, gid
);
468 log_unit_error_errno(unit
, r
, "Failed to connect %s to the journal socket, ignoring: %m", fileno
== STDOUT_FILENO
? "stdout" : "stderr");
469 r
= open_null_as(O_WRONLY
, fileno
);
473 case EXEC_OUTPUT_SOCKET
:
474 assert(socket_fd
>= 0);
475 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
478 assert_not_reached("Unknown error type");
482 static int chown_terminal(int fd
, uid_t uid
) {
487 /* This might fail. What matters are the results. */
488 (void) fchown(fd
, uid
, -1);
489 (void) fchmod(fd
, TTY_MODE
);
491 if (fstat(fd
, &st
) < 0)
494 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
500 static int setup_confirm_stdio(int *_saved_stdin
,
501 int *_saved_stdout
) {
502 int fd
= -1, saved_stdin
, saved_stdout
= -1, r
;
504 assert(_saved_stdin
);
505 assert(_saved_stdout
);
507 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
511 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
512 if (saved_stdout
< 0) {
517 fd
= acquire_terminal(
522 DEFAULT_CONFIRM_USEC
);
528 r
= chown_terminal(fd
, getuid());
532 if (dup2(fd
, STDIN_FILENO
) < 0) {
537 if (dup2(fd
, STDOUT_FILENO
) < 0) {
545 *_saved_stdin
= saved_stdin
;
546 *_saved_stdout
= saved_stdout
;
551 safe_close(saved_stdout
);
552 safe_close(saved_stdin
);
558 _printf_(1, 2) static int write_confirm_message(const char *format
, ...) {
559 _cleanup_close_
int fd
= -1;
564 fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
568 va_start(ap
, format
);
569 vdprintf(fd
, format
, ap
);
575 static int restore_confirm_stdio(int *saved_stdin
,
581 assert(saved_stdout
);
585 if (*saved_stdin
>= 0)
586 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
589 if (*saved_stdout
>= 0)
590 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
593 safe_close(*saved_stdin
);
594 safe_close(*saved_stdout
);
599 static int ask_for_confirmation(char *response
, char **argv
) {
600 int saved_stdout
= -1, saved_stdin
= -1, r
;
601 _cleanup_free_
char *line
= NULL
;
603 r
= setup_confirm_stdio(&saved_stdin
, &saved_stdout
);
607 line
= exec_command_line(argv
);
611 r
= ask_char(response
, "yns", "Execute %s? [Yes, No, Skip] ", line
);
613 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
618 static int enforce_groups(const ExecContext
*context
, const char *username
, gid_t gid
) {
619 bool keep_groups
= false;
624 /* Lookup and set GID and supplementary group list. Here too
625 * we avoid NSS lookups for gid=0. */
627 if (context
->group
|| username
) {
629 if (context
->group
) {
630 const char *g
= context
->group
;
632 if ((r
= get_group_creds(&g
, &gid
)) < 0)
636 /* First step, initialize groups from /etc/groups */
637 if (username
&& gid
!= 0) {
638 if (initgroups(username
, gid
) < 0)
644 /* Second step, set our gids */
645 if (setresgid(gid
, gid
, gid
) < 0)
649 if (context
->supplementary_groups
) {
654 /* Final step, initialize any manually set supplementary groups */
655 assert_se((ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
)) > 0);
657 if (!(gids
= new(gid_t
, ngroups_max
)))
661 if ((k
= getgroups(ngroups_max
, gids
)) < 0) {
668 STRV_FOREACH(i
, context
->supplementary_groups
) {
671 if (k
>= ngroups_max
) {
677 r
= get_group_creds(&g
, gids
+k
);
686 if (setgroups(k
, gids
) < 0) {
697 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
700 /* Sets (but doesn't lookup) the uid and make sure we keep the
701 * capabilities while doing so. */
703 if (context
->capabilities
) {
704 _cleanup_cap_free_ cap_t d
= NULL
;
705 static const cap_value_t bits
[] = {
706 CAP_SETUID
, /* Necessary so that we can run setresuid() below */
707 CAP_SETPCAP
/* Necessary so that we can set PR_SET_SECUREBITS later on */
710 /* First step: If we need to keep capabilities but
711 * drop privileges we need to make sure we keep our
712 * caps, while we drop privileges. */
714 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
716 if (prctl(PR_GET_SECUREBITS
) != sb
)
717 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
721 /* Second step: set the capabilities. This will reduce
722 * the capabilities to the minimum we need. */
724 d
= cap_dup(context
->capabilities
);
728 if (cap_set_flag(d
, CAP_EFFECTIVE
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0 ||
729 cap_set_flag(d
, CAP_PERMITTED
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0)
732 if (cap_set_proc(d
) < 0)
736 /* Third step: actually set the uids */
737 if (setresuid(uid
, uid
, uid
) < 0)
740 /* At this point we should have all necessary capabilities but
741 are otherwise a normal user. However, the caps might got
742 corrupted due to the setresuid() so we need clean them up
743 later. This is done outside of this call. */
750 static int null_conv(
752 const struct pam_message
**msg
,
753 struct pam_response
**resp
,
756 /* We don't support conversations */
761 static int setup_pam(
767 int fds
[], unsigned n_fds
) {
769 static const struct pam_conv conv
= {
774 pam_handle_t
*handle
= NULL
;
776 int pam_code
= PAM_SUCCESS
;
779 bool close_session
= false;
780 pid_t pam_pid
= 0, parent_pid
;
787 /* We set up PAM in the parent process, then fork. The child
788 * will then stay around until killed via PR_GET_PDEATHSIG or
789 * systemd via the cgroup logic. It will then remove the PAM
790 * session again. The parent process will exec() the actual
791 * daemon. We do things this way to ensure that the main PID
792 * of the daemon is the one we initially fork()ed. */
794 if (log_get_max_level() < LOG_DEBUG
)
797 pam_code
= pam_start(name
, user
, &conv
, &handle
);
798 if (pam_code
!= PAM_SUCCESS
) {
804 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
805 if (pam_code
!= PAM_SUCCESS
)
809 pam_code
= pam_acct_mgmt(handle
, flags
);
810 if (pam_code
!= PAM_SUCCESS
)
813 pam_code
= pam_open_session(handle
, flags
);
814 if (pam_code
!= PAM_SUCCESS
)
817 close_session
= true;
819 e
= pam_getenvlist(handle
);
821 pam_code
= PAM_BUF_ERR
;
825 /* Block SIGTERM, so that we know that it won't get lost in
828 assert_se(sigprocmask_many(SIG_BLOCK
, &old_ss
, SIGTERM
, -1) >= 0);
830 parent_pid
= getpid();
840 /* The child's job is to reset the PAM session on
843 /* This string must fit in 10 chars (i.e. the length
844 * of "/sbin/init"), to look pretty in /bin/ps */
845 rename_process("(sd-pam)");
847 /* Make sure we don't keep open the passed fds in this
848 child. We assume that otherwise only those fds are
849 open here that have been opened by PAM. */
850 close_many(fds
, n_fds
);
852 /* Drop privileges - we don't need any to pam_close_session
853 * and this will make PR_SET_PDEATHSIG work in most cases.
854 * If this fails, ignore the error - but expect sd-pam threads
855 * to fail to exit normally */
856 if (setresuid(uid
, uid
, uid
) < 0)
857 log_error_errno(r
, "Error: Failed to setresuid() in sd-pam: %m");
859 (void) ignore_signals(SIGPIPE
, -1);
861 /* Wait until our parent died. This will only work if
862 * the above setresuid() succeeds, otherwise the kernel
863 * will not allow unprivileged parents kill their privileged
864 * children this way. We rely on the control groups kill logic
865 * to do the rest for us. */
866 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
869 /* Check if our parent process might already have
871 if (getppid() == parent_pid
) {
874 assert_se(sigemptyset(&ss
) >= 0);
875 assert_se(sigaddset(&ss
, SIGTERM
) >= 0);
878 if (sigwait(&ss
, &sig
) < 0) {
885 assert(sig
== SIGTERM
);
890 /* If our parent died we'll end the session */
891 if (getppid() != parent_pid
) {
892 pam_code
= pam_close_session(handle
, flags
);
893 if (pam_code
!= PAM_SUCCESS
)
900 pam_end(handle
, pam_code
| flags
);
904 /* If the child was forked off successfully it will do all the
905 * cleanups, so forget about the handle here. */
908 /* Unblock SIGTERM again in the parent */
909 assert_se(sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) >= 0);
911 /* We close the log explicitly here, since the PAM modules
912 * might have opened it, but we don't want this fd around. */
921 if (pam_code
!= PAM_SUCCESS
) {
922 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
923 err
= -EPERM
; /* PAM errors do not map to errno */
925 log_error_errno(errno
, "PAM failed: %m");
931 pam_code
= pam_close_session(handle
, flags
);
933 pam_end(handle
, pam_code
| flags
);
941 kill(pam_pid
, SIGTERM
);
942 kill(pam_pid
, SIGCONT
);
949 static void rename_process_from_path(const char *path
) {
950 char process_name
[11];
954 /* This resulting string must fit in 10 chars (i.e. the length
955 * of "/sbin/init") to look pretty in /bin/ps */
959 rename_process("(...)");
965 /* The end of the process name is usually more
966 * interesting, since the first bit might just be
972 process_name
[0] = '(';
973 memcpy(process_name
+1, p
, l
);
974 process_name
[1+l
] = ')';
975 process_name
[1+l
+1] = 0;
977 rename_process(process_name
);
982 static int apply_seccomp(const ExecContext
*c
) {
983 uint32_t negative_action
, action
;
984 scmp_filter_ctx
*seccomp
;
991 negative_action
= c
->syscall_errno
== 0 ? SCMP_ACT_KILL
: SCMP_ACT_ERRNO(c
->syscall_errno
);
993 seccomp
= seccomp_init(c
->syscall_whitelist
? negative_action
: SCMP_ACT_ALLOW
);
997 if (c
->syscall_archs
) {
999 SET_FOREACH(id
, c
->syscall_archs
, i
) {
1000 r
= seccomp_arch_add(seccomp
, PTR_TO_UINT32(id
) - 1);
1008 r
= seccomp_add_secondary_archs(seccomp
);
1013 action
= c
->syscall_whitelist
? SCMP_ACT_ALLOW
: negative_action
;
1014 SET_FOREACH(id
, c
->syscall_filter
, i
) {
1015 r
= seccomp_rule_add(seccomp
, action
, PTR_TO_INT(id
) - 1, 0);
1020 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1024 r
= seccomp_load(seccomp
);
1027 seccomp_release(seccomp
);
1031 static int apply_address_families(const ExecContext
*c
) {
1032 scmp_filter_ctx
*seccomp
;
1038 seccomp
= seccomp_init(SCMP_ACT_ALLOW
);
1042 r
= seccomp_add_secondary_archs(seccomp
);
1046 if (c
->address_families_whitelist
) {
1047 int af
, first
= 0, last
= 0;
1050 /* If this is a whitelist, we first block the address
1051 * families that are out of range and then everything
1052 * that is not in the set. First, we find the lowest
1053 * and highest address family in the set. */
1055 SET_FOREACH(afp
, c
->address_families
, i
) {
1056 af
= PTR_TO_INT(afp
);
1058 if (af
<= 0 || af
>= af_max())
1061 if (first
== 0 || af
< first
)
1064 if (last
== 0 || af
> last
)
1068 assert((first
== 0) == (last
== 0));
1072 /* No entries in the valid range, block everything */
1073 r
= seccomp_rule_add(
1075 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1083 /* Block everything below the first entry */
1084 r
= seccomp_rule_add(
1086 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1089 SCMP_A0(SCMP_CMP_LT
, first
));
1093 /* Block everything above the last entry */
1094 r
= seccomp_rule_add(
1096 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1099 SCMP_A0(SCMP_CMP_GT
, last
));
1103 /* Block everything between the first and last
1105 for (af
= 1; af
< af_max(); af
++) {
1107 if (set_contains(c
->address_families
, INT_TO_PTR(af
)))
1110 r
= seccomp_rule_add(
1112 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1115 SCMP_A0(SCMP_CMP_EQ
, af
));
1124 /* If this is a blacklist, then generate one rule for
1125 * each address family that are then combined in OR
1128 SET_FOREACH(af
, c
->address_families
, i
) {
1130 r
= seccomp_rule_add(
1132 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1135 SCMP_A0(SCMP_CMP_EQ
, PTR_TO_INT(af
)));
1141 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1145 r
= seccomp_load(seccomp
);
1148 seccomp_release(seccomp
);
1154 static void do_idle_pipe_dance(int idle_pipe
[4]) {
1158 safe_close(idle_pipe
[1]);
1159 safe_close(idle_pipe
[2]);
1161 if (idle_pipe
[0] >= 0) {
1164 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1166 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1167 /* Signal systemd that we are bored and want to continue. */
1168 r
= write(idle_pipe
[3], "x", 1);
1170 /* Wait for systemd to react to the signal above. */
1171 fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1174 safe_close(idle_pipe
[0]);
1178 safe_close(idle_pipe
[3]);
1181 static int build_environment(
1182 const ExecContext
*c
,
1184 usec_t watchdog_usec
,
1186 const char *username
,
1190 _cleanup_strv_free_
char **our_env
= NULL
;
1197 our_env
= new0(char*, 10);
1202 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid()) < 0)
1204 our_env
[n_env
++] = x
;
1206 if (asprintf(&x
, "LISTEN_FDS=%u", n_fds
) < 0)
1208 our_env
[n_env
++] = x
;
1211 if (watchdog_usec
> 0) {
1212 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid()) < 0)
1214 our_env
[n_env
++] = x
;
1216 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, watchdog_usec
) < 0)
1218 our_env
[n_env
++] = x
;
1222 x
= strappend("HOME=", home
);
1225 our_env
[n_env
++] = x
;
1229 x
= strappend("LOGNAME=", username
);
1232 our_env
[n_env
++] = x
;
1234 x
= strappend("USER=", username
);
1237 our_env
[n_env
++] = x
;
1241 x
= strappend("SHELL=", shell
);
1244 our_env
[n_env
++] = x
;
1247 if (is_terminal_input(c
->std_input
) ||
1248 c
->std_output
== EXEC_OUTPUT_TTY
||
1249 c
->std_error
== EXEC_OUTPUT_TTY
||
1252 x
= strdup(default_term_for_tty(tty_path(c
)));
1255 our_env
[n_env
++] = x
;
1258 our_env
[n_env
++] = NULL
;
1259 assert(n_env
<= 10);
1267 static bool exec_needs_mount_namespace(
1268 const ExecContext
*context
,
1269 const ExecParameters
*params
,
1270 ExecRuntime
*runtime
) {
1275 if (!strv_isempty(context
->read_write_dirs
) ||
1276 !strv_isempty(context
->read_only_dirs
) ||
1277 !strv_isempty(context
->inaccessible_dirs
))
1280 if (context
->mount_flags
!= 0)
1283 if (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
))
1286 if (params
->bus_endpoint_path
)
1289 if (context
->private_devices
||
1290 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1291 context
->protect_home
!= PROTECT_HOME_NO
)
1297 static int exec_child(
1299 ExecCommand
*command
,
1300 const ExecContext
*context
,
1301 const ExecParameters
*params
,
1302 ExecRuntime
*runtime
,
1305 int *fds
, unsigned n_fds
,
1309 _cleanup_strv_free_
char **our_env
= NULL
, **pam_env
= NULL
, **final_env
= NULL
, **final_argv
= NULL
;
1310 _cleanup_free_
char *mac_selinux_context_net
= NULL
;
1311 const char *username
= NULL
, *home
= NULL
, *shell
= NULL
;
1312 unsigned n_dont_close
= 0;
1313 int dont_close
[n_fds
+ 4];
1314 uid_t uid
= UID_INVALID
;
1315 gid_t gid
= GID_INVALID
;
1317 bool needs_mount_namespace
;
1323 assert(exit_status
);
1325 rename_process_from_path(command
->path
);
1327 /* We reset exactly these signals, since they are the
1328 * only ones we set to SIG_IGN in the main daemon. All
1329 * others we leave untouched because we set them to
1330 * SIG_DFL or a valid handler initially, both of which
1331 * will be demoted to SIG_DFL. */
1332 (void) default_signals(SIGNALS_CRASH_HANDLER
,
1333 SIGNALS_IGNORE
, -1);
1335 if (context
->ignore_sigpipe
)
1336 (void) ignore_signals(SIGPIPE
, -1);
1338 r
= reset_signal_mask();
1340 *exit_status
= EXIT_SIGNAL_MASK
;
1344 if (params
->idle_pipe
)
1345 do_idle_pipe_dance(params
->idle_pipe
);
1347 /* Close sockets very early to make sure we don't
1348 * block init reexecution because it cannot bind its
1354 dont_close
[n_dont_close
++] = socket_fd
;
1356 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
1357 n_dont_close
+= n_fds
;
1359 if (params
->bus_endpoint_fd
>= 0)
1360 dont_close
[n_dont_close
++] = params
->bus_endpoint_fd
;
1362 if (runtime
->netns_storage_socket
[0] >= 0)
1363 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[0];
1364 if (runtime
->netns_storage_socket
[1] >= 0)
1365 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[1];
1368 r
= close_all_fds(dont_close
, n_dont_close
);
1370 *exit_status
= EXIT_FDS
;
1374 if (!context
->same_pgrp
)
1376 *exit_status
= EXIT_SETSID
;
1380 exec_context_tty_reset(context
);
1382 if (params
->confirm_spawn
) {
1385 r
= ask_for_confirmation(&response
, argv
);
1386 if (r
== -ETIMEDOUT
)
1387 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1389 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r
));
1390 else if (response
== 's') {
1391 write_confirm_message("Skipping execution.\n");
1392 *exit_status
= EXIT_CONFIRM
;
1394 } else if (response
== 'n') {
1395 write_confirm_message("Failing execution.\n");
1401 if (context
->user
) {
1402 username
= context
->user
;
1403 r
= get_user_creds(&username
, &uid
, &gid
, &home
, &shell
);
1405 *exit_status
= EXIT_USER
;
1410 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1411 * must sure to drop O_NONBLOCK */
1413 fd_nonblock(socket_fd
, false);
1415 r
= setup_input(context
, socket_fd
, params
->apply_tty_stdin
);
1417 *exit_status
= EXIT_STDIN
;
1421 r
= setup_output(unit
, context
, STDOUT_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1423 *exit_status
= EXIT_STDOUT
;
1427 r
= setup_output(unit
, context
, STDERR_FILENO
, socket_fd
, basename(command
->path
), params
->apply_tty_stdin
, uid
, gid
);
1429 *exit_status
= EXIT_STDERR
;
1433 if (params
->cgroup_path
) {
1434 r
= cg_attach_everywhere(params
->cgroup_supported
, params
->cgroup_path
, 0, NULL
, NULL
);
1436 *exit_status
= EXIT_CGROUP
;
1441 if (context
->oom_score_adjust_set
) {
1442 char t
[DECIMAL_STR_MAX(context
->oom_score_adjust
)];
1444 /* When we can't make this change due to EPERM, then
1445 * let's silently skip over it. User namespaces
1446 * prohibit write access to this file, and we
1447 * shouldn't trip up over that. */
1449 sprintf(t
, "%i", context
->oom_score_adjust
);
1450 r
= write_string_file("/proc/self/oom_score_adj", t
, 0);
1451 if (r
== -EPERM
|| r
== -EACCES
) {
1453 log_unit_debug_errno(unit
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1456 *exit_status
= EXIT_OOM_ADJUST
;
1461 if (context
->nice_set
)
1462 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
1463 *exit_status
= EXIT_NICE
;
1467 if (context
->cpu_sched_set
) {
1468 struct sched_param param
= {
1469 .sched_priority
= context
->cpu_sched_priority
,
1472 r
= sched_setscheduler(0,
1473 context
->cpu_sched_policy
|
1474 (context
->cpu_sched_reset_on_fork
?
1475 SCHED_RESET_ON_FORK
: 0),
1478 *exit_status
= EXIT_SETSCHEDULER
;
1483 if (context
->cpuset
)
1484 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
1485 *exit_status
= EXIT_CPUAFFINITY
;
1489 if (context
->ioprio_set
)
1490 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
1491 *exit_status
= EXIT_IOPRIO
;
1495 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
1496 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
1497 *exit_status
= EXIT_TIMERSLACK
;
1501 if (context
->personality
!= PERSONALITY_INVALID
)
1502 if (personality(context
->personality
) < 0) {
1503 *exit_status
= EXIT_PERSONALITY
;
1507 if (context
->utmp_id
)
1508 utmp_put_init_process(context
->utmp_id
, getpid(), getsid(0), context
->tty_path
,
1509 context
->utmp_mode
== EXEC_UTMP_INIT
? INIT_PROCESS
:
1510 context
->utmp_mode
== EXEC_UTMP_LOGIN
? LOGIN_PROCESS
:
1512 username
? "root" : context
->user
);
1514 if (context
->user
&& is_terminal_input(context
->std_input
)) {
1515 r
= chown_terminal(STDIN_FILENO
, uid
);
1517 *exit_status
= EXIT_STDIN
;
1522 if (params
->bus_endpoint_fd
>= 0 && context
->bus_endpoint
) {
1523 uid_t ep_uid
= (uid
== UID_INVALID
) ? 0 : uid
;
1525 r
= bus_kernel_set_endpoint_policy(params
->bus_endpoint_fd
, ep_uid
, context
->bus_endpoint
);
1527 *exit_status
= EXIT_BUS_ENDPOINT
;
1532 /* If delegation is enabled we'll pass ownership of the cgroup
1533 * (but only in systemd's own controller hierarchy!) to the
1534 * user of the new process. */
1535 if (params
->cgroup_path
&& context
->user
&& params
->cgroup_delegate
) {
1536 r
= cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0644, uid
, gid
);
1538 *exit_status
= EXIT_CGROUP
;
1543 r
= cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0755, uid
, gid
);
1545 *exit_status
= EXIT_CGROUP
;
1550 if (!strv_isempty(context
->runtime_directory
) && params
->runtime_prefix
) {
1553 STRV_FOREACH(rt
, context
->runtime_directory
) {
1554 _cleanup_free_
char *p
;
1556 p
= strjoin(params
->runtime_prefix
, "/", *rt
, NULL
);
1558 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1562 r
= mkdir_p_label(p
, context
->runtime_directory_mode
);
1564 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1568 r
= chmod_and_chown(p
, context
->runtime_directory_mode
, uid
, gid
);
1570 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1576 if (params
->apply_permissions
) {
1577 r
= enforce_groups(context
, username
, gid
);
1579 *exit_status
= EXIT_GROUP
;
1584 umask(context
->umask
);
1587 if (params
->apply_permissions
&& context
->pam_name
&& username
) {
1588 r
= setup_pam(context
->pam_name
, username
, uid
, context
->tty_path
, &pam_env
, fds
, n_fds
);
1590 *exit_status
= EXIT_PAM
;
1596 if (context
->private_network
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
1597 r
= setup_netns(runtime
->netns_storage_socket
);
1599 *exit_status
= EXIT_NETWORK
;
1604 needs_mount_namespace
= exec_needs_mount_namespace(context
, params
, runtime
);
1606 if (needs_mount_namespace
) {
1607 char *tmp
= NULL
, *var
= NULL
;
1609 /* The runtime struct only contains the parent
1610 * of the private /tmp, which is
1611 * non-accessible to world users. Inside of it
1612 * there's a /tmp that is sticky, and that's
1613 * the one we want to use here. */
1615 if (context
->private_tmp
&& runtime
) {
1616 if (runtime
->tmp_dir
)
1617 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
1618 if (runtime
->var_tmp_dir
)
1619 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
1622 r
= setup_namespace(
1623 params
->apply_chroot
? context
->root_directory
: NULL
,
1624 context
->read_write_dirs
,
1625 context
->read_only_dirs
,
1626 context
->inaccessible_dirs
,
1629 params
->bus_endpoint_path
,
1630 context
->private_devices
,
1631 context
->protect_home
,
1632 context
->protect_system
,
1633 context
->mount_flags
);
1635 /* If we couldn't set up the namespace this is
1636 * probably due to a missing capability. In this case,
1637 * silently proceeed. */
1638 if (r
== -EPERM
|| r
== -EACCES
) {
1640 log_unit_debug_errno(unit
, r
, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1643 *exit_status
= EXIT_NAMESPACE
;
1648 if (params
->apply_chroot
) {
1649 if (!needs_mount_namespace
&& context
->root_directory
)
1650 if (chroot(context
->root_directory
) < 0) {
1651 *exit_status
= EXIT_CHROOT
;
1655 if (chdir(context
->working_directory
?: "/") < 0 &&
1656 !context
->working_directory_missing_ok
) {
1657 *exit_status
= EXIT_CHDIR
;
1661 _cleanup_free_
char *d
= NULL
;
1663 if (asprintf(&d
, "%s/%s",
1664 context
->root_directory
?: "",
1665 context
->working_directory
?: "") < 0) {
1666 *exit_status
= EXIT_MEMORY
;
1671 !context
->working_directory_missing_ok
) {
1672 *exit_status
= EXIT_CHDIR
;
1678 if (params
->apply_permissions
&& mac_selinux_use() && params
->selinux_context_net
&& socket_fd
>= 0) {
1679 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
1681 *exit_status
= EXIT_SELINUX_CONTEXT
;
1687 /* We repeat the fd closing here, to make sure that
1688 * nothing is leaked from the PAM modules. Note that
1689 * we are more aggressive this time since socket_fd
1690 * and the netns fds we don't need anymore. The custom
1691 * endpoint fd was needed to upload the policy and can
1692 * now be closed as well. */
1693 r
= close_all_fds(fds
, n_fds
);
1695 r
= shift_fds(fds
, n_fds
);
1697 r
= flags_fds(fds
, n_fds
, context
->non_blocking
);
1699 *exit_status
= EXIT_FDS
;
1703 if (params
->apply_permissions
) {
1705 for (i
= 0; i
< _RLIMIT_MAX
; i
++) {
1706 if (!context
->rlimit
[i
])
1709 if (setrlimit_closest(i
, context
->rlimit
[i
]) < 0) {
1710 *exit_status
= EXIT_LIMITS
;
1715 if (context
->capability_bounding_set_drop
) {
1716 r
= capability_bounding_set_drop(context
->capability_bounding_set_drop
, false);
1718 *exit_status
= EXIT_CAPABILITIES
;
1724 if (context
->smack_process_label
) {
1725 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
1727 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1731 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1733 _cleanup_free_
char *exec_label
= NULL
;
1735 r
= mac_smack_read(command
->path
, SMACK_ATTR_EXEC
, &exec_label
);
1736 if (r
< 0 && r
!= -ENODATA
&& r
!= -EOPNOTSUPP
) {
1737 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1741 r
= mac_smack_apply_pid(0, exec_label
? : SMACK_DEFAULT_PROCESS_LABEL
);
1743 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1750 if (context
->user
) {
1751 r
= enforce_user(context
, uid
);
1753 *exit_status
= EXIT_USER
;
1758 /* PR_GET_SECUREBITS is not privileged, while
1759 * PR_SET_SECUREBITS is. So to suppress
1760 * potential EPERMs we'll try not to call
1761 * PR_SET_SECUREBITS unless necessary. */
1762 if (prctl(PR_GET_SECUREBITS
) != context
->secure_bits
)
1763 if (prctl(PR_SET_SECUREBITS
, context
->secure_bits
) < 0) {
1764 *exit_status
= EXIT_SECUREBITS
;
1768 if (context
->capabilities
)
1769 if (cap_set_proc(context
->capabilities
) < 0) {
1770 *exit_status
= EXIT_CAPABILITIES
;
1774 if (context
->no_new_privileges
)
1775 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
1776 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
1781 if (context
->address_families_whitelist
||
1782 !set_isempty(context
->address_families
)) {
1783 r
= apply_address_families(context
);
1785 *exit_status
= EXIT_ADDRESS_FAMILIES
;
1790 if (context
->syscall_whitelist
||
1791 !set_isempty(context
->syscall_filter
) ||
1792 !set_isempty(context
->syscall_archs
)) {
1793 r
= apply_seccomp(context
);
1795 *exit_status
= EXIT_SECCOMP
;
1802 if (mac_selinux_use()) {
1803 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
1806 r
= setexeccon(exec_context
);
1808 *exit_status
= EXIT_SELINUX_CONTEXT
;
1815 #ifdef HAVE_APPARMOR
1816 if (context
->apparmor_profile
&& mac_apparmor_use()) {
1817 r
= aa_change_onexec(context
->apparmor_profile
);
1818 if (r
< 0 && !context
->apparmor_profile_ignore
) {
1819 *exit_status
= EXIT_APPARMOR_PROFILE
;
1826 r
= build_environment(context
, n_fds
, params
->watchdog_usec
, home
, username
, shell
, &our_env
);
1828 *exit_status
= EXIT_MEMORY
;
1832 final_env
= strv_env_merge(5,
1833 params
->environment
,
1835 context
->environment
,
1840 *exit_status
= EXIT_MEMORY
;
1844 final_argv
= replace_env_argv(argv
, final_env
);
1846 *exit_status
= EXIT_MEMORY
;
1850 final_env
= strv_env_clean(final_env
);
1852 if (_unlikely_(log_get_max_level() >= LOG_DEBUG
)) {
1853 _cleanup_free_
char *line
;
1855 line
= exec_command_line(final_argv
);
1858 log_struct(LOG_DEBUG
,
1860 "EXECUTABLE=%s", command
->path
,
1861 LOG_UNIT_MESSAGE(unit
, "Executing: %s", line
),
1867 execve(command
->path
, final_argv
, final_env
);
1868 *exit_status
= EXIT_EXEC
;
1872 int exec_spawn(Unit
*unit
,
1873 ExecCommand
*command
,
1874 const ExecContext
*context
,
1875 const ExecParameters
*params
,
1876 ExecRuntime
*runtime
,
1879 _cleanup_strv_free_
char **files_env
= NULL
;
1880 int *fds
= NULL
; unsigned n_fds
= 0;
1881 _cleanup_free_
char *line
= NULL
;
1891 assert(params
->fds
|| params
->n_fds
<= 0);
1893 if (context
->std_input
== EXEC_INPUT_SOCKET
||
1894 context
->std_output
== EXEC_OUTPUT_SOCKET
||
1895 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
1897 if (params
->n_fds
!= 1) {
1898 log_unit_error(unit
, "Got more than one socket.");
1902 socket_fd
= params
->fds
[0];
1906 n_fds
= params
->n_fds
;
1909 r
= exec_context_load_environment(unit
, context
, &files_env
);
1911 return log_unit_error_errno(unit
, r
, "Failed to load environment files: %m");
1913 argv
= params
->argv
?: command
->argv
;
1914 line
= exec_command_line(argv
);
1918 log_struct(LOG_DEBUG
,
1920 LOG_UNIT_MESSAGE(unit
, "About to execute: %s", line
),
1921 "EXECUTABLE=%s", command
->path
,
1925 return log_unit_error_errno(unit
, r
, "Failed to fork: %m");
1930 r
= exec_child(unit
,
1942 log_struct_errno(LOG_ERR
, r
,
1943 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED
),
1945 LOG_UNIT_MESSAGE(unit
, "Failed at step %s spawning %s: %m",
1946 exit_status_to_string(exit_status
, EXIT_STATUS_SYSTEMD
),
1948 "EXECUTABLE=%s", command
->path
,
1955 log_unit_debug(unit
, "Forked %s as "PID_FMT
, command
->path
, pid
);
1957 /* We add the new process to the cgroup both in the child (so
1958 * that we can be sure that no user code is ever executed
1959 * outside of the cgroup) and in the parent (so that we can be
1960 * sure that when we kill the cgroup the process will be
1962 if (params
->cgroup_path
)
1963 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, pid
);
1965 exec_status_start(&command
->exec_status
, pid
);
1971 void exec_context_init(ExecContext
*c
) {
1975 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
1976 c
->cpu_sched_policy
= SCHED_OTHER
;
1977 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
1978 c
->syslog_level_prefix
= true;
1979 c
->ignore_sigpipe
= true;
1980 c
->timer_slack_nsec
= NSEC_INFINITY
;
1981 c
->personality
= PERSONALITY_INVALID
;
1982 c
->runtime_directory_mode
= 0755;
1985 void exec_context_done(ExecContext
*c
) {
1990 strv_free(c
->environment
);
1991 c
->environment
= NULL
;
1993 strv_free(c
->environment_files
);
1994 c
->environment_files
= NULL
;
1996 for (l
= 0; l
< ELEMENTSOF(c
->rlimit
); l
++) {
1997 c
->rlimit
[l
] = mfree(c
->rlimit
[l
]);
2000 c
->working_directory
= mfree(c
->working_directory
);
2001 c
->root_directory
= mfree(c
->root_directory
);
2002 c
->tty_path
= mfree(c
->tty_path
);
2003 c
->syslog_identifier
= mfree(c
->syslog_identifier
);
2004 c
->user
= mfree(c
->user
);
2005 c
->group
= mfree(c
->group
);
2007 strv_free(c
->supplementary_groups
);
2008 c
->supplementary_groups
= NULL
;
2010 c
->pam_name
= mfree(c
->pam_name
);
2012 if (c
->capabilities
) {
2013 cap_free(c
->capabilities
);
2014 c
->capabilities
= NULL
;
2017 strv_free(c
->read_only_dirs
);
2018 c
->read_only_dirs
= NULL
;
2020 strv_free(c
->read_write_dirs
);
2021 c
->read_write_dirs
= NULL
;
2023 strv_free(c
->inaccessible_dirs
);
2024 c
->inaccessible_dirs
= NULL
;
2027 CPU_FREE(c
->cpuset
);
2029 c
->utmp_id
= mfree(c
->utmp_id
);
2030 c
->selinux_context
= mfree(c
->selinux_context
);
2031 c
->apparmor_profile
= mfree(c
->apparmor_profile
);
2033 set_free(c
->syscall_filter
);
2034 c
->syscall_filter
= NULL
;
2036 set_free(c
->syscall_archs
);
2037 c
->syscall_archs
= NULL
;
2039 set_free(c
->address_families
);
2040 c
->address_families
= NULL
;
2042 strv_free(c
->runtime_directory
);
2043 c
->runtime_directory
= NULL
;
2045 bus_endpoint_free(c
->bus_endpoint
);
2046 c
->bus_endpoint
= NULL
;
2049 int exec_context_destroy_runtime_directory(ExecContext
*c
, const char *runtime_prefix
) {
2054 if (!runtime_prefix
)
2057 STRV_FOREACH(i
, c
->runtime_directory
) {
2058 _cleanup_free_
char *p
;
2060 p
= strjoin(runtime_prefix
, "/", *i
, NULL
);
2064 /* We execute this synchronously, since we need to be
2065 * sure this is gone when we start the service
2067 (void) rm_rf(p
, REMOVE_ROOT
);
2073 void exec_command_done(ExecCommand
*c
) {
2076 c
->path
= mfree(c
->path
);
2082 void exec_command_done_array(ExecCommand
*c
, unsigned n
) {
2085 for (i
= 0; i
< n
; i
++)
2086 exec_command_done(c
+i
);
2089 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
2093 LIST_REMOVE(command
, c
, i
);
2094 exec_command_done(i
);
2101 void exec_command_free_array(ExecCommand
**c
, unsigned n
) {
2104 for (i
= 0; i
< n
; i
++)
2105 c
[i
] = exec_command_free_list(c
[i
]);
2108 typedef struct InvalidEnvInfo
{
2113 static void invalid_env(const char *p
, void *userdata
) {
2114 InvalidEnvInfo
*info
= userdata
;
2116 log_unit_error(info
->unit
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
2119 int exec_context_load_environment(Unit
*unit
, const ExecContext
*c
, char ***l
) {
2120 char **i
, **r
= NULL
;
2125 STRV_FOREACH(i
, c
->environment_files
) {
2128 bool ignore
= false;
2130 _cleanup_globfree_ glob_t pglob
= {};
2140 if (!path_is_absolute(fn
)) {
2148 /* Filename supports globbing, take all matching files */
2150 if (glob(fn
, 0, NULL
, &pglob
) != 0) {
2155 return errno
? -errno
: -EINVAL
;
2157 count
= pglob
.gl_pathc
;
2165 for (n
= 0; n
< count
; n
++) {
2166 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], NULL
, &p
);
2174 /* Log invalid environment variables with filename */
2176 InvalidEnvInfo info
= {
2178 .path
= pglob
.gl_pathv
[n
]
2181 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
2189 m
= strv_env_merge(2, r
, p
);
2205 static bool tty_may_match_dev_console(const char *tty
) {
2206 _cleanup_free_
char *active
= NULL
;
2209 if (startswith(tty
, "/dev/"))
2212 /* trivial identity? */
2213 if (streq(tty
, "console"))
2216 console
= resolve_dev_console(&active
);
2217 /* if we could not resolve, assume it may */
2221 /* "tty0" means the active VC, so it may be the same sometimes */
2222 return streq(console
, tty
) || (streq(console
, "tty0") && tty_is_vc(tty
));
2225 bool exec_context_may_touch_console(ExecContext
*ec
) {
2226 return (ec
->tty_reset
|| ec
->tty_vhangup
|| ec
->tty_vt_disallocate
||
2227 is_terminal_input(ec
->std_input
) ||
2228 is_terminal_output(ec
->std_output
) ||
2229 is_terminal_output(ec
->std_error
)) &&
2230 tty_may_match_dev_console(tty_path(ec
));
2233 static void strv_fprintf(FILE *f
, char **l
) {
2239 fprintf(f
, " %s", *g
);
2242 void exec_context_dump(ExecContext
*c
, FILE* f
, const char *prefix
) {
2249 prefix
= strempty(prefix
);
2253 "%sWorkingDirectory: %s\n"
2254 "%sRootDirectory: %s\n"
2255 "%sNonBlocking: %s\n"
2256 "%sPrivateTmp: %s\n"
2257 "%sPrivateNetwork: %s\n"
2258 "%sPrivateDevices: %s\n"
2259 "%sProtectHome: %s\n"
2260 "%sProtectSystem: %s\n"
2261 "%sIgnoreSIGPIPE: %s\n",
2263 prefix
, c
->working_directory
? c
->working_directory
: "/",
2264 prefix
, c
->root_directory
? c
->root_directory
: "/",
2265 prefix
, yes_no(c
->non_blocking
),
2266 prefix
, yes_no(c
->private_tmp
),
2267 prefix
, yes_no(c
->private_network
),
2268 prefix
, yes_no(c
->private_devices
),
2269 prefix
, protect_home_to_string(c
->protect_home
),
2270 prefix
, protect_system_to_string(c
->protect_system
),
2271 prefix
, yes_no(c
->ignore_sigpipe
));
2273 STRV_FOREACH(e
, c
->environment
)
2274 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
2276 STRV_FOREACH(e
, c
->environment_files
)
2277 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
2284 if (c
->oom_score_adjust_set
)
2286 "%sOOMScoreAdjust: %i\n",
2287 prefix
, c
->oom_score_adjust
);
2289 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
2291 fprintf(f
, "%s%s: "RLIM_FMT
"\n",
2292 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
2294 if (c
->ioprio_set
) {
2295 _cleanup_free_
char *class_str
= NULL
;
2297 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
2299 "%sIOSchedulingClass: %s\n"
2300 "%sIOPriority: %i\n",
2301 prefix
, strna(class_str
),
2302 prefix
, (int) IOPRIO_PRIO_DATA(c
->ioprio
));
2305 if (c
->cpu_sched_set
) {
2306 _cleanup_free_
char *policy_str
= NULL
;
2308 sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
2310 "%sCPUSchedulingPolicy: %s\n"
2311 "%sCPUSchedulingPriority: %i\n"
2312 "%sCPUSchedulingResetOnFork: %s\n",
2313 prefix
, strna(policy_str
),
2314 prefix
, c
->cpu_sched_priority
,
2315 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
2319 fprintf(f
, "%sCPUAffinity:", prefix
);
2320 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
2321 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
2322 fprintf(f
, " %u", i
);
2326 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
2327 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
2330 "%sStandardInput: %s\n"
2331 "%sStandardOutput: %s\n"
2332 "%sStandardError: %s\n",
2333 prefix
, exec_input_to_string(c
->std_input
),
2334 prefix
, exec_output_to_string(c
->std_output
),
2335 prefix
, exec_output_to_string(c
->std_error
));
2341 "%sTTYVHangup: %s\n"
2342 "%sTTYVTDisallocate: %s\n",
2343 prefix
, c
->tty_path
,
2344 prefix
, yes_no(c
->tty_reset
),
2345 prefix
, yes_no(c
->tty_vhangup
),
2346 prefix
, yes_no(c
->tty_vt_disallocate
));
2348 if (c
->std_output
== EXEC_OUTPUT_SYSLOG
||
2349 c
->std_output
== EXEC_OUTPUT_KMSG
||
2350 c
->std_output
== EXEC_OUTPUT_JOURNAL
||
2351 c
->std_output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2352 c
->std_output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2353 c
->std_output
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
||
2354 c
->std_error
== EXEC_OUTPUT_SYSLOG
||
2355 c
->std_error
== EXEC_OUTPUT_KMSG
||
2356 c
->std_error
== EXEC_OUTPUT_JOURNAL
||
2357 c
->std_error
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2358 c
->std_error
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2359 c
->std_error
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) {
2361 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
2363 log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
2364 log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
2367 "%sSyslogFacility: %s\n"
2368 "%sSyslogLevel: %s\n",
2369 prefix
, strna(fac_str
),
2370 prefix
, strna(lvl_str
));
2373 if (c
->capabilities
) {
2374 _cleanup_cap_free_charp_
char *t
;
2376 t
= cap_to_text(c
->capabilities
, NULL
);
2378 fprintf(f
, "%sCapabilities: %s\n", prefix
, t
);
2382 fprintf(f
, "%sSecure Bits:%s%s%s%s%s%s\n",
2384 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS
) ? " keep-caps" : "",
2385 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS_LOCKED
) ? " keep-caps-locked" : "",
2386 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP
) ? " no-setuid-fixup" : "",
2387 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP_LOCKED
) ? " no-setuid-fixup-locked" : "",
2388 (c
->secure_bits
& 1<<SECURE_NOROOT
) ? " noroot" : "",
2389 (c
->secure_bits
& 1<<SECURE_NOROOT_LOCKED
) ? "noroot-locked" : "");
2391 if (c
->capability_bounding_set_drop
) {
2393 fprintf(f
, "%sCapabilityBoundingSet:", prefix
);
2395 for (l
= 0; l
<= cap_last_cap(); l
++)
2396 if (!(c
->capability_bounding_set_drop
& ((uint64_t) 1ULL << (uint64_t) l
)))
2397 fprintf(f
, " %s", strna(capability_to_name(l
)));
2403 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
2405 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
2407 if (strv_length(c
->supplementary_groups
) > 0) {
2408 fprintf(f
, "%sSupplementaryGroups:", prefix
);
2409 strv_fprintf(f
, c
->supplementary_groups
);
2414 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
2416 if (strv_length(c
->read_write_dirs
) > 0) {
2417 fprintf(f
, "%sReadWriteDirs:", prefix
);
2418 strv_fprintf(f
, c
->read_write_dirs
);
2422 if (strv_length(c
->read_only_dirs
) > 0) {
2423 fprintf(f
, "%sReadOnlyDirs:", prefix
);
2424 strv_fprintf(f
, c
->read_only_dirs
);
2428 if (strv_length(c
->inaccessible_dirs
) > 0) {
2429 fprintf(f
, "%sInaccessibleDirs:", prefix
);
2430 strv_fprintf(f
, c
->inaccessible_dirs
);
2436 "%sUtmpIdentifier: %s\n",
2437 prefix
, c
->utmp_id
);
2439 if (c
->selinux_context
)
2441 "%sSELinuxContext: %s%s\n",
2442 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
2444 if (c
->personality
!= PERSONALITY_INVALID
)
2446 "%sPersonality: %s\n",
2447 prefix
, strna(personality_to_string(c
->personality
)));
2449 if (c
->syscall_filter
) {
2457 "%sSystemCallFilter: ",
2460 if (!c
->syscall_whitelist
)
2464 SET_FOREACH(id
, c
->syscall_filter
, j
) {
2465 _cleanup_free_
char *name
= NULL
;
2472 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
2473 fputs(strna(name
), f
);
2480 if (c
->syscall_archs
) {
2487 "%sSystemCallArchitectures:",
2491 SET_FOREACH(id
, c
->syscall_archs
, j
)
2492 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
2497 if (c
->syscall_errno
!= 0)
2499 "%sSystemCallErrorNumber: %s\n",
2500 prefix
, strna(errno_to_name(c
->syscall_errno
)));
2502 if (c
->apparmor_profile
)
2504 "%sAppArmorProfile: %s%s\n",
2505 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
2508 bool exec_context_maintains_privileges(ExecContext
*c
) {
2511 /* Returns true if the process forked off would run run under
2512 * an unchanged UID or as root. */
2517 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
2523 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
2528 dual_timestamp_get(&s
->start_timestamp
);
2531 void exec_status_exit(ExecStatus
*s
, ExecContext
*context
, pid_t pid
, int code
, int status
) {
2534 if (s
->pid
&& s
->pid
!= pid
)
2538 dual_timestamp_get(&s
->exit_timestamp
);
2544 if (context
->utmp_id
)
2545 utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
2547 exec_context_tty_reset(context
);
2551 void exec_status_dump(ExecStatus
*s
, FILE *f
, const char *prefix
) {
2552 char buf
[FORMAT_TIMESTAMP_MAX
];
2560 prefix
= strempty(prefix
);
2563 "%sPID: "PID_FMT
"\n",
2566 if (s
->start_timestamp
.realtime
> 0)
2568 "%sStart Timestamp: %s\n",
2569 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
2571 if (s
->exit_timestamp
.realtime
> 0)
2573 "%sExit Timestamp: %s\n"
2575 "%sExit Status: %i\n",
2576 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
2577 prefix
, sigchld_code_to_string(s
->code
),
2581 char *exec_command_line(char **argv
) {
2589 STRV_FOREACH(a
, argv
)
2592 if (!(n
= new(char, k
)))
2596 STRV_FOREACH(a
, argv
) {
2603 if (strpbrk(*a
, WHITESPACE
)) {
2614 /* FIXME: this doesn't really handle arguments that have
2615 * spaces and ticks in them */
2620 void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2621 _cleanup_free_
char *cmd
= NULL
;
2622 const char *prefix2
;
2627 prefix
= strempty(prefix
);
2628 prefix2
= strjoina(prefix
, "\t");
2630 cmd
= exec_command_line(c
->argv
);
2632 "%sCommand Line: %s\n",
2633 prefix
, cmd
? cmd
: strerror(ENOMEM
));
2635 exec_status_dump(&c
->exec_status
, f
, prefix2
);
2638 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2641 prefix
= strempty(prefix
);
2643 LIST_FOREACH(command
, c
, c
)
2644 exec_command_dump(c
, f
, prefix
);
2647 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
2654 /* It's kind of important, that we keep the order here */
2655 LIST_FIND_TAIL(command
, *l
, end
);
2656 LIST_INSERT_AFTER(command
, *l
, end
, e
);
2661 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
2669 l
= strv_new_ap(path
, ap
);
2690 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
2691 _cleanup_strv_free_
char **l
= NULL
;
2699 l
= strv_new_ap(path
, ap
);
2705 r
= strv_extend_strv(&c
->argv
, l
);
2713 static int exec_runtime_allocate(ExecRuntime
**rt
) {
2718 *rt
= new0(ExecRuntime
, 1);
2723 (*rt
)->netns_storage_socket
[0] = (*rt
)->netns_storage_socket
[1] = -1;
2728 int exec_runtime_make(ExecRuntime
**rt
, ExecContext
*c
, const char *id
) {
2738 if (!c
->private_network
&& !c
->private_tmp
)
2741 r
= exec_runtime_allocate(rt
);
2745 if (c
->private_network
&& (*rt
)->netns_storage_socket
[0] < 0) {
2746 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, (*rt
)->netns_storage_socket
) < 0)
2750 if (c
->private_tmp
&& !(*rt
)->tmp_dir
) {
2751 r
= setup_tmp_dirs(id
, &(*rt
)->tmp_dir
, &(*rt
)->var_tmp_dir
);
2759 ExecRuntime
*exec_runtime_ref(ExecRuntime
*r
) {
2761 assert(r
->n_ref
> 0);
2767 ExecRuntime
*exec_runtime_unref(ExecRuntime
*r
) {
2772 assert(r
->n_ref
> 0);
2779 free(r
->var_tmp_dir
);
2780 safe_close_pair(r
->netns_storage_socket
);
2786 int exec_runtime_serialize(Unit
*u
, ExecRuntime
*rt
, FILE *f
, FDSet
*fds
) {
2795 unit_serialize_item(u
, f
, "tmp-dir", rt
->tmp_dir
);
2797 if (rt
->var_tmp_dir
)
2798 unit_serialize_item(u
, f
, "var-tmp-dir", rt
->var_tmp_dir
);
2800 if (rt
->netns_storage_socket
[0] >= 0) {
2803 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
2807 unit_serialize_item_format(u
, f
, "netns-socket-0", "%i", copy
);
2810 if (rt
->netns_storage_socket
[1] >= 0) {
2813 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
2817 unit_serialize_item_format(u
, f
, "netns-socket-1", "%i", copy
);
2823 int exec_runtime_deserialize_item(Unit
*u
, ExecRuntime
**rt
, const char *key
, const char *value
, FDSet
*fds
) {
2830 if (streq(key
, "tmp-dir")) {
2833 r
= exec_runtime_allocate(rt
);
2837 copy
= strdup(value
);
2841 free((*rt
)->tmp_dir
);
2842 (*rt
)->tmp_dir
= copy
;
2844 } else if (streq(key
, "var-tmp-dir")) {
2847 r
= exec_runtime_allocate(rt
);
2851 copy
= strdup(value
);
2855 free((*rt
)->var_tmp_dir
);
2856 (*rt
)->var_tmp_dir
= copy
;
2858 } else if (streq(key
, "netns-socket-0")) {
2861 r
= exec_runtime_allocate(rt
);
2865 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2866 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2868 safe_close((*rt
)->netns_storage_socket
[0]);
2869 (*rt
)->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
2871 } else if (streq(key
, "netns-socket-1")) {
2874 r
= exec_runtime_allocate(rt
);
2878 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2879 log_unit_debug(u
, "Failed to parse netns socket value: %s", value
);
2881 safe_close((*rt
)->netns_storage_socket
[1]);
2882 (*rt
)->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
2890 static void *remove_tmpdir_thread(void *p
) {
2891 _cleanup_free_
char *path
= p
;
2893 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
2897 void exec_runtime_destroy(ExecRuntime
*rt
) {
2903 /* If there are multiple users of this, let's leave the stuff around */
2908 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
2910 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
2912 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
2919 if (rt
->var_tmp_dir
) {
2920 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
2922 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
2924 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
2925 free(rt
->var_tmp_dir
);
2928 rt
->var_tmp_dir
= NULL
;
2931 safe_close_pair(rt
->netns_storage_socket
);
2934 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
2935 [EXEC_INPUT_NULL
] = "null",
2936 [EXEC_INPUT_TTY
] = "tty",
2937 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
2938 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
2939 [EXEC_INPUT_SOCKET
] = "socket"
2942 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
2944 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
2945 [EXEC_OUTPUT_INHERIT
] = "inherit",
2946 [EXEC_OUTPUT_NULL
] = "null",
2947 [EXEC_OUTPUT_TTY
] = "tty",
2948 [EXEC_OUTPUT_SYSLOG
] = "syslog",
2949 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
2950 [EXEC_OUTPUT_KMSG
] = "kmsg",
2951 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
2952 [EXEC_OUTPUT_JOURNAL
] = "journal",
2953 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
2954 [EXEC_OUTPUT_SOCKET
] = "socket"
2957 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);
2959 static const char* const exec_utmp_mode_table
[_EXEC_UTMP_MODE_MAX
] = {
2960 [EXEC_UTMP_INIT
] = "init",
2961 [EXEC_UTMP_LOGIN
] = "login",
2962 [EXEC_UTMP_USER
] = "user",
2965 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode
, ExecUtmpMode
);