1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/socket.h>
29 #include <sys/prctl.h>
34 #include <sys/personality.h>
37 #include <security/pam_appl.h>
41 #include <selinux/selinux.h>
49 #include <sys/apparmor.h>
56 #include "capability.h"
59 #include "sd-messages.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
65 #include "utmp-wtmp.h"
67 #include "path-util.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
84 #include "apparmor-util.h"
88 #include "seccomp-util.h"
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
94 /* This assumes there is a 'tty' group */
97 #define SNDBUF_SIZE (8*1024*1024)
99 static int shift_fds(int fds
[], unsigned n_fds
) {
100 int start
, restart_from
;
105 /* Modifies the fds array! (sorts it) */
115 for (i
= start
; i
< (int) n_fds
; i
++) {
118 /* Already at right index? */
122 if ((nfd
= fcntl(fds
[i
], F_DUPFD
, i
+3)) < 0)
128 /* Hmm, the fd we wanted isn't free? Then
129 * let's remember that and try again from here */
130 if (nfd
!= i
+3 && restart_from
< 0)
134 if (restart_from
< 0)
137 start
= restart_from
;
143 static int flags_fds(const int fds
[], unsigned n_fds
, bool nonblock
) {
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
154 for (i
= 0; i
< n_fds
; i
++) {
156 if ((r
= fd_nonblock(fds
[i
], nonblock
)) < 0)
159 /* We unconditionally drop FD_CLOEXEC from the fds,
160 * since after all we want to pass these fds to our
163 if ((r
= fd_cloexec(fds
[i
], false)) < 0)
170 _pure_
static const char *tty_path(const ExecContext
*context
) {
173 if (context
->tty_path
)
174 return context
->tty_path
;
176 return "/dev/console";
179 static void exec_context_tty_reset(const ExecContext
*context
) {
182 if (context
->tty_vhangup
)
183 terminal_vhangup(tty_path(context
));
185 if (context
->tty_reset
)
186 reset_terminal(tty_path(context
));
188 if (context
->tty_vt_disallocate
&& context
->tty_path
)
189 vt_disallocate(context
->tty_path
);
192 static bool is_terminal_output(ExecOutput o
) {
194 o
== EXEC_OUTPUT_TTY
||
195 o
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
196 o
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
197 o
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
;
200 static int open_null_as(int flags
, int nfd
) {
205 fd
= open("/dev/null", flags
|O_NOCTTY
);
210 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
218 static int connect_journal_socket(int fd
, uid_t uid
, gid_t gid
) {
219 union sockaddr_union sa
= {
220 .un
.sun_family
= AF_UNIX
,
221 .un
.sun_path
= "/run/systemd/journal/stdout",
223 uid_t olduid
= UID_INVALID
;
224 gid_t oldgid
= GID_INVALID
;
227 if (gid
!= GID_INVALID
) {
235 if (uid
!= UID_INVALID
) {
245 r
= connect(fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(sa
.un
.sun_path
));
249 /* If we fail to restore the uid or gid, things will likely
250 fail later on. This should only happen if an LSM interferes. */
252 if (uid
!= UID_INVALID
)
253 (void) seteuid(olduid
);
256 if (gid
!= GID_INVALID
)
257 (void) setegid(oldgid
);
262 static int connect_logger_as(const ExecContext
*context
, ExecOutput output
, const char *ident
, const char *unit_id
, int nfd
, uid_t uid
, gid_t gid
) {
266 assert(output
< _EXEC_OUTPUT_MAX
);
270 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
274 r
= connect_journal_socket(fd
, uid
, gid
);
278 if (shutdown(fd
, SHUT_RD
) < 0) {
283 fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
293 context
->syslog_identifier
? context
->syslog_identifier
: ident
,
295 context
->syslog_priority
,
296 !!context
->syslog_level_prefix
,
297 output
== EXEC_OUTPUT_SYSLOG
|| output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
,
298 output
== EXEC_OUTPUT_KMSG
|| output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
,
299 is_terminal_output(output
));
302 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
309 static int open_terminal_as(const char *path
, mode_t mode
, int nfd
) {
315 if ((fd
= open_terminal(path
, mode
| O_NOCTTY
)) < 0)
319 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
327 static bool is_terminal_input(ExecInput i
) {
329 i
== EXEC_INPUT_TTY
||
330 i
== EXEC_INPUT_TTY_FORCE
||
331 i
== EXEC_INPUT_TTY_FAIL
;
334 static int fixup_input(ExecInput std_input
, int socket_fd
, bool apply_tty_stdin
) {
336 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
337 return EXEC_INPUT_NULL
;
339 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
340 return EXEC_INPUT_NULL
;
345 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
347 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
348 return EXEC_OUTPUT_INHERIT
;
353 static int setup_input(const ExecContext
*context
, int socket_fd
, bool apply_tty_stdin
) {
358 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
362 case EXEC_INPUT_NULL
:
363 return open_null_as(O_RDONLY
, STDIN_FILENO
);
366 case EXEC_INPUT_TTY_FORCE
:
367 case EXEC_INPUT_TTY_FAIL
: {
370 fd
= acquire_terminal(tty_path(context
),
371 i
== EXEC_INPUT_TTY_FAIL
,
372 i
== EXEC_INPUT_TTY_FORCE
,
378 if (fd
!= STDIN_FILENO
) {
379 r
= dup2(fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
387 case EXEC_INPUT_SOCKET
:
388 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
391 assert_not_reached("Unknown input type");
395 static int setup_output(const ExecContext
*context
, int fileno
, int socket_fd
, const char *ident
, const char *unit_id
, bool apply_tty_stdin
, uid_t uid
, gid_t gid
) {
403 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
404 o
= fixup_output(context
->std_output
, socket_fd
);
406 if (fileno
== STDERR_FILENO
) {
408 e
= fixup_output(context
->std_error
, socket_fd
);
410 /* This expects the input and output are already set up */
412 /* Don't change the stderr file descriptor if we inherit all
413 * the way and are not on a tty */
414 if (e
== EXEC_OUTPUT_INHERIT
&&
415 o
== EXEC_OUTPUT_INHERIT
&&
416 i
== EXEC_INPUT_NULL
&&
417 !is_terminal_input(context
->std_input
) &&
421 /* Duplicate from stdout if possible */
422 if (e
== o
|| e
== EXEC_OUTPUT_INHERIT
)
423 return dup2(STDOUT_FILENO
, fileno
) < 0 ? -errno
: fileno
;
427 } else if (o
== EXEC_OUTPUT_INHERIT
) {
428 /* If input got downgraded, inherit the original value */
429 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
430 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
432 /* If the input is connected to anything that's not a /dev/null, inherit that... */
433 if (i
!= EXEC_INPUT_NULL
)
434 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
436 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
440 /* We need to open /dev/null here anew, to get the right access mode. */
441 return open_null_as(O_WRONLY
, fileno
);
446 case EXEC_OUTPUT_NULL
:
447 return open_null_as(O_WRONLY
, fileno
);
449 case EXEC_OUTPUT_TTY
:
450 if (is_terminal_input(i
))
451 return dup2(STDIN_FILENO
, fileno
) < 0 ? -errno
: fileno
;
453 /* We don't reset the terminal if this is just about output */
454 return open_terminal_as(tty_path(context
), O_WRONLY
, fileno
);
456 case EXEC_OUTPUT_SYSLOG
:
457 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
458 case EXEC_OUTPUT_KMSG
:
459 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
460 case EXEC_OUTPUT_JOURNAL
:
461 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE
:
462 r
= connect_logger_as(context
, o
, ident
, unit_id
, fileno
, uid
, gid
);
464 log_unit_struct(unit_id
,
466 LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
467 fileno
== STDOUT_FILENO
? "stdout" : "stderr",
468 unit_id
, strerror(-r
)),
471 r
= open_null_as(O_WRONLY
, fileno
);
475 case EXEC_OUTPUT_SOCKET
:
476 assert(socket_fd
>= 0);
477 return dup2(socket_fd
, fileno
) < 0 ? -errno
: fileno
;
480 assert_not_reached("Unknown error type");
484 static int chown_terminal(int fd
, uid_t uid
) {
489 /* This might fail. What matters are the results. */
490 (void) fchown(fd
, uid
, -1);
491 (void) fchmod(fd
, TTY_MODE
);
493 if (fstat(fd
, &st
) < 0)
496 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
502 static int setup_confirm_stdio(int *_saved_stdin
,
503 int *_saved_stdout
) {
504 int fd
= -1, saved_stdin
, saved_stdout
= -1, r
;
506 assert(_saved_stdin
);
507 assert(_saved_stdout
);
509 saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3);
513 saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3);
514 if (saved_stdout
< 0) {
519 fd
= acquire_terminal(
524 DEFAULT_CONFIRM_USEC
);
530 r
= chown_terminal(fd
, getuid());
534 if (dup2(fd
, STDIN_FILENO
) < 0) {
539 if (dup2(fd
, STDOUT_FILENO
) < 0) {
547 *_saved_stdin
= saved_stdin
;
548 *_saved_stdout
= saved_stdout
;
553 safe_close(saved_stdout
);
554 safe_close(saved_stdin
);
560 _printf_(1, 2) static int write_confirm_message(const char *format
, ...) {
561 _cleanup_close_
int fd
= -1;
566 fd
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
570 va_start(ap
, format
);
571 vdprintf(fd
, format
, ap
);
577 static int restore_confirm_stdio(int *saved_stdin
,
583 assert(saved_stdout
);
587 if (*saved_stdin
>= 0)
588 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
591 if (*saved_stdout
>= 0)
592 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
595 safe_close(*saved_stdin
);
596 safe_close(*saved_stdout
);
601 static int ask_for_confirmation(char *response
, char **argv
) {
602 int saved_stdout
= -1, saved_stdin
= -1, r
;
603 _cleanup_free_
char *line
= NULL
;
605 r
= setup_confirm_stdio(&saved_stdin
, &saved_stdout
);
609 line
= exec_command_line(argv
);
613 r
= ask_char(response
, "yns", "Execute %s? [Yes, No, Skip] ", line
);
615 restore_confirm_stdio(&saved_stdin
, &saved_stdout
);
620 static int enforce_groups(const ExecContext
*context
, const char *username
, gid_t gid
) {
621 bool keep_groups
= false;
626 /* Lookup and set GID and supplementary group list. Here too
627 * we avoid NSS lookups for gid=0. */
629 if (context
->group
|| username
) {
631 if (context
->group
) {
632 const char *g
= context
->group
;
634 if ((r
= get_group_creds(&g
, &gid
)) < 0)
638 /* First step, initialize groups from /etc/groups */
639 if (username
&& gid
!= 0) {
640 if (initgroups(username
, gid
) < 0)
646 /* Second step, set our gids */
647 if (setresgid(gid
, gid
, gid
) < 0)
651 if (context
->supplementary_groups
) {
656 /* Final step, initialize any manually set supplementary groups */
657 assert_se((ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
)) > 0);
659 if (!(gids
= new(gid_t
, ngroups_max
)))
663 if ((k
= getgroups(ngroups_max
, gids
)) < 0) {
670 STRV_FOREACH(i
, context
->supplementary_groups
) {
673 if (k
>= ngroups_max
) {
679 r
= get_group_creds(&g
, gids
+k
);
688 if (setgroups(k
, gids
) < 0) {
699 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
702 /* Sets (but doesn't lookup) the uid and make sure we keep the
703 * capabilities while doing so. */
705 if (context
->capabilities
) {
706 _cleanup_cap_free_ cap_t d
= NULL
;
707 static const cap_value_t bits
[] = {
708 CAP_SETUID
, /* Necessary so that we can run setresuid() below */
709 CAP_SETPCAP
/* Necessary so that we can set PR_SET_SECUREBITS later on */
712 /* First step: If we need to keep capabilities but
713 * drop privileges we need to make sure we keep our
714 * caps, while we drop privileges. */
716 int sb
= context
->secure_bits
| 1<<SECURE_KEEP_CAPS
;
718 if (prctl(PR_GET_SECUREBITS
) != sb
)
719 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
723 /* Second step: set the capabilities. This will reduce
724 * the capabilities to the minimum we need. */
726 d
= cap_dup(context
->capabilities
);
730 if (cap_set_flag(d
, CAP_EFFECTIVE
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0 ||
731 cap_set_flag(d
, CAP_PERMITTED
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0)
734 if (cap_set_proc(d
) < 0)
738 /* Third step: actually set the uids */
739 if (setresuid(uid
, uid
, uid
) < 0)
742 /* At this point we should have all necessary capabilities but
743 are otherwise a normal user. However, the caps might got
744 corrupted due to the setresuid() so we need clean them up
745 later. This is done outside of this call. */
752 static int null_conv(
754 const struct pam_message
**msg
,
755 struct pam_response
**resp
,
758 /* We don't support conversations */
763 static int setup_pam(
769 int fds
[], unsigned n_fds
) {
771 static const struct pam_conv conv
= {
776 pam_handle_t
*handle
= NULL
;
778 int pam_code
= PAM_SUCCESS
;
781 bool close_session
= false;
782 pid_t pam_pid
= 0, parent_pid
;
789 /* We set up PAM in the parent process, then fork. The child
790 * will then stay around until killed via PR_GET_PDEATHSIG or
791 * systemd via the cgroup logic. It will then remove the PAM
792 * session again. The parent process will exec() the actual
793 * daemon. We do things this way to ensure that the main PID
794 * of the daemon is the one we initially fork()ed. */
796 if (log_get_max_level() < LOG_DEBUG
)
799 pam_code
= pam_start(name
, user
, &conv
, &handle
);
800 if (pam_code
!= PAM_SUCCESS
) {
806 pam_code
= pam_set_item(handle
, PAM_TTY
, tty
);
807 if (pam_code
!= PAM_SUCCESS
)
811 pam_code
= pam_acct_mgmt(handle
, flags
);
812 if (pam_code
!= PAM_SUCCESS
)
815 pam_code
= pam_open_session(handle
, flags
);
816 if (pam_code
!= PAM_SUCCESS
)
819 close_session
= true;
821 e
= pam_getenvlist(handle
);
823 pam_code
= PAM_BUF_ERR
;
827 /* Block SIGTERM, so that we know that it won't get lost in
829 if (sigemptyset(&ss
) < 0 ||
830 sigaddset(&ss
, SIGTERM
) < 0 ||
831 sigprocmask(SIG_BLOCK
, &ss
, &old_ss
) < 0)
834 parent_pid
= getpid();
844 /* The child's job is to reset the PAM session on
847 /* This string must fit in 10 chars (i.e. the length
848 * of "/sbin/init"), to look pretty in /bin/ps */
849 rename_process("(sd-pam)");
851 /* Make sure we don't keep open the passed fds in this
852 child. We assume that otherwise only those fds are
853 open here that have been opened by PAM. */
854 close_many(fds
, n_fds
);
856 /* Drop privileges - we don't need any to pam_close_session
857 * and this will make PR_SET_PDEATHSIG work in most cases.
858 * If this fails, ignore the error - but expect sd-pam threads
859 * to fail to exit normally */
860 if (setresuid(uid
, uid
, uid
) < 0)
861 log_error_errno(r
, "Error: Failed to setresuid() in sd-pam: %m");
863 /* Wait until our parent died. This will only work if
864 * the above setresuid() succeeds, otherwise the kernel
865 * will not allow unprivileged parents kill their privileged
866 * children this way. We rely on the control groups kill logic
867 * to do the rest for us. */
868 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
871 /* Check if our parent process might already have
873 if (getppid() == parent_pid
) {
875 if (sigwait(&ss
, &sig
) < 0) {
882 assert(sig
== SIGTERM
);
887 /* If our parent died we'll end the session */
888 if (getppid() != parent_pid
) {
889 pam_code
= pam_close_session(handle
, flags
);
890 if (pam_code
!= PAM_SUCCESS
)
897 pam_end(handle
, pam_code
| flags
);
901 /* If the child was forked off successfully it will do all the
902 * cleanups, so forget about the handle here. */
905 /* Unblock SIGTERM again in the parent */
906 if (sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) < 0)
909 /* We close the log explicitly here, since the PAM modules
910 * might have opened it, but we don't want this fd around. */
919 if (pam_code
!= PAM_SUCCESS
) {
920 log_error("PAM failed: %s", pam_strerror(handle
, pam_code
));
921 err
= -EPERM
; /* PAM errors do not map to errno */
923 log_error_errno(errno
, "PAM failed: %m");
929 pam_code
= pam_close_session(handle
, flags
);
931 pam_end(handle
, pam_code
| flags
);
939 kill(pam_pid
, SIGTERM
);
940 kill(pam_pid
, SIGCONT
);
947 static void rename_process_from_path(const char *path
) {
948 char process_name
[11];
952 /* This resulting string must fit in 10 chars (i.e. the length
953 * of "/sbin/init") to look pretty in /bin/ps */
957 rename_process("(...)");
963 /* The end of the process name is usually more
964 * interesting, since the first bit might just be
970 process_name
[0] = '(';
971 memcpy(process_name
+1, p
, l
);
972 process_name
[1+l
] = ')';
973 process_name
[1+l
+1] = 0;
975 rename_process(process_name
);
980 static int apply_seccomp(const ExecContext
*c
) {
981 uint32_t negative_action
, action
;
982 scmp_filter_ctx
*seccomp
;
989 negative_action
= c
->syscall_errno
== 0 ? SCMP_ACT_KILL
: SCMP_ACT_ERRNO(c
->syscall_errno
);
991 seccomp
= seccomp_init(c
->syscall_whitelist
? negative_action
: SCMP_ACT_ALLOW
);
995 if (c
->syscall_archs
) {
997 SET_FOREACH(id
, c
->syscall_archs
, i
) {
998 r
= seccomp_arch_add(seccomp
, PTR_TO_UINT32(id
) - 1);
1006 r
= seccomp_add_secondary_archs(seccomp
);
1011 action
= c
->syscall_whitelist
? SCMP_ACT_ALLOW
: negative_action
;
1012 SET_FOREACH(id
, c
->syscall_filter
, i
) {
1013 r
= seccomp_rule_add(seccomp
, action
, PTR_TO_INT(id
) - 1, 0);
1018 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1022 r
= seccomp_load(seccomp
);
1025 seccomp_release(seccomp
);
1029 static int apply_address_families(const ExecContext
*c
) {
1030 scmp_filter_ctx
*seccomp
;
1036 seccomp
= seccomp_init(SCMP_ACT_ALLOW
);
1040 r
= seccomp_add_secondary_archs(seccomp
);
1044 if (c
->address_families_whitelist
) {
1045 int af
, first
= 0, last
= 0;
1048 /* If this is a whitelist, we first block the address
1049 * families that are out of range and then everything
1050 * that is not in the set. First, we find the lowest
1051 * and highest address family in the set. */
1053 SET_FOREACH(afp
, c
->address_families
, i
) {
1054 af
= PTR_TO_INT(afp
);
1056 if (af
<= 0 || af
>= af_max())
1059 if (first
== 0 || af
< first
)
1062 if (last
== 0 || af
> last
)
1066 assert((first
== 0) == (last
== 0));
1070 /* No entries in the valid range, block everything */
1071 r
= seccomp_rule_add(
1073 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1081 /* Block everything below the first entry */
1082 r
= seccomp_rule_add(
1084 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1087 SCMP_A0(SCMP_CMP_LT
, first
));
1091 /* Block everything above the last entry */
1092 r
= seccomp_rule_add(
1094 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1097 SCMP_A0(SCMP_CMP_GT
, last
));
1101 /* Block everything between the first and last
1103 for (af
= 1; af
< af_max(); af
++) {
1105 if (set_contains(c
->address_families
, INT_TO_PTR(af
)))
1108 r
= seccomp_rule_add(
1110 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1113 SCMP_A0(SCMP_CMP_EQ
, af
));
1122 /* If this is a blacklist, then generate one rule for
1123 * each address family that are then combined in OR
1126 SET_FOREACH(af
, c
->address_families
, i
) {
1128 r
= seccomp_rule_add(
1130 SCMP_ACT_ERRNO(EPROTONOSUPPORT
),
1133 SCMP_A0(SCMP_CMP_EQ
, PTR_TO_INT(af
)));
1139 r
= seccomp_attr_set(seccomp
, SCMP_FLTATR_CTL_NNP
, 0);
1143 r
= seccomp_load(seccomp
);
1146 seccomp_release(seccomp
);
1152 static void do_idle_pipe_dance(int idle_pipe
[4]) {
1156 safe_close(idle_pipe
[1]);
1157 safe_close(idle_pipe
[2]);
1159 if (idle_pipe
[0] >= 0) {
1162 r
= fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT_USEC
);
1164 if (idle_pipe
[3] >= 0 && r
== 0 /* timeout */) {
1165 /* Signal systemd that we are bored and want to continue. */
1166 r
= write(idle_pipe
[3], "x", 1);
1168 /* Wait for systemd to react to the signal above. */
1169 fd_wait_for_event(idle_pipe
[0], POLLHUP
, IDLE_TIMEOUT2_USEC
);
1172 safe_close(idle_pipe
[0]);
1176 safe_close(idle_pipe
[3]);
1179 static int build_environment(
1180 const ExecContext
*c
,
1182 usec_t watchdog_usec
,
1184 const char *username
,
1188 _cleanup_strv_free_
char **our_env
= NULL
;
1195 our_env
= new0(char*, 10);
1200 if (asprintf(&x
, "LISTEN_PID="PID_FMT
, getpid()) < 0)
1202 our_env
[n_env
++] = x
;
1204 if (asprintf(&x
, "LISTEN_FDS=%u", n_fds
) < 0)
1206 our_env
[n_env
++] = x
;
1209 if (watchdog_usec
> 0) {
1210 if (asprintf(&x
, "WATCHDOG_PID="PID_FMT
, getpid()) < 0)
1212 our_env
[n_env
++] = x
;
1214 if (asprintf(&x
, "WATCHDOG_USEC="USEC_FMT
, watchdog_usec
) < 0)
1216 our_env
[n_env
++] = x
;
1220 x
= strappend("HOME=", home
);
1223 our_env
[n_env
++] = x
;
1227 x
= strappend("LOGNAME=", username
);
1230 our_env
[n_env
++] = x
;
1232 x
= strappend("USER=", username
);
1235 our_env
[n_env
++] = x
;
1239 x
= strappend("SHELL=", shell
);
1242 our_env
[n_env
++] = x
;
1245 if (is_terminal_input(c
->std_input
) ||
1246 c
->std_output
== EXEC_OUTPUT_TTY
||
1247 c
->std_error
== EXEC_OUTPUT_TTY
||
1250 x
= strdup(default_term_for_tty(tty_path(c
)));
1253 our_env
[n_env
++] = x
;
1256 our_env
[n_env
++] = NULL
;
1257 assert(n_env
<= 10);
1265 static int exec_child(
1266 ExecCommand
*command
,
1267 const ExecContext
*context
,
1268 const ExecParameters
*params
,
1269 ExecRuntime
*runtime
,
1272 int *fds
, unsigned n_fds
,
1276 _cleanup_strv_free_
char **our_env
= NULL
, **pam_env
= NULL
, **final_env
= NULL
, **final_argv
= NULL
;
1277 _cleanup_free_
char *mac_selinux_context_net
= NULL
;
1278 const char *username
= NULL
, *home
= NULL
, *shell
= NULL
;
1279 unsigned n_dont_close
= 0;
1280 int dont_close
[n_fds
+ 4];
1281 uid_t uid
= UID_INVALID
;
1282 gid_t gid
= GID_INVALID
;
1288 assert(exit_status
);
1290 rename_process_from_path(command
->path
);
1292 /* We reset exactly these signals, since they are the
1293 * only ones we set to SIG_IGN in the main daemon. All
1294 * others we leave untouched because we set them to
1295 * SIG_DFL or a valid handler initially, both of which
1296 * will be demoted to SIG_DFL. */
1297 default_signals(SIGNALS_CRASH_HANDLER
,
1298 SIGNALS_IGNORE
, -1);
1300 if (context
->ignore_sigpipe
)
1301 ignore_signals(SIGPIPE
, -1);
1303 r
= reset_signal_mask();
1305 *exit_status
= EXIT_SIGNAL_MASK
;
1309 if (params
->idle_pipe
)
1310 do_idle_pipe_dance(params
->idle_pipe
);
1312 /* Close sockets very early to make sure we don't
1313 * block init reexecution because it cannot bind its
1319 dont_close
[n_dont_close
++] = socket_fd
;
1321 memcpy(dont_close
+ n_dont_close
, fds
, sizeof(int) * n_fds
);
1322 n_dont_close
+= n_fds
;
1324 if (params
->bus_endpoint_fd
>= 0)
1325 dont_close
[n_dont_close
++] = params
->bus_endpoint_fd
;
1327 if (runtime
->netns_storage_socket
[0] >= 0)
1328 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[0];
1329 if (runtime
->netns_storage_socket
[1] >= 0)
1330 dont_close
[n_dont_close
++] = runtime
->netns_storage_socket
[1];
1333 r
= close_all_fds(dont_close
, n_dont_close
);
1335 *exit_status
= EXIT_FDS
;
1339 if (!context
->same_pgrp
)
1341 *exit_status
= EXIT_SETSID
;
1345 exec_context_tty_reset(context
);
1347 if (params
->confirm_spawn
) {
1350 r
= ask_for_confirmation(&response
, argv
);
1351 if (r
== -ETIMEDOUT
)
1352 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1354 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r
));
1355 else if (response
== 's') {
1356 write_confirm_message("Skipping execution.\n");
1357 *exit_status
= EXIT_CONFIRM
;
1359 } else if (response
== 'n') {
1360 write_confirm_message("Failing execution.\n");
1366 if (context
->user
) {
1367 username
= context
->user
;
1368 r
= get_user_creds(&username
, &uid
, &gid
, &home
, &shell
);
1370 *exit_status
= EXIT_USER
;
1375 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1376 * must sure to drop O_NONBLOCK */
1378 fd_nonblock(socket_fd
, false);
1380 r
= setup_input(context
, socket_fd
, params
->apply_tty_stdin
);
1382 *exit_status
= EXIT_STDIN
;
1386 r
= setup_output(context
, STDOUT_FILENO
, socket_fd
, basename(command
->path
), params
->unit_id
, params
->apply_tty_stdin
, uid
, gid
);
1388 *exit_status
= EXIT_STDOUT
;
1392 r
= setup_output(context
, STDERR_FILENO
, socket_fd
, basename(command
->path
), params
->unit_id
, params
->apply_tty_stdin
, uid
, gid
);
1394 *exit_status
= EXIT_STDERR
;
1398 if (params
->cgroup_path
) {
1399 r
= cg_attach_everywhere(params
->cgroup_supported
, params
->cgroup_path
, 0, NULL
, NULL
);
1401 *exit_status
= EXIT_CGROUP
;
1406 if (context
->oom_score_adjust_set
) {
1407 char t
[DECIMAL_STR_MAX(context
->oom_score_adjust
)];
1409 /* When we can't make this change due to EPERM, then
1410 * let's silently skip over it. User namespaces
1411 * prohibit write access to this file, and we
1412 * shouldn't trip up over that. */
1414 sprintf(t
, "%i", context
->oom_score_adjust
);
1415 r
= write_string_file("/proc/self/oom_score_adj", t
);
1416 if (r
== -EPERM
|| r
== -EACCES
) {
1418 log_unit_debug_errno(params
->unit_id
, r
, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1421 *exit_status
= EXIT_OOM_ADJUST
;
1426 if (context
->nice_set
)
1427 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
1428 *exit_status
= EXIT_NICE
;
1432 if (context
->cpu_sched_set
) {
1433 struct sched_param param
= {
1434 .sched_priority
= context
->cpu_sched_priority
,
1437 r
= sched_setscheduler(0,
1438 context
->cpu_sched_policy
|
1439 (context
->cpu_sched_reset_on_fork
?
1440 SCHED_RESET_ON_FORK
: 0),
1443 *exit_status
= EXIT_SETSCHEDULER
;
1448 if (context
->cpuset
)
1449 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
1450 *exit_status
= EXIT_CPUAFFINITY
;
1454 if (context
->ioprio_set
)
1455 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
1456 *exit_status
= EXIT_IOPRIO
;
1460 if (context
->timer_slack_nsec
!= NSEC_INFINITY
)
1461 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
1462 *exit_status
= EXIT_TIMERSLACK
;
1466 if (context
->personality
!= 0xffffffffUL
)
1467 if (personality(context
->personality
) < 0) {
1468 *exit_status
= EXIT_PERSONALITY
;
1472 if (context
->utmp_id
)
1473 utmp_put_init_process(context
->utmp_id
, getpid(), getsid(0), context
->tty_path
);
1475 if (context
->user
&& is_terminal_input(context
->std_input
)) {
1476 r
= chown_terminal(STDIN_FILENO
, uid
);
1478 *exit_status
= EXIT_STDIN
;
1484 if (params
->bus_endpoint_fd
>= 0 && context
->bus_endpoint
) {
1485 uid_t ep_uid
= (uid
== UID_INVALID
) ? 0 : uid
;
1487 r
= bus_kernel_set_endpoint_policy(params
->bus_endpoint_fd
, ep_uid
, context
->bus_endpoint
);
1489 *exit_status
= EXIT_BUS_ENDPOINT
;
1495 /* If delegation is enabled we'll pass ownership of the cgroup
1496 * (but only in systemd's own controller hierarchy!) to the
1497 * user of the new process. */
1498 if (params
->cgroup_path
&& context
->user
&& params
->cgroup_delegate
) {
1499 r
= cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0644, uid
, gid
);
1501 *exit_status
= EXIT_CGROUP
;
1506 r
= cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, 0755, uid
, gid
);
1508 *exit_status
= EXIT_CGROUP
;
1513 if (!strv_isempty(context
->runtime_directory
) && params
->runtime_prefix
) {
1516 STRV_FOREACH(rt
, context
->runtime_directory
) {
1517 _cleanup_free_
char *p
;
1519 p
= strjoin(params
->runtime_prefix
, "/", *rt
, NULL
);
1521 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1525 r
= mkdir_safe_label(p
, context
->runtime_directory_mode
, uid
, gid
);
1527 *exit_status
= EXIT_RUNTIME_DIRECTORY
;
1533 if (params
->apply_permissions
) {
1534 r
= enforce_groups(context
, username
, gid
);
1536 *exit_status
= EXIT_GROUP
;
1541 umask(context
->umask
);
1544 if (params
->apply_permissions
&& context
->pam_name
&& username
) {
1545 r
= setup_pam(context
->pam_name
, username
, uid
, context
->tty_path
, &pam_env
, fds
, n_fds
);
1547 *exit_status
= EXIT_PAM
;
1553 if (context
->private_network
&& runtime
&& runtime
->netns_storage_socket
[0] >= 0) {
1554 r
= setup_netns(runtime
->netns_storage_socket
);
1556 *exit_status
= EXIT_NETWORK
;
1561 if (!strv_isempty(context
->read_write_dirs
) ||
1562 !strv_isempty(context
->read_only_dirs
) ||
1563 !strv_isempty(context
->inaccessible_dirs
) ||
1564 context
->mount_flags
!= 0 ||
1565 (context
->private_tmp
&& runtime
&& (runtime
->tmp_dir
|| runtime
->var_tmp_dir
)) ||
1566 params
->bus_endpoint_path
||
1567 context
->private_devices
||
1568 context
->protect_system
!= PROTECT_SYSTEM_NO
||
1569 context
->protect_home
!= PROTECT_HOME_NO
) {
1571 char *tmp
= NULL
, *var
= NULL
;
1573 /* The runtime struct only contains the parent
1574 * of the private /tmp, which is
1575 * non-accessible to world users. Inside of it
1576 * there's a /tmp that is sticky, and that's
1577 * the one we want to use here. */
1579 if (context
->private_tmp
&& runtime
) {
1580 if (runtime
->tmp_dir
)
1581 tmp
= strjoina(runtime
->tmp_dir
, "/tmp");
1582 if (runtime
->var_tmp_dir
)
1583 var
= strjoina(runtime
->var_tmp_dir
, "/tmp");
1586 r
= setup_namespace(
1587 context
->read_write_dirs
,
1588 context
->read_only_dirs
,
1589 context
->inaccessible_dirs
,
1592 params
->bus_endpoint_path
,
1593 context
->private_devices
,
1594 context
->protect_home
,
1595 context
->protect_system
,
1596 context
->mount_flags
);
1598 /* If we couldn't set up the namespace this is
1599 * probably due to a missing capability. In this case,
1600 * silently proceeed. */
1601 if (r
== -EPERM
|| r
== -EACCES
) {
1603 log_unit_debug_errno(params
->unit_id
, r
, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1606 *exit_status
= EXIT_NAMESPACE
;
1611 if (params
->apply_chroot
) {
1612 if (context
->root_directory
)
1613 if (chroot(context
->root_directory
) < 0) {
1614 *exit_status
= EXIT_CHROOT
;
1618 if (chdir(context
->working_directory
?: "/") < 0 &&
1619 !context
->working_directory_missing_ok
) {
1620 *exit_status
= EXIT_CHDIR
;
1624 _cleanup_free_
char *d
= NULL
;
1626 if (asprintf(&d
, "%s/%s",
1627 context
->root_directory
?: "",
1628 context
->working_directory
?: "") < 0) {
1629 *exit_status
= EXIT_MEMORY
;
1634 !context
->working_directory_missing_ok
) {
1635 *exit_status
= EXIT_CHDIR
;
1641 if (params
->apply_permissions
&& mac_selinux_use() && params
->selinux_context_net
&& socket_fd
>= 0) {
1642 r
= mac_selinux_get_child_mls_label(socket_fd
, command
->path
, context
->selinux_context
, &mac_selinux_context_net
);
1644 *exit_status
= EXIT_SELINUX_CONTEXT
;
1650 /* We repeat the fd closing here, to make sure that
1651 * nothing is leaked from the PAM modules. Note that
1652 * we are more aggressive this time since socket_fd
1653 * and the netns fds we don't need anymore. The custom
1654 * endpoint fd was needed to upload the policy and can
1655 * now be closed as well. */
1656 r
= close_all_fds(fds
, n_fds
);
1658 r
= shift_fds(fds
, n_fds
);
1660 r
= flags_fds(fds
, n_fds
, context
->non_blocking
);
1662 *exit_status
= EXIT_FDS
;
1666 if (params
->apply_permissions
) {
1668 for (i
= 0; i
< _RLIMIT_MAX
; i
++) {
1669 if (!context
->rlimit
[i
])
1672 if (setrlimit_closest(i
, context
->rlimit
[i
]) < 0) {
1673 *exit_status
= EXIT_LIMITS
;
1678 if (context
->capability_bounding_set_drop
) {
1679 r
= capability_bounding_set_drop(context
->capability_bounding_set_drop
, false);
1681 *exit_status
= EXIT_CAPABILITIES
;
1687 if (context
->smack_process_label
) {
1688 r
= mac_smack_apply_pid(0, context
->smack_process_label
);
1690 *exit_status
= EXIT_SMACK_PROCESS_LABEL
;
1696 if (context
->user
) {
1697 r
= enforce_user(context
, uid
);
1699 *exit_status
= EXIT_USER
;
1704 /* PR_GET_SECUREBITS is not privileged, while
1705 * PR_SET_SECUREBITS is. So to suppress
1706 * potential EPERMs we'll try not to call
1707 * PR_SET_SECUREBITS unless necessary. */
1708 if (prctl(PR_GET_SECUREBITS
) != context
->secure_bits
)
1709 if (prctl(PR_SET_SECUREBITS
, context
->secure_bits
) < 0) {
1710 *exit_status
= EXIT_SECUREBITS
;
1714 if (context
->capabilities
)
1715 if (cap_set_proc(context
->capabilities
) < 0) {
1716 *exit_status
= EXIT_CAPABILITIES
;
1720 if (context
->no_new_privileges
)
1721 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) < 0) {
1722 *exit_status
= EXIT_NO_NEW_PRIVILEGES
;
1727 if (context
->address_families_whitelist
||
1728 !set_isempty(context
->address_families
)) {
1729 r
= apply_address_families(context
);
1731 *exit_status
= EXIT_ADDRESS_FAMILIES
;
1736 if (context
->syscall_whitelist
||
1737 !set_isempty(context
->syscall_filter
) ||
1738 !set_isempty(context
->syscall_archs
)) {
1739 r
= apply_seccomp(context
);
1741 *exit_status
= EXIT_SECCOMP
;
1748 if (mac_selinux_use()) {
1749 char *exec_context
= mac_selinux_context_net
?: context
->selinux_context
;
1752 r
= setexeccon(exec_context
);
1754 *exit_status
= EXIT_SELINUX_CONTEXT
;
1761 #ifdef HAVE_APPARMOR
1762 if (context
->apparmor_profile
&& mac_apparmor_use()) {
1763 r
= aa_change_onexec(context
->apparmor_profile
);
1764 if (r
< 0 && !context
->apparmor_profile_ignore
) {
1765 *exit_status
= EXIT_APPARMOR_PROFILE
;
1772 r
= build_environment(context
, n_fds
, params
->watchdog_usec
, home
, username
, shell
, &our_env
);
1774 *exit_status
= EXIT_MEMORY
;
1778 final_env
= strv_env_merge(5,
1779 params
->environment
,
1781 context
->environment
,
1786 *exit_status
= EXIT_MEMORY
;
1790 final_argv
= replace_env_argv(argv
, final_env
);
1792 *exit_status
= EXIT_MEMORY
;
1796 final_env
= strv_env_clean(final_env
);
1798 if (_unlikely_(log_get_max_level() >= LOG_DEBUG
)) {
1799 _cleanup_free_
char *line
;
1801 line
= exec_command_line(final_argv
);
1804 log_unit_struct(params
->unit_id
,
1806 "EXECUTABLE=%s", command
->path
,
1807 LOG_MESSAGE("Executing: %s", line
),
1813 execve(command
->path
, final_argv
, final_env
);
1814 *exit_status
= EXIT_EXEC
;
1818 int exec_spawn(ExecCommand
*command
,
1819 const ExecContext
*context
,
1820 const ExecParameters
*params
,
1821 ExecRuntime
*runtime
,
1824 _cleanup_strv_free_
char **files_env
= NULL
;
1825 int *fds
= NULL
; unsigned n_fds
= 0;
1826 _cleanup_free_
char *line
= NULL
;
1835 assert(params
->fds
|| params
->n_fds
<= 0);
1837 if (context
->std_input
== EXEC_INPUT_SOCKET
||
1838 context
->std_output
== EXEC_OUTPUT_SOCKET
||
1839 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
1841 if (params
->n_fds
!= 1) {
1842 log_unit_error(params
->unit_id
, "Got more than one socket.");
1846 socket_fd
= params
->fds
[0];
1850 n_fds
= params
->n_fds
;
1853 r
= exec_context_load_environment(context
, params
->unit_id
, &files_env
);
1855 return log_unit_error_errno(params
->unit_id
, r
, "Failed to load environment files: %m");
1857 argv
= params
->argv
?: command
->argv
;
1858 line
= exec_command_line(argv
);
1862 log_unit_struct(params
->unit_id
,
1864 "EXECUTABLE=%s", command
->path
,
1865 LOG_MESSAGE("About to execute: %s", line
),
1869 return log_unit_error_errno(params
->unit_id
, r
, "Failed to fork: %m");
1874 r
= exec_child(command
,
1885 log_unit_struct(params
->unit_id
,
1887 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED
),
1888 "EXECUTABLE=%s", command
->path
,
1889 LOG_MESSAGE("Failed at step %s spawning %s: %s",
1890 exit_status_to_string(exit_status
, EXIT_STATUS_SYSTEMD
),
1891 command
->path
, strerror(-r
)),
1899 log_unit_debug(params
->unit_id
, "Forked %s as "PID_FMT
, command
->path
, pid
);
1901 /* We add the new process to the cgroup both in the child (so
1902 * that we can be sure that no user code is ever executed
1903 * outside of the cgroup) and in the parent (so that we can be
1904 * sure that when we kill the cgroup the process will be
1906 if (params
->cgroup_path
)
1907 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER
, params
->cgroup_path
, pid
);
1909 exec_status_start(&command
->exec_status
, pid
);
1915 void exec_context_init(ExecContext
*c
) {
1919 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
1920 c
->cpu_sched_policy
= SCHED_OTHER
;
1921 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
1922 c
->syslog_level_prefix
= true;
1923 c
->ignore_sigpipe
= true;
1924 c
->timer_slack_nsec
= NSEC_INFINITY
;
1925 c
->personality
= 0xffffffffUL
;
1926 c
->runtime_directory_mode
= 0755;
1929 void exec_context_done(ExecContext
*c
) {
1934 strv_free(c
->environment
);
1935 c
->environment
= NULL
;
1937 strv_free(c
->environment_files
);
1938 c
->environment_files
= NULL
;
1940 for (l
= 0; l
< ELEMENTSOF(c
->rlimit
); l
++) {
1942 c
->rlimit
[l
] = NULL
;
1945 free(c
->working_directory
);
1946 c
->working_directory
= NULL
;
1947 free(c
->root_directory
);
1948 c
->root_directory
= NULL
;
1953 free(c
->syslog_identifier
);
1954 c
->syslog_identifier
= NULL
;
1962 strv_free(c
->supplementary_groups
);
1963 c
->supplementary_groups
= NULL
;
1968 if (c
->capabilities
) {
1969 cap_free(c
->capabilities
);
1970 c
->capabilities
= NULL
;
1973 strv_free(c
->read_only_dirs
);
1974 c
->read_only_dirs
= NULL
;
1976 strv_free(c
->read_write_dirs
);
1977 c
->read_write_dirs
= NULL
;
1979 strv_free(c
->inaccessible_dirs
);
1980 c
->inaccessible_dirs
= NULL
;
1983 CPU_FREE(c
->cpuset
);
1988 free(c
->selinux_context
);
1989 c
->selinux_context
= NULL
;
1991 free(c
->apparmor_profile
);
1992 c
->apparmor_profile
= NULL
;
1994 set_free(c
->syscall_filter
);
1995 c
->syscall_filter
= NULL
;
1997 set_free(c
->syscall_archs
);
1998 c
->syscall_archs
= NULL
;
2000 set_free(c
->address_families
);
2001 c
->address_families
= NULL
;
2003 strv_free(c
->runtime_directory
);
2004 c
->runtime_directory
= NULL
;
2006 bus_endpoint_free(c
->bus_endpoint
);
2007 c
->bus_endpoint
= NULL
;
2010 int exec_context_destroy_runtime_directory(ExecContext
*c
, const char *runtime_prefix
) {
2015 if (!runtime_prefix
)
2018 STRV_FOREACH(i
, c
->runtime_directory
) {
2019 _cleanup_free_
char *p
;
2021 p
= strjoin(runtime_prefix
, "/", *i
, NULL
);
2025 /* We execute this synchronously, since we need to be
2026 * sure this is gone when we start the service
2028 (void) rm_rf(p
, REMOVE_ROOT
);
2034 void exec_command_done(ExecCommand
*c
) {
2044 void exec_command_done_array(ExecCommand
*c
, unsigned n
) {
2047 for (i
= 0; i
< n
; i
++)
2048 exec_command_done(c
+i
);
2051 ExecCommand
* exec_command_free_list(ExecCommand
*c
) {
2055 LIST_REMOVE(command
, c
, i
);
2056 exec_command_done(i
);
2063 void exec_command_free_array(ExecCommand
**c
, unsigned n
) {
2066 for (i
= 0; i
< n
; i
++)
2067 c
[i
] = exec_command_free_list(c
[i
]);
2070 typedef struct InvalidEnvInfo
{
2071 const char *unit_id
;
2075 static void invalid_env(const char *p
, void *userdata
) {
2076 InvalidEnvInfo
*info
= userdata
;
2078 log_unit_error(info
->unit_id
, "Ignoring invalid environment assignment '%s': %s", p
, info
->path
);
2081 int exec_context_load_environment(const ExecContext
*c
, const char *unit_id
, char ***l
) {
2082 char **i
, **r
= NULL
;
2087 STRV_FOREACH(i
, c
->environment_files
) {
2090 bool ignore
= false;
2092 _cleanup_globfree_ glob_t pglob
= {};
2102 if (!path_is_absolute(fn
)) {
2110 /* Filename supports globbing, take all matching files */
2112 if (glob(fn
, 0, NULL
, &pglob
) != 0) {
2117 return errno
? -errno
: -EINVAL
;
2119 count
= pglob
.gl_pathc
;
2127 for (n
= 0; n
< count
; n
++) {
2128 k
= load_env_file(NULL
, pglob
.gl_pathv
[n
], NULL
, &p
);
2136 /* Log invalid environment variables with filename */
2138 InvalidEnvInfo info
= {
2140 .path
= pglob
.gl_pathv
[n
]
2143 p
= strv_env_clean_with_callback(p
, invalid_env
, &info
);
2151 m
= strv_env_merge(2, r
, p
);
2167 static bool tty_may_match_dev_console(const char *tty
) {
2168 _cleanup_free_
char *active
= NULL
;
2171 if (startswith(tty
, "/dev/"))
2174 /* trivial identity? */
2175 if (streq(tty
, "console"))
2178 console
= resolve_dev_console(&active
);
2179 /* if we could not resolve, assume it may */
2183 /* "tty0" means the active VC, so it may be the same sometimes */
2184 return streq(console
, tty
) || (streq(console
, "tty0") && tty_is_vc(tty
));
2187 bool exec_context_may_touch_console(ExecContext
*ec
) {
2188 return (ec
->tty_reset
|| ec
->tty_vhangup
|| ec
->tty_vt_disallocate
||
2189 is_terminal_input(ec
->std_input
) ||
2190 is_terminal_output(ec
->std_output
) ||
2191 is_terminal_output(ec
->std_error
)) &&
2192 tty_may_match_dev_console(tty_path(ec
));
2195 static void strv_fprintf(FILE *f
, char **l
) {
2201 fprintf(f
, " %s", *g
);
2204 void exec_context_dump(ExecContext
*c
, FILE* f
, const char *prefix
) {
2211 prefix
= strempty(prefix
);
2215 "%sWorkingDirectory: %s\n"
2216 "%sRootDirectory: %s\n"
2217 "%sNonBlocking: %s\n"
2218 "%sPrivateTmp: %s\n"
2219 "%sPrivateNetwork: %s\n"
2220 "%sPrivateDevices: %s\n"
2221 "%sProtectHome: %s\n"
2222 "%sProtectSystem: %s\n"
2223 "%sIgnoreSIGPIPE: %s\n",
2225 prefix
, c
->working_directory
? c
->working_directory
: "/",
2226 prefix
, c
->root_directory
? c
->root_directory
: "/",
2227 prefix
, yes_no(c
->non_blocking
),
2228 prefix
, yes_no(c
->private_tmp
),
2229 prefix
, yes_no(c
->private_network
),
2230 prefix
, yes_no(c
->private_devices
),
2231 prefix
, protect_home_to_string(c
->protect_home
),
2232 prefix
, protect_system_to_string(c
->protect_system
),
2233 prefix
, yes_no(c
->ignore_sigpipe
));
2235 STRV_FOREACH(e
, c
->environment
)
2236 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
2238 STRV_FOREACH(e
, c
->environment_files
)
2239 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
2246 if (c
->oom_score_adjust_set
)
2248 "%sOOMScoreAdjust: %i\n",
2249 prefix
, c
->oom_score_adjust
);
2251 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
2253 fprintf(f
, "%s%s: "RLIM_FMT
"\n",
2254 prefix
, rlimit_to_string(i
), c
->rlimit
[i
]->rlim_max
);
2256 if (c
->ioprio_set
) {
2257 _cleanup_free_
char *class_str
= NULL
;
2259 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c
->ioprio
), &class_str
);
2261 "%sIOSchedulingClass: %s\n"
2262 "%sIOPriority: %i\n",
2263 prefix
, strna(class_str
),
2264 prefix
, (int) IOPRIO_PRIO_DATA(c
->ioprio
));
2267 if (c
->cpu_sched_set
) {
2268 _cleanup_free_
char *policy_str
= NULL
;
2270 sched_policy_to_string_alloc(c
->cpu_sched_policy
, &policy_str
);
2272 "%sCPUSchedulingPolicy: %s\n"
2273 "%sCPUSchedulingPriority: %i\n"
2274 "%sCPUSchedulingResetOnFork: %s\n",
2275 prefix
, strna(policy_str
),
2276 prefix
, c
->cpu_sched_priority
,
2277 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
2281 fprintf(f
, "%sCPUAffinity:", prefix
);
2282 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
2283 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
2284 fprintf(f
, " %u", i
);
2288 if (c
->timer_slack_nsec
!= NSEC_INFINITY
)
2289 fprintf(f
, "%sTimerSlackNSec: "NSEC_FMT
"\n", prefix
, c
->timer_slack_nsec
);
2292 "%sStandardInput: %s\n"
2293 "%sStandardOutput: %s\n"
2294 "%sStandardError: %s\n",
2295 prefix
, exec_input_to_string(c
->std_input
),
2296 prefix
, exec_output_to_string(c
->std_output
),
2297 prefix
, exec_output_to_string(c
->std_error
));
2303 "%sTTYVHangup: %s\n"
2304 "%sTTYVTDisallocate: %s\n",
2305 prefix
, c
->tty_path
,
2306 prefix
, yes_no(c
->tty_reset
),
2307 prefix
, yes_no(c
->tty_vhangup
),
2308 prefix
, yes_no(c
->tty_vt_disallocate
));
2310 if (c
->std_output
== EXEC_OUTPUT_SYSLOG
||
2311 c
->std_output
== EXEC_OUTPUT_KMSG
||
2312 c
->std_output
== EXEC_OUTPUT_JOURNAL
||
2313 c
->std_output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2314 c
->std_output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2315 c
->std_output
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
||
2316 c
->std_error
== EXEC_OUTPUT_SYSLOG
||
2317 c
->std_error
== EXEC_OUTPUT_KMSG
||
2318 c
->std_error
== EXEC_OUTPUT_JOURNAL
||
2319 c
->std_error
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
||
2320 c
->std_error
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
2321 c
->std_error
== EXEC_OUTPUT_JOURNAL_AND_CONSOLE
) {
2323 _cleanup_free_
char *fac_str
= NULL
, *lvl_str
= NULL
;
2325 log_facility_unshifted_to_string_alloc(c
->syslog_priority
>> 3, &fac_str
);
2326 log_level_to_string_alloc(LOG_PRI(c
->syslog_priority
), &lvl_str
);
2329 "%sSyslogFacility: %s\n"
2330 "%sSyslogLevel: %s\n",
2331 prefix
, strna(fac_str
),
2332 prefix
, strna(lvl_str
));
2335 if (c
->capabilities
) {
2336 _cleanup_cap_free_charp_
char *t
;
2338 t
= cap_to_text(c
->capabilities
, NULL
);
2340 fprintf(f
, "%sCapabilities: %s\n", prefix
, t
);
2344 fprintf(f
, "%sSecure Bits:%s%s%s%s%s%s\n",
2346 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS
) ? " keep-caps" : "",
2347 (c
->secure_bits
& 1<<SECURE_KEEP_CAPS_LOCKED
) ? " keep-caps-locked" : "",
2348 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP
) ? " no-setuid-fixup" : "",
2349 (c
->secure_bits
& 1<<SECURE_NO_SETUID_FIXUP_LOCKED
) ? " no-setuid-fixup-locked" : "",
2350 (c
->secure_bits
& 1<<SECURE_NOROOT
) ? " noroot" : "",
2351 (c
->secure_bits
& 1<<SECURE_NOROOT_LOCKED
) ? "noroot-locked" : "");
2353 if (c
->capability_bounding_set_drop
) {
2355 fprintf(f
, "%sCapabilityBoundingSet:", prefix
);
2357 for (l
= 0; l
<= cap_last_cap(); l
++)
2358 if (!(c
->capability_bounding_set_drop
& ((uint64_t) 1ULL << (uint64_t) l
)))
2359 fprintf(f
, " %s", strna(capability_to_name(l
)));
2365 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
2367 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
2369 if (strv_length(c
->supplementary_groups
) > 0) {
2370 fprintf(f
, "%sSupplementaryGroups:", prefix
);
2371 strv_fprintf(f
, c
->supplementary_groups
);
2376 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
2378 if (strv_length(c
->read_write_dirs
) > 0) {
2379 fprintf(f
, "%sReadWriteDirs:", prefix
);
2380 strv_fprintf(f
, c
->read_write_dirs
);
2384 if (strv_length(c
->read_only_dirs
) > 0) {
2385 fprintf(f
, "%sReadOnlyDirs:", prefix
);
2386 strv_fprintf(f
, c
->read_only_dirs
);
2390 if (strv_length(c
->inaccessible_dirs
) > 0) {
2391 fprintf(f
, "%sInaccessibleDirs:", prefix
);
2392 strv_fprintf(f
, c
->inaccessible_dirs
);
2398 "%sUtmpIdentifier: %s\n",
2399 prefix
, c
->utmp_id
);
2401 if (c
->selinux_context
)
2403 "%sSELinuxContext: %s%s\n",
2404 prefix
, c
->selinux_context_ignore
? "-" : "", c
->selinux_context
);
2406 if (c
->personality
!= 0xffffffffUL
)
2408 "%sPersonality: %s\n",
2409 prefix
, strna(personality_to_string(c
->personality
)));
2411 if (c
->syscall_filter
) {
2419 "%sSystemCallFilter: ",
2422 if (!c
->syscall_whitelist
)
2426 SET_FOREACH(id
, c
->syscall_filter
, j
) {
2427 _cleanup_free_
char *name
= NULL
;
2434 name
= seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE
, PTR_TO_INT(id
) - 1);
2435 fputs(strna(name
), f
);
2442 if (c
->syscall_archs
) {
2449 "%sSystemCallArchitectures:",
2453 SET_FOREACH(id
, c
->syscall_archs
, j
)
2454 fprintf(f
, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id
) - 1)));
2459 if (c
->syscall_errno
!= 0)
2461 "%sSystemCallErrorNumber: %s\n",
2462 prefix
, strna(errno_to_name(c
->syscall_errno
)));
2464 if (c
->apparmor_profile
)
2466 "%sAppArmorProfile: %s%s\n",
2467 prefix
, c
->apparmor_profile_ignore
? "-" : "", c
->apparmor_profile
);
2470 bool exec_context_maintains_privileges(ExecContext
*c
) {
2473 /* Returns true if the process forked off would run run under
2474 * an unchanged UID or as root. */
2479 if (streq(c
->user
, "root") || streq(c
->user
, "0"))
2485 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
2490 dual_timestamp_get(&s
->start_timestamp
);
2493 void exec_status_exit(ExecStatus
*s
, ExecContext
*context
, pid_t pid
, int code
, int status
) {
2496 if (s
->pid
&& s
->pid
!= pid
)
2500 dual_timestamp_get(&s
->exit_timestamp
);
2506 if (context
->utmp_id
)
2507 utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
2509 exec_context_tty_reset(context
);
2513 void exec_status_dump(ExecStatus
*s
, FILE *f
, const char *prefix
) {
2514 char buf
[FORMAT_TIMESTAMP_MAX
];
2522 prefix
= strempty(prefix
);
2525 "%sPID: "PID_FMT
"\n",
2528 if (s
->start_timestamp
.realtime
> 0)
2530 "%sStart Timestamp: %s\n",
2531 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
2533 if (s
->exit_timestamp
.realtime
> 0)
2535 "%sExit Timestamp: %s\n"
2537 "%sExit Status: %i\n",
2538 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
2539 prefix
, sigchld_code_to_string(s
->code
),
2543 char *exec_command_line(char **argv
) {
2551 STRV_FOREACH(a
, argv
)
2554 if (!(n
= new(char, k
)))
2558 STRV_FOREACH(a
, argv
) {
2565 if (strpbrk(*a
, WHITESPACE
)) {
2576 /* FIXME: this doesn't really handle arguments that have
2577 * spaces and ticks in them */
2582 void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2583 _cleanup_free_
char *cmd
= NULL
;
2584 const char *prefix2
;
2589 prefix
= strempty(prefix
);
2590 prefix2
= strjoina(prefix
, "\t");
2592 cmd
= exec_command_line(c
->argv
);
2594 "%sCommand Line: %s\n",
2595 prefix
, cmd
? cmd
: strerror(ENOMEM
));
2597 exec_status_dump(&c
->exec_status
, f
, prefix2
);
2600 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
2603 prefix
= strempty(prefix
);
2605 LIST_FOREACH(command
, c
, c
)
2606 exec_command_dump(c
, f
, prefix
);
2609 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
2616 /* It's kind of important, that we keep the order here */
2617 LIST_FIND_TAIL(command
, *l
, end
);
2618 LIST_INSERT_AFTER(command
, *l
, end
, e
);
2623 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
2631 l
= strv_new_ap(path
, ap
);
2652 int exec_command_append(ExecCommand
*c
, const char *path
, ...) {
2653 _cleanup_strv_free_
char **l
= NULL
;
2661 l
= strv_new_ap(path
, ap
);
2667 r
= strv_extend_strv(&c
->argv
, l
);
2675 static int exec_runtime_allocate(ExecRuntime
**rt
) {
2680 *rt
= new0(ExecRuntime
, 1);
2685 (*rt
)->netns_storage_socket
[0] = (*rt
)->netns_storage_socket
[1] = -1;
2690 int exec_runtime_make(ExecRuntime
**rt
, ExecContext
*c
, const char *id
) {
2700 if (!c
->private_network
&& !c
->private_tmp
)
2703 r
= exec_runtime_allocate(rt
);
2707 if (c
->private_network
&& (*rt
)->netns_storage_socket
[0] < 0) {
2708 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, (*rt
)->netns_storage_socket
) < 0)
2712 if (c
->private_tmp
&& !(*rt
)->tmp_dir
) {
2713 r
= setup_tmp_dirs(id
, &(*rt
)->tmp_dir
, &(*rt
)->var_tmp_dir
);
2721 ExecRuntime
*exec_runtime_ref(ExecRuntime
*r
) {
2723 assert(r
->n_ref
> 0);
2729 ExecRuntime
*exec_runtime_unref(ExecRuntime
*r
) {
2734 assert(r
->n_ref
> 0);
2737 if (r
->n_ref
<= 0) {
2739 free(r
->var_tmp_dir
);
2740 safe_close_pair(r
->netns_storage_socket
);
2747 int exec_runtime_serialize(ExecRuntime
*rt
, Unit
*u
, FILE *f
, FDSet
*fds
) {
2756 unit_serialize_item(u
, f
, "tmp-dir", rt
->tmp_dir
);
2758 if (rt
->var_tmp_dir
)
2759 unit_serialize_item(u
, f
, "var-tmp-dir", rt
->var_tmp_dir
);
2761 if (rt
->netns_storage_socket
[0] >= 0) {
2764 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[0]);
2768 unit_serialize_item_format(u
, f
, "netns-socket-0", "%i", copy
);
2771 if (rt
->netns_storage_socket
[1] >= 0) {
2774 copy
= fdset_put_dup(fds
, rt
->netns_storage_socket
[1]);
2778 unit_serialize_item_format(u
, f
, "netns-socket-1", "%i", copy
);
2784 int exec_runtime_deserialize_item(ExecRuntime
**rt
, Unit
*u
, const char *key
, const char *value
, FDSet
*fds
) {
2791 if (streq(key
, "tmp-dir")) {
2794 r
= exec_runtime_allocate(rt
);
2798 copy
= strdup(value
);
2802 free((*rt
)->tmp_dir
);
2803 (*rt
)->tmp_dir
= copy
;
2805 } else if (streq(key
, "var-tmp-dir")) {
2808 r
= exec_runtime_allocate(rt
);
2812 copy
= strdup(value
);
2816 free((*rt
)->var_tmp_dir
);
2817 (*rt
)->var_tmp_dir
= copy
;
2819 } else if (streq(key
, "netns-socket-0")) {
2822 r
= exec_runtime_allocate(rt
);
2826 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2827 log_unit_debug(u
->id
, "Failed to parse netns socket value %s", value
);
2829 safe_close((*rt
)->netns_storage_socket
[0]);
2830 (*rt
)->netns_storage_socket
[0] = fdset_remove(fds
, fd
);
2832 } else if (streq(key
, "netns-socket-1")) {
2835 r
= exec_runtime_allocate(rt
);
2839 if (safe_atoi(value
, &fd
) < 0 || !fdset_contains(fds
, fd
))
2840 log_unit_debug(u
->id
, "Failed to parse netns socket value %s", value
);
2842 safe_close((*rt
)->netns_storage_socket
[1]);
2843 (*rt
)->netns_storage_socket
[1] = fdset_remove(fds
, fd
);
2851 static void *remove_tmpdir_thread(void *p
) {
2852 _cleanup_free_
char *path
= p
;
2854 (void) rm_rf(path
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
2858 void exec_runtime_destroy(ExecRuntime
*rt
) {
2864 /* If there are multiple users of this, let's leave the stuff around */
2869 log_debug("Spawning thread to nuke %s", rt
->tmp_dir
);
2871 r
= asynchronous_job(remove_tmpdir_thread
, rt
->tmp_dir
);
2873 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->tmp_dir
);
2880 if (rt
->var_tmp_dir
) {
2881 log_debug("Spawning thread to nuke %s", rt
->var_tmp_dir
);
2883 r
= asynchronous_job(remove_tmpdir_thread
, rt
->var_tmp_dir
);
2885 log_warning_errno(r
, "Failed to nuke %s: %m", rt
->var_tmp_dir
);
2886 free(rt
->var_tmp_dir
);
2889 rt
->var_tmp_dir
= NULL
;
2892 safe_close_pair(rt
->netns_storage_socket
);
2895 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
2896 [EXEC_INPUT_NULL
] = "null",
2897 [EXEC_INPUT_TTY
] = "tty",
2898 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
2899 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
2900 [EXEC_INPUT_SOCKET
] = "socket"
2903 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
2905 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
2906 [EXEC_OUTPUT_INHERIT
] = "inherit",
2907 [EXEC_OUTPUT_NULL
] = "null",
2908 [EXEC_OUTPUT_TTY
] = "tty",
2909 [EXEC_OUTPUT_SYSLOG
] = "syslog",
2910 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
2911 [EXEC_OUTPUT_KMSG
] = "kmsg",
2912 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
2913 [EXEC_OUTPUT_JOURNAL
] = "journal",
2914 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE
] = "journal+console",
2915 [EXEC_OUTPUT_SOCKET
] = "socket"
2918 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);