1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <security/pam_appl.h>
51 #include "securebits.h"
53 #include "namespace.h"
55 #include "exit-status.h"
57 #include "utmp-wtmp.h"
59 #include "loopback-setup.h"
61 /* This assumes there is a 'tty' group */
64 static int shift_fds(int fds
[], unsigned n_fds
) {
65 int start
, restart_from
;
70 /* Modifies the fds array! (sorts it) */
80 for (i
= start
; i
< (int) n_fds
; i
++) {
83 /* Already at right index? */
87 if ((nfd
= fcntl(fds
[i
], F_DUPFD
, i
+3)) < 0)
90 close_nointr_nofail(fds
[i
]);
93 /* Hmm, the fd we wanted isn't free? Then
94 * let's remember that and try again from here*/
95 if (nfd
!= i
+3 && restart_from
< 0)
102 start
= restart_from
;
108 static int flags_fds(const int fds
[], unsigned n_fds
, bool nonblock
) {
117 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
119 for (i
= 0; i
< n_fds
; i
++) {
121 if ((r
= fd_nonblock(fds
[i
], nonblock
)) < 0)
124 /* We unconditionally drop FD_CLOEXEC from the fds,
125 * since after all we want to pass these fds to our
128 if ((r
= fd_cloexec(fds
[i
], false)) < 0)
135 static const char *tty_path(const ExecContext
*context
) {
138 if (context
->tty_path
)
139 return context
->tty_path
;
141 return "/dev/console";
144 void exec_context_tty_reset(const ExecContext
*context
) {
147 if (context
->tty_vhangup
)
148 terminal_vhangup(tty_path(context
));
150 if (context
->tty_reset
)
151 reset_terminal(tty_path(context
));
153 if (context
->tty_vt_disallocate
&& context
->tty_path
)
154 vt_disallocate(context
->tty_path
);
157 static int open_null_as(int flags
, int nfd
) {
162 if ((fd
= open("/dev/null", flags
|O_NOCTTY
)) < 0)
166 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
167 close_nointr_nofail(fd
);
174 static int connect_logger_as(const ExecContext
*context
, ExecOutput output
, const char *ident
, int nfd
) {
178 struct sockaddr_un un
;
182 assert(output
< _EXEC_OUTPUT_MAX
);
186 if ((fd
= socket(AF_UNIX
, SOCK_STREAM
, 0)) < 0)
190 sa
.sa
.sa_family
= AF_UNIX
;
191 strncpy(sa
.un
.sun_path
, STDOUT_SYSLOG_BRIDGE_SOCKET
, sizeof(sa
.un
.sun_path
));
193 if (connect(fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + sizeof(STDOUT_SYSLOG_BRIDGE_SOCKET
) - 1) < 0) {
194 close_nointr_nofail(fd
);
198 if (shutdown(fd
, SHUT_RD
) < 0) {
199 close_nointr_nofail(fd
);
203 /* We speak a very simple protocol between log server
204 * and client: one line for the log destination (kmsg
205 * or syslog), followed by the priority field,
206 * followed by the process name. Since we replaced
207 * stdin/stderr we simple use stdio to write to
208 * it. Note that we use stderr, to minimize buffer
209 * flushing issues. */
216 output
== EXEC_OUTPUT_KMSG
? "kmsg" :
217 output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
? "kmsg+console" :
218 output
== EXEC_OUTPUT_SYSLOG
? "syslog" :
220 context
->syslog_priority
,
221 context
->syslog_identifier
? context
->syslog_identifier
: ident
,
222 context
->syslog_level_prefix
);
225 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
226 close_nointr_nofail(fd
);
232 static int open_terminal_as(const char *path
, mode_t mode
, int nfd
) {
238 if ((fd
= open_terminal(path
, mode
| O_NOCTTY
)) < 0)
242 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
243 close_nointr_nofail(fd
);
250 static bool is_terminal_input(ExecInput i
) {
252 i
== EXEC_INPUT_TTY
||
253 i
== EXEC_INPUT_TTY_FORCE
||
254 i
== EXEC_INPUT_TTY_FAIL
;
257 static int fixup_input(ExecInput std_input
, int socket_fd
, bool apply_tty_stdin
) {
259 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
260 return EXEC_INPUT_NULL
;
262 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
263 return EXEC_INPUT_NULL
;
268 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
270 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
271 return EXEC_OUTPUT_INHERIT
;
276 static int setup_input(const ExecContext
*context
, int socket_fd
, bool apply_tty_stdin
) {
281 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
285 case EXEC_INPUT_NULL
:
286 return open_null_as(O_RDONLY
, STDIN_FILENO
);
289 case EXEC_INPUT_TTY_FORCE
:
290 case EXEC_INPUT_TTY_FAIL
: {
293 if ((fd
= acquire_terminal(
295 i
== EXEC_INPUT_TTY_FAIL
,
296 i
== EXEC_INPUT_TTY_FORCE
,
300 if (fd
!= STDIN_FILENO
) {
301 r
= dup2(fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
302 close_nointr_nofail(fd
);
309 case EXEC_INPUT_SOCKET
:
310 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
313 assert_not_reached("Unknown input type");
317 static int setup_output(const ExecContext
*context
, int socket_fd
, const char *ident
, bool apply_tty_stdin
) {
324 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
325 o
= fixup_output(context
->std_output
, socket_fd
);
327 /* This expects the input is already set up */
331 case EXEC_OUTPUT_INHERIT
:
333 /* If input got downgraded, inherit the original value */
334 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
335 return open_terminal_as(tty_path(context
), O_WRONLY
, STDOUT_FILENO
);
337 /* If the input is connected to anything that's not a /dev/null, inherit that... */
338 if (i
!= EXEC_INPUT_NULL
)
339 return dup2(STDIN_FILENO
, STDOUT_FILENO
) < 0 ? -errno
: STDOUT_FILENO
;
341 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
343 return STDOUT_FILENO
;
345 /* We need to open /dev/null here anew, to get the
346 * right access mode. So we fall through */
348 case EXEC_OUTPUT_NULL
:
349 return open_null_as(O_WRONLY
, STDOUT_FILENO
);
351 case EXEC_OUTPUT_TTY
:
352 if (is_terminal_input(i
))
353 return dup2(STDIN_FILENO
, STDOUT_FILENO
) < 0 ? -errno
: STDOUT_FILENO
;
355 /* We don't reset the terminal if this is just about output */
356 return open_terminal_as(tty_path(context
), O_WRONLY
, STDOUT_FILENO
);
358 case EXEC_OUTPUT_SYSLOG
:
359 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
360 case EXEC_OUTPUT_KMSG
:
361 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
362 return connect_logger_as(context
, o
, ident
, STDOUT_FILENO
);
364 case EXEC_OUTPUT_SOCKET
:
365 assert(socket_fd
>= 0);
366 return dup2(socket_fd
, STDOUT_FILENO
) < 0 ? -errno
: STDOUT_FILENO
;
369 assert_not_reached("Unknown output type");
373 static int setup_error(const ExecContext
*context
, int socket_fd
, const char *ident
, bool apply_tty_stdin
) {
380 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
381 o
= fixup_output(context
->std_output
, socket_fd
);
382 e
= fixup_output(context
->std_error
, socket_fd
);
384 /* This expects the input and output are already set up */
386 /* Don't change the stderr file descriptor if we inherit all
387 * the way and are not on a tty */
388 if (e
== EXEC_OUTPUT_INHERIT
&&
389 o
== EXEC_OUTPUT_INHERIT
&&
390 i
== EXEC_INPUT_NULL
&&
391 !is_terminal_input(context
->std_input
) &&
393 return STDERR_FILENO
;
395 /* Duplicate from stdout if possible */
396 if (e
== o
|| e
== EXEC_OUTPUT_INHERIT
)
397 return dup2(STDOUT_FILENO
, STDERR_FILENO
) < 0 ? -errno
: STDERR_FILENO
;
401 case EXEC_OUTPUT_NULL
:
402 return open_null_as(O_WRONLY
, STDERR_FILENO
);
404 case EXEC_OUTPUT_TTY
:
405 if (is_terminal_input(i
))
406 return dup2(STDIN_FILENO
, STDERR_FILENO
) < 0 ? -errno
: STDERR_FILENO
;
408 /* We don't reset the terminal if this is just about output */
409 return open_terminal_as(tty_path(context
), O_WRONLY
, STDERR_FILENO
);
411 case EXEC_OUTPUT_SYSLOG
:
412 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
413 case EXEC_OUTPUT_KMSG
:
414 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
415 return connect_logger_as(context
, e
, ident
, STDERR_FILENO
);
417 case EXEC_OUTPUT_SOCKET
:
418 assert(socket_fd
>= 0);
419 return dup2(socket_fd
, STDERR_FILENO
) < 0 ? -errno
: STDERR_FILENO
;
422 assert_not_reached("Unknown error type");
426 static int chown_terminal(int fd
, uid_t uid
) {
431 /* This might fail. What matters are the results. */
432 (void) fchown(fd
, uid
, -1);
433 (void) fchmod(fd
, TTY_MODE
);
435 if (fstat(fd
, &st
) < 0)
438 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
444 static int setup_confirm_stdio(const ExecContext
*context
,
446 int *_saved_stdout
) {
447 int fd
= -1, saved_stdin
, saved_stdout
= -1, r
;
450 assert(_saved_stdin
);
451 assert(_saved_stdout
);
453 /* This returns positive EXIT_xxx return values instead of
454 * negative errno style values! */
456 if ((saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3)) < 0)
459 if ((saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3)) < 0) {
464 if ((fd
= acquire_terminal(
466 context
->std_input
== EXEC_INPUT_TTY_FAIL
,
467 context
->std_input
== EXEC_INPUT_TTY_FORCE
,
473 if (chown_terminal(fd
, getuid()) < 0) {
478 if (dup2(fd
, STDIN_FILENO
) < 0) {
483 if (dup2(fd
, STDOUT_FILENO
) < 0) {
489 close_nointr_nofail(fd
);
491 *_saved_stdin
= saved_stdin
;
492 *_saved_stdout
= saved_stdout
;
497 if (saved_stdout
>= 0)
498 close_nointr_nofail(saved_stdout
);
500 if (saved_stdin
>= 0)
501 close_nointr_nofail(saved_stdin
);
504 close_nointr_nofail(fd
);
509 static int restore_confirm_stdio(const ExecContext
*context
,
517 assert(*saved_stdin
>= 0);
518 assert(saved_stdout
);
519 assert(*saved_stdout
>= 0);
521 /* This returns positive EXIT_xxx return values instead of
522 * negative errno style values! */
524 if (is_terminal_input(context
->std_input
)) {
526 /* The service wants terminal input. */
530 context
->std_output
== EXEC_OUTPUT_INHERIT
||
531 context
->std_output
== EXEC_OUTPUT_TTY
;
534 /* If the service doesn't want a controlling terminal,
535 * then we need to get rid entirely of what we have
538 if (release_terminal() < 0)
541 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
544 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
547 *keep_stdout
= *keep_stdin
= false;
553 static int enforce_groups(const ExecContext
*context
, const char *username
, gid_t gid
) {
554 bool keep_groups
= false;
559 /* Lookup and set GID and supplementary group list. Here too
560 * we avoid NSS lookups for gid=0. */
562 if (context
->group
|| username
) {
564 if (context
->group
) {
565 const char *g
= context
->group
;
567 if ((r
= get_group_creds(&g
, &gid
)) < 0)
571 /* First step, initialize groups from /etc/groups */
572 if (username
&& gid
!= 0) {
573 if (initgroups(username
, gid
) < 0)
579 /* Second step, set our gids */
580 if (setresgid(gid
, gid
, gid
) < 0)
584 if (context
->supplementary_groups
) {
589 /* Final step, initialize any manually set supplementary groups */
590 assert_se((ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
)) > 0);
592 if (!(gids
= new(gid_t
, ngroups_max
)))
596 if ((k
= getgroups(ngroups_max
, gids
)) < 0) {
603 STRV_FOREACH(i
, context
->supplementary_groups
) {
606 if (k
>= ngroups_max
) {
612 r
= get_group_creds(&g
, gids
+k
);
621 if (setgroups(k
, gids
) < 0) {
632 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
636 /* Sets (but doesn't lookup) the uid and make sure we keep the
637 * capabilities while doing so. */
639 if (context
->capabilities
) {
641 static const cap_value_t bits
[] = {
642 CAP_SETUID
, /* Necessary so that we can run setresuid() below */
643 CAP_SETPCAP
/* Necessary so that we can set PR_SET_SECUREBITS later on */
646 /* First step: If we need to keep capabilities but
647 * drop privileges we need to make sure we keep our
648 * caps, whiel we drop privileges. */
650 int sb
= context
->secure_bits
|SECURE_KEEP_CAPS
;
652 if (prctl(PR_GET_SECUREBITS
) != sb
)
653 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
657 /* Second step: set the capabilities. This will reduce
658 * the capabilities to the minimum we need. */
660 if (!(d
= cap_dup(context
->capabilities
)))
663 if (cap_set_flag(d
, CAP_EFFECTIVE
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0 ||
664 cap_set_flag(d
, CAP_PERMITTED
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0) {
670 if (cap_set_proc(d
) < 0) {
679 /* Third step: actually set the uids */
680 if (setresuid(uid
, uid
, uid
) < 0)
683 /* At this point we should have all necessary capabilities but
684 are otherwise a normal user. However, the caps might got
685 corrupted due to the setresuid() so we need clean them up
686 later. This is done outside of this call. */
693 static int null_conv(
695 const struct pam_message
**msg
,
696 struct pam_response
**resp
,
699 /* We don't support conversations */
704 static int setup_pam(
709 int fds
[], unsigned n_fds
) {
711 static const struct pam_conv conv
= {
716 pam_handle_t
*handle
= NULL
;
718 int pam_code
= PAM_SUCCESS
;
720 bool close_session
= false;
721 pid_t pam_pid
= 0, parent_pid
;
727 /* We set up PAM in the parent process, then fork. The child
728 * will then stay around until killed via PR_GET_PDEATHSIG or
729 * systemd via the cgroup logic. It will then remove the PAM
730 * session again. The parent process will exec() the actual
731 * daemon. We do things this way to ensure that the main PID
732 * of the daemon is the one we initially fork()ed. */
734 if ((pam_code
= pam_start(name
, user
, &conv
, &handle
)) != PAM_SUCCESS
) {
740 if ((pam_code
= pam_set_item(handle
, PAM_TTY
, tty
)) != PAM_SUCCESS
)
743 if ((pam_code
= pam_acct_mgmt(handle
, PAM_SILENT
)) != PAM_SUCCESS
)
746 if ((pam_code
= pam_open_session(handle
, PAM_SILENT
)) != PAM_SUCCESS
)
749 close_session
= true;
751 if ((!(e
= pam_getenvlist(handle
)))) {
752 pam_code
= PAM_BUF_ERR
;
756 /* Block SIGTERM, so that we know that it won't get lost in
758 if (sigemptyset(&ss
) < 0 ||
759 sigaddset(&ss
, SIGTERM
) < 0 ||
760 sigprocmask(SIG_BLOCK
, &ss
, &old_ss
) < 0)
763 parent_pid
= getpid();
765 if ((pam_pid
= fork()) < 0)
772 /* The child's job is to reset the PAM session on
775 /* This string must fit in 10 chars (i.e. the length
776 * of "/sbin/init") */
777 rename_process("sd(PAM)");
779 /* Make sure we don't keep open the passed fds in this
780 child. We assume that otherwise only those fds are
781 open here that have been opened by PAM. */
782 close_many(fds
, n_fds
);
784 /* Wait until our parent died. This will most likely
785 * not work since the kernel does not allow
786 * unprivileged parents kill their privileged children
787 * this way. We rely on the control groups kill logic
788 * to do the rest for us. */
789 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
792 /* Check if our parent process might already have
794 if (getppid() == parent_pid
) {
796 if (sigwait(&ss
, &sig
) < 0) {
803 assert(sig
== SIGTERM
);
808 /* If our parent died we'll end the session */
809 if (getppid() != parent_pid
)
810 if ((pam_code
= pam_close_session(handle
, PAM_DATA_SILENT
)) != PAM_SUCCESS
)
816 pam_end(handle
, pam_code
| PAM_DATA_SILENT
);
820 /* If the child was forked off successfully it will do all the
821 * cleanups, so forget about the handle here. */
824 /* Unblock SIGTERM again in the parent */
825 if (sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) < 0)
828 /* We close the log explicitly here, since the PAM modules
829 * might have opened it, but we don't want this fd around. */
840 pam_code
= pam_close_session(handle
, PAM_DATA_SILENT
);
842 pam_end(handle
, pam_code
| PAM_DATA_SILENT
);
850 kill(pam_pid
, SIGTERM
);
851 kill(pam_pid
, SIGCONT
);
858 static int do_capability_bounding_set_drop(uint64_t drop
) {
860 cap_t old_cap
= NULL
, new_cap
= NULL
;
864 /* If we are run as PID 1 we will lack CAP_SETPCAP by default
865 * in the effective set (yes, the kernel drops that when
866 * executing init!), so get it back temporarily so that we can
867 * call PR_CAPBSET_DROP. */
869 old_cap
= cap_get_proc();
873 if (cap_get_flag(old_cap
, CAP_SETPCAP
, CAP_EFFECTIVE
, &fv
) < 0) {
879 static const cap_value_t v
= CAP_SETPCAP
;
881 new_cap
= cap_dup(old_cap
);
887 if (cap_set_flag(new_cap
, CAP_EFFECTIVE
, 1, &v
, CAP_SET
) < 0) {
892 if (cap_set_proc(new_cap
) < 0) {
898 for (i
= 0; i
<= cap_last_cap(); i
++)
899 if (drop
& ((uint64_t) 1ULL << (uint64_t) i
)) {
900 if (prctl(PR_CAPBSET_DROP
, i
) < 0) {
913 cap_set_proc(old_cap
);
920 int exec_spawn(ExecCommand
*command
,
922 const ExecContext
*context
,
923 int fds
[], unsigned n_fds
,
925 bool apply_permissions
,
927 bool apply_tty_stdin
,
929 CGroupBonding
*cgroup_bondings
,
930 CGroupAttribute
*cgroup_attributes
,
937 char **files_env
= NULL
;
942 assert(fds
|| n_fds
<= 0);
944 if (context
->std_input
== EXEC_INPUT_SOCKET
||
945 context
->std_output
== EXEC_OUTPUT_SOCKET
||
946 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
958 if ((r
= exec_context_load_environment(context
, &files_env
)) < 0) {
959 log_error("Failed to load environment files: %s", strerror(-r
));
964 argv
= command
->argv
;
966 if (!(line
= exec_command_line(argv
))) {
971 log_debug("About to execute: %s", line
);
974 r
= cgroup_bonding_realize_list(cgroup_bondings
);
978 cgroup_attribute_apply_list(cgroup_attributes
, cgroup_bondings
);
980 if ((pid
= fork()) < 0) {
988 const char *username
= NULL
, *home
= NULL
;
989 uid_t uid
= (uid_t
) -1;
990 gid_t gid
= (gid_t
) -1;
991 char **our_env
= NULL
, **pam_env
= NULL
, **final_env
= NULL
, **final_argv
= NULL
;
993 int saved_stdout
= -1, saved_stdin
= -1;
994 bool keep_stdout
= false, keep_stdin
= false;
998 /* This string must fit in 10 chars (i.e. the length
999 * of "/sbin/init") */
1000 rename_process("sd(EXEC)");
1002 /* We reset exactly these signals, since they are the
1003 * only ones we set to SIG_IGN in the main daemon. All
1004 * others we leave untouched because we set them to
1005 * SIG_DFL or a valid handler initially, both of which
1006 * will be demoted to SIG_DFL. */
1007 default_signals(SIGNALS_CRASH_HANDLER
,
1008 SIGNALS_IGNORE
, -1);
1010 if (sigemptyset(&ss
) < 0 ||
1011 sigprocmask(SIG_SETMASK
, &ss
, NULL
) < 0) {
1012 r
= EXIT_SIGNAL_MASK
;
1016 /* Close sockets very early to make sure we don't
1017 * block init reexecution because it cannot bind its
1019 if (close_all_fds(socket_fd
>= 0 ? &socket_fd
: fds
,
1020 socket_fd
>= 0 ? 1 : n_fds
) < 0) {
1025 if (!context
->same_pgrp
)
1031 if (context
->tcpwrap_name
) {
1033 if (!socket_tcpwrap(socket_fd
, context
->tcpwrap_name
)) {
1038 for (i
= 0; i
< (int) n_fds
; i
++) {
1039 if (!socket_tcpwrap(fds
[i
], context
->tcpwrap_name
)) {
1046 exec_context_tty_reset(context
);
1048 /* We skip the confirmation step if we shall not apply the TTY */
1049 if (confirm_spawn
&&
1050 (!is_terminal_input(context
->std_input
) || apply_tty_stdin
)) {
1053 /* Set up terminal for the question */
1054 if ((r
= setup_confirm_stdio(context
,
1055 &saved_stdin
, &saved_stdout
)))
1058 /* Now ask the question. */
1059 if (!(line
= exec_command_line(argv
))) {
1064 r
= ask(&response
, "yns", "Execute %s? [Yes, No, Skip] ", line
);
1067 if (r
< 0 || response
== 'n') {
1070 } else if (response
== 's') {
1075 /* Release terminal for the question */
1076 if ((r
= restore_confirm_stdio(context
,
1077 &saved_stdin
, &saved_stdout
,
1078 &keep_stdin
, &keep_stdout
)))
1082 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1083 * must sure to drop O_NONBLOCK */
1085 fd_nonblock(socket_fd
, false);
1088 if (setup_input(context
, socket_fd
, apply_tty_stdin
) < 0) {
1094 if (setup_output(context
, socket_fd
, file_name_from_path(command
->path
), apply_tty_stdin
) < 0) {
1099 if (setup_error(context
, socket_fd
, file_name_from_path(command
->path
), apply_tty_stdin
) < 0) {
1104 if (cgroup_bondings
)
1105 if (cgroup_bonding_install_list(cgroup_bondings
, 0) < 0) {
1110 if (context
->oom_score_adjust_set
) {
1113 snprintf(t
, sizeof(t
), "%i", context
->oom_score_adjust
);
1116 if (write_one_line_file("/proc/self/oom_score_adj", t
) < 0) {
1117 /* Compatibility with Linux <= 2.6.35 */
1121 adj
= (context
->oom_score_adjust
* -OOM_DISABLE
) / OOM_SCORE_ADJ_MAX
;
1122 adj
= CLAMP(adj
, OOM_DISABLE
, OOM_ADJUST_MAX
);
1124 snprintf(t
, sizeof(t
), "%i", adj
);
1127 if (write_one_line_file("/proc/self/oom_adj", t
) < 0
1128 && errno
!= EACCES
) {
1129 r
= EXIT_OOM_ADJUST
;
1135 if (context
->nice_set
)
1136 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
1141 if (context
->cpu_sched_set
) {
1142 struct sched_param param
;
1145 param
.sched_priority
= context
->cpu_sched_priority
;
1147 if (sched_setscheduler(0, context
->cpu_sched_policy
|
1148 (context
->cpu_sched_reset_on_fork
? SCHED_RESET_ON_FORK
: 0), ¶m
) < 0) {
1149 r
= EXIT_SETSCHEDULER
;
1154 if (context
->cpuset
)
1155 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
1156 r
= EXIT_CPUAFFINITY
;
1160 if (context
->ioprio_set
)
1161 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
1166 if (context
->timer_slack_nsec_set
)
1167 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
1168 r
= EXIT_TIMERSLACK
;
1172 if (context
->utmp_id
)
1173 utmp_put_init_process(context
->utmp_id
, getpid(), getsid(0), context
->tty_path
);
1175 if (context
->user
) {
1176 username
= context
->user
;
1177 if (get_user_creds(&username
, &uid
, &gid
, &home
) < 0) {
1182 if (is_terminal_input(context
->std_input
))
1183 if (chown_terminal(STDIN_FILENO
, uid
) < 0) {
1188 if (cgroup_bondings
&& context
->control_group_modify
)
1189 if (cgroup_bonding_set_group_access_list(cgroup_bondings
, 0755, uid
, gid
) < 0 ||
1190 cgroup_bonding_set_task_access_list(cgroup_bondings
, 0644, uid
, gid
) < 0) {
1196 if (apply_permissions
)
1197 if (enforce_groups(context
, username
, gid
) < 0) {
1202 umask(context
->umask
);
1205 if (context
->pam_name
&& username
) {
1206 if (setup_pam(context
->pam_name
, username
, context
->tty_path
, &pam_env
, fds
, n_fds
) != 0) {
1212 if (context
->private_network
) {
1213 if (unshare(CLONE_NEWNET
) < 0) {
1221 if (strv_length(context
->read_write_dirs
) > 0 ||
1222 strv_length(context
->read_only_dirs
) > 0 ||
1223 strv_length(context
->inaccessible_dirs
) > 0 ||
1224 context
->mount_flags
!= MS_SHARED
||
1225 context
->private_tmp
)
1226 if ((r
= setup_namespace(
1227 context
->read_write_dirs
,
1228 context
->read_only_dirs
,
1229 context
->inaccessible_dirs
,
1230 context
->private_tmp
,
1231 context
->mount_flags
)) < 0)
1235 if (context
->root_directory
)
1236 if (chroot(context
->root_directory
) < 0) {
1241 if (chdir(context
->working_directory
? context
->working_directory
: "/") < 0) {
1249 if (asprintf(&d
, "%s/%s",
1250 context
->root_directory
? context
->root_directory
: "",
1251 context
->working_directory
? context
->working_directory
: "") < 0) {
1265 /* We repeat the fd closing here, to make sure that
1266 * nothing is leaked from the PAM modules */
1267 if (close_all_fds(fds
, n_fds
) < 0 ||
1268 shift_fds(fds
, n_fds
) < 0 ||
1269 flags_fds(fds
, n_fds
, context
->non_blocking
) < 0) {
1274 if (apply_permissions
) {
1276 for (i
= 0; i
< RLIMIT_NLIMITS
; i
++) {
1277 if (!context
->rlimit
[i
])
1280 if (setrlimit(i
, context
->rlimit
[i
]) < 0) {
1286 if (context
->capability_bounding_set_drop
)
1287 if (do_capability_bounding_set_drop(context
->capability_bounding_set_drop
) < 0) {
1288 r
= EXIT_CAPABILITIES
;
1293 if (enforce_user(context
, uid
) < 0) {
1298 /* PR_GET_SECUREBITS is not privileged, while
1299 * PR_SET_SECUREBITS is. So to suppress
1300 * potential EPERMs we'll try not to call
1301 * PR_SET_SECUREBITS unless necessary. */
1302 if (prctl(PR_GET_SECUREBITS
) != context
->secure_bits
)
1303 if (prctl(PR_SET_SECUREBITS
, context
->secure_bits
) < 0) {
1304 r
= EXIT_SECUREBITS
;
1308 if (context
->capabilities
)
1309 if (cap_set_proc(context
->capabilities
) < 0) {
1310 r
= EXIT_CAPABILITIES
;
1315 if (!(our_env
= new0(char*, 7))) {
1321 if (asprintf(our_env
+ n_env
++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1322 asprintf(our_env
+ n_env
++, "LISTEN_FDS=%u", n_fds
) < 0) {
1328 if (asprintf(our_env
+ n_env
++, "HOME=%s", home
) < 0) {
1334 if (asprintf(our_env
+ n_env
++, "LOGNAME=%s", username
) < 0 ||
1335 asprintf(our_env
+ n_env
++, "USER=%s", username
) < 0) {
1340 if (is_terminal_input(context
->std_input
) ||
1341 context
->std_output
== EXEC_OUTPUT_TTY
||
1342 context
->std_error
== EXEC_OUTPUT_TTY
)
1343 if (!(our_env
[n_env
++] = strdup(default_term_for_tty(tty_path(context
))))) {
1350 if (!(final_env
= strv_env_merge(
1354 context
->environment
,
1362 if (!(final_argv
= replace_env_argv(argv
, final_env
))) {
1367 final_env
= strv_env_clean(final_env
);
1369 execve(command
->path
, final_argv
, final_env
);
1374 strv_free(final_env
);
1376 strv_free(files_env
);
1377 strv_free(final_argv
);
1379 if (saved_stdin
>= 0)
1380 close_nointr_nofail(saved_stdin
);
1382 if (saved_stdout
>= 0)
1383 close_nointr_nofail(saved_stdout
);
1388 strv_free(files_env
);
1390 /* We add the new process to the cgroup both in the child (so
1391 * that we can be sure that no user code is ever executed
1392 * outside of the cgroup) and in the parent (so that we can be
1393 * sure that when we kill the cgroup the process will be
1395 if (cgroup_bondings
)
1396 cgroup_bonding_install_list(cgroup_bondings
, pid
);
1398 log_debug("Forked %s as %lu", command
->path
, (unsigned long) pid
);
1400 exec_status_start(&command
->exec_status
, pid
);
1406 strv_free(files_env
);
1411 void exec_context_init(ExecContext
*c
) {
1415 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
1416 c
->cpu_sched_policy
= SCHED_OTHER
;
1417 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
1418 c
->syslog_level_prefix
= true;
1419 c
->mount_flags
= MS_SHARED
;
1420 c
->kill_signal
= SIGTERM
;
1421 c
->send_sigkill
= true;
1424 void exec_context_done(ExecContext
*c
) {
1429 strv_free(c
->environment
);
1430 c
->environment
= NULL
;
1432 strv_free(c
->environment_files
);
1433 c
->environment_files
= NULL
;
1435 for (l
= 0; l
< ELEMENTSOF(c
->rlimit
); l
++) {
1437 c
->rlimit
[l
] = NULL
;
1440 free(c
->working_directory
);
1441 c
->working_directory
= NULL
;
1442 free(c
->root_directory
);
1443 c
->root_directory
= NULL
;
1448 free(c
->tcpwrap_name
);
1449 c
->tcpwrap_name
= NULL
;
1451 free(c
->syslog_identifier
);
1452 c
->syslog_identifier
= NULL
;
1460 strv_free(c
->supplementary_groups
);
1461 c
->supplementary_groups
= NULL
;
1466 if (c
->capabilities
) {
1467 cap_free(c
->capabilities
);
1468 c
->capabilities
= NULL
;
1471 strv_free(c
->read_only_dirs
);
1472 c
->read_only_dirs
= NULL
;
1474 strv_free(c
->read_write_dirs
);
1475 c
->read_write_dirs
= NULL
;
1477 strv_free(c
->inaccessible_dirs
);
1478 c
->inaccessible_dirs
= NULL
;
1481 CPU_FREE(c
->cpuset
);
1487 void exec_command_done(ExecCommand
*c
) {
1497 void exec_command_done_array(ExecCommand
*c
, unsigned n
) {
1500 for (i
= 0; i
< n
; i
++)
1501 exec_command_done(c
+i
);
1504 void exec_command_free_list(ExecCommand
*c
) {
1508 LIST_REMOVE(ExecCommand
, command
, c
, i
);
1509 exec_command_done(i
);
1514 void exec_command_free_array(ExecCommand
**c
, unsigned n
) {
1517 for (i
= 0; i
< n
; i
++) {
1518 exec_command_free_list(c
[i
]);
1523 int exec_context_load_environment(const ExecContext
*c
, char ***l
) {
1524 char **i
, **r
= NULL
;
1529 STRV_FOREACH(i
, c
->environment_files
) {
1532 bool ignore
= false;
1542 if (!path_is_absolute(fn
)) {
1551 if ((k
= load_env_file(fn
, &p
)) < 0) {
1565 m
= strv_env_merge(2, r
, p
);
1581 static void strv_fprintf(FILE *f
, char **l
) {
1587 fprintf(f
, " %s", *g
);
1590 void exec_context_dump(ExecContext
*c
, FILE* f
, const char *prefix
) {
1602 "%sWorkingDirectory: %s\n"
1603 "%sRootDirectory: %s\n"
1604 "%sNonBlocking: %s\n"
1605 "%sPrivateTmp: %s\n"
1606 "%sControlGroupModify: %s\n"
1607 "%sPrivateNetwork: %s\n",
1609 prefix
, c
->working_directory
? c
->working_directory
: "/",
1610 prefix
, c
->root_directory
? c
->root_directory
: "/",
1611 prefix
, yes_no(c
->non_blocking
),
1612 prefix
, yes_no(c
->private_tmp
),
1613 prefix
, yes_no(c
->control_group_modify
),
1614 prefix
, yes_no(c
->private_network
));
1616 STRV_FOREACH(e
, c
->environment
)
1617 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
1619 STRV_FOREACH(e
, c
->environment_files
)
1620 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
1622 if (c
->tcpwrap_name
)
1624 "%sTCPWrapName: %s\n",
1625 prefix
, c
->tcpwrap_name
);
1632 if (c
->oom_score_adjust_set
)
1634 "%sOOMScoreAdjust: %i\n",
1635 prefix
, c
->oom_score_adjust
);
1637 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
1639 fprintf(f
, "%s%s: %llu\n", prefix
, rlimit_to_string(i
), (unsigned long long) c
->rlimit
[i
]->rlim_max
);
1643 "%sIOSchedulingClass: %s\n"
1644 "%sIOPriority: %i\n",
1645 prefix
, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c
->ioprio
)),
1646 prefix
, (int) IOPRIO_PRIO_DATA(c
->ioprio
));
1648 if (c
->cpu_sched_set
)
1650 "%sCPUSchedulingPolicy: %s\n"
1651 "%sCPUSchedulingPriority: %i\n"
1652 "%sCPUSchedulingResetOnFork: %s\n",
1653 prefix
, sched_policy_to_string(c
->cpu_sched_policy
),
1654 prefix
, c
->cpu_sched_priority
,
1655 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
1658 fprintf(f
, "%sCPUAffinity:", prefix
);
1659 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
1660 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
1661 fprintf(f
, " %i", i
);
1665 if (c
->timer_slack_nsec_set
)
1666 fprintf(f
, "%sTimerSlackNSec: %lu\n", prefix
, c
->timer_slack_nsec
);
1669 "%sStandardInput: %s\n"
1670 "%sStandardOutput: %s\n"
1671 "%sStandardError: %s\n",
1672 prefix
, exec_input_to_string(c
->std_input
),
1673 prefix
, exec_output_to_string(c
->std_output
),
1674 prefix
, exec_output_to_string(c
->std_error
));
1680 "%sTTYVHangup: %s\n"
1681 "%sTTYVTDisallocate: %s\n",
1682 prefix
, c
->tty_path
,
1683 prefix
, yes_no(c
->tty_reset
),
1684 prefix
, yes_no(c
->tty_vhangup
),
1685 prefix
, yes_no(c
->tty_vt_disallocate
));
1687 if (c
->std_output
== EXEC_OUTPUT_SYSLOG
|| c
->std_output
== EXEC_OUTPUT_KMSG
||
1688 c
->std_output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
|| c
->std_output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
1689 c
->std_error
== EXEC_OUTPUT_SYSLOG
|| c
->std_error
== EXEC_OUTPUT_KMSG
||
1690 c
->std_error
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
|| c
->std_error
== EXEC_OUTPUT_KMSG_AND_CONSOLE
)
1692 "%sSyslogFacility: %s\n"
1693 "%sSyslogLevel: %s\n",
1694 prefix
, log_facility_unshifted_to_string(c
->syslog_priority
>> 3),
1695 prefix
, log_level_to_string(LOG_PRI(c
->syslog_priority
)));
1697 if (c
->capabilities
) {
1699 if ((t
= cap_to_text(c
->capabilities
, NULL
))) {
1700 fprintf(f
, "%sCapabilities: %s\n",
1707 fprintf(f
, "%sSecure Bits:%s%s%s%s%s%s\n",
1709 (c
->secure_bits
& SECURE_KEEP_CAPS
) ? " keep-caps" : "",
1710 (c
->secure_bits
& SECURE_KEEP_CAPS_LOCKED
) ? " keep-caps-locked" : "",
1711 (c
->secure_bits
& SECURE_NO_SETUID_FIXUP
) ? " no-setuid-fixup" : "",
1712 (c
->secure_bits
& SECURE_NO_SETUID_FIXUP_LOCKED
) ? " no-setuid-fixup-locked" : "",
1713 (c
->secure_bits
& SECURE_NOROOT
) ? " noroot" : "",
1714 (c
->secure_bits
& SECURE_NOROOT_LOCKED
) ? "noroot-locked" : "");
1716 if (c
->capability_bounding_set_drop
) {
1718 fprintf(f
, "%sCapabilityBoundingSet:", prefix
);
1720 for (l
= 0; l
<= cap_last_cap(); l
++)
1721 if (!(c
->capability_bounding_set_drop
& ((uint64_t) 1ULL << (uint64_t) l
))) {
1724 if ((t
= cap_to_name(l
))) {
1725 fprintf(f
, " %s", t
);
1734 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
1736 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
1738 if (strv_length(c
->supplementary_groups
) > 0) {
1739 fprintf(f
, "%sSupplementaryGroups:", prefix
);
1740 strv_fprintf(f
, c
->supplementary_groups
);
1745 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
1747 if (strv_length(c
->read_write_dirs
) > 0) {
1748 fprintf(f
, "%sReadWriteDirs:", prefix
);
1749 strv_fprintf(f
, c
->read_write_dirs
);
1753 if (strv_length(c
->read_only_dirs
) > 0) {
1754 fprintf(f
, "%sReadOnlyDirs:", prefix
);
1755 strv_fprintf(f
, c
->read_only_dirs
);
1759 if (strv_length(c
->inaccessible_dirs
) > 0) {
1760 fprintf(f
, "%sInaccessibleDirs:", prefix
);
1761 strv_fprintf(f
, c
->inaccessible_dirs
);
1767 "%sKillSignal: SIG%s\n"
1768 "%sSendSIGKILL: %s\n",
1769 prefix
, kill_mode_to_string(c
->kill_mode
),
1770 prefix
, signal_to_string(c
->kill_signal
),
1771 prefix
, yes_no(c
->send_sigkill
));
1775 "%sUtmpIdentifier: %s\n",
1776 prefix
, c
->utmp_id
);
1779 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
1784 dual_timestamp_get(&s
->start_timestamp
);
1787 void exec_status_exit(ExecStatus
*s
, ExecContext
*context
, pid_t pid
, int code
, int status
) {
1790 if ((s
->pid
&& s
->pid
!= pid
) ||
1791 !s
->start_timestamp
.realtime
<= 0)
1795 dual_timestamp_get(&s
->exit_timestamp
);
1801 if (context
->utmp_id
)
1802 utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
1804 exec_context_tty_reset(context
);
1808 void exec_status_dump(ExecStatus
*s
, FILE *f
, const char *prefix
) {
1809 char buf
[FORMAT_TIMESTAMP_MAX
];
1822 prefix
, (unsigned long) s
->pid
);
1824 if (s
->start_timestamp
.realtime
> 0)
1826 "%sStart Timestamp: %s\n",
1827 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
1829 if (s
->exit_timestamp
.realtime
> 0)
1831 "%sExit Timestamp: %s\n"
1833 "%sExit Status: %i\n",
1834 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
1835 prefix
, sigchld_code_to_string(s
->code
),
1839 char *exec_command_line(char **argv
) {
1847 STRV_FOREACH(a
, argv
)
1850 if (!(n
= new(char, k
)))
1854 STRV_FOREACH(a
, argv
) {
1861 if (strpbrk(*a
, WHITESPACE
)) {
1872 /* FIXME: this doesn't really handle arguments that have
1873 * spaces and ticks in them */
1878 void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
1880 const char *prefix2
;
1889 p2
= strappend(prefix
, "\t");
1890 prefix2
= p2
? p2
: prefix
;
1892 cmd
= exec_command_line(c
->argv
);
1895 "%sCommand Line: %s\n",
1896 prefix
, cmd
? cmd
: strerror(ENOMEM
));
1900 exec_status_dump(&c
->exec_status
, f
, prefix2
);
1905 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
1911 LIST_FOREACH(command
, c
, c
)
1912 exec_command_dump(c
, f
, prefix
);
1915 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
1922 /* It's kind of important, that we keep the order here */
1923 LIST_FIND_TAIL(ExecCommand
, command
, *l
, end
);
1924 LIST_INSERT_AFTER(ExecCommand
, command
, *l
, end
, e
);
1929 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
1937 l
= strv_new_ap(path
, ap
);
1943 if (!(p
= strdup(path
))) {
1957 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
1958 [EXEC_INPUT_NULL
] = "null",
1959 [EXEC_INPUT_TTY
] = "tty",
1960 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
1961 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
1962 [EXEC_INPUT_SOCKET
] = "socket"
1965 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
1967 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
1968 [EXEC_OUTPUT_INHERIT
] = "inherit",
1969 [EXEC_OUTPUT_NULL
] = "null",
1970 [EXEC_OUTPUT_TTY
] = "tty",
1971 [EXEC_OUTPUT_SYSLOG
] = "syslog",
1972 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
1973 [EXEC_OUTPUT_KMSG
] = "kmsg",
1974 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
1975 [EXEC_OUTPUT_SOCKET
] = "socket"
1978 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);
1980 static const char* const kill_mode_table
[_KILL_MODE_MAX
] = {
1981 [KILL_CONTROL_GROUP
] = "control-group",
1982 [KILL_PROCESS
] = "process",
1983 [KILL_NONE
] = "none"
1986 DEFINE_STRING_TABLE_LOOKUP(kill_mode
, KillMode
);
1988 static const char* const kill_who_table
[_KILL_WHO_MAX
] = {
1989 [KILL_MAIN
] = "main",
1990 [KILL_CONTROL
] = "control",
1994 DEFINE_STRING_TABLE_LOOKUP(kill_who
, KillWho
);