1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <security/pam_appl.h>
51 #include "securebits.h"
53 #include "namespace.h"
55 #include "exit-status.h"
57 #include "utmp-wtmp.h"
60 /* This assumes there is a 'tty' group */
63 static int shift_fds(int fds
[], unsigned n_fds
) {
64 int start
, restart_from
;
69 /* Modifies the fds array! (sorts it) */
79 for (i
= start
; i
< (int) n_fds
; i
++) {
82 /* Already at right index? */
86 if ((nfd
= fcntl(fds
[i
], F_DUPFD
, i
+3)) < 0)
89 close_nointr_nofail(fds
[i
]);
92 /* Hmm, the fd we wanted isn't free? Then
93 * let's remember that and try again from here*/
94 if (nfd
!= i
+3 && restart_from
< 0)
101 start
= restart_from
;
107 static int flags_fds(const int fds
[], unsigned n_fds
, bool nonblock
) {
116 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
118 for (i
= 0; i
< n_fds
; i
++) {
120 if ((r
= fd_nonblock(fds
[i
], nonblock
)) < 0)
123 /* We unconditionally drop FD_CLOEXEC from the fds,
124 * since after all we want to pass these fds to our
127 if ((r
= fd_cloexec(fds
[i
], false)) < 0)
134 static const char *tty_path(const ExecContext
*context
) {
137 if (context
->tty_path
)
138 return context
->tty_path
;
140 return "/dev/console";
143 void exec_context_tty_reset(const ExecContext
*context
) {
146 if (context
->tty_vhangup
)
147 terminal_vhangup(tty_path(context
));
149 if (context
->tty_reset
)
150 reset_terminal(tty_path(context
));
152 if (context
->tty_vt_disallocate
&& context
->tty_path
)
153 vt_disallocate(context
->tty_path
);
156 static int open_null_as(int flags
, int nfd
) {
161 if ((fd
= open("/dev/null", flags
|O_NOCTTY
)) < 0)
165 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
166 close_nointr_nofail(fd
);
173 static int connect_logger_as(const ExecContext
*context
, ExecOutput output
, const char *ident
, int nfd
) {
177 struct sockaddr_un un
;
181 assert(output
< _EXEC_OUTPUT_MAX
);
185 if ((fd
= socket(AF_UNIX
, SOCK_STREAM
, 0)) < 0)
189 sa
.sa
.sa_family
= AF_UNIX
;
190 strncpy(sa
.un
.sun_path
, LOGGER_SOCKET
, sizeof(sa
.un
.sun_path
));
192 if (connect(fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + sizeof(LOGGER_SOCKET
) - 1) < 0) {
193 close_nointr_nofail(fd
);
197 if (shutdown(fd
, SHUT_RD
) < 0) {
198 close_nointr_nofail(fd
);
202 /* We speak a very simple protocol between log server
203 * and client: one line for the log destination (kmsg
204 * or syslog), followed by the priority field,
205 * followed by the process name. Since we replaced
206 * stdin/stderr we simple use stdio to write to
207 * it. Note that we use stderr, to minimize buffer
208 * flushing issues. */
215 output
== EXEC_OUTPUT_KMSG
? "kmsg" :
216 output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
? "kmsg+console" :
217 output
== EXEC_OUTPUT_SYSLOG
? "syslog" :
219 context
->syslog_priority
,
220 context
->syslog_identifier
? context
->syslog_identifier
: ident
,
221 context
->syslog_level_prefix
);
224 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
225 close_nointr_nofail(fd
);
231 static int open_terminal_as(const char *path
, mode_t mode
, int nfd
) {
237 if ((fd
= open_terminal(path
, mode
| O_NOCTTY
)) < 0)
241 r
= dup2(fd
, nfd
) < 0 ? -errno
: nfd
;
242 close_nointr_nofail(fd
);
249 static bool is_terminal_input(ExecInput i
) {
251 i
== EXEC_INPUT_TTY
||
252 i
== EXEC_INPUT_TTY_FORCE
||
253 i
== EXEC_INPUT_TTY_FAIL
;
256 static int fixup_input(ExecInput std_input
, int socket_fd
, bool apply_tty_stdin
) {
258 if (is_terminal_input(std_input
) && !apply_tty_stdin
)
259 return EXEC_INPUT_NULL
;
261 if (std_input
== EXEC_INPUT_SOCKET
&& socket_fd
< 0)
262 return EXEC_INPUT_NULL
;
267 static int fixup_output(ExecOutput std_output
, int socket_fd
) {
269 if (std_output
== EXEC_OUTPUT_SOCKET
&& socket_fd
< 0)
270 return EXEC_OUTPUT_INHERIT
;
275 static int setup_input(const ExecContext
*context
, int socket_fd
, bool apply_tty_stdin
) {
280 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
284 case EXEC_INPUT_NULL
:
285 return open_null_as(O_RDONLY
, STDIN_FILENO
);
288 case EXEC_INPUT_TTY_FORCE
:
289 case EXEC_INPUT_TTY_FAIL
: {
292 if ((fd
= acquire_terminal(
294 i
== EXEC_INPUT_TTY_FAIL
,
295 i
== EXEC_INPUT_TTY_FORCE
,
299 if (fd
!= STDIN_FILENO
) {
300 r
= dup2(fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
301 close_nointr_nofail(fd
);
308 case EXEC_INPUT_SOCKET
:
309 return dup2(socket_fd
, STDIN_FILENO
) < 0 ? -errno
: STDIN_FILENO
;
312 assert_not_reached("Unknown input type");
316 static int setup_output(const ExecContext
*context
, int socket_fd
, const char *ident
, bool apply_tty_stdin
) {
323 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
324 o
= fixup_output(context
->std_output
, socket_fd
);
326 /* This expects the input is already set up */
330 case EXEC_OUTPUT_INHERIT
:
332 /* If input got downgraded, inherit the original value */
333 if (i
== EXEC_INPUT_NULL
&& is_terminal_input(context
->std_input
))
334 return open_terminal_as(tty_path(context
), O_WRONLY
, STDOUT_FILENO
);
336 /* If the input is connected to anything that's not a /dev/null, inherit that... */
337 if (i
!= EXEC_INPUT_NULL
)
338 return dup2(STDIN_FILENO
, STDOUT_FILENO
) < 0 ? -errno
: STDOUT_FILENO
;
340 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
342 return STDOUT_FILENO
;
344 /* We need to open /dev/null here anew, to get the
345 * right access mode. So we fall through */
347 case EXEC_OUTPUT_NULL
:
348 return open_null_as(O_WRONLY
, STDOUT_FILENO
);
350 case EXEC_OUTPUT_TTY
:
351 if (is_terminal_input(i
))
352 return dup2(STDIN_FILENO
, STDOUT_FILENO
) < 0 ? -errno
: STDOUT_FILENO
;
354 /* We don't reset the terminal if this is just about output */
355 return open_terminal_as(tty_path(context
), O_WRONLY
, STDOUT_FILENO
);
357 case EXEC_OUTPUT_SYSLOG
:
358 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
359 case EXEC_OUTPUT_KMSG
:
360 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
361 return connect_logger_as(context
, o
, ident
, STDOUT_FILENO
);
363 case EXEC_OUTPUT_SOCKET
:
364 assert(socket_fd
>= 0);
365 return dup2(socket_fd
, STDOUT_FILENO
) < 0 ? -errno
: STDOUT_FILENO
;
368 assert_not_reached("Unknown output type");
372 static int setup_error(const ExecContext
*context
, int socket_fd
, const char *ident
, bool apply_tty_stdin
) {
379 i
= fixup_input(context
->std_input
, socket_fd
, apply_tty_stdin
);
380 o
= fixup_output(context
->std_output
, socket_fd
);
381 e
= fixup_output(context
->std_error
, socket_fd
);
383 /* This expects the input and output are already set up */
385 /* Don't change the stderr file descriptor if we inherit all
386 * the way and are not on a tty */
387 if (e
== EXEC_OUTPUT_INHERIT
&&
388 o
== EXEC_OUTPUT_INHERIT
&&
389 i
== EXEC_INPUT_NULL
&&
390 !is_terminal_input(context
->std_input
) &&
392 return STDERR_FILENO
;
394 /* Duplicate from stdout if possible */
395 if (e
== o
|| e
== EXEC_OUTPUT_INHERIT
)
396 return dup2(STDOUT_FILENO
, STDERR_FILENO
) < 0 ? -errno
: STDERR_FILENO
;
400 case EXEC_OUTPUT_NULL
:
401 return open_null_as(O_WRONLY
, STDERR_FILENO
);
403 case EXEC_OUTPUT_TTY
:
404 if (is_terminal_input(i
))
405 return dup2(STDIN_FILENO
, STDERR_FILENO
) < 0 ? -errno
: STDERR_FILENO
;
407 /* We don't reset the terminal if this is just about output */
408 return open_terminal_as(tty_path(context
), O_WRONLY
, STDERR_FILENO
);
410 case EXEC_OUTPUT_SYSLOG
:
411 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE
:
412 case EXEC_OUTPUT_KMSG
:
413 case EXEC_OUTPUT_KMSG_AND_CONSOLE
:
414 return connect_logger_as(context
, e
, ident
, STDERR_FILENO
);
416 case EXEC_OUTPUT_SOCKET
:
417 assert(socket_fd
>= 0);
418 return dup2(socket_fd
, STDERR_FILENO
) < 0 ? -errno
: STDERR_FILENO
;
421 assert_not_reached("Unknown error type");
425 static int chown_terminal(int fd
, uid_t uid
) {
430 /* This might fail. What matters are the results. */
431 (void) fchown(fd
, uid
, -1);
432 (void) fchmod(fd
, TTY_MODE
);
434 if (fstat(fd
, &st
) < 0)
437 if (st
.st_uid
!= uid
|| (st
.st_mode
& 0777) != TTY_MODE
)
443 static int setup_confirm_stdio(const ExecContext
*context
,
445 int *_saved_stdout
) {
446 int fd
= -1, saved_stdin
, saved_stdout
= -1, r
;
449 assert(_saved_stdin
);
450 assert(_saved_stdout
);
452 /* This returns positive EXIT_xxx return values instead of
453 * negative errno style values! */
455 if ((saved_stdin
= fcntl(STDIN_FILENO
, F_DUPFD
, 3)) < 0)
458 if ((saved_stdout
= fcntl(STDOUT_FILENO
, F_DUPFD
, 3)) < 0) {
463 if ((fd
= acquire_terminal(
465 context
->std_input
== EXEC_INPUT_TTY_FAIL
,
466 context
->std_input
== EXEC_INPUT_TTY_FORCE
,
472 if (chown_terminal(fd
, getuid()) < 0) {
477 if (dup2(fd
, STDIN_FILENO
) < 0) {
482 if (dup2(fd
, STDOUT_FILENO
) < 0) {
488 close_nointr_nofail(fd
);
490 *_saved_stdin
= saved_stdin
;
491 *_saved_stdout
= saved_stdout
;
496 if (saved_stdout
>= 0)
497 close_nointr_nofail(saved_stdout
);
499 if (saved_stdin
>= 0)
500 close_nointr_nofail(saved_stdin
);
503 close_nointr_nofail(fd
);
508 static int restore_confirm_stdio(const ExecContext
*context
,
516 assert(*saved_stdin
>= 0);
517 assert(saved_stdout
);
518 assert(*saved_stdout
>= 0);
520 /* This returns positive EXIT_xxx return values instead of
521 * negative errno style values! */
523 if (is_terminal_input(context
->std_input
)) {
525 /* The service wants terminal input. */
529 context
->std_output
== EXEC_OUTPUT_INHERIT
||
530 context
->std_output
== EXEC_OUTPUT_TTY
;
533 /* If the service doesn't want a controlling terminal,
534 * then we need to get rid entirely of what we have
537 if (release_terminal() < 0)
540 if (dup2(*saved_stdin
, STDIN_FILENO
) < 0)
543 if (dup2(*saved_stdout
, STDOUT_FILENO
) < 0)
546 *keep_stdout
= *keep_stdin
= false;
552 static int get_group_creds(const char *groupname
, gid_t
*gid
) {
559 /* We enforce some special rules for gid=0: in order to avoid
560 * NSS lookups for root we hardcode its data. */
562 if (streq(groupname
, "root") || streq(groupname
, "0")) {
567 if (safe_atolu(groupname
, &lu
) >= 0) {
569 g
= getgrgid((gid_t
) lu
);
572 g
= getgrnam(groupname
);
576 return errno
!= 0 ? -errno
: -ESRCH
;
582 static int get_user_creds(const char **username
, uid_t
*uid
, gid_t
*gid
, const char **home
) {
592 /* We enforce some special rules for uid=0: in order to avoid
593 * NSS lookups for root we hardcode its data. */
595 if (streq(*username
, "root") || streq(*username
, "0")) {
603 if (safe_atolu(*username
, &lu
) >= 0) {
605 p
= getpwuid((uid_t
) lu
);
607 /* If there are multiple users with the same id, make
608 * sure to leave $USER to the configured value instead
609 * of the first occurrence in the database. However if
610 * the uid was configured by a numeric uid, then let's
611 * pick the real username from /etc/passwd. */
613 *username
= p
->pw_name
;
616 p
= getpwnam(*username
);
620 return errno
!= 0 ? -errno
: -ESRCH
;
628 static int enforce_groups(const ExecContext
*context
, const char *username
, gid_t gid
) {
629 bool keep_groups
= false;
634 /* Lookup and set GID and supplementary group list. Here too
635 * we avoid NSS lookups for gid=0. */
637 if (context
->group
|| username
) {
640 if ((r
= get_group_creds(context
->group
, &gid
)) < 0)
643 /* First step, initialize groups from /etc/groups */
644 if (username
&& gid
!= 0) {
645 if (initgroups(username
, gid
) < 0)
651 /* Second step, set our gids */
652 if (setresgid(gid
, gid
, gid
) < 0)
656 if (context
->supplementary_groups
) {
661 /* Final step, initialize any manually set supplementary groups */
662 assert_se((ngroups_max
= (int) sysconf(_SC_NGROUPS_MAX
)) > 0);
664 if (!(gids
= new(gid_t
, ngroups_max
)))
668 if ((k
= getgroups(ngroups_max
, gids
)) < 0) {
675 STRV_FOREACH(i
, context
->supplementary_groups
) {
677 if (k
>= ngroups_max
) {
682 if ((r
= get_group_creds(*i
, gids
+k
)) < 0) {
690 if (setgroups(k
, gids
) < 0) {
701 static int enforce_user(const ExecContext
*context
, uid_t uid
) {
705 /* Sets (but doesn't lookup) the uid and make sure we keep the
706 * capabilities while doing so. */
708 if (context
->capabilities
) {
710 static const cap_value_t bits
[] = {
711 CAP_SETUID
, /* Necessary so that we can run setresuid() below */
712 CAP_SETPCAP
/* Necessary so that we can set PR_SET_SECUREBITS later on */
715 /* First step: If we need to keep capabilities but
716 * drop privileges we need to make sure we keep our
717 * caps, whiel we drop privileges. */
719 int sb
= context
->secure_bits
|SECURE_KEEP_CAPS
;
721 if (prctl(PR_GET_SECUREBITS
) != sb
)
722 if (prctl(PR_SET_SECUREBITS
, sb
) < 0)
726 /* Second step: set the capabilities. This will reduce
727 * the capabilities to the minimum we need. */
729 if (!(d
= cap_dup(context
->capabilities
)))
732 if (cap_set_flag(d
, CAP_EFFECTIVE
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0 ||
733 cap_set_flag(d
, CAP_PERMITTED
, ELEMENTSOF(bits
), bits
, CAP_SET
) < 0) {
739 if (cap_set_proc(d
) < 0) {
748 /* Third step: actually set the uids */
749 if (setresuid(uid
, uid
, uid
) < 0)
752 /* At this point we should have all necessary capabilities but
753 are otherwise a normal user. However, the caps might got
754 corrupted due to the setresuid() so we need clean them up
755 later. This is done outside of this call. */
762 static int null_conv(
764 const struct pam_message
**msg
,
765 struct pam_response
**resp
,
768 /* We don't support conversations */
773 static int setup_pam(
778 int fds
[], unsigned n_fds
) {
780 static const struct pam_conv conv
= {
785 pam_handle_t
*handle
= NULL
;
787 int pam_code
= PAM_SUCCESS
;
789 bool close_session
= false;
790 pid_t pam_pid
= 0, parent_pid
;
796 /* We set up PAM in the parent process, then fork. The child
797 * will then stay around until killed via PR_GET_PDEATHSIG or
798 * systemd via the cgroup logic. It will then remove the PAM
799 * session again. The parent process will exec() the actual
800 * daemon. We do things this way to ensure that the main PID
801 * of the daemon is the one we initially fork()ed. */
803 if ((pam_code
= pam_start(name
, user
, &conv
, &handle
)) != PAM_SUCCESS
) {
809 if ((pam_code
= pam_set_item(handle
, PAM_TTY
, tty
)) != PAM_SUCCESS
)
812 if ((pam_code
= pam_acct_mgmt(handle
, PAM_SILENT
)) != PAM_SUCCESS
)
815 if ((pam_code
= pam_open_session(handle
, PAM_SILENT
)) != PAM_SUCCESS
)
818 close_session
= true;
820 if ((pam_code
= pam_setcred(handle
, PAM_ESTABLISH_CRED
| PAM_SILENT
)) != PAM_SUCCESS
)
823 if ((!(e
= pam_getenvlist(handle
)))) {
824 pam_code
= PAM_BUF_ERR
;
828 /* Block SIGTERM, so that we know that it won't get lost in
830 if (sigemptyset(&ss
) < 0 ||
831 sigaddset(&ss
, SIGTERM
) < 0 ||
832 sigprocmask(SIG_BLOCK
, &ss
, &old_ss
) < 0)
835 parent_pid
= getpid();
837 if ((pam_pid
= fork()) < 0)
844 /* The child's job is to reset the PAM session on
847 /* This string must fit in 10 chars (i.e. the length
848 * of "/sbin/init") */
849 rename_process("sd:pam");
851 /* Make sure we don't keep open the passed fds in this
852 child. We assume that otherwise only those fds are
853 open here that have been opened by PAM. */
854 close_many(fds
, n_fds
);
856 /* Wait until our parent died. This will most likely
857 * not work since the kernel does not allow
858 * unprivileged parents kill their privileged children
859 * this way. We rely on the control groups kill logic
860 * to do the rest for us. */
861 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
864 /* Check if our parent process might already have
866 if (getppid() == parent_pid
) {
867 if (sigwait(&ss
, &sig
) < 0)
870 assert(sig
== SIGTERM
);
873 /* Only if our parent died we'll end the session */
874 if (getppid() != parent_pid
)
875 if ((pam_code
= pam_close_session(handle
, PAM_DATA_SILENT
)) != PAM_SUCCESS
)
881 pam_end(handle
, pam_code
| PAM_DATA_SILENT
);
885 /* If the child was forked off successfully it will do all the
886 * cleanups, so forget about the handle here. */
889 /* Unblock SIGSUR1 again in the parent */
890 if (sigprocmask(SIG_SETMASK
, &old_ss
, NULL
) < 0)
893 /* We close the log explicitly here, since the PAM modules
894 * might have opened it, but we don't want this fd around. */
902 pam_code
= pam_close_session(handle
, PAM_DATA_SILENT
);
904 pam_end(handle
, pam_code
| PAM_DATA_SILENT
);
912 kill(pam_pid
, SIGTERM
);
913 kill(pam_pid
, SIGCONT
);
920 static int do_capability_bounding_set_drop(uint64_t drop
) {
922 cap_t old_cap
= NULL
, new_cap
= NULL
;
926 /* If we are run as PID 1 we will lack CAP_SETPCAP by default
927 * in the effective set (yes, the kernel drops that when
928 * executing init!), so get it back temporarily so that we can
929 * call PR_CAPBSET_DROP. */
931 old_cap
= cap_get_proc();
935 if (cap_get_flag(old_cap
, CAP_SETPCAP
, CAP_EFFECTIVE
, &fv
) < 0) {
941 static const cap_value_t v
= CAP_SETPCAP
;
943 new_cap
= cap_dup(old_cap
);
949 if (cap_set_flag(new_cap
, CAP_EFFECTIVE
, 1, &v
, CAP_SET
) < 0) {
954 if (cap_set_proc(new_cap
) < 0) {
960 for (i
= 0; i
<= MAX(63LU, (unsigned long) CAP_LAST_CAP
); i
++)
961 if (drop
& ((uint64_t) 1ULL << (uint64_t) i
)) {
962 if (prctl(PR_CAPBSET_DROP
, i
) < 0) {
978 cap_set_proc(old_cap
);
985 int exec_spawn(ExecCommand
*command
,
987 const ExecContext
*context
,
988 int fds
[], unsigned n_fds
,
990 bool apply_permissions
,
992 bool apply_tty_stdin
,
994 CGroupBonding
*cgroup_bondings
,
1001 char **files_env
= NULL
;
1006 assert(fds
|| n_fds
<= 0);
1008 if (context
->std_input
== EXEC_INPUT_SOCKET
||
1009 context
->std_output
== EXEC_OUTPUT_SOCKET
||
1010 context
->std_error
== EXEC_OUTPUT_SOCKET
) {
1022 if ((r
= exec_context_load_environment(context
, &files_env
)) < 0) {
1023 log_error("Failed to load environment files: %s", strerror(-r
));
1028 argv
= command
->argv
;
1030 if (!(line
= exec_command_line(argv
))) {
1035 log_debug("About to execute: %s", line
);
1038 if (cgroup_bondings
)
1039 if ((r
= cgroup_bonding_realize_list(cgroup_bondings
)))
1042 if ((pid
= fork()) < 0) {
1050 const char *username
= NULL
, *home
= NULL
;
1051 uid_t uid
= (uid_t
) -1;
1052 gid_t gid
= (gid_t
) -1;
1053 char **our_env
= NULL
, **pam_env
= NULL
, **final_env
= NULL
, **final_argv
= NULL
;
1055 int saved_stdout
= -1, saved_stdin
= -1;
1056 bool keep_stdout
= false, keep_stdin
= false;
1060 /* This string must fit in 10 chars (i.e. the length
1061 * of "/sbin/init") */
1062 rename_process("sd.exec");
1064 /* We reset exactly these signals, since they are the
1065 * only ones we set to SIG_IGN in the main daemon. All
1066 * others we leave untouched because we set them to
1067 * SIG_DFL or a valid handler initially, both of which
1068 * will be demoted to SIG_DFL. */
1069 default_signals(SIGNALS_CRASH_HANDLER
,
1070 SIGNALS_IGNORE
, -1);
1072 if (sigemptyset(&ss
) < 0 ||
1073 sigprocmask(SIG_SETMASK
, &ss
, NULL
) < 0) {
1074 r
= EXIT_SIGNAL_MASK
;
1078 /* Close sockets very early to make sure we don't
1079 * block init reexecution because it cannot bind its
1081 if (close_all_fds(socket_fd
>= 0 ? &socket_fd
: fds
,
1082 socket_fd
>= 0 ? 1 : n_fds
) < 0) {
1087 if (!context
->same_pgrp
)
1093 if (context
->tcpwrap_name
) {
1095 if (!socket_tcpwrap(socket_fd
, context
->tcpwrap_name
)) {
1100 for (i
= 0; i
< (int) n_fds
; i
++) {
1101 if (!socket_tcpwrap(fds
[i
], context
->tcpwrap_name
)) {
1108 exec_context_tty_reset(context
);
1110 /* We skip the confirmation step if we shall not apply the TTY */
1111 if (confirm_spawn
&&
1112 (!is_terminal_input(context
->std_input
) || apply_tty_stdin
)) {
1115 /* Set up terminal for the question */
1116 if ((r
= setup_confirm_stdio(context
,
1117 &saved_stdin
, &saved_stdout
)))
1120 /* Now ask the question. */
1121 if (!(line
= exec_command_line(argv
))) {
1126 r
= ask(&response
, "yns", "Execute %s? [Yes, No, Skip] ", line
);
1129 if (r
< 0 || response
== 'n') {
1132 } else if (response
== 's') {
1137 /* Release terminal for the question */
1138 if ((r
= restore_confirm_stdio(context
,
1139 &saved_stdin
, &saved_stdout
,
1140 &keep_stdin
, &keep_stdout
)))
1144 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1145 * must sure to drop O_NONBLOCK */
1147 fd_nonblock(socket_fd
, false);
1150 if (setup_input(context
, socket_fd
, apply_tty_stdin
) < 0) {
1156 if (setup_output(context
, socket_fd
, file_name_from_path(command
->path
), apply_tty_stdin
) < 0) {
1161 if (setup_error(context
, socket_fd
, file_name_from_path(command
->path
), apply_tty_stdin
) < 0) {
1166 if (cgroup_bondings
)
1167 if (cgroup_bonding_install_list(cgroup_bondings
, 0) < 0) {
1172 if (context
->oom_score_adjust_set
) {
1175 snprintf(t
, sizeof(t
), "%i", context
->oom_score_adjust
);
1178 if (write_one_line_file("/proc/self/oom_score_adj", t
) < 0) {
1179 /* Compatibility with Linux <= 2.6.35 */
1183 adj
= (context
->oom_score_adjust
* -OOM_DISABLE
) / OOM_SCORE_ADJ_MAX
;
1184 adj
= CLAMP(adj
, OOM_DISABLE
, OOM_ADJUST_MAX
);
1186 snprintf(t
, sizeof(t
), "%i", adj
);
1189 if (write_one_line_file("/proc/self/oom_adj", t
) < 0
1190 && errno
!= EACCES
) {
1191 r
= EXIT_OOM_ADJUST
;
1197 if (context
->nice_set
)
1198 if (setpriority(PRIO_PROCESS
, 0, context
->nice
) < 0) {
1203 if (context
->cpu_sched_set
) {
1204 struct sched_param param
;
1207 param
.sched_priority
= context
->cpu_sched_priority
;
1209 if (sched_setscheduler(0, context
->cpu_sched_policy
|
1210 (context
->cpu_sched_reset_on_fork
? SCHED_RESET_ON_FORK
: 0), ¶m
) < 0) {
1211 r
= EXIT_SETSCHEDULER
;
1216 if (context
->cpuset
)
1217 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context
->cpuset_ncpus
), context
->cpuset
) < 0) {
1218 r
= EXIT_CPUAFFINITY
;
1222 if (context
->ioprio_set
)
1223 if (ioprio_set(IOPRIO_WHO_PROCESS
, 0, context
->ioprio
) < 0) {
1228 if (context
->timer_slack_nsec_set
)
1229 if (prctl(PR_SET_TIMERSLACK
, context
->timer_slack_nsec
) < 0) {
1230 r
= EXIT_TIMERSLACK
;
1234 if (context
->utmp_id
)
1235 utmp_put_init_process(0, context
->utmp_id
, getpid(), getsid(0), context
->tty_path
);
1237 if (context
->user
) {
1238 username
= context
->user
;
1239 if (get_user_creds(&username
, &uid
, &gid
, &home
) < 0) {
1244 if (is_terminal_input(context
->std_input
))
1245 if (chown_terminal(STDIN_FILENO
, uid
) < 0) {
1250 if (cgroup_bondings
&& context
->control_group_modify
)
1251 if (cgroup_bonding_set_group_access_list(cgroup_bondings
, 0755, uid
, gid
) < 0 ||
1252 cgroup_bonding_set_task_access_list(cgroup_bondings
, 0644, uid
, gid
) < 0) {
1259 if (context
->pam_name
&& username
) {
1260 if (setup_pam(context
->pam_name
, username
, context
->tty_path
, &pam_env
, fds
, n_fds
) < 0) {
1267 if (apply_permissions
)
1268 if (enforce_groups(context
, username
, uid
) < 0) {
1273 umask(context
->umask
);
1275 if (strv_length(context
->read_write_dirs
) > 0 ||
1276 strv_length(context
->read_only_dirs
) > 0 ||
1277 strv_length(context
->inaccessible_dirs
) > 0 ||
1278 context
->mount_flags
!= MS_SHARED
||
1279 context
->private_tmp
)
1280 if ((r
= setup_namespace(
1281 context
->read_write_dirs
,
1282 context
->read_only_dirs
,
1283 context
->inaccessible_dirs
,
1284 context
->private_tmp
,
1285 context
->mount_flags
)) < 0)
1289 if (context
->root_directory
)
1290 if (chroot(context
->root_directory
) < 0) {
1295 if (chdir(context
->working_directory
? context
->working_directory
: "/") < 0) {
1303 if (asprintf(&d
, "%s/%s",
1304 context
->root_directory
? context
->root_directory
: "",
1305 context
->working_directory
? context
->working_directory
: "") < 0) {
1319 /* We repeat the fd closing here, to make sure that
1320 * nothing is leaked from the PAM modules */
1321 if (close_all_fds(fds
, n_fds
) < 0 ||
1322 shift_fds(fds
, n_fds
) < 0 ||
1323 flags_fds(fds
, n_fds
, context
->non_blocking
) < 0) {
1328 if (apply_permissions
) {
1330 for (i
= 0; i
< RLIMIT_NLIMITS
; i
++) {
1331 if (!context
->rlimit
[i
])
1334 if (setrlimit(i
, context
->rlimit
[i
]) < 0) {
1340 if (context
->capability_bounding_set_drop
)
1341 if (do_capability_bounding_set_drop(context
->capability_bounding_set_drop
) < 0) {
1342 r
= EXIT_CAPABILITIES
;
1347 if (enforce_user(context
, uid
) < 0) {
1352 /* PR_GET_SECUREBITS is not privileged, while
1353 * PR_SET_SECUREBITS is. So to suppress
1354 * potential EPERMs we'll try not to call
1355 * PR_SET_SECUREBITS unless necessary. */
1356 if (prctl(PR_GET_SECUREBITS
) != context
->secure_bits
)
1357 if (prctl(PR_SET_SECUREBITS
, context
->secure_bits
) < 0) {
1358 r
= EXIT_SECUREBITS
;
1362 if (context
->capabilities
)
1363 if (cap_set_proc(context
->capabilities
) < 0) {
1364 r
= EXIT_CAPABILITIES
;
1369 if (!(our_env
= new0(char*, 7))) {
1375 if (asprintf(our_env
+ n_env
++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1376 asprintf(our_env
+ n_env
++, "LISTEN_FDS=%u", n_fds
) < 0) {
1382 if (asprintf(our_env
+ n_env
++, "HOME=%s", home
) < 0) {
1388 if (asprintf(our_env
+ n_env
++, "LOGNAME=%s", username
) < 0 ||
1389 asprintf(our_env
+ n_env
++, "USER=%s", username
) < 0) {
1394 if (is_terminal_input(context
->std_input
) ||
1395 context
->std_output
== EXEC_OUTPUT_TTY
||
1396 context
->std_error
== EXEC_OUTPUT_TTY
)
1397 if (!(our_env
[n_env
++] = strdup(default_term_for_tty(tty_path(context
))))) {
1404 if (!(final_env
= strv_env_merge(
1408 context
->environment
,
1416 if (!(final_argv
= replace_env_argv(argv
, final_env
))) {
1421 final_env
= strv_env_clean(final_env
);
1423 execve(command
->path
, final_argv
, final_env
);
1428 strv_free(final_env
);
1430 strv_free(files_env
);
1431 strv_free(final_argv
);
1433 if (saved_stdin
>= 0)
1434 close_nointr_nofail(saved_stdin
);
1436 if (saved_stdout
>= 0)
1437 close_nointr_nofail(saved_stdout
);
1442 strv_free(files_env
);
1444 /* We add the new process to the cgroup both in the child (so
1445 * that we can be sure that no user code is ever executed
1446 * outside of the cgroup) and in the parent (so that we can be
1447 * sure that when we kill the cgroup the process will be
1449 if (cgroup_bondings
)
1450 cgroup_bonding_install_list(cgroup_bondings
, pid
);
1452 log_debug("Forked %s as %lu", command
->path
, (unsigned long) pid
);
1454 exec_status_start(&command
->exec_status
, pid
);
1460 strv_free(files_env
);
1465 void exec_context_init(ExecContext
*c
) {
1469 c
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, 0);
1470 c
->cpu_sched_policy
= SCHED_OTHER
;
1471 c
->syslog_priority
= LOG_DAEMON
|LOG_INFO
;
1472 c
->syslog_level_prefix
= true;
1473 c
->mount_flags
= MS_SHARED
;
1474 c
->kill_signal
= SIGTERM
;
1475 c
->send_sigkill
= true;
1478 void exec_context_done(ExecContext
*c
) {
1483 strv_free(c
->environment
);
1484 c
->environment
= NULL
;
1486 strv_free(c
->environment_files
);
1487 c
->environment_files
= NULL
;
1489 for (l
= 0; l
< ELEMENTSOF(c
->rlimit
); l
++) {
1491 c
->rlimit
[l
] = NULL
;
1494 free(c
->working_directory
);
1495 c
->working_directory
= NULL
;
1496 free(c
->root_directory
);
1497 c
->root_directory
= NULL
;
1502 free(c
->tcpwrap_name
);
1503 c
->tcpwrap_name
= NULL
;
1505 free(c
->syslog_identifier
);
1506 c
->syslog_identifier
= NULL
;
1514 strv_free(c
->supplementary_groups
);
1515 c
->supplementary_groups
= NULL
;
1520 if (c
->capabilities
) {
1521 cap_free(c
->capabilities
);
1522 c
->capabilities
= NULL
;
1525 strv_free(c
->read_only_dirs
);
1526 c
->read_only_dirs
= NULL
;
1528 strv_free(c
->read_write_dirs
);
1529 c
->read_write_dirs
= NULL
;
1531 strv_free(c
->inaccessible_dirs
);
1532 c
->inaccessible_dirs
= NULL
;
1535 CPU_FREE(c
->cpuset
);
1541 void exec_command_done(ExecCommand
*c
) {
1551 void exec_command_done_array(ExecCommand
*c
, unsigned n
) {
1554 for (i
= 0; i
< n
; i
++)
1555 exec_command_done(c
+i
);
1558 void exec_command_free_list(ExecCommand
*c
) {
1562 LIST_REMOVE(ExecCommand
, command
, c
, i
);
1563 exec_command_done(i
);
1568 void exec_command_free_array(ExecCommand
**c
, unsigned n
) {
1571 for (i
= 0; i
< n
; i
++) {
1572 exec_command_free_list(c
[i
]);
1577 int exec_context_load_environment(const ExecContext
*c
, char ***l
) {
1578 char **i
, **r
= NULL
;
1583 STRV_FOREACH(i
, c
->environment_files
) {
1586 bool ignore
= false;
1596 if (!path_is_absolute(fn
)) {
1605 if ((k
= load_env_file(fn
, &p
)) < 0) {
1619 m
= strv_env_merge(2, r
, p
);
1635 static void strv_fprintf(FILE *f
, char **l
) {
1641 fprintf(f
, " %s", *g
);
1644 void exec_context_dump(ExecContext
*c
, FILE* f
, const char *prefix
) {
1656 "%sWorkingDirectory: %s\n"
1657 "%sRootDirectory: %s\n"
1658 "%sNonBlocking: %s\n"
1659 "%sPrivateTmp: %s\n"
1660 "%sControlGroupModify: %s\n",
1662 prefix
, c
->working_directory
? c
->working_directory
: "/",
1663 prefix
, c
->root_directory
? c
->root_directory
: "/",
1664 prefix
, yes_no(c
->non_blocking
),
1665 prefix
, yes_no(c
->private_tmp
),
1666 prefix
, yes_no(c
->control_group_modify
));
1668 STRV_FOREACH(e
, c
->environment
)
1669 fprintf(f
, "%sEnvironment: %s\n", prefix
, *e
);
1671 STRV_FOREACH(e
, c
->environment_files
)
1672 fprintf(f
, "%sEnvironmentFile: %s\n", prefix
, *e
);
1674 if (c
->tcpwrap_name
)
1676 "%sTCPWrapName: %s\n",
1677 prefix
, c
->tcpwrap_name
);
1684 if (c
->oom_score_adjust_set
)
1686 "%sOOMScoreAdjust: %i\n",
1687 prefix
, c
->oom_score_adjust
);
1689 for (i
= 0; i
< RLIM_NLIMITS
; i
++)
1691 fprintf(f
, "%s%s: %llu\n", prefix
, rlimit_to_string(i
), (unsigned long long) c
->rlimit
[i
]->rlim_max
);
1695 "%sIOSchedulingClass: %s\n"
1696 "%sIOPriority: %i\n",
1697 prefix
, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c
->ioprio
)),
1698 prefix
, (int) IOPRIO_PRIO_DATA(c
->ioprio
));
1700 if (c
->cpu_sched_set
)
1702 "%sCPUSchedulingPolicy: %s\n"
1703 "%sCPUSchedulingPriority: %i\n"
1704 "%sCPUSchedulingResetOnFork: %s\n",
1705 prefix
, sched_policy_to_string(c
->cpu_sched_policy
),
1706 prefix
, c
->cpu_sched_priority
,
1707 prefix
, yes_no(c
->cpu_sched_reset_on_fork
));
1710 fprintf(f
, "%sCPUAffinity:", prefix
);
1711 for (i
= 0; i
< c
->cpuset_ncpus
; i
++)
1712 if (CPU_ISSET_S(i
, CPU_ALLOC_SIZE(c
->cpuset_ncpus
), c
->cpuset
))
1713 fprintf(f
, " %i", i
);
1717 if (c
->timer_slack_nsec_set
)
1718 fprintf(f
, "%sTimerSlackNSec: %lu\n", prefix
, c
->timer_slack_nsec
);
1721 "%sStandardInput: %s\n"
1722 "%sStandardOutput: %s\n"
1723 "%sStandardError: %s\n",
1724 prefix
, exec_input_to_string(c
->std_input
),
1725 prefix
, exec_output_to_string(c
->std_output
),
1726 prefix
, exec_output_to_string(c
->std_error
));
1732 "%sTTYVHangup: %s\n"
1733 "%sTTYVTDisallocate: %s\n",
1734 prefix
, c
->tty_path
,
1735 prefix
, yes_no(c
->tty_reset
),
1736 prefix
, yes_no(c
->tty_vhangup
),
1737 prefix
, yes_no(c
->tty_vt_disallocate
));
1739 if (c
->std_output
== EXEC_OUTPUT_SYSLOG
|| c
->std_output
== EXEC_OUTPUT_KMSG
||
1740 c
->std_output
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
|| c
->std_output
== EXEC_OUTPUT_KMSG_AND_CONSOLE
||
1741 c
->std_error
== EXEC_OUTPUT_SYSLOG
|| c
->std_error
== EXEC_OUTPUT_KMSG
||
1742 c
->std_error
== EXEC_OUTPUT_SYSLOG_AND_CONSOLE
|| c
->std_error
== EXEC_OUTPUT_KMSG_AND_CONSOLE
)
1744 "%sSyslogFacility: %s\n"
1745 "%sSyslogLevel: %s\n",
1746 prefix
, log_facility_unshifted_to_string(c
->syslog_priority
>> 3),
1747 prefix
, log_level_to_string(LOG_PRI(c
->syslog_priority
)));
1749 if (c
->capabilities
) {
1751 if ((t
= cap_to_text(c
->capabilities
, NULL
))) {
1752 fprintf(f
, "%sCapabilities: %s\n",
1759 fprintf(f
, "%sSecure Bits:%s%s%s%s%s%s\n",
1761 (c
->secure_bits
& SECURE_KEEP_CAPS
) ? " keep-caps" : "",
1762 (c
->secure_bits
& SECURE_KEEP_CAPS_LOCKED
) ? " keep-caps-locked" : "",
1763 (c
->secure_bits
& SECURE_NO_SETUID_FIXUP
) ? " no-setuid-fixup" : "",
1764 (c
->secure_bits
& SECURE_NO_SETUID_FIXUP_LOCKED
) ? " no-setuid-fixup-locked" : "",
1765 (c
->secure_bits
& SECURE_NOROOT
) ? " noroot" : "",
1766 (c
->secure_bits
& SECURE_NOROOT_LOCKED
) ? "noroot-locked" : "");
1768 if (c
->capability_bounding_set_drop
) {
1770 fprintf(f
, "%sCapabilityBoundingSet:", prefix
);
1772 for (l
= 0; l
<= (unsigned long) CAP_LAST_CAP
; l
++)
1773 if (!(c
->capability_bounding_set_drop
& ((uint64_t) 1ULL << (uint64_t) l
))) {
1776 if ((t
= cap_to_name(l
))) {
1777 fprintf(f
, " %s", t
);
1786 fprintf(f
, "%sUser: %s\n", prefix
, c
->user
);
1788 fprintf(f
, "%sGroup: %s\n", prefix
, c
->group
);
1790 if (strv_length(c
->supplementary_groups
) > 0) {
1791 fprintf(f
, "%sSupplementaryGroups:", prefix
);
1792 strv_fprintf(f
, c
->supplementary_groups
);
1797 fprintf(f
, "%sPAMName: %s\n", prefix
, c
->pam_name
);
1799 if (strv_length(c
->read_write_dirs
) > 0) {
1800 fprintf(f
, "%sReadWriteDirs:", prefix
);
1801 strv_fprintf(f
, c
->read_write_dirs
);
1805 if (strv_length(c
->read_only_dirs
) > 0) {
1806 fprintf(f
, "%sReadOnlyDirs:", prefix
);
1807 strv_fprintf(f
, c
->read_only_dirs
);
1811 if (strv_length(c
->inaccessible_dirs
) > 0) {
1812 fprintf(f
, "%sInaccessibleDirs:", prefix
);
1813 strv_fprintf(f
, c
->inaccessible_dirs
);
1819 "%sKillSignal: SIG%s\n"
1820 "%sSendSIGKILL: %s\n",
1821 prefix
, kill_mode_to_string(c
->kill_mode
),
1822 prefix
, signal_to_string(c
->kill_signal
),
1823 prefix
, yes_no(c
->send_sigkill
));
1827 "%sUtmpIdentifier: %s\n",
1828 prefix
, c
->utmp_id
);
1831 void exec_status_start(ExecStatus
*s
, pid_t pid
) {
1836 dual_timestamp_get(&s
->start_timestamp
);
1839 void exec_status_exit(ExecStatus
*s
, ExecContext
*context
, pid_t pid
, int code
, int status
) {
1842 if ((s
->pid
&& s
->pid
!= pid
) ||
1843 !s
->start_timestamp
.realtime
<= 0)
1847 dual_timestamp_get(&s
->exit_timestamp
);
1853 if (context
->utmp_id
)
1854 utmp_put_dead_process(context
->utmp_id
, pid
, code
, status
);
1856 exec_context_tty_reset(context
);
1860 void exec_status_dump(ExecStatus
*s
, FILE *f
, const char *prefix
) {
1861 char buf
[FORMAT_TIMESTAMP_MAX
];
1874 prefix
, (unsigned long) s
->pid
);
1876 if (s
->start_timestamp
.realtime
> 0)
1878 "%sStart Timestamp: %s\n",
1879 prefix
, format_timestamp(buf
, sizeof(buf
), s
->start_timestamp
.realtime
));
1881 if (s
->exit_timestamp
.realtime
> 0)
1883 "%sExit Timestamp: %s\n"
1885 "%sExit Status: %i\n",
1886 prefix
, format_timestamp(buf
, sizeof(buf
), s
->exit_timestamp
.realtime
),
1887 prefix
, sigchld_code_to_string(s
->code
),
1891 char *exec_command_line(char **argv
) {
1899 STRV_FOREACH(a
, argv
)
1902 if (!(n
= new(char, k
)))
1906 STRV_FOREACH(a
, argv
) {
1913 if (strpbrk(*a
, WHITESPACE
)) {
1924 /* FIXME: this doesn't really handle arguments that have
1925 * spaces and ticks in them */
1930 void exec_command_dump(ExecCommand
*c
, FILE *f
, const char *prefix
) {
1932 const char *prefix2
;
1941 p2
= strappend(prefix
, "\t");
1942 prefix2
= p2
? p2
: prefix
;
1944 cmd
= exec_command_line(c
->argv
);
1947 "%sCommand Line: %s\n",
1948 prefix
, cmd
? cmd
: strerror(ENOMEM
));
1952 exec_status_dump(&c
->exec_status
, f
, prefix2
);
1957 void exec_command_dump_list(ExecCommand
*c
, FILE *f
, const char *prefix
) {
1963 LIST_FOREACH(command
, c
, c
)
1964 exec_command_dump(c
, f
, prefix
);
1967 void exec_command_append_list(ExecCommand
**l
, ExecCommand
*e
) {
1974 /* It's kind of important, that we keep the order here */
1975 LIST_FIND_TAIL(ExecCommand
, command
, *l
, end
);
1976 LIST_INSERT_AFTER(ExecCommand
, command
, *l
, end
, e
);
1981 int exec_command_set(ExecCommand
*c
, const char *path
, ...) {
1989 l
= strv_new_ap(path
, ap
);
1995 if (!(p
= strdup(path
))) {
2009 static const char* const exec_input_table
[_EXEC_INPUT_MAX
] = {
2010 [EXEC_INPUT_NULL
] = "null",
2011 [EXEC_INPUT_TTY
] = "tty",
2012 [EXEC_INPUT_TTY_FORCE
] = "tty-force",
2013 [EXEC_INPUT_TTY_FAIL
] = "tty-fail",
2014 [EXEC_INPUT_SOCKET
] = "socket"
2017 DEFINE_STRING_TABLE_LOOKUP(exec_input
, ExecInput
);
2019 static const char* const exec_output_table
[_EXEC_OUTPUT_MAX
] = {
2020 [EXEC_OUTPUT_INHERIT
] = "inherit",
2021 [EXEC_OUTPUT_NULL
] = "null",
2022 [EXEC_OUTPUT_TTY
] = "tty",
2023 [EXEC_OUTPUT_SYSLOG
] = "syslog",
2024 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE
] = "syslog+console",
2025 [EXEC_OUTPUT_KMSG
] = "kmsg",
2026 [EXEC_OUTPUT_KMSG_AND_CONSOLE
] = "kmsg+console",
2027 [EXEC_OUTPUT_SOCKET
] = "socket"
2030 DEFINE_STRING_TABLE_LOOKUP(exec_output
, ExecOutput
);
2032 static const char* const kill_mode_table
[_KILL_MODE_MAX
] = {
2033 [KILL_CONTROL_GROUP
] = "control-group",
2034 [KILL_PROCESS
] = "process",
2035 [KILL_NONE
] = "none"
2038 DEFINE_STRING_TABLE_LOOKUP(kill_mode
, KillMode
);
2040 static const char* const kill_who_table
[_KILL_WHO_MAX
] = {
2041 [KILL_MAIN
] = "main",
2042 [KILL_CONTROL
] = "control",
2046 DEFINE_STRING_TABLE_LOOKUP(kill_who
, KillWho
);