]> git.ipfire.org Git - thirdparty/systemd.git/blame_incremental - src/core/execute.c
core,network: major per-object logging rework
[thirdparty/systemd.git] / src / core / execute.c
... / ...
CommitLineData
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <errno.h>
23#include <fcntl.h>
24#include <unistd.h>
25#include <string.h>
26#include <signal.h>
27#include <sys/socket.h>
28#include <sys/un.h>
29#include <sys/prctl.h>
30#include <sys/stat.h>
31#include <grp.h>
32#include <poll.h>
33#include <glob.h>
34#include <sys/personality.h>
35
36#ifdef HAVE_PAM
37#include <security/pam_appl.h>
38#endif
39
40#ifdef HAVE_SELINUX
41#include <selinux/selinux.h>
42#endif
43
44#ifdef HAVE_SECCOMP
45#include <seccomp.h>
46#endif
47
48#ifdef HAVE_APPARMOR
49#include <sys/apparmor.h>
50#endif
51
52#include "rm-rf.h"
53#include "execute.h"
54#include "strv.h"
55#include "macro.h"
56#include "capability.h"
57#include "util.h"
58#include "log.h"
59#include "sd-messages.h"
60#include "ioprio.h"
61#include "securebits.h"
62#include "namespace.h"
63#include "exit-status.h"
64#include "missing.h"
65#include "utmp-wtmp.h"
66#include "def.h"
67#include "path-util.h"
68#include "env-util.h"
69#include "fileio.h"
70#include "unit.h"
71#include "async.h"
72#include "selinux-util.h"
73#include "errno-list.h"
74#include "af-list.h"
75#include "mkdir.h"
76#include "smack-util.h"
77#include "bus-endpoint.h"
78#include "cap-list.h"
79#include "formats-util.h"
80#include "process-util.h"
81#include "terminal-util.h"
82
83#ifdef HAVE_APPARMOR
84#include "apparmor-util.h"
85#endif
86
87#ifdef HAVE_SECCOMP
88#include "seccomp-util.h"
89#endif
90
91#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
93
94/* This assumes there is a 'tty' group */
95#define TTY_MODE 0620
96
97#define SNDBUF_SIZE (8*1024*1024)
98
99static int shift_fds(int fds[], unsigned n_fds) {
100 int start, restart_from;
101
102 if (n_fds <= 0)
103 return 0;
104
105 /* Modifies the fds array! (sorts it) */
106
107 assert(fds);
108
109 start = 0;
110 for (;;) {
111 int i;
112
113 restart_from = -1;
114
115 for (i = start; i < (int) n_fds; i++) {
116 int nfd;
117
118 /* Already at right index? */
119 if (fds[i] == i+3)
120 continue;
121
122 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
123 return -errno;
124
125 safe_close(fds[i]);
126 fds[i] = nfd;
127
128 /* Hmm, the fd we wanted isn't free? Then
129 * let's remember that and try again from here */
130 if (nfd != i+3 && restart_from < 0)
131 restart_from = i;
132 }
133
134 if (restart_from < 0)
135 break;
136
137 start = restart_from;
138 }
139
140 return 0;
141}
142
143static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
144 unsigned i;
145 int r;
146
147 if (n_fds <= 0)
148 return 0;
149
150 assert(fds);
151
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
153
154 for (i = 0; i < n_fds; i++) {
155
156 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
157 return r;
158
159 /* We unconditionally drop FD_CLOEXEC from the fds,
160 * since after all we want to pass these fds to our
161 * children */
162
163 if ((r = fd_cloexec(fds[i], false)) < 0)
164 return r;
165 }
166
167 return 0;
168}
169
170_pure_ static const char *tty_path(const ExecContext *context) {
171 assert(context);
172
173 if (context->tty_path)
174 return context->tty_path;
175
176 return "/dev/console";
177}
178
179static void exec_context_tty_reset(const ExecContext *context) {
180 assert(context);
181
182 if (context->tty_vhangup)
183 terminal_vhangup(tty_path(context));
184
185 if (context->tty_reset)
186 reset_terminal(tty_path(context));
187
188 if (context->tty_vt_disallocate && context->tty_path)
189 vt_disallocate(context->tty_path);
190}
191
192static bool is_terminal_output(ExecOutput o) {
193 return
194 o == EXEC_OUTPUT_TTY ||
195 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
196 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
197 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
198}
199
200static int open_null_as(int flags, int nfd) {
201 int fd, r;
202
203 assert(nfd >= 0);
204
205 fd = open("/dev/null", flags|O_NOCTTY);
206 if (fd < 0)
207 return -errno;
208
209 if (fd != nfd) {
210 r = dup2(fd, nfd) < 0 ? -errno : nfd;
211 safe_close(fd);
212 } else
213 r = nfd;
214
215 return r;
216}
217
218static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
219 union sockaddr_union sa = {
220 .un.sun_family = AF_UNIX,
221 .un.sun_path = "/run/systemd/journal/stdout",
222 };
223 uid_t olduid = UID_INVALID;
224 gid_t oldgid = GID_INVALID;
225 int r;
226
227 if (gid != GID_INVALID) {
228 oldgid = getgid();
229
230 r = setegid(gid);
231 if (r < 0)
232 return -errno;
233 }
234
235 if (uid != UID_INVALID) {
236 olduid = getuid();
237
238 r = seteuid(uid);
239 if (r < 0) {
240 r = -errno;
241 goto restore_gid;
242 }
243 }
244
245 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
246 if (r < 0)
247 r = -errno;
248
249 /* If we fail to restore the uid or gid, things will likely
250 fail later on. This should only happen if an LSM interferes. */
251
252 if (uid != UID_INVALID)
253 (void) seteuid(olduid);
254
255 restore_gid:
256 if (gid != GID_INVALID)
257 (void) setegid(oldgid);
258
259 return r;
260}
261
262static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
263 int fd, r;
264
265 assert(context);
266 assert(output < _EXEC_OUTPUT_MAX);
267 assert(ident);
268 assert(nfd >= 0);
269
270 fd = socket(AF_UNIX, SOCK_STREAM, 0);
271 if (fd < 0)
272 return -errno;
273
274 r = connect_journal_socket(fd, uid, gid);
275 if (r < 0)
276 return r;
277
278 if (shutdown(fd, SHUT_RD) < 0) {
279 safe_close(fd);
280 return -errno;
281 }
282
283 fd_inc_sndbuf(fd, SNDBUF_SIZE);
284
285 dprintf(fd,
286 "%s\n"
287 "%s\n"
288 "%i\n"
289 "%i\n"
290 "%i\n"
291 "%i\n"
292 "%i\n",
293 context->syslog_identifier ? context->syslog_identifier : ident,
294 unit_id,
295 context->syslog_priority,
296 !!context->syslog_level_prefix,
297 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
298 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
299 is_terminal_output(output));
300
301 if (fd != nfd) {
302 r = dup2(fd, nfd) < 0 ? -errno : nfd;
303 safe_close(fd);
304 } else
305 r = nfd;
306
307 return r;
308}
309static int open_terminal_as(const char *path, mode_t mode, int nfd) {
310 int fd, r;
311
312 assert(path);
313 assert(nfd >= 0);
314
315 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
316 return fd;
317
318 if (fd != nfd) {
319 r = dup2(fd, nfd) < 0 ? -errno : nfd;
320 safe_close(fd);
321 } else
322 r = nfd;
323
324 return r;
325}
326
327static bool is_terminal_input(ExecInput i) {
328 return
329 i == EXEC_INPUT_TTY ||
330 i == EXEC_INPUT_TTY_FORCE ||
331 i == EXEC_INPUT_TTY_FAIL;
332}
333
334static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
335
336 if (is_terminal_input(std_input) && !apply_tty_stdin)
337 return EXEC_INPUT_NULL;
338
339 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
340 return EXEC_INPUT_NULL;
341
342 return std_input;
343}
344
345static int fixup_output(ExecOutput std_output, int socket_fd) {
346
347 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
348 return EXEC_OUTPUT_INHERIT;
349
350 return std_output;
351}
352
353static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
354 ExecInput i;
355
356 assert(context);
357
358 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
359
360 switch (i) {
361
362 case EXEC_INPUT_NULL:
363 return open_null_as(O_RDONLY, STDIN_FILENO);
364
365 case EXEC_INPUT_TTY:
366 case EXEC_INPUT_TTY_FORCE:
367 case EXEC_INPUT_TTY_FAIL: {
368 int fd, r;
369
370 fd = acquire_terminal(tty_path(context),
371 i == EXEC_INPUT_TTY_FAIL,
372 i == EXEC_INPUT_TTY_FORCE,
373 false,
374 USEC_INFINITY);
375 if (fd < 0)
376 return fd;
377
378 if (fd != STDIN_FILENO) {
379 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
380 safe_close(fd);
381 } else
382 r = STDIN_FILENO;
383
384 return r;
385 }
386
387 case EXEC_INPUT_SOCKET:
388 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
389
390 default:
391 assert_not_reached("Unknown input type");
392 }
393}
394
395static int setup_output(Unit *unit, const ExecContext *context, int fileno, int socket_fd, const char *ident, bool apply_tty_stdin, uid_t uid, gid_t gid) {
396 ExecOutput o;
397 ExecInput i;
398 int r;
399
400 assert(unit);
401 assert(context);
402 assert(ident);
403
404 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
405 o = fixup_output(context->std_output, socket_fd);
406
407 if (fileno == STDERR_FILENO) {
408 ExecOutput e;
409 e = fixup_output(context->std_error, socket_fd);
410
411 /* This expects the input and output are already set up */
412
413 /* Don't change the stderr file descriptor if we inherit all
414 * the way and are not on a tty */
415 if (e == EXEC_OUTPUT_INHERIT &&
416 o == EXEC_OUTPUT_INHERIT &&
417 i == EXEC_INPUT_NULL &&
418 !is_terminal_input(context->std_input) &&
419 getppid () != 1)
420 return fileno;
421
422 /* Duplicate from stdout if possible */
423 if (e == o || e == EXEC_OUTPUT_INHERIT)
424 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
425
426 o = e;
427
428 } else if (o == EXEC_OUTPUT_INHERIT) {
429 /* If input got downgraded, inherit the original value */
430 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
431 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
432
433 /* If the input is connected to anything that's not a /dev/null, inherit that... */
434 if (i != EXEC_INPUT_NULL)
435 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
436
437 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
438 if (getppid() != 1)
439 return fileno;
440
441 /* We need to open /dev/null here anew, to get the right access mode. */
442 return open_null_as(O_WRONLY, fileno);
443 }
444
445 switch (o) {
446
447 case EXEC_OUTPUT_NULL:
448 return open_null_as(O_WRONLY, fileno);
449
450 case EXEC_OUTPUT_TTY:
451 if (is_terminal_input(i))
452 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
453
454 /* We don't reset the terminal if this is just about output */
455 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
456
457 case EXEC_OUTPUT_SYSLOG:
458 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
459 case EXEC_OUTPUT_KMSG:
460 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
461 case EXEC_OUTPUT_JOURNAL:
462 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
463 r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid);
464 if (r < 0) {
465 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
466 r = open_null_as(O_WRONLY, fileno);
467 }
468 return r;
469
470 case EXEC_OUTPUT_SOCKET:
471 assert(socket_fd >= 0);
472 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
473
474 default:
475 assert_not_reached("Unknown error type");
476 }
477}
478
479static int chown_terminal(int fd, uid_t uid) {
480 struct stat st;
481
482 assert(fd >= 0);
483
484 /* This might fail. What matters are the results. */
485 (void) fchown(fd, uid, -1);
486 (void) fchmod(fd, TTY_MODE);
487
488 if (fstat(fd, &st) < 0)
489 return -errno;
490
491 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
492 return -EPERM;
493
494 return 0;
495}
496
497static int setup_confirm_stdio(int *_saved_stdin,
498 int *_saved_stdout) {
499 int fd = -1, saved_stdin, saved_stdout = -1, r;
500
501 assert(_saved_stdin);
502 assert(_saved_stdout);
503
504 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
505 if (saved_stdin < 0)
506 return -errno;
507
508 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
509 if (saved_stdout < 0) {
510 r = errno;
511 goto fail;
512 }
513
514 fd = acquire_terminal(
515 "/dev/console",
516 false,
517 false,
518 false,
519 DEFAULT_CONFIRM_USEC);
520 if (fd < 0) {
521 r = fd;
522 goto fail;
523 }
524
525 r = chown_terminal(fd, getuid());
526 if (r < 0)
527 goto fail;
528
529 if (dup2(fd, STDIN_FILENO) < 0) {
530 r = -errno;
531 goto fail;
532 }
533
534 if (dup2(fd, STDOUT_FILENO) < 0) {
535 r = -errno;
536 goto fail;
537 }
538
539 if (fd >= 2)
540 safe_close(fd);
541
542 *_saved_stdin = saved_stdin;
543 *_saved_stdout = saved_stdout;
544
545 return 0;
546
547fail:
548 safe_close(saved_stdout);
549 safe_close(saved_stdin);
550 safe_close(fd);
551
552 return r;
553}
554
555_printf_(1, 2) static int write_confirm_message(const char *format, ...) {
556 _cleanup_close_ int fd = -1;
557 va_list ap;
558
559 assert(format);
560
561 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
562 if (fd < 0)
563 return fd;
564
565 va_start(ap, format);
566 vdprintf(fd, format, ap);
567 va_end(ap);
568
569 return 0;
570}
571
572static int restore_confirm_stdio(int *saved_stdin,
573 int *saved_stdout) {
574
575 int r = 0;
576
577 assert(saved_stdin);
578 assert(saved_stdout);
579
580 release_terminal();
581
582 if (*saved_stdin >= 0)
583 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
584 r = -errno;
585
586 if (*saved_stdout >= 0)
587 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
588 r = -errno;
589
590 safe_close(*saved_stdin);
591 safe_close(*saved_stdout);
592
593 return r;
594}
595
596static int ask_for_confirmation(char *response, char **argv) {
597 int saved_stdout = -1, saved_stdin = -1, r;
598 _cleanup_free_ char *line = NULL;
599
600 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
601 if (r < 0)
602 return r;
603
604 line = exec_command_line(argv);
605 if (!line)
606 return -ENOMEM;
607
608 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
609
610 restore_confirm_stdio(&saved_stdin, &saved_stdout);
611
612 return r;
613}
614
615static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
616 bool keep_groups = false;
617 int r;
618
619 assert(context);
620
621 /* Lookup and set GID and supplementary group list. Here too
622 * we avoid NSS lookups for gid=0. */
623
624 if (context->group || username) {
625
626 if (context->group) {
627 const char *g = context->group;
628
629 if ((r = get_group_creds(&g, &gid)) < 0)
630 return r;
631 }
632
633 /* First step, initialize groups from /etc/groups */
634 if (username && gid != 0) {
635 if (initgroups(username, gid) < 0)
636 return -errno;
637
638 keep_groups = true;
639 }
640
641 /* Second step, set our gids */
642 if (setresgid(gid, gid, gid) < 0)
643 return -errno;
644 }
645
646 if (context->supplementary_groups) {
647 int ngroups_max, k;
648 gid_t *gids;
649 char **i;
650
651 /* Final step, initialize any manually set supplementary groups */
652 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
653
654 if (!(gids = new(gid_t, ngroups_max)))
655 return -ENOMEM;
656
657 if (keep_groups) {
658 if ((k = getgroups(ngroups_max, gids)) < 0) {
659 free(gids);
660 return -errno;
661 }
662 } else
663 k = 0;
664
665 STRV_FOREACH(i, context->supplementary_groups) {
666 const char *g;
667
668 if (k >= ngroups_max) {
669 free(gids);
670 return -E2BIG;
671 }
672
673 g = *i;
674 r = get_group_creds(&g, gids+k);
675 if (r < 0) {
676 free(gids);
677 return r;
678 }
679
680 k++;
681 }
682
683 if (setgroups(k, gids) < 0) {
684 free(gids);
685 return -errno;
686 }
687
688 free(gids);
689 }
690
691 return 0;
692}
693
694static int enforce_user(const ExecContext *context, uid_t uid) {
695 assert(context);
696
697 /* Sets (but doesn't lookup) the uid and make sure we keep the
698 * capabilities while doing so. */
699
700 if (context->capabilities) {
701 _cleanup_cap_free_ cap_t d = NULL;
702 static const cap_value_t bits[] = {
703 CAP_SETUID, /* Necessary so that we can run setresuid() below */
704 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
705 };
706
707 /* First step: If we need to keep capabilities but
708 * drop privileges we need to make sure we keep our
709 * caps, while we drop privileges. */
710 if (uid != 0) {
711 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
712
713 if (prctl(PR_GET_SECUREBITS) != sb)
714 if (prctl(PR_SET_SECUREBITS, sb) < 0)
715 return -errno;
716 }
717
718 /* Second step: set the capabilities. This will reduce
719 * the capabilities to the minimum we need. */
720
721 d = cap_dup(context->capabilities);
722 if (!d)
723 return -errno;
724
725 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
726 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
727 return -errno;
728
729 if (cap_set_proc(d) < 0)
730 return -errno;
731 }
732
733 /* Third step: actually set the uids */
734 if (setresuid(uid, uid, uid) < 0)
735 return -errno;
736
737 /* At this point we should have all necessary capabilities but
738 are otherwise a normal user. However, the caps might got
739 corrupted due to the setresuid() so we need clean them up
740 later. This is done outside of this call. */
741
742 return 0;
743}
744
745#ifdef HAVE_PAM
746
747static int null_conv(
748 int num_msg,
749 const struct pam_message **msg,
750 struct pam_response **resp,
751 void *appdata_ptr) {
752
753 /* We don't support conversations */
754
755 return PAM_CONV_ERR;
756}
757
758static int setup_pam(
759 const char *name,
760 const char *user,
761 uid_t uid,
762 const char *tty,
763 char ***pam_env,
764 int fds[], unsigned n_fds) {
765
766 static const struct pam_conv conv = {
767 .conv = null_conv,
768 .appdata_ptr = NULL
769 };
770
771 pam_handle_t *handle = NULL;
772 sigset_t ss, old_ss;
773 int pam_code = PAM_SUCCESS;
774 int err;
775 char **e = NULL;
776 bool close_session = false;
777 pid_t pam_pid = 0, parent_pid;
778 int flags = 0;
779
780 assert(name);
781 assert(user);
782 assert(pam_env);
783
784 /* We set up PAM in the parent process, then fork. The child
785 * will then stay around until killed via PR_GET_PDEATHSIG or
786 * systemd via the cgroup logic. It will then remove the PAM
787 * session again. The parent process will exec() the actual
788 * daemon. We do things this way to ensure that the main PID
789 * of the daemon is the one we initially fork()ed. */
790
791 if (log_get_max_level() < LOG_DEBUG)
792 flags |= PAM_SILENT;
793
794 pam_code = pam_start(name, user, &conv, &handle);
795 if (pam_code != PAM_SUCCESS) {
796 handle = NULL;
797 goto fail;
798 }
799
800 if (tty) {
801 pam_code = pam_set_item(handle, PAM_TTY, tty);
802 if (pam_code != PAM_SUCCESS)
803 goto fail;
804 }
805
806 pam_code = pam_acct_mgmt(handle, flags);
807 if (pam_code != PAM_SUCCESS)
808 goto fail;
809
810 pam_code = pam_open_session(handle, flags);
811 if (pam_code != PAM_SUCCESS)
812 goto fail;
813
814 close_session = true;
815
816 e = pam_getenvlist(handle);
817 if (!e) {
818 pam_code = PAM_BUF_ERR;
819 goto fail;
820 }
821
822 /* Block SIGTERM, so that we know that it won't get lost in
823 * the child */
824 if (sigemptyset(&ss) < 0 ||
825 sigaddset(&ss, SIGTERM) < 0 ||
826 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
827 goto fail;
828
829 parent_pid = getpid();
830
831 pam_pid = fork();
832 if (pam_pid < 0)
833 goto fail;
834
835 if (pam_pid == 0) {
836 int sig;
837 int r = EXIT_PAM;
838
839 /* The child's job is to reset the PAM session on
840 * termination */
841
842 /* This string must fit in 10 chars (i.e. the length
843 * of "/sbin/init"), to look pretty in /bin/ps */
844 rename_process("(sd-pam)");
845
846 /* Make sure we don't keep open the passed fds in this
847 child. We assume that otherwise only those fds are
848 open here that have been opened by PAM. */
849 close_many(fds, n_fds);
850
851 /* Drop privileges - we don't need any to pam_close_session
852 * and this will make PR_SET_PDEATHSIG work in most cases.
853 * If this fails, ignore the error - but expect sd-pam threads
854 * to fail to exit normally */
855 if (setresuid(uid, uid, uid) < 0)
856 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
857
858 /* Wait until our parent died. This will only work if
859 * the above setresuid() succeeds, otherwise the kernel
860 * will not allow unprivileged parents kill their privileged
861 * children this way. We rely on the control groups kill logic
862 * to do the rest for us. */
863 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
864 goto child_finish;
865
866 /* Check if our parent process might already have
867 * died? */
868 if (getppid() == parent_pid) {
869 for (;;) {
870 if (sigwait(&ss, &sig) < 0) {
871 if (errno == EINTR)
872 continue;
873
874 goto child_finish;
875 }
876
877 assert(sig == SIGTERM);
878 break;
879 }
880 }
881
882 /* If our parent died we'll end the session */
883 if (getppid() != parent_pid) {
884 pam_code = pam_close_session(handle, flags);
885 if (pam_code != PAM_SUCCESS)
886 goto child_finish;
887 }
888
889 r = 0;
890
891 child_finish:
892 pam_end(handle, pam_code | flags);
893 _exit(r);
894 }
895
896 /* If the child was forked off successfully it will do all the
897 * cleanups, so forget about the handle here. */
898 handle = NULL;
899
900 /* Unblock SIGTERM again in the parent */
901 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
902 goto fail;
903
904 /* We close the log explicitly here, since the PAM modules
905 * might have opened it, but we don't want this fd around. */
906 closelog();
907
908 *pam_env = e;
909 e = NULL;
910
911 return 0;
912
913fail:
914 if (pam_code != PAM_SUCCESS) {
915 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
916 err = -EPERM; /* PAM errors do not map to errno */
917 } else {
918 log_error_errno(errno, "PAM failed: %m");
919 err = -errno;
920 }
921
922 if (handle) {
923 if (close_session)
924 pam_code = pam_close_session(handle, flags);
925
926 pam_end(handle, pam_code | flags);
927 }
928
929 strv_free(e);
930
931 closelog();
932
933 if (pam_pid > 1) {
934 kill(pam_pid, SIGTERM);
935 kill(pam_pid, SIGCONT);
936 }
937
938 return err;
939}
940#endif
941
942static void rename_process_from_path(const char *path) {
943 char process_name[11];
944 const char *p;
945 size_t l;
946
947 /* This resulting string must fit in 10 chars (i.e. the length
948 * of "/sbin/init") to look pretty in /bin/ps */
949
950 p = basename(path);
951 if (isempty(p)) {
952 rename_process("(...)");
953 return;
954 }
955
956 l = strlen(p);
957 if (l > 8) {
958 /* The end of the process name is usually more
959 * interesting, since the first bit might just be
960 * "systemd-" */
961 p = p + l - 8;
962 l = 8;
963 }
964
965 process_name[0] = '(';
966 memcpy(process_name+1, p, l);
967 process_name[1+l] = ')';
968 process_name[1+l+1] = 0;
969
970 rename_process(process_name);
971}
972
973#ifdef HAVE_SECCOMP
974
975static int apply_seccomp(const ExecContext *c) {
976 uint32_t negative_action, action;
977 scmp_filter_ctx *seccomp;
978 Iterator i;
979 void *id;
980 int r;
981
982 assert(c);
983
984 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
985
986 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
987 if (!seccomp)
988 return -ENOMEM;
989
990 if (c->syscall_archs) {
991
992 SET_FOREACH(id, c->syscall_archs, i) {
993 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
994 if (r == -EEXIST)
995 continue;
996 if (r < 0)
997 goto finish;
998 }
999
1000 } else {
1001 r = seccomp_add_secondary_archs(seccomp);
1002 if (r < 0)
1003 goto finish;
1004 }
1005
1006 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1007 SET_FOREACH(id, c->syscall_filter, i) {
1008 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1009 if (r < 0)
1010 goto finish;
1011 }
1012
1013 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1014 if (r < 0)
1015 goto finish;
1016
1017 r = seccomp_load(seccomp);
1018
1019finish:
1020 seccomp_release(seccomp);
1021 return r;
1022}
1023
1024static int apply_address_families(const ExecContext *c) {
1025 scmp_filter_ctx *seccomp;
1026 Iterator i;
1027 int r;
1028
1029 assert(c);
1030
1031 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1032 if (!seccomp)
1033 return -ENOMEM;
1034
1035 r = seccomp_add_secondary_archs(seccomp);
1036 if (r < 0)
1037 goto finish;
1038
1039 if (c->address_families_whitelist) {
1040 int af, first = 0, last = 0;
1041 void *afp;
1042
1043 /* If this is a whitelist, we first block the address
1044 * families that are out of range and then everything
1045 * that is not in the set. First, we find the lowest
1046 * and highest address family in the set. */
1047
1048 SET_FOREACH(afp, c->address_families, i) {
1049 af = PTR_TO_INT(afp);
1050
1051 if (af <= 0 || af >= af_max())
1052 continue;
1053
1054 if (first == 0 || af < first)
1055 first = af;
1056
1057 if (last == 0 || af > last)
1058 last = af;
1059 }
1060
1061 assert((first == 0) == (last == 0));
1062
1063 if (first == 0) {
1064
1065 /* No entries in the valid range, block everything */
1066 r = seccomp_rule_add(
1067 seccomp,
1068 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1069 SCMP_SYS(socket),
1070 0);
1071 if (r < 0)
1072 goto finish;
1073
1074 } else {
1075
1076 /* Block everything below the first entry */
1077 r = seccomp_rule_add(
1078 seccomp,
1079 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1080 SCMP_SYS(socket),
1081 1,
1082 SCMP_A0(SCMP_CMP_LT, first));
1083 if (r < 0)
1084 goto finish;
1085
1086 /* Block everything above the last entry */
1087 r = seccomp_rule_add(
1088 seccomp,
1089 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1090 SCMP_SYS(socket),
1091 1,
1092 SCMP_A0(SCMP_CMP_GT, last));
1093 if (r < 0)
1094 goto finish;
1095
1096 /* Block everything between the first and last
1097 * entry */
1098 for (af = 1; af < af_max(); af++) {
1099
1100 if (set_contains(c->address_families, INT_TO_PTR(af)))
1101 continue;
1102
1103 r = seccomp_rule_add(
1104 seccomp,
1105 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1106 SCMP_SYS(socket),
1107 1,
1108 SCMP_A0(SCMP_CMP_EQ, af));
1109 if (r < 0)
1110 goto finish;
1111 }
1112 }
1113
1114 } else {
1115 void *af;
1116
1117 /* If this is a blacklist, then generate one rule for
1118 * each address family that are then combined in OR
1119 * checks. */
1120
1121 SET_FOREACH(af, c->address_families, i) {
1122
1123 r = seccomp_rule_add(
1124 seccomp,
1125 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1126 SCMP_SYS(socket),
1127 1,
1128 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1129 if (r < 0)
1130 goto finish;
1131 }
1132 }
1133
1134 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1135 if (r < 0)
1136 goto finish;
1137
1138 r = seccomp_load(seccomp);
1139
1140finish:
1141 seccomp_release(seccomp);
1142 return r;
1143}
1144
1145#endif
1146
1147static void do_idle_pipe_dance(int idle_pipe[4]) {
1148 assert(idle_pipe);
1149
1150
1151 safe_close(idle_pipe[1]);
1152 safe_close(idle_pipe[2]);
1153
1154 if (idle_pipe[0] >= 0) {
1155 int r;
1156
1157 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1158
1159 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1160 /* Signal systemd that we are bored and want to continue. */
1161 r = write(idle_pipe[3], "x", 1);
1162 if (r > 0)
1163 /* Wait for systemd to react to the signal above. */
1164 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1165 }
1166
1167 safe_close(idle_pipe[0]);
1168
1169 }
1170
1171 safe_close(idle_pipe[3]);
1172}
1173
1174static int build_environment(
1175 const ExecContext *c,
1176 unsigned n_fds,
1177 usec_t watchdog_usec,
1178 const char *home,
1179 const char *username,
1180 const char *shell,
1181 char ***ret) {
1182
1183 _cleanup_strv_free_ char **our_env = NULL;
1184 unsigned n_env = 0;
1185 char *x;
1186
1187 assert(c);
1188 assert(ret);
1189
1190 our_env = new0(char*, 10);
1191 if (!our_env)
1192 return -ENOMEM;
1193
1194 if (n_fds > 0) {
1195 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1196 return -ENOMEM;
1197 our_env[n_env++] = x;
1198
1199 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1200 return -ENOMEM;
1201 our_env[n_env++] = x;
1202 }
1203
1204 if (watchdog_usec > 0) {
1205 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1206 return -ENOMEM;
1207 our_env[n_env++] = x;
1208
1209 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1210 return -ENOMEM;
1211 our_env[n_env++] = x;
1212 }
1213
1214 if (home) {
1215 x = strappend("HOME=", home);
1216 if (!x)
1217 return -ENOMEM;
1218 our_env[n_env++] = x;
1219 }
1220
1221 if (username) {
1222 x = strappend("LOGNAME=", username);
1223 if (!x)
1224 return -ENOMEM;
1225 our_env[n_env++] = x;
1226
1227 x = strappend("USER=", username);
1228 if (!x)
1229 return -ENOMEM;
1230 our_env[n_env++] = x;
1231 }
1232
1233 if (shell) {
1234 x = strappend("SHELL=", shell);
1235 if (!x)
1236 return -ENOMEM;
1237 our_env[n_env++] = x;
1238 }
1239
1240 if (is_terminal_input(c->std_input) ||
1241 c->std_output == EXEC_OUTPUT_TTY ||
1242 c->std_error == EXEC_OUTPUT_TTY ||
1243 c->tty_path) {
1244
1245 x = strdup(default_term_for_tty(tty_path(c)));
1246 if (!x)
1247 return -ENOMEM;
1248 our_env[n_env++] = x;
1249 }
1250
1251 our_env[n_env++] = NULL;
1252 assert(n_env <= 10);
1253
1254 *ret = our_env;
1255 our_env = NULL;
1256
1257 return 0;
1258}
1259
1260static int exec_child(
1261 Unit *unit,
1262 ExecCommand *command,
1263 const ExecContext *context,
1264 const ExecParameters *params,
1265 ExecRuntime *runtime,
1266 char **argv,
1267 int socket_fd,
1268 int *fds, unsigned n_fds,
1269 char **files_env,
1270 int *exit_status) {
1271
1272 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1273 _cleanup_free_ char *mac_selinux_context_net = NULL;
1274 const char *username = NULL, *home = NULL, *shell = NULL;
1275 unsigned n_dont_close = 0;
1276 int dont_close[n_fds + 4];
1277 uid_t uid = UID_INVALID;
1278 gid_t gid = GID_INVALID;
1279 int i, r;
1280
1281 assert(unit);
1282 assert(command);
1283 assert(context);
1284 assert(params);
1285 assert(exit_status);
1286
1287 rename_process_from_path(command->path);
1288
1289 /* We reset exactly these signals, since they are the
1290 * only ones we set to SIG_IGN in the main daemon. All
1291 * others we leave untouched because we set them to
1292 * SIG_DFL or a valid handler initially, both of which
1293 * will be demoted to SIG_DFL. */
1294 default_signals(SIGNALS_CRASH_HANDLER,
1295 SIGNALS_IGNORE, -1);
1296
1297 if (context->ignore_sigpipe)
1298 ignore_signals(SIGPIPE, -1);
1299
1300 r = reset_signal_mask();
1301 if (r < 0) {
1302 *exit_status = EXIT_SIGNAL_MASK;
1303 return r;
1304 }
1305
1306 if (params->idle_pipe)
1307 do_idle_pipe_dance(params->idle_pipe);
1308
1309 /* Close sockets very early to make sure we don't
1310 * block init reexecution because it cannot bind its
1311 * sockets */
1312
1313 log_forget_fds();
1314
1315 if (socket_fd >= 0)
1316 dont_close[n_dont_close++] = socket_fd;
1317 if (n_fds > 0) {
1318 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1319 n_dont_close += n_fds;
1320 }
1321 if (params->bus_endpoint_fd >= 0)
1322 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1323 if (runtime) {
1324 if (runtime->netns_storage_socket[0] >= 0)
1325 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1326 if (runtime->netns_storage_socket[1] >= 0)
1327 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1328 }
1329
1330 r = close_all_fds(dont_close, n_dont_close);
1331 if (r < 0) {
1332 *exit_status = EXIT_FDS;
1333 return r;
1334 }
1335
1336 if (!context->same_pgrp)
1337 if (setsid() < 0) {
1338 *exit_status = EXIT_SETSID;
1339 return -errno;
1340 }
1341
1342 exec_context_tty_reset(context);
1343
1344 if (params->confirm_spawn) {
1345 char response;
1346
1347 r = ask_for_confirmation(&response, argv);
1348 if (r == -ETIMEDOUT)
1349 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1350 else if (r < 0)
1351 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1352 else if (response == 's') {
1353 write_confirm_message("Skipping execution.\n");
1354 *exit_status = EXIT_CONFIRM;
1355 return -ECANCELED;
1356 } else if (response == 'n') {
1357 write_confirm_message("Failing execution.\n");
1358 *exit_status = 0;
1359 return 0;
1360 }
1361 }
1362
1363 if (context->user) {
1364 username = context->user;
1365 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1366 if (r < 0) {
1367 *exit_status = EXIT_USER;
1368 return r;
1369 }
1370 }
1371
1372 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1373 * must sure to drop O_NONBLOCK */
1374 if (socket_fd >= 0)
1375 fd_nonblock(socket_fd, false);
1376
1377 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1378 if (r < 0) {
1379 *exit_status = EXIT_STDIN;
1380 return r;
1381 }
1382
1383 r = setup_output(unit, context, STDOUT_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1384 if (r < 0) {
1385 *exit_status = EXIT_STDOUT;
1386 return r;
1387 }
1388
1389 r = setup_output(unit, context, STDERR_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1390 if (r < 0) {
1391 *exit_status = EXIT_STDERR;
1392 return r;
1393 }
1394
1395 if (params->cgroup_path) {
1396 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1397 if (r < 0) {
1398 *exit_status = EXIT_CGROUP;
1399 return r;
1400 }
1401 }
1402
1403 if (context->oom_score_adjust_set) {
1404 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1405
1406 /* When we can't make this change due to EPERM, then
1407 * let's silently skip over it. User namespaces
1408 * prohibit write access to this file, and we
1409 * shouldn't trip up over that. */
1410
1411 sprintf(t, "%i", context->oom_score_adjust);
1412 r = write_string_file("/proc/self/oom_score_adj", t);
1413 if (r == -EPERM || r == -EACCES) {
1414 log_open();
1415 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1416 log_close();
1417 } else if (r < 0) {
1418 *exit_status = EXIT_OOM_ADJUST;
1419 return -errno;
1420 }
1421 }
1422
1423 if (context->nice_set)
1424 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1425 *exit_status = EXIT_NICE;
1426 return -errno;
1427 }
1428
1429 if (context->cpu_sched_set) {
1430 struct sched_param param = {
1431 .sched_priority = context->cpu_sched_priority,
1432 };
1433
1434 r = sched_setscheduler(0,
1435 context->cpu_sched_policy |
1436 (context->cpu_sched_reset_on_fork ?
1437 SCHED_RESET_ON_FORK : 0),
1438 &param);
1439 if (r < 0) {
1440 *exit_status = EXIT_SETSCHEDULER;
1441 return -errno;
1442 }
1443 }
1444
1445 if (context->cpuset)
1446 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1447 *exit_status = EXIT_CPUAFFINITY;
1448 return -errno;
1449 }
1450
1451 if (context->ioprio_set)
1452 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1453 *exit_status = EXIT_IOPRIO;
1454 return -errno;
1455 }
1456
1457 if (context->timer_slack_nsec != NSEC_INFINITY)
1458 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1459 *exit_status = EXIT_TIMERSLACK;
1460 return -errno;
1461 }
1462
1463 if (context->personality != 0xffffffffUL)
1464 if (personality(context->personality) < 0) {
1465 *exit_status = EXIT_PERSONALITY;
1466 return -errno;
1467 }
1468
1469 if (context->utmp_id)
1470 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1471
1472 if (context->user && is_terminal_input(context->std_input)) {
1473 r = chown_terminal(STDIN_FILENO, uid);
1474 if (r < 0) {
1475 *exit_status = EXIT_STDIN;
1476 return r;
1477 }
1478 }
1479
1480#ifdef ENABLE_KDBUS
1481 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1482 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1483
1484 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1485 if (r < 0) {
1486 *exit_status = EXIT_BUS_ENDPOINT;
1487 return r;
1488 }
1489 }
1490#endif
1491
1492 /* If delegation is enabled we'll pass ownership of the cgroup
1493 * (but only in systemd's own controller hierarchy!) to the
1494 * user of the new process. */
1495 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1496 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1497 if (r < 0) {
1498 *exit_status = EXIT_CGROUP;
1499 return r;
1500 }
1501
1502
1503 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1504 if (r < 0) {
1505 *exit_status = EXIT_CGROUP;
1506 return r;
1507 }
1508 }
1509
1510 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1511 char **rt;
1512
1513 STRV_FOREACH(rt, context->runtime_directory) {
1514 _cleanup_free_ char *p;
1515
1516 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1517 if (!p) {
1518 *exit_status = EXIT_RUNTIME_DIRECTORY;
1519 return -ENOMEM;
1520 }
1521
1522 r = mkdir_safe_label(p, context->runtime_directory_mode, uid, gid);
1523 if (r < 0) {
1524 *exit_status = EXIT_RUNTIME_DIRECTORY;
1525 return r;
1526 }
1527 }
1528 }
1529
1530 if (params->apply_permissions) {
1531 r = enforce_groups(context, username, gid);
1532 if (r < 0) {
1533 *exit_status = EXIT_GROUP;
1534 return r;
1535 }
1536 }
1537
1538 umask(context->umask);
1539
1540#ifdef HAVE_PAM
1541 if (params->apply_permissions && context->pam_name && username) {
1542 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1543 if (r < 0) {
1544 *exit_status = EXIT_PAM;
1545 return r;
1546 }
1547 }
1548#endif
1549
1550 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1551 r = setup_netns(runtime->netns_storage_socket);
1552 if (r < 0) {
1553 *exit_status = EXIT_NETWORK;
1554 return r;
1555 }
1556 }
1557
1558 if (!strv_isempty(context->read_write_dirs) ||
1559 !strv_isempty(context->read_only_dirs) ||
1560 !strv_isempty(context->inaccessible_dirs) ||
1561 context->mount_flags != 0 ||
1562 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1563 params->bus_endpoint_path ||
1564 context->private_devices ||
1565 context->protect_system != PROTECT_SYSTEM_NO ||
1566 context->protect_home != PROTECT_HOME_NO) {
1567
1568 char *tmp = NULL, *var = NULL;
1569
1570 /* The runtime struct only contains the parent
1571 * of the private /tmp, which is
1572 * non-accessible to world users. Inside of it
1573 * there's a /tmp that is sticky, and that's
1574 * the one we want to use here. */
1575
1576 if (context->private_tmp && runtime) {
1577 if (runtime->tmp_dir)
1578 tmp = strjoina(runtime->tmp_dir, "/tmp");
1579 if (runtime->var_tmp_dir)
1580 var = strjoina(runtime->var_tmp_dir, "/tmp");
1581 }
1582
1583 r = setup_namespace(
1584 context->read_write_dirs,
1585 context->read_only_dirs,
1586 context->inaccessible_dirs,
1587 tmp,
1588 var,
1589 params->bus_endpoint_path,
1590 context->private_devices,
1591 context->protect_home,
1592 context->protect_system,
1593 context->mount_flags);
1594
1595 /* If we couldn't set up the namespace this is
1596 * probably due to a missing capability. In this case,
1597 * silently proceeed. */
1598 if (r == -EPERM || r == -EACCES) {
1599 log_open();
1600 log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1601 log_close();
1602 } else if (r < 0) {
1603 *exit_status = EXIT_NAMESPACE;
1604 return r;
1605 }
1606 }
1607
1608 if (params->apply_chroot) {
1609 if (context->root_directory)
1610 if (chroot(context->root_directory) < 0) {
1611 *exit_status = EXIT_CHROOT;
1612 return -errno;
1613 }
1614
1615 if (chdir(context->working_directory ?: "/") < 0 &&
1616 !context->working_directory_missing_ok) {
1617 *exit_status = EXIT_CHDIR;
1618 return -errno;
1619 }
1620 } else {
1621 _cleanup_free_ char *d = NULL;
1622
1623 if (asprintf(&d, "%s/%s",
1624 context->root_directory ?: "",
1625 context->working_directory ?: "") < 0) {
1626 *exit_status = EXIT_MEMORY;
1627 return -ENOMEM;
1628 }
1629
1630 if (chdir(d) < 0 &&
1631 !context->working_directory_missing_ok) {
1632 *exit_status = EXIT_CHDIR;
1633 return -errno;
1634 }
1635 }
1636
1637#ifdef HAVE_SELINUX
1638 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1639 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1640 if (r < 0) {
1641 *exit_status = EXIT_SELINUX_CONTEXT;
1642 return r;
1643 }
1644 }
1645#endif
1646
1647 /* We repeat the fd closing here, to make sure that
1648 * nothing is leaked from the PAM modules. Note that
1649 * we are more aggressive this time since socket_fd
1650 * and the netns fds we don't need anymore. The custom
1651 * endpoint fd was needed to upload the policy and can
1652 * now be closed as well. */
1653 r = close_all_fds(fds, n_fds);
1654 if (r >= 0)
1655 r = shift_fds(fds, n_fds);
1656 if (r >= 0)
1657 r = flags_fds(fds, n_fds, context->non_blocking);
1658 if (r < 0) {
1659 *exit_status = EXIT_FDS;
1660 return r;
1661 }
1662
1663 if (params->apply_permissions) {
1664
1665 for (i = 0; i < _RLIMIT_MAX; i++) {
1666 if (!context->rlimit[i])
1667 continue;
1668
1669 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1670 *exit_status = EXIT_LIMITS;
1671 return -errno;
1672 }
1673 }
1674
1675 if (context->capability_bounding_set_drop) {
1676 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1677 if (r < 0) {
1678 *exit_status = EXIT_CAPABILITIES;
1679 return r;
1680 }
1681 }
1682
1683#ifdef HAVE_SMACK
1684 if (context->smack_process_label) {
1685 r = mac_smack_apply_pid(0, context->smack_process_label);
1686 if (r < 0) {
1687 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1688 return r;
1689 }
1690 }
1691#endif
1692
1693 if (context->user) {
1694 r = enforce_user(context, uid);
1695 if (r < 0) {
1696 *exit_status = EXIT_USER;
1697 return r;
1698 }
1699 }
1700
1701 /* PR_GET_SECUREBITS is not privileged, while
1702 * PR_SET_SECUREBITS is. So to suppress
1703 * potential EPERMs we'll try not to call
1704 * PR_SET_SECUREBITS unless necessary. */
1705 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1706 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1707 *exit_status = EXIT_SECUREBITS;
1708 return -errno;
1709 }
1710
1711 if (context->capabilities)
1712 if (cap_set_proc(context->capabilities) < 0) {
1713 *exit_status = EXIT_CAPABILITIES;
1714 return -errno;
1715 }
1716
1717 if (context->no_new_privileges)
1718 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1719 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1720 return -errno;
1721 }
1722
1723#ifdef HAVE_SECCOMP
1724 if (context->address_families_whitelist ||
1725 !set_isempty(context->address_families)) {
1726 r = apply_address_families(context);
1727 if (r < 0) {
1728 *exit_status = EXIT_ADDRESS_FAMILIES;
1729 return r;
1730 }
1731 }
1732
1733 if (context->syscall_whitelist ||
1734 !set_isempty(context->syscall_filter) ||
1735 !set_isempty(context->syscall_archs)) {
1736 r = apply_seccomp(context);
1737 if (r < 0) {
1738 *exit_status = EXIT_SECCOMP;
1739 return r;
1740 }
1741 }
1742#endif
1743
1744#ifdef HAVE_SELINUX
1745 if (mac_selinux_use()) {
1746 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1747
1748 if (exec_context) {
1749 r = setexeccon(exec_context);
1750 if (r < 0) {
1751 *exit_status = EXIT_SELINUX_CONTEXT;
1752 return r;
1753 }
1754 }
1755 }
1756#endif
1757
1758#ifdef HAVE_APPARMOR
1759 if (context->apparmor_profile && mac_apparmor_use()) {
1760 r = aa_change_onexec(context->apparmor_profile);
1761 if (r < 0 && !context->apparmor_profile_ignore) {
1762 *exit_status = EXIT_APPARMOR_PROFILE;
1763 return -errno;
1764 }
1765 }
1766#endif
1767 }
1768
1769 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1770 if (r < 0) {
1771 *exit_status = EXIT_MEMORY;
1772 return r;
1773 }
1774
1775 final_env = strv_env_merge(5,
1776 params->environment,
1777 our_env,
1778 context->environment,
1779 files_env,
1780 pam_env,
1781 NULL);
1782 if (!final_env) {
1783 *exit_status = EXIT_MEMORY;
1784 return -ENOMEM;
1785 }
1786
1787 final_argv = replace_env_argv(argv, final_env);
1788 if (!final_argv) {
1789 *exit_status = EXIT_MEMORY;
1790 return -ENOMEM;
1791 }
1792
1793 final_env = strv_env_clean(final_env);
1794
1795 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1796 _cleanup_free_ char *line;
1797
1798 line = exec_command_line(final_argv);
1799 if (line) {
1800 log_open();
1801 log_struct(LOG_DEBUG,
1802 LOG_UNIT_ID(unit),
1803 "EXECUTABLE=%s", command->path,
1804 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
1805 NULL);
1806 log_close();
1807 }
1808 }
1809
1810 execve(command->path, final_argv, final_env);
1811 *exit_status = EXIT_EXEC;
1812 return -errno;
1813}
1814
1815int exec_spawn(Unit *unit,
1816 ExecCommand *command,
1817 const ExecContext *context,
1818 const ExecParameters *params,
1819 ExecRuntime *runtime,
1820 pid_t *ret) {
1821
1822 _cleanup_strv_free_ char **files_env = NULL;
1823 int *fds = NULL; unsigned n_fds = 0;
1824 _cleanup_free_ char *line = NULL;
1825 int socket_fd, r;
1826 char **argv;
1827 pid_t pid;
1828
1829 assert(unit);
1830 assert(command);
1831 assert(context);
1832 assert(ret);
1833 assert(params);
1834 assert(params->fds || params->n_fds <= 0);
1835
1836 if (context->std_input == EXEC_INPUT_SOCKET ||
1837 context->std_output == EXEC_OUTPUT_SOCKET ||
1838 context->std_error == EXEC_OUTPUT_SOCKET) {
1839
1840 if (params->n_fds != 1) {
1841 log_unit_error(unit, "Got more than one socket.");
1842 return -EINVAL;
1843 }
1844
1845 socket_fd = params->fds[0];
1846 } else {
1847 socket_fd = -1;
1848 fds = params->fds;
1849 n_fds = params->n_fds;
1850 }
1851
1852 r = exec_context_load_environment(unit, context, &files_env);
1853 if (r < 0)
1854 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
1855
1856 argv = params->argv ?: command->argv;
1857 line = exec_command_line(argv);
1858 if (!line)
1859 return log_oom();
1860
1861 log_struct(LOG_DEBUG,
1862 LOG_UNIT_ID(unit),
1863 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
1864 "EXECUTABLE=%s", command->path,
1865 NULL);
1866 pid = fork();
1867 if (pid < 0)
1868 return log_unit_error_errno(unit, r, "Failed to fork: %m");
1869
1870 if (pid == 0) {
1871 int exit_status;
1872
1873 r = exec_child(unit,
1874 command,
1875 context,
1876 params,
1877 runtime,
1878 argv,
1879 socket_fd,
1880 fds, n_fds,
1881 files_env,
1882 &exit_status);
1883 if (r < 0) {
1884 log_open();
1885 log_struct_errno(LOG_ERR, r,
1886 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1887 LOG_UNIT_ID(unit),
1888 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
1889 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1890 command->path),
1891 "EXECUTABLE=%s", command->path,
1892 NULL);
1893 }
1894
1895 _exit(exit_status);
1896 }
1897
1898 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
1899
1900 /* We add the new process to the cgroup both in the child (so
1901 * that we can be sure that no user code is ever executed
1902 * outside of the cgroup) and in the parent (so that we can be
1903 * sure that when we kill the cgroup the process will be
1904 * killed too). */
1905 if (params->cgroup_path)
1906 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1907
1908 exec_status_start(&command->exec_status, pid);
1909
1910 *ret = pid;
1911 return 0;
1912}
1913
1914void exec_context_init(ExecContext *c) {
1915 assert(c);
1916
1917 c->umask = 0022;
1918 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1919 c->cpu_sched_policy = SCHED_OTHER;
1920 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1921 c->syslog_level_prefix = true;
1922 c->ignore_sigpipe = true;
1923 c->timer_slack_nsec = NSEC_INFINITY;
1924 c->personality = 0xffffffffUL;
1925 c->runtime_directory_mode = 0755;
1926}
1927
1928void exec_context_done(ExecContext *c) {
1929 unsigned l;
1930
1931 assert(c);
1932
1933 strv_free(c->environment);
1934 c->environment = NULL;
1935
1936 strv_free(c->environment_files);
1937 c->environment_files = NULL;
1938
1939 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1940 free(c->rlimit[l]);
1941 c->rlimit[l] = NULL;
1942 }
1943
1944 free(c->working_directory);
1945 c->working_directory = NULL;
1946 free(c->root_directory);
1947 c->root_directory = NULL;
1948
1949 free(c->tty_path);
1950 c->tty_path = NULL;
1951
1952 free(c->syslog_identifier);
1953 c->syslog_identifier = NULL;
1954
1955 free(c->user);
1956 c->user = NULL;
1957
1958 free(c->group);
1959 c->group = NULL;
1960
1961 strv_free(c->supplementary_groups);
1962 c->supplementary_groups = NULL;
1963
1964 free(c->pam_name);
1965 c->pam_name = NULL;
1966
1967 if (c->capabilities) {
1968 cap_free(c->capabilities);
1969 c->capabilities = NULL;
1970 }
1971
1972 strv_free(c->read_only_dirs);
1973 c->read_only_dirs = NULL;
1974
1975 strv_free(c->read_write_dirs);
1976 c->read_write_dirs = NULL;
1977
1978 strv_free(c->inaccessible_dirs);
1979 c->inaccessible_dirs = NULL;
1980
1981 if (c->cpuset)
1982 CPU_FREE(c->cpuset);
1983
1984 free(c->utmp_id);
1985 c->utmp_id = NULL;
1986
1987 free(c->selinux_context);
1988 c->selinux_context = NULL;
1989
1990 free(c->apparmor_profile);
1991 c->apparmor_profile = NULL;
1992
1993 set_free(c->syscall_filter);
1994 c->syscall_filter = NULL;
1995
1996 set_free(c->syscall_archs);
1997 c->syscall_archs = NULL;
1998
1999 set_free(c->address_families);
2000 c->address_families = NULL;
2001
2002 strv_free(c->runtime_directory);
2003 c->runtime_directory = NULL;
2004
2005 bus_endpoint_free(c->bus_endpoint);
2006 c->bus_endpoint = NULL;
2007}
2008
2009int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2010 char **i;
2011
2012 assert(c);
2013
2014 if (!runtime_prefix)
2015 return 0;
2016
2017 STRV_FOREACH(i, c->runtime_directory) {
2018 _cleanup_free_ char *p;
2019
2020 p = strjoin(runtime_prefix, "/", *i, NULL);
2021 if (!p)
2022 return -ENOMEM;
2023
2024 /* We execute this synchronously, since we need to be
2025 * sure this is gone when we start the service
2026 * next. */
2027 (void) rm_rf(p, REMOVE_ROOT);
2028 }
2029
2030 return 0;
2031}
2032
2033void exec_command_done(ExecCommand *c) {
2034 assert(c);
2035
2036 free(c->path);
2037 c->path = NULL;
2038
2039 strv_free(c->argv);
2040 c->argv = NULL;
2041}
2042
2043void exec_command_done_array(ExecCommand *c, unsigned n) {
2044 unsigned i;
2045
2046 for (i = 0; i < n; i++)
2047 exec_command_done(c+i);
2048}
2049
2050ExecCommand* exec_command_free_list(ExecCommand *c) {
2051 ExecCommand *i;
2052
2053 while ((i = c)) {
2054 LIST_REMOVE(command, c, i);
2055 exec_command_done(i);
2056 free(i);
2057 }
2058
2059 return NULL;
2060}
2061
2062void exec_command_free_array(ExecCommand **c, unsigned n) {
2063 unsigned i;
2064
2065 for (i = 0; i < n; i++)
2066 c[i] = exec_command_free_list(c[i]);
2067}
2068
2069typedef struct InvalidEnvInfo {
2070 Unit *unit;
2071 const char *path;
2072} InvalidEnvInfo;
2073
2074static void invalid_env(const char *p, void *userdata) {
2075 InvalidEnvInfo *info = userdata;
2076
2077 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2078}
2079
2080int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
2081 char **i, **r = NULL;
2082
2083 assert(c);
2084 assert(l);
2085
2086 STRV_FOREACH(i, c->environment_files) {
2087 char *fn;
2088 int k;
2089 bool ignore = false;
2090 char **p;
2091 _cleanup_globfree_ glob_t pglob = {};
2092 int count, n;
2093
2094 fn = *i;
2095
2096 if (fn[0] == '-') {
2097 ignore = true;
2098 fn ++;
2099 }
2100
2101 if (!path_is_absolute(fn)) {
2102 if (ignore)
2103 continue;
2104
2105 strv_free(r);
2106 return -EINVAL;
2107 }
2108
2109 /* Filename supports globbing, take all matching files */
2110 errno = 0;
2111 if (glob(fn, 0, NULL, &pglob) != 0) {
2112 if (ignore)
2113 continue;
2114
2115 strv_free(r);
2116 return errno ? -errno : -EINVAL;
2117 }
2118 count = pglob.gl_pathc;
2119 if (count == 0) {
2120 if (ignore)
2121 continue;
2122
2123 strv_free(r);
2124 return -EINVAL;
2125 }
2126 for (n = 0; n < count; n++) {
2127 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2128 if (k < 0) {
2129 if (ignore)
2130 continue;
2131
2132 strv_free(r);
2133 return k;
2134 }
2135 /* Log invalid environment variables with filename */
2136 if (p) {
2137 InvalidEnvInfo info = {
2138 .unit = unit,
2139 .path = pglob.gl_pathv[n]
2140 };
2141
2142 p = strv_env_clean_with_callback(p, invalid_env, &info);
2143 }
2144
2145 if (r == NULL)
2146 r = p;
2147 else {
2148 char **m;
2149
2150 m = strv_env_merge(2, r, p);
2151 strv_free(r);
2152 strv_free(p);
2153 if (!m)
2154 return -ENOMEM;
2155
2156 r = m;
2157 }
2158 }
2159 }
2160
2161 *l = r;
2162
2163 return 0;
2164}
2165
2166static bool tty_may_match_dev_console(const char *tty) {
2167 _cleanup_free_ char *active = NULL;
2168 char *console;
2169
2170 if (startswith(tty, "/dev/"))
2171 tty += 5;
2172
2173 /* trivial identity? */
2174 if (streq(tty, "console"))
2175 return true;
2176
2177 console = resolve_dev_console(&active);
2178 /* if we could not resolve, assume it may */
2179 if (!console)
2180 return true;
2181
2182 /* "tty0" means the active VC, so it may be the same sometimes */
2183 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2184}
2185
2186bool exec_context_may_touch_console(ExecContext *ec) {
2187 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2188 is_terminal_input(ec->std_input) ||
2189 is_terminal_output(ec->std_output) ||
2190 is_terminal_output(ec->std_error)) &&
2191 tty_may_match_dev_console(tty_path(ec));
2192}
2193
2194static void strv_fprintf(FILE *f, char **l) {
2195 char **g;
2196
2197 assert(f);
2198
2199 STRV_FOREACH(g, l)
2200 fprintf(f, " %s", *g);
2201}
2202
2203void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2204 char **e;
2205 unsigned i;
2206
2207 assert(c);
2208 assert(f);
2209
2210 prefix = strempty(prefix);
2211
2212 fprintf(f,
2213 "%sUMask: %04o\n"
2214 "%sWorkingDirectory: %s\n"
2215 "%sRootDirectory: %s\n"
2216 "%sNonBlocking: %s\n"
2217 "%sPrivateTmp: %s\n"
2218 "%sPrivateNetwork: %s\n"
2219 "%sPrivateDevices: %s\n"
2220 "%sProtectHome: %s\n"
2221 "%sProtectSystem: %s\n"
2222 "%sIgnoreSIGPIPE: %s\n",
2223 prefix, c->umask,
2224 prefix, c->working_directory ? c->working_directory : "/",
2225 prefix, c->root_directory ? c->root_directory : "/",
2226 prefix, yes_no(c->non_blocking),
2227 prefix, yes_no(c->private_tmp),
2228 prefix, yes_no(c->private_network),
2229 prefix, yes_no(c->private_devices),
2230 prefix, protect_home_to_string(c->protect_home),
2231 prefix, protect_system_to_string(c->protect_system),
2232 prefix, yes_no(c->ignore_sigpipe));
2233
2234 STRV_FOREACH(e, c->environment)
2235 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2236
2237 STRV_FOREACH(e, c->environment_files)
2238 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2239
2240 if (c->nice_set)
2241 fprintf(f,
2242 "%sNice: %i\n",
2243 prefix, c->nice);
2244
2245 if (c->oom_score_adjust_set)
2246 fprintf(f,
2247 "%sOOMScoreAdjust: %i\n",
2248 prefix, c->oom_score_adjust);
2249
2250 for (i = 0; i < RLIM_NLIMITS; i++)
2251 if (c->rlimit[i])
2252 fprintf(f, "%s%s: "RLIM_FMT"\n",
2253 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2254
2255 if (c->ioprio_set) {
2256 _cleanup_free_ char *class_str = NULL;
2257
2258 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2259 fprintf(f,
2260 "%sIOSchedulingClass: %s\n"
2261 "%sIOPriority: %i\n",
2262 prefix, strna(class_str),
2263 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2264 }
2265
2266 if (c->cpu_sched_set) {
2267 _cleanup_free_ char *policy_str = NULL;
2268
2269 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2270 fprintf(f,
2271 "%sCPUSchedulingPolicy: %s\n"
2272 "%sCPUSchedulingPriority: %i\n"
2273 "%sCPUSchedulingResetOnFork: %s\n",
2274 prefix, strna(policy_str),
2275 prefix, c->cpu_sched_priority,
2276 prefix, yes_no(c->cpu_sched_reset_on_fork));
2277 }
2278
2279 if (c->cpuset) {
2280 fprintf(f, "%sCPUAffinity:", prefix);
2281 for (i = 0; i < c->cpuset_ncpus; i++)
2282 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2283 fprintf(f, " %u", i);
2284 fputs("\n", f);
2285 }
2286
2287 if (c->timer_slack_nsec != NSEC_INFINITY)
2288 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2289
2290 fprintf(f,
2291 "%sStandardInput: %s\n"
2292 "%sStandardOutput: %s\n"
2293 "%sStandardError: %s\n",
2294 prefix, exec_input_to_string(c->std_input),
2295 prefix, exec_output_to_string(c->std_output),
2296 prefix, exec_output_to_string(c->std_error));
2297
2298 if (c->tty_path)
2299 fprintf(f,
2300 "%sTTYPath: %s\n"
2301 "%sTTYReset: %s\n"
2302 "%sTTYVHangup: %s\n"
2303 "%sTTYVTDisallocate: %s\n",
2304 prefix, c->tty_path,
2305 prefix, yes_no(c->tty_reset),
2306 prefix, yes_no(c->tty_vhangup),
2307 prefix, yes_no(c->tty_vt_disallocate));
2308
2309 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2310 c->std_output == EXEC_OUTPUT_KMSG ||
2311 c->std_output == EXEC_OUTPUT_JOURNAL ||
2312 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2313 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2314 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2315 c->std_error == EXEC_OUTPUT_SYSLOG ||
2316 c->std_error == EXEC_OUTPUT_KMSG ||
2317 c->std_error == EXEC_OUTPUT_JOURNAL ||
2318 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2319 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2320 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2321
2322 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2323
2324 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2325 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2326
2327 fprintf(f,
2328 "%sSyslogFacility: %s\n"
2329 "%sSyslogLevel: %s\n",
2330 prefix, strna(fac_str),
2331 prefix, strna(lvl_str));
2332 }
2333
2334 if (c->capabilities) {
2335 _cleanup_cap_free_charp_ char *t;
2336
2337 t = cap_to_text(c->capabilities, NULL);
2338 if (t)
2339 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2340 }
2341
2342 if (c->secure_bits)
2343 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2344 prefix,
2345 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2346 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2347 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2348 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2349 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2350 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2351
2352 if (c->capability_bounding_set_drop) {
2353 unsigned long l;
2354 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2355
2356 for (l = 0; l <= cap_last_cap(); l++)
2357 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2358 fprintf(f, " %s", strna(capability_to_name(l)));
2359
2360 fputs("\n", f);
2361 }
2362
2363 if (c->user)
2364 fprintf(f, "%sUser: %s\n", prefix, c->user);
2365 if (c->group)
2366 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2367
2368 if (strv_length(c->supplementary_groups) > 0) {
2369 fprintf(f, "%sSupplementaryGroups:", prefix);
2370 strv_fprintf(f, c->supplementary_groups);
2371 fputs("\n", f);
2372 }
2373
2374 if (c->pam_name)
2375 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2376
2377 if (strv_length(c->read_write_dirs) > 0) {
2378 fprintf(f, "%sReadWriteDirs:", prefix);
2379 strv_fprintf(f, c->read_write_dirs);
2380 fputs("\n", f);
2381 }
2382
2383 if (strv_length(c->read_only_dirs) > 0) {
2384 fprintf(f, "%sReadOnlyDirs:", prefix);
2385 strv_fprintf(f, c->read_only_dirs);
2386 fputs("\n", f);
2387 }
2388
2389 if (strv_length(c->inaccessible_dirs) > 0) {
2390 fprintf(f, "%sInaccessibleDirs:", prefix);
2391 strv_fprintf(f, c->inaccessible_dirs);
2392 fputs("\n", f);
2393 }
2394
2395 if (c->utmp_id)
2396 fprintf(f,
2397 "%sUtmpIdentifier: %s\n",
2398 prefix, c->utmp_id);
2399
2400 if (c->selinux_context)
2401 fprintf(f,
2402 "%sSELinuxContext: %s%s\n",
2403 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2404
2405 if (c->personality != 0xffffffffUL)
2406 fprintf(f,
2407 "%sPersonality: %s\n",
2408 prefix, strna(personality_to_string(c->personality)));
2409
2410 if (c->syscall_filter) {
2411#ifdef HAVE_SECCOMP
2412 Iterator j;
2413 void *id;
2414 bool first = true;
2415#endif
2416
2417 fprintf(f,
2418 "%sSystemCallFilter: ",
2419 prefix);
2420
2421 if (!c->syscall_whitelist)
2422 fputc('~', f);
2423
2424#ifdef HAVE_SECCOMP
2425 SET_FOREACH(id, c->syscall_filter, j) {
2426 _cleanup_free_ char *name = NULL;
2427
2428 if (first)
2429 first = false;
2430 else
2431 fputc(' ', f);
2432
2433 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2434 fputs(strna(name), f);
2435 }
2436#endif
2437
2438 fputc('\n', f);
2439 }
2440
2441 if (c->syscall_archs) {
2442#ifdef HAVE_SECCOMP
2443 Iterator j;
2444 void *id;
2445#endif
2446
2447 fprintf(f,
2448 "%sSystemCallArchitectures:",
2449 prefix);
2450
2451#ifdef HAVE_SECCOMP
2452 SET_FOREACH(id, c->syscall_archs, j)
2453 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2454#endif
2455 fputc('\n', f);
2456 }
2457
2458 if (c->syscall_errno != 0)
2459 fprintf(f,
2460 "%sSystemCallErrorNumber: %s\n",
2461 prefix, strna(errno_to_name(c->syscall_errno)));
2462
2463 if (c->apparmor_profile)
2464 fprintf(f,
2465 "%sAppArmorProfile: %s%s\n",
2466 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2467}
2468
2469bool exec_context_maintains_privileges(ExecContext *c) {
2470 assert(c);
2471
2472 /* Returns true if the process forked off would run run under
2473 * an unchanged UID or as root. */
2474
2475 if (!c->user)
2476 return true;
2477
2478 if (streq(c->user, "root") || streq(c->user, "0"))
2479 return true;
2480
2481 return false;
2482}
2483
2484void exec_status_start(ExecStatus *s, pid_t pid) {
2485 assert(s);
2486
2487 zero(*s);
2488 s->pid = pid;
2489 dual_timestamp_get(&s->start_timestamp);
2490}
2491
2492void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2493 assert(s);
2494
2495 if (s->pid && s->pid != pid)
2496 zero(*s);
2497
2498 s->pid = pid;
2499 dual_timestamp_get(&s->exit_timestamp);
2500
2501 s->code = code;
2502 s->status = status;
2503
2504 if (context) {
2505 if (context->utmp_id)
2506 utmp_put_dead_process(context->utmp_id, pid, code, status);
2507
2508 exec_context_tty_reset(context);
2509 }
2510}
2511
2512void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2513 char buf[FORMAT_TIMESTAMP_MAX];
2514
2515 assert(s);
2516 assert(f);
2517
2518 if (s->pid <= 0)
2519 return;
2520
2521 prefix = strempty(prefix);
2522
2523 fprintf(f,
2524 "%sPID: "PID_FMT"\n",
2525 prefix, s->pid);
2526
2527 if (s->start_timestamp.realtime > 0)
2528 fprintf(f,
2529 "%sStart Timestamp: %s\n",
2530 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2531
2532 if (s->exit_timestamp.realtime > 0)
2533 fprintf(f,
2534 "%sExit Timestamp: %s\n"
2535 "%sExit Code: %s\n"
2536 "%sExit Status: %i\n",
2537 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2538 prefix, sigchld_code_to_string(s->code),
2539 prefix, s->status);
2540}
2541
2542char *exec_command_line(char **argv) {
2543 size_t k;
2544 char *n, *p, **a;
2545 bool first = true;
2546
2547 assert(argv);
2548
2549 k = 1;
2550 STRV_FOREACH(a, argv)
2551 k += strlen(*a)+3;
2552
2553 if (!(n = new(char, k)))
2554 return NULL;
2555
2556 p = n;
2557 STRV_FOREACH(a, argv) {
2558
2559 if (!first)
2560 *(p++) = ' ';
2561 else
2562 first = false;
2563
2564 if (strpbrk(*a, WHITESPACE)) {
2565 *(p++) = '\'';
2566 p = stpcpy(p, *a);
2567 *(p++) = '\'';
2568 } else
2569 p = stpcpy(p, *a);
2570
2571 }
2572
2573 *p = 0;
2574
2575 /* FIXME: this doesn't really handle arguments that have
2576 * spaces and ticks in them */
2577
2578 return n;
2579}
2580
2581void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2582 _cleanup_free_ char *cmd = NULL;
2583 const char *prefix2;
2584
2585 assert(c);
2586 assert(f);
2587
2588 prefix = strempty(prefix);
2589 prefix2 = strjoina(prefix, "\t");
2590
2591 cmd = exec_command_line(c->argv);
2592 fprintf(f,
2593 "%sCommand Line: %s\n",
2594 prefix, cmd ? cmd : strerror(ENOMEM));
2595
2596 exec_status_dump(&c->exec_status, f, prefix2);
2597}
2598
2599void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2600 assert(f);
2601
2602 prefix = strempty(prefix);
2603
2604 LIST_FOREACH(command, c, c)
2605 exec_command_dump(c, f, prefix);
2606}
2607
2608void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2609 ExecCommand *end;
2610
2611 assert(l);
2612 assert(e);
2613
2614 if (*l) {
2615 /* It's kind of important, that we keep the order here */
2616 LIST_FIND_TAIL(command, *l, end);
2617 LIST_INSERT_AFTER(command, *l, end, e);
2618 } else
2619 *l = e;
2620}
2621
2622int exec_command_set(ExecCommand *c, const char *path, ...) {
2623 va_list ap;
2624 char **l, *p;
2625
2626 assert(c);
2627 assert(path);
2628
2629 va_start(ap, path);
2630 l = strv_new_ap(path, ap);
2631 va_end(ap);
2632
2633 if (!l)
2634 return -ENOMEM;
2635
2636 p = strdup(path);
2637 if (!p) {
2638 strv_free(l);
2639 return -ENOMEM;
2640 }
2641
2642 free(c->path);
2643 c->path = p;
2644
2645 strv_free(c->argv);
2646 c->argv = l;
2647
2648 return 0;
2649}
2650
2651int exec_command_append(ExecCommand *c, const char *path, ...) {
2652 _cleanup_strv_free_ char **l = NULL;
2653 va_list ap;
2654 int r;
2655
2656 assert(c);
2657 assert(path);
2658
2659 va_start(ap, path);
2660 l = strv_new_ap(path, ap);
2661 va_end(ap);
2662
2663 if (!l)
2664 return -ENOMEM;
2665
2666 r = strv_extend_strv(&c->argv, l);
2667 if (r < 0)
2668 return r;
2669
2670 return 0;
2671}
2672
2673
2674static int exec_runtime_allocate(ExecRuntime **rt) {
2675
2676 if (*rt)
2677 return 0;
2678
2679 *rt = new0(ExecRuntime, 1);
2680 if (!*rt)
2681 return -ENOMEM;
2682
2683 (*rt)->n_ref = 1;
2684 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2685
2686 return 0;
2687}
2688
2689int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2690 int r;
2691
2692 assert(rt);
2693 assert(c);
2694 assert(id);
2695
2696 if (*rt)
2697 return 1;
2698
2699 if (!c->private_network && !c->private_tmp)
2700 return 0;
2701
2702 r = exec_runtime_allocate(rt);
2703 if (r < 0)
2704 return r;
2705
2706 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2707 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2708 return -errno;
2709 }
2710
2711 if (c->private_tmp && !(*rt)->tmp_dir) {
2712 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2713 if (r < 0)
2714 return r;
2715 }
2716
2717 return 1;
2718}
2719
2720ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2721 assert(r);
2722 assert(r->n_ref > 0);
2723
2724 r->n_ref++;
2725 return r;
2726}
2727
2728ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2729
2730 if (!r)
2731 return NULL;
2732
2733 assert(r->n_ref > 0);
2734
2735 r->n_ref--;
2736 if (r->n_ref > 0)
2737 return NULL;
2738
2739 free(r->tmp_dir);
2740 free(r->var_tmp_dir);
2741 safe_close_pair(r->netns_storage_socket);
2742 free(r);
2743
2744 return NULL;
2745}
2746
2747int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
2748 assert(u);
2749 assert(f);
2750 assert(fds);
2751
2752 if (!rt)
2753 return 0;
2754
2755 if (rt->tmp_dir)
2756 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2757
2758 if (rt->var_tmp_dir)
2759 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2760
2761 if (rt->netns_storage_socket[0] >= 0) {
2762 int copy;
2763
2764 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2765 if (copy < 0)
2766 return copy;
2767
2768 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2769 }
2770
2771 if (rt->netns_storage_socket[1] >= 0) {
2772 int copy;
2773
2774 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2775 if (copy < 0)
2776 return copy;
2777
2778 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2779 }
2780
2781 return 0;
2782}
2783
2784int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
2785 int r;
2786
2787 assert(rt);
2788 assert(key);
2789 assert(value);
2790
2791 if (streq(key, "tmp-dir")) {
2792 char *copy;
2793
2794 r = exec_runtime_allocate(rt);
2795 if (r < 0)
2796 return log_oom();
2797
2798 copy = strdup(value);
2799 if (!copy)
2800 return log_oom();
2801
2802 free((*rt)->tmp_dir);
2803 (*rt)->tmp_dir = copy;
2804
2805 } else if (streq(key, "var-tmp-dir")) {
2806 char *copy;
2807
2808 r = exec_runtime_allocate(rt);
2809 if (r < 0)
2810 return log_oom();
2811
2812 copy = strdup(value);
2813 if (!copy)
2814 return log_oom();
2815
2816 free((*rt)->var_tmp_dir);
2817 (*rt)->var_tmp_dir = copy;
2818
2819 } else if (streq(key, "netns-socket-0")) {
2820 int fd;
2821
2822 r = exec_runtime_allocate(rt);
2823 if (r < 0)
2824 return log_oom();
2825
2826 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2827 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2828 else {
2829 safe_close((*rt)->netns_storage_socket[0]);
2830 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2831 }
2832 } else if (streq(key, "netns-socket-1")) {
2833 int fd;
2834
2835 r = exec_runtime_allocate(rt);
2836 if (r < 0)
2837 return log_oom();
2838
2839 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2840 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2841 else {
2842 safe_close((*rt)->netns_storage_socket[1]);
2843 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2844 }
2845 } else
2846 return 0;
2847
2848 return 1;
2849}
2850
2851static void *remove_tmpdir_thread(void *p) {
2852 _cleanup_free_ char *path = p;
2853
2854 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
2855 return NULL;
2856}
2857
2858void exec_runtime_destroy(ExecRuntime *rt) {
2859 int r;
2860
2861 if (!rt)
2862 return;
2863
2864 /* If there are multiple users of this, let's leave the stuff around */
2865 if (rt->n_ref > 1)
2866 return;
2867
2868 if (rt->tmp_dir) {
2869 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2870
2871 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2872 if (r < 0) {
2873 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2874 free(rt->tmp_dir);
2875 }
2876
2877 rt->tmp_dir = NULL;
2878 }
2879
2880 if (rt->var_tmp_dir) {
2881 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2882
2883 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2884 if (r < 0) {
2885 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2886 free(rt->var_tmp_dir);
2887 }
2888
2889 rt->var_tmp_dir = NULL;
2890 }
2891
2892 safe_close_pair(rt->netns_storage_socket);
2893}
2894
2895static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2896 [EXEC_INPUT_NULL] = "null",
2897 [EXEC_INPUT_TTY] = "tty",
2898 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2899 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2900 [EXEC_INPUT_SOCKET] = "socket"
2901};
2902
2903DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2904
2905static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2906 [EXEC_OUTPUT_INHERIT] = "inherit",
2907 [EXEC_OUTPUT_NULL] = "null",
2908 [EXEC_OUTPUT_TTY] = "tty",
2909 [EXEC_OUTPUT_SYSLOG] = "syslog",
2910 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2911 [EXEC_OUTPUT_KMSG] = "kmsg",
2912 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2913 [EXEC_OUTPUT_JOURNAL] = "journal",
2914 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2915 [EXEC_OUTPUT_SOCKET] = "socket"
2916};
2917
2918DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);