]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
e88a2dc0edd3f00e0d8bd1566dc702c147d55aff
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/un.h>
29 #include <sys/prctl.h>
30 #include <sys/stat.h>
31 #include <grp.h>
32 #include <poll.h>
33 #include <glob.h>
34 #include <sys/personality.h>
35
36 #ifdef HAVE_PAM
37 #include <security/pam_appl.h>
38 #endif
39
40 #ifdef HAVE_SELINUX
41 #include <selinux/selinux.h>
42 #endif
43
44 #ifdef HAVE_SECCOMP
45 #include <seccomp.h>
46 #endif
47
48 #ifdef HAVE_APPARMOR
49 #include <sys/apparmor.h>
50 #endif
51
52 #include "rm-rf.h"
53 #include "execute.h"
54 #include "strv.h"
55 #include "macro.h"
56 #include "capability.h"
57 #include "util.h"
58 #include "log.h"
59 #include "sd-messages.h"
60 #include "ioprio.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
64 #include "missing.h"
65 #include "utmp-wtmp.h"
66 #include "def.h"
67 #include "path-util.h"
68 #include "env-util.h"
69 #include "fileio.h"
70 #include "unit.h"
71 #include "async.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
74 #include "af-list.h"
75 #include "mkdir.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
78 #include "cap-list.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
82
83 #ifdef HAVE_APPARMOR
84 #include "apparmor-util.h"
85 #endif
86
87 #ifdef HAVE_SECCOMP
88 #include "seccomp-util.h"
89 #endif
90
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
93
94 /* This assumes there is a 'tty' group */
95 #define TTY_MODE 0620
96
97 #define SNDBUF_SIZE (8*1024*1024)
98
99 static int shift_fds(int fds[], unsigned n_fds) {
100 int start, restart_from;
101
102 if (n_fds <= 0)
103 return 0;
104
105 /* Modifies the fds array! (sorts it) */
106
107 assert(fds);
108
109 start = 0;
110 for (;;) {
111 int i;
112
113 restart_from = -1;
114
115 for (i = start; i < (int) n_fds; i++) {
116 int nfd;
117
118 /* Already at right index? */
119 if (fds[i] == i+3)
120 continue;
121
122 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
123 return -errno;
124
125 safe_close(fds[i]);
126 fds[i] = nfd;
127
128 /* Hmm, the fd we wanted isn't free? Then
129 * let's remember that and try again from here */
130 if (nfd != i+3 && restart_from < 0)
131 restart_from = i;
132 }
133
134 if (restart_from < 0)
135 break;
136
137 start = restart_from;
138 }
139
140 return 0;
141 }
142
143 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
144 unsigned i;
145 int r;
146
147 if (n_fds <= 0)
148 return 0;
149
150 assert(fds);
151
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
153
154 for (i = 0; i < n_fds; i++) {
155
156 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
157 return r;
158
159 /* We unconditionally drop FD_CLOEXEC from the fds,
160 * since after all we want to pass these fds to our
161 * children */
162
163 if ((r = fd_cloexec(fds[i], false)) < 0)
164 return r;
165 }
166
167 return 0;
168 }
169
170 _pure_ static const char *tty_path(const ExecContext *context) {
171 assert(context);
172
173 if (context->tty_path)
174 return context->tty_path;
175
176 return "/dev/console";
177 }
178
179 static void exec_context_tty_reset(const ExecContext *context) {
180 assert(context);
181
182 if (context->tty_vhangup)
183 terminal_vhangup(tty_path(context));
184
185 if (context->tty_reset)
186 reset_terminal(tty_path(context));
187
188 if (context->tty_vt_disallocate && context->tty_path)
189 vt_disallocate(context->tty_path);
190 }
191
192 static bool is_terminal_output(ExecOutput o) {
193 return
194 o == EXEC_OUTPUT_TTY ||
195 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
196 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
197 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
198 }
199
200 static int open_null_as(int flags, int nfd) {
201 int fd, r;
202
203 assert(nfd >= 0);
204
205 fd = open("/dev/null", flags|O_NOCTTY);
206 if (fd < 0)
207 return -errno;
208
209 if (fd != nfd) {
210 r = dup2(fd, nfd) < 0 ? -errno : nfd;
211 safe_close(fd);
212 } else
213 r = nfd;
214
215 return r;
216 }
217
218 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
219 union sockaddr_union sa = {
220 .un.sun_family = AF_UNIX,
221 .un.sun_path = "/run/systemd/journal/stdout",
222 };
223 uid_t olduid = UID_INVALID;
224 gid_t oldgid = GID_INVALID;
225 int r;
226
227 if (gid != GID_INVALID) {
228 oldgid = getgid();
229
230 r = setegid(gid);
231 if (r < 0)
232 return -errno;
233 }
234
235 if (uid != UID_INVALID) {
236 olduid = getuid();
237
238 r = seteuid(uid);
239 if (r < 0) {
240 r = -errno;
241 goto restore_gid;
242 }
243 }
244
245 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
246 if (r < 0)
247 r = -errno;
248
249 /* If we fail to restore the uid or gid, things will likely
250 fail later on. This should only happen if an LSM interferes. */
251
252 if (uid != UID_INVALID)
253 (void) seteuid(olduid);
254
255 restore_gid:
256 if (gid != GID_INVALID)
257 (void) setegid(oldgid);
258
259 return r;
260 }
261
262 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
263 int fd, r;
264
265 assert(context);
266 assert(output < _EXEC_OUTPUT_MAX);
267 assert(ident);
268 assert(nfd >= 0);
269
270 fd = socket(AF_UNIX, SOCK_STREAM, 0);
271 if (fd < 0)
272 return -errno;
273
274 r = connect_journal_socket(fd, uid, gid);
275 if (r < 0)
276 return r;
277
278 if (shutdown(fd, SHUT_RD) < 0) {
279 safe_close(fd);
280 return -errno;
281 }
282
283 fd_inc_sndbuf(fd, SNDBUF_SIZE);
284
285 dprintf(fd,
286 "%s\n"
287 "%s\n"
288 "%i\n"
289 "%i\n"
290 "%i\n"
291 "%i\n"
292 "%i\n",
293 context->syslog_identifier ? context->syslog_identifier : ident,
294 unit_id,
295 context->syslog_priority,
296 !!context->syslog_level_prefix,
297 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
298 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
299 is_terminal_output(output));
300
301 if (fd != nfd) {
302 r = dup2(fd, nfd) < 0 ? -errno : nfd;
303 safe_close(fd);
304 } else
305 r = nfd;
306
307 return r;
308 }
309 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
310 int fd, r;
311
312 assert(path);
313 assert(nfd >= 0);
314
315 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
316 return fd;
317
318 if (fd != nfd) {
319 r = dup2(fd, nfd) < 0 ? -errno : nfd;
320 safe_close(fd);
321 } else
322 r = nfd;
323
324 return r;
325 }
326
327 static bool is_terminal_input(ExecInput i) {
328 return
329 i == EXEC_INPUT_TTY ||
330 i == EXEC_INPUT_TTY_FORCE ||
331 i == EXEC_INPUT_TTY_FAIL;
332 }
333
334 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
335
336 if (is_terminal_input(std_input) && !apply_tty_stdin)
337 return EXEC_INPUT_NULL;
338
339 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
340 return EXEC_INPUT_NULL;
341
342 return std_input;
343 }
344
345 static int fixup_output(ExecOutput std_output, int socket_fd) {
346
347 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
348 return EXEC_OUTPUT_INHERIT;
349
350 return std_output;
351 }
352
353 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
354 ExecInput i;
355
356 assert(context);
357
358 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
359
360 switch (i) {
361
362 case EXEC_INPUT_NULL:
363 return open_null_as(O_RDONLY, STDIN_FILENO);
364
365 case EXEC_INPUT_TTY:
366 case EXEC_INPUT_TTY_FORCE:
367 case EXEC_INPUT_TTY_FAIL: {
368 int fd, r;
369
370 fd = acquire_terminal(tty_path(context),
371 i == EXEC_INPUT_TTY_FAIL,
372 i == EXEC_INPUT_TTY_FORCE,
373 false,
374 USEC_INFINITY);
375 if (fd < 0)
376 return fd;
377
378 if (fd != STDIN_FILENO) {
379 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
380 safe_close(fd);
381 } else
382 r = STDIN_FILENO;
383
384 return r;
385 }
386
387 case EXEC_INPUT_SOCKET:
388 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
389
390 default:
391 assert_not_reached("Unknown input type");
392 }
393 }
394
395 static int setup_output(Unit *unit, const ExecContext *context, int fileno, int socket_fd, const char *ident, bool apply_tty_stdin, uid_t uid, gid_t gid) {
396 ExecOutput o;
397 ExecInput i;
398 int r;
399
400 assert(unit);
401 assert(context);
402 assert(ident);
403
404 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
405 o = fixup_output(context->std_output, socket_fd);
406
407 if (fileno == STDERR_FILENO) {
408 ExecOutput e;
409 e = fixup_output(context->std_error, socket_fd);
410
411 /* This expects the input and output are already set up */
412
413 /* Don't change the stderr file descriptor if we inherit all
414 * the way and are not on a tty */
415 if (e == EXEC_OUTPUT_INHERIT &&
416 o == EXEC_OUTPUT_INHERIT &&
417 i == EXEC_INPUT_NULL &&
418 !is_terminal_input(context->std_input) &&
419 getppid () != 1)
420 return fileno;
421
422 /* Duplicate from stdout if possible */
423 if (e == o || e == EXEC_OUTPUT_INHERIT)
424 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
425
426 o = e;
427
428 } else if (o == EXEC_OUTPUT_INHERIT) {
429 /* If input got downgraded, inherit the original value */
430 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
431 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
432
433 /* If the input is connected to anything that's not a /dev/null, inherit that... */
434 if (i != EXEC_INPUT_NULL)
435 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
436
437 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
438 if (getppid() != 1)
439 return fileno;
440
441 /* We need to open /dev/null here anew, to get the right access mode. */
442 return open_null_as(O_WRONLY, fileno);
443 }
444
445 switch (o) {
446
447 case EXEC_OUTPUT_NULL:
448 return open_null_as(O_WRONLY, fileno);
449
450 case EXEC_OUTPUT_TTY:
451 if (is_terminal_input(i))
452 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
453
454 /* We don't reset the terminal if this is just about output */
455 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
456
457 case EXEC_OUTPUT_SYSLOG:
458 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
459 case EXEC_OUTPUT_KMSG:
460 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
461 case EXEC_OUTPUT_JOURNAL:
462 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
463 r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid);
464 if (r < 0) {
465 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
466 r = open_null_as(O_WRONLY, fileno);
467 }
468 return r;
469
470 case EXEC_OUTPUT_SOCKET:
471 assert(socket_fd >= 0);
472 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
473
474 default:
475 assert_not_reached("Unknown error type");
476 }
477 }
478
479 static int chown_terminal(int fd, uid_t uid) {
480 struct stat st;
481
482 assert(fd >= 0);
483
484 /* This might fail. What matters are the results. */
485 (void) fchown(fd, uid, -1);
486 (void) fchmod(fd, TTY_MODE);
487
488 if (fstat(fd, &st) < 0)
489 return -errno;
490
491 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
492 return -EPERM;
493
494 return 0;
495 }
496
497 static int setup_confirm_stdio(int *_saved_stdin,
498 int *_saved_stdout) {
499 int fd = -1, saved_stdin, saved_stdout = -1, r;
500
501 assert(_saved_stdin);
502 assert(_saved_stdout);
503
504 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
505 if (saved_stdin < 0)
506 return -errno;
507
508 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
509 if (saved_stdout < 0) {
510 r = errno;
511 goto fail;
512 }
513
514 fd = acquire_terminal(
515 "/dev/console",
516 false,
517 false,
518 false,
519 DEFAULT_CONFIRM_USEC);
520 if (fd < 0) {
521 r = fd;
522 goto fail;
523 }
524
525 r = chown_terminal(fd, getuid());
526 if (r < 0)
527 goto fail;
528
529 if (dup2(fd, STDIN_FILENO) < 0) {
530 r = -errno;
531 goto fail;
532 }
533
534 if (dup2(fd, STDOUT_FILENO) < 0) {
535 r = -errno;
536 goto fail;
537 }
538
539 if (fd >= 2)
540 safe_close(fd);
541
542 *_saved_stdin = saved_stdin;
543 *_saved_stdout = saved_stdout;
544
545 return 0;
546
547 fail:
548 safe_close(saved_stdout);
549 safe_close(saved_stdin);
550 safe_close(fd);
551
552 return r;
553 }
554
555 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
556 _cleanup_close_ int fd = -1;
557 va_list ap;
558
559 assert(format);
560
561 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
562 if (fd < 0)
563 return fd;
564
565 va_start(ap, format);
566 vdprintf(fd, format, ap);
567 va_end(ap);
568
569 return 0;
570 }
571
572 static int restore_confirm_stdio(int *saved_stdin,
573 int *saved_stdout) {
574
575 int r = 0;
576
577 assert(saved_stdin);
578 assert(saved_stdout);
579
580 release_terminal();
581
582 if (*saved_stdin >= 0)
583 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
584 r = -errno;
585
586 if (*saved_stdout >= 0)
587 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
588 r = -errno;
589
590 safe_close(*saved_stdin);
591 safe_close(*saved_stdout);
592
593 return r;
594 }
595
596 static int ask_for_confirmation(char *response, char **argv) {
597 int saved_stdout = -1, saved_stdin = -1, r;
598 _cleanup_free_ char *line = NULL;
599
600 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
601 if (r < 0)
602 return r;
603
604 line = exec_command_line(argv);
605 if (!line)
606 return -ENOMEM;
607
608 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
609
610 restore_confirm_stdio(&saved_stdin, &saved_stdout);
611
612 return r;
613 }
614
615 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
616 bool keep_groups = false;
617 int r;
618
619 assert(context);
620
621 /* Lookup and set GID and supplementary group list. Here too
622 * we avoid NSS lookups for gid=0. */
623
624 if (context->group || username) {
625
626 if (context->group) {
627 const char *g = context->group;
628
629 if ((r = get_group_creds(&g, &gid)) < 0)
630 return r;
631 }
632
633 /* First step, initialize groups from /etc/groups */
634 if (username && gid != 0) {
635 if (initgroups(username, gid) < 0)
636 return -errno;
637
638 keep_groups = true;
639 }
640
641 /* Second step, set our gids */
642 if (setresgid(gid, gid, gid) < 0)
643 return -errno;
644 }
645
646 if (context->supplementary_groups) {
647 int ngroups_max, k;
648 gid_t *gids;
649 char **i;
650
651 /* Final step, initialize any manually set supplementary groups */
652 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
653
654 if (!(gids = new(gid_t, ngroups_max)))
655 return -ENOMEM;
656
657 if (keep_groups) {
658 if ((k = getgroups(ngroups_max, gids)) < 0) {
659 free(gids);
660 return -errno;
661 }
662 } else
663 k = 0;
664
665 STRV_FOREACH(i, context->supplementary_groups) {
666 const char *g;
667
668 if (k >= ngroups_max) {
669 free(gids);
670 return -E2BIG;
671 }
672
673 g = *i;
674 r = get_group_creds(&g, gids+k);
675 if (r < 0) {
676 free(gids);
677 return r;
678 }
679
680 k++;
681 }
682
683 if (setgroups(k, gids) < 0) {
684 free(gids);
685 return -errno;
686 }
687
688 free(gids);
689 }
690
691 return 0;
692 }
693
694 static int enforce_user(const ExecContext *context, uid_t uid) {
695 assert(context);
696
697 /* Sets (but doesn't lookup) the uid and make sure we keep the
698 * capabilities while doing so. */
699
700 if (context->capabilities) {
701 _cleanup_cap_free_ cap_t d = NULL;
702 static const cap_value_t bits[] = {
703 CAP_SETUID, /* Necessary so that we can run setresuid() below */
704 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
705 };
706
707 /* First step: If we need to keep capabilities but
708 * drop privileges we need to make sure we keep our
709 * caps, while we drop privileges. */
710 if (uid != 0) {
711 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
712
713 if (prctl(PR_GET_SECUREBITS) != sb)
714 if (prctl(PR_SET_SECUREBITS, sb) < 0)
715 return -errno;
716 }
717
718 /* Second step: set the capabilities. This will reduce
719 * the capabilities to the minimum we need. */
720
721 d = cap_dup(context->capabilities);
722 if (!d)
723 return -errno;
724
725 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
726 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
727 return -errno;
728
729 if (cap_set_proc(d) < 0)
730 return -errno;
731 }
732
733 /* Third step: actually set the uids */
734 if (setresuid(uid, uid, uid) < 0)
735 return -errno;
736
737 /* At this point we should have all necessary capabilities but
738 are otherwise a normal user. However, the caps might got
739 corrupted due to the setresuid() so we need clean them up
740 later. This is done outside of this call. */
741
742 return 0;
743 }
744
745 #ifdef HAVE_PAM
746
747 static int null_conv(
748 int num_msg,
749 const struct pam_message **msg,
750 struct pam_response **resp,
751 void *appdata_ptr) {
752
753 /* We don't support conversations */
754
755 return PAM_CONV_ERR;
756 }
757
758 static int setup_pam(
759 const char *name,
760 const char *user,
761 uid_t uid,
762 const char *tty,
763 char ***pam_env,
764 int fds[], unsigned n_fds) {
765
766 static const struct pam_conv conv = {
767 .conv = null_conv,
768 .appdata_ptr = NULL
769 };
770
771 pam_handle_t *handle = NULL;
772 sigset_t ss, old_ss;
773 int pam_code = PAM_SUCCESS;
774 int err;
775 char **e = NULL;
776 bool close_session = false;
777 pid_t pam_pid = 0, parent_pid;
778 int flags = 0;
779
780 assert(name);
781 assert(user);
782 assert(pam_env);
783
784 /* We set up PAM in the parent process, then fork. The child
785 * will then stay around until killed via PR_GET_PDEATHSIG or
786 * systemd via the cgroup logic. It will then remove the PAM
787 * session again. The parent process will exec() the actual
788 * daemon. We do things this way to ensure that the main PID
789 * of the daemon is the one we initially fork()ed. */
790
791 if (log_get_max_level() < LOG_DEBUG)
792 flags |= PAM_SILENT;
793
794 pam_code = pam_start(name, user, &conv, &handle);
795 if (pam_code != PAM_SUCCESS) {
796 handle = NULL;
797 goto fail;
798 }
799
800 if (tty) {
801 pam_code = pam_set_item(handle, PAM_TTY, tty);
802 if (pam_code != PAM_SUCCESS)
803 goto fail;
804 }
805
806 pam_code = pam_acct_mgmt(handle, flags);
807 if (pam_code != PAM_SUCCESS)
808 goto fail;
809
810 pam_code = pam_open_session(handle, flags);
811 if (pam_code != PAM_SUCCESS)
812 goto fail;
813
814 close_session = true;
815
816 e = pam_getenvlist(handle);
817 if (!e) {
818 pam_code = PAM_BUF_ERR;
819 goto fail;
820 }
821
822 /* Block SIGTERM, so that we know that it won't get lost in
823 * the child */
824 if (sigemptyset(&ss) < 0 ||
825 sigaddset(&ss, SIGTERM) < 0 ||
826 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
827 goto fail;
828
829 parent_pid = getpid();
830
831 pam_pid = fork();
832 if (pam_pid < 0)
833 goto fail;
834
835 if (pam_pid == 0) {
836 int sig;
837 int r = EXIT_PAM;
838
839 /* The child's job is to reset the PAM session on
840 * termination */
841
842 /* This string must fit in 10 chars (i.e. the length
843 * of "/sbin/init"), to look pretty in /bin/ps */
844 rename_process("(sd-pam)");
845
846 /* Make sure we don't keep open the passed fds in this
847 child. We assume that otherwise only those fds are
848 open here that have been opened by PAM. */
849 close_many(fds, n_fds);
850
851 /* Drop privileges - we don't need any to pam_close_session
852 * and this will make PR_SET_PDEATHSIG work in most cases.
853 * If this fails, ignore the error - but expect sd-pam threads
854 * to fail to exit normally */
855 if (setresuid(uid, uid, uid) < 0)
856 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
857
858 /* Wait until our parent died. This will only work if
859 * the above setresuid() succeeds, otherwise the kernel
860 * will not allow unprivileged parents kill their privileged
861 * children this way. We rely on the control groups kill logic
862 * to do the rest for us. */
863 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
864 goto child_finish;
865
866 /* Check if our parent process might already have
867 * died? */
868 if (getppid() == parent_pid) {
869 for (;;) {
870 if (sigwait(&ss, &sig) < 0) {
871 if (errno == EINTR)
872 continue;
873
874 goto child_finish;
875 }
876
877 assert(sig == SIGTERM);
878 break;
879 }
880 }
881
882 /* If our parent died we'll end the session */
883 if (getppid() != parent_pid) {
884 pam_code = pam_close_session(handle, flags);
885 if (pam_code != PAM_SUCCESS)
886 goto child_finish;
887 }
888
889 r = 0;
890
891 child_finish:
892 pam_end(handle, pam_code | flags);
893 _exit(r);
894 }
895
896 /* If the child was forked off successfully it will do all the
897 * cleanups, so forget about the handle here. */
898 handle = NULL;
899
900 /* Unblock SIGTERM again in the parent */
901 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
902 goto fail;
903
904 /* We close the log explicitly here, since the PAM modules
905 * might have opened it, but we don't want this fd around. */
906 closelog();
907
908 *pam_env = e;
909 e = NULL;
910
911 return 0;
912
913 fail:
914 if (pam_code != PAM_SUCCESS) {
915 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
916 err = -EPERM; /* PAM errors do not map to errno */
917 } else {
918 log_error_errno(errno, "PAM failed: %m");
919 err = -errno;
920 }
921
922 if (handle) {
923 if (close_session)
924 pam_code = pam_close_session(handle, flags);
925
926 pam_end(handle, pam_code | flags);
927 }
928
929 strv_free(e);
930
931 closelog();
932
933 if (pam_pid > 1) {
934 kill(pam_pid, SIGTERM);
935 kill(pam_pid, SIGCONT);
936 }
937
938 return err;
939 }
940 #endif
941
942 static void rename_process_from_path(const char *path) {
943 char process_name[11];
944 const char *p;
945 size_t l;
946
947 /* This resulting string must fit in 10 chars (i.e. the length
948 * of "/sbin/init") to look pretty in /bin/ps */
949
950 p = basename(path);
951 if (isempty(p)) {
952 rename_process("(...)");
953 return;
954 }
955
956 l = strlen(p);
957 if (l > 8) {
958 /* The end of the process name is usually more
959 * interesting, since the first bit might just be
960 * "systemd-" */
961 p = p + l - 8;
962 l = 8;
963 }
964
965 process_name[0] = '(';
966 memcpy(process_name+1, p, l);
967 process_name[1+l] = ')';
968 process_name[1+l+1] = 0;
969
970 rename_process(process_name);
971 }
972
973 #ifdef HAVE_SECCOMP
974
975 static int apply_seccomp(const ExecContext *c) {
976 uint32_t negative_action, action;
977 scmp_filter_ctx *seccomp;
978 Iterator i;
979 void *id;
980 int r;
981
982 assert(c);
983
984 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
985
986 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
987 if (!seccomp)
988 return -ENOMEM;
989
990 if (c->syscall_archs) {
991
992 SET_FOREACH(id, c->syscall_archs, i) {
993 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
994 if (r == -EEXIST)
995 continue;
996 if (r < 0)
997 goto finish;
998 }
999
1000 } else {
1001 r = seccomp_add_secondary_archs(seccomp);
1002 if (r < 0)
1003 goto finish;
1004 }
1005
1006 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1007 SET_FOREACH(id, c->syscall_filter, i) {
1008 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1009 if (r < 0)
1010 goto finish;
1011 }
1012
1013 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1014 if (r < 0)
1015 goto finish;
1016
1017 r = seccomp_load(seccomp);
1018
1019 finish:
1020 seccomp_release(seccomp);
1021 return r;
1022 }
1023
1024 static int apply_address_families(const ExecContext *c) {
1025 scmp_filter_ctx *seccomp;
1026 Iterator i;
1027 int r;
1028
1029 assert(c);
1030
1031 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1032 if (!seccomp)
1033 return -ENOMEM;
1034
1035 r = seccomp_add_secondary_archs(seccomp);
1036 if (r < 0)
1037 goto finish;
1038
1039 if (c->address_families_whitelist) {
1040 int af, first = 0, last = 0;
1041 void *afp;
1042
1043 /* If this is a whitelist, we first block the address
1044 * families that are out of range and then everything
1045 * that is not in the set. First, we find the lowest
1046 * and highest address family in the set. */
1047
1048 SET_FOREACH(afp, c->address_families, i) {
1049 af = PTR_TO_INT(afp);
1050
1051 if (af <= 0 || af >= af_max())
1052 continue;
1053
1054 if (first == 0 || af < first)
1055 first = af;
1056
1057 if (last == 0 || af > last)
1058 last = af;
1059 }
1060
1061 assert((first == 0) == (last == 0));
1062
1063 if (first == 0) {
1064
1065 /* No entries in the valid range, block everything */
1066 r = seccomp_rule_add(
1067 seccomp,
1068 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1069 SCMP_SYS(socket),
1070 0);
1071 if (r < 0)
1072 goto finish;
1073
1074 } else {
1075
1076 /* Block everything below the first entry */
1077 r = seccomp_rule_add(
1078 seccomp,
1079 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1080 SCMP_SYS(socket),
1081 1,
1082 SCMP_A0(SCMP_CMP_LT, first));
1083 if (r < 0)
1084 goto finish;
1085
1086 /* Block everything above the last entry */
1087 r = seccomp_rule_add(
1088 seccomp,
1089 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1090 SCMP_SYS(socket),
1091 1,
1092 SCMP_A0(SCMP_CMP_GT, last));
1093 if (r < 0)
1094 goto finish;
1095
1096 /* Block everything between the first and last
1097 * entry */
1098 for (af = 1; af < af_max(); af++) {
1099
1100 if (set_contains(c->address_families, INT_TO_PTR(af)))
1101 continue;
1102
1103 r = seccomp_rule_add(
1104 seccomp,
1105 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1106 SCMP_SYS(socket),
1107 1,
1108 SCMP_A0(SCMP_CMP_EQ, af));
1109 if (r < 0)
1110 goto finish;
1111 }
1112 }
1113
1114 } else {
1115 void *af;
1116
1117 /* If this is a blacklist, then generate one rule for
1118 * each address family that are then combined in OR
1119 * checks. */
1120
1121 SET_FOREACH(af, c->address_families, i) {
1122
1123 r = seccomp_rule_add(
1124 seccomp,
1125 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1126 SCMP_SYS(socket),
1127 1,
1128 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1129 if (r < 0)
1130 goto finish;
1131 }
1132 }
1133
1134 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1135 if (r < 0)
1136 goto finish;
1137
1138 r = seccomp_load(seccomp);
1139
1140 finish:
1141 seccomp_release(seccomp);
1142 return r;
1143 }
1144
1145 #endif
1146
1147 static void do_idle_pipe_dance(int idle_pipe[4]) {
1148 assert(idle_pipe);
1149
1150
1151 safe_close(idle_pipe[1]);
1152 safe_close(idle_pipe[2]);
1153
1154 if (idle_pipe[0] >= 0) {
1155 int r;
1156
1157 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1158
1159 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1160 /* Signal systemd that we are bored and want to continue. */
1161 r = write(idle_pipe[3], "x", 1);
1162 if (r > 0)
1163 /* Wait for systemd to react to the signal above. */
1164 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1165 }
1166
1167 safe_close(idle_pipe[0]);
1168
1169 }
1170
1171 safe_close(idle_pipe[3]);
1172 }
1173
1174 static int build_environment(
1175 const ExecContext *c,
1176 unsigned n_fds,
1177 usec_t watchdog_usec,
1178 const char *home,
1179 const char *username,
1180 const char *shell,
1181 char ***ret) {
1182
1183 _cleanup_strv_free_ char **our_env = NULL;
1184 unsigned n_env = 0;
1185 char *x;
1186
1187 assert(c);
1188 assert(ret);
1189
1190 our_env = new0(char*, 10);
1191 if (!our_env)
1192 return -ENOMEM;
1193
1194 if (n_fds > 0) {
1195 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1196 return -ENOMEM;
1197 our_env[n_env++] = x;
1198
1199 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1200 return -ENOMEM;
1201 our_env[n_env++] = x;
1202 }
1203
1204 if (watchdog_usec > 0) {
1205 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1206 return -ENOMEM;
1207 our_env[n_env++] = x;
1208
1209 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1210 return -ENOMEM;
1211 our_env[n_env++] = x;
1212 }
1213
1214 if (home) {
1215 x = strappend("HOME=", home);
1216 if (!x)
1217 return -ENOMEM;
1218 our_env[n_env++] = x;
1219 }
1220
1221 if (username) {
1222 x = strappend("LOGNAME=", username);
1223 if (!x)
1224 return -ENOMEM;
1225 our_env[n_env++] = x;
1226
1227 x = strappend("USER=", username);
1228 if (!x)
1229 return -ENOMEM;
1230 our_env[n_env++] = x;
1231 }
1232
1233 if (shell) {
1234 x = strappend("SHELL=", shell);
1235 if (!x)
1236 return -ENOMEM;
1237 our_env[n_env++] = x;
1238 }
1239
1240 if (is_terminal_input(c->std_input) ||
1241 c->std_output == EXEC_OUTPUT_TTY ||
1242 c->std_error == EXEC_OUTPUT_TTY ||
1243 c->tty_path) {
1244
1245 x = strdup(default_term_for_tty(tty_path(c)));
1246 if (!x)
1247 return -ENOMEM;
1248 our_env[n_env++] = x;
1249 }
1250
1251 our_env[n_env++] = NULL;
1252 assert(n_env <= 10);
1253
1254 *ret = our_env;
1255 our_env = NULL;
1256
1257 return 0;
1258 }
1259
1260 static bool exec_needs_mount_namespace(
1261 const ExecContext *context,
1262 const ExecParameters *params,
1263 ExecRuntime *runtime) {
1264
1265 assert(context);
1266 assert(params);
1267
1268 if (!strv_isempty(context->read_write_dirs) ||
1269 !strv_isempty(context->read_only_dirs) ||
1270 !strv_isempty(context->inaccessible_dirs))
1271 return true;
1272
1273 if (context->mount_flags != 0)
1274 return true;
1275
1276 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1277 return true;
1278
1279 if (params->bus_endpoint_path)
1280 return true;
1281
1282 if (context->private_devices ||
1283 context->protect_system != PROTECT_SYSTEM_NO ||
1284 context->protect_home != PROTECT_HOME_NO)
1285 return true;
1286
1287 return false;
1288 }
1289
1290 static int exec_child(
1291 Unit *unit,
1292 ExecCommand *command,
1293 const ExecContext *context,
1294 const ExecParameters *params,
1295 ExecRuntime *runtime,
1296 char **argv,
1297 int socket_fd,
1298 int *fds, unsigned n_fds,
1299 char **files_env,
1300 int *exit_status) {
1301
1302 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1303 _cleanup_free_ char *mac_selinux_context_net = NULL;
1304 const char *username = NULL, *home = NULL, *shell = NULL;
1305 unsigned n_dont_close = 0;
1306 int dont_close[n_fds + 4];
1307 uid_t uid = UID_INVALID;
1308 gid_t gid = GID_INVALID;
1309 int i, r;
1310 bool needs_mount_namespace;
1311
1312 assert(unit);
1313 assert(command);
1314 assert(context);
1315 assert(params);
1316 assert(exit_status);
1317
1318 rename_process_from_path(command->path);
1319
1320 /* We reset exactly these signals, since they are the
1321 * only ones we set to SIG_IGN in the main daemon. All
1322 * others we leave untouched because we set them to
1323 * SIG_DFL or a valid handler initially, both of which
1324 * will be demoted to SIG_DFL. */
1325 default_signals(SIGNALS_CRASH_HANDLER,
1326 SIGNALS_IGNORE, -1);
1327
1328 if (context->ignore_sigpipe)
1329 ignore_signals(SIGPIPE, -1);
1330
1331 r = reset_signal_mask();
1332 if (r < 0) {
1333 *exit_status = EXIT_SIGNAL_MASK;
1334 return r;
1335 }
1336
1337 if (params->idle_pipe)
1338 do_idle_pipe_dance(params->idle_pipe);
1339
1340 /* Close sockets very early to make sure we don't
1341 * block init reexecution because it cannot bind its
1342 * sockets */
1343
1344 log_forget_fds();
1345
1346 if (socket_fd >= 0)
1347 dont_close[n_dont_close++] = socket_fd;
1348 if (n_fds > 0) {
1349 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1350 n_dont_close += n_fds;
1351 }
1352 if (params->bus_endpoint_fd >= 0)
1353 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1354 if (runtime) {
1355 if (runtime->netns_storage_socket[0] >= 0)
1356 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1357 if (runtime->netns_storage_socket[1] >= 0)
1358 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1359 }
1360
1361 r = close_all_fds(dont_close, n_dont_close);
1362 if (r < 0) {
1363 *exit_status = EXIT_FDS;
1364 return r;
1365 }
1366
1367 if (!context->same_pgrp)
1368 if (setsid() < 0) {
1369 *exit_status = EXIT_SETSID;
1370 return -errno;
1371 }
1372
1373 exec_context_tty_reset(context);
1374
1375 if (params->confirm_spawn) {
1376 char response;
1377
1378 r = ask_for_confirmation(&response, argv);
1379 if (r == -ETIMEDOUT)
1380 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1381 else if (r < 0)
1382 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1383 else if (response == 's') {
1384 write_confirm_message("Skipping execution.\n");
1385 *exit_status = EXIT_CONFIRM;
1386 return -ECANCELED;
1387 } else if (response == 'n') {
1388 write_confirm_message("Failing execution.\n");
1389 *exit_status = 0;
1390 return 0;
1391 }
1392 }
1393
1394 if (context->user) {
1395 username = context->user;
1396 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1397 if (r < 0) {
1398 *exit_status = EXIT_USER;
1399 return r;
1400 }
1401 }
1402
1403 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1404 * must sure to drop O_NONBLOCK */
1405 if (socket_fd >= 0)
1406 fd_nonblock(socket_fd, false);
1407
1408 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1409 if (r < 0) {
1410 *exit_status = EXIT_STDIN;
1411 return r;
1412 }
1413
1414 r = setup_output(unit, context, STDOUT_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1415 if (r < 0) {
1416 *exit_status = EXIT_STDOUT;
1417 return r;
1418 }
1419
1420 r = setup_output(unit, context, STDERR_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1421 if (r < 0) {
1422 *exit_status = EXIT_STDERR;
1423 return r;
1424 }
1425
1426 if (params->cgroup_path) {
1427 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1428 if (r < 0) {
1429 *exit_status = EXIT_CGROUP;
1430 return r;
1431 }
1432 }
1433
1434 if (context->oom_score_adjust_set) {
1435 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1436
1437 /* When we can't make this change due to EPERM, then
1438 * let's silently skip over it. User namespaces
1439 * prohibit write access to this file, and we
1440 * shouldn't trip up over that. */
1441
1442 sprintf(t, "%i", context->oom_score_adjust);
1443 r = write_string_file("/proc/self/oom_score_adj", t);
1444 if (r == -EPERM || r == -EACCES) {
1445 log_open();
1446 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1447 log_close();
1448 } else if (r < 0) {
1449 *exit_status = EXIT_OOM_ADJUST;
1450 return -errno;
1451 }
1452 }
1453
1454 if (context->nice_set)
1455 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1456 *exit_status = EXIT_NICE;
1457 return -errno;
1458 }
1459
1460 if (context->cpu_sched_set) {
1461 struct sched_param param = {
1462 .sched_priority = context->cpu_sched_priority,
1463 };
1464
1465 r = sched_setscheduler(0,
1466 context->cpu_sched_policy |
1467 (context->cpu_sched_reset_on_fork ?
1468 SCHED_RESET_ON_FORK : 0),
1469 &param);
1470 if (r < 0) {
1471 *exit_status = EXIT_SETSCHEDULER;
1472 return -errno;
1473 }
1474 }
1475
1476 if (context->cpuset)
1477 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1478 *exit_status = EXIT_CPUAFFINITY;
1479 return -errno;
1480 }
1481
1482 if (context->ioprio_set)
1483 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1484 *exit_status = EXIT_IOPRIO;
1485 return -errno;
1486 }
1487
1488 if (context->timer_slack_nsec != NSEC_INFINITY)
1489 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1490 *exit_status = EXIT_TIMERSLACK;
1491 return -errno;
1492 }
1493
1494 if (context->personality != PERSONALITY_INVALID)
1495 if (personality(context->personality) < 0) {
1496 *exit_status = EXIT_PERSONALITY;
1497 return -errno;
1498 }
1499
1500 if (context->utmp_id)
1501 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1502
1503 if (context->user && is_terminal_input(context->std_input)) {
1504 r = chown_terminal(STDIN_FILENO, uid);
1505 if (r < 0) {
1506 *exit_status = EXIT_STDIN;
1507 return r;
1508 }
1509 }
1510
1511 #ifdef ENABLE_KDBUS
1512 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1513 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1514
1515 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1516 if (r < 0) {
1517 *exit_status = EXIT_BUS_ENDPOINT;
1518 return r;
1519 }
1520 }
1521 #endif
1522
1523 /* If delegation is enabled we'll pass ownership of the cgroup
1524 * (but only in systemd's own controller hierarchy!) to the
1525 * user of the new process. */
1526 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1527 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1528 if (r < 0) {
1529 *exit_status = EXIT_CGROUP;
1530 return r;
1531 }
1532
1533
1534 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1535 if (r < 0) {
1536 *exit_status = EXIT_CGROUP;
1537 return r;
1538 }
1539 }
1540
1541 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1542 char **rt;
1543
1544 STRV_FOREACH(rt, context->runtime_directory) {
1545 _cleanup_free_ char *p;
1546
1547 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1548 if (!p) {
1549 *exit_status = EXIT_RUNTIME_DIRECTORY;
1550 return -ENOMEM;
1551 }
1552
1553 r = mkdir_safe_label(p, context->runtime_directory_mode, uid, gid);
1554 if (r < 0) {
1555 *exit_status = EXIT_RUNTIME_DIRECTORY;
1556 return r;
1557 }
1558 }
1559 }
1560
1561 if (params->apply_permissions) {
1562 r = enforce_groups(context, username, gid);
1563 if (r < 0) {
1564 *exit_status = EXIT_GROUP;
1565 return r;
1566 }
1567 }
1568
1569 umask(context->umask);
1570
1571 #ifdef HAVE_PAM
1572 if (params->apply_permissions && context->pam_name && username) {
1573 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1574 if (r < 0) {
1575 *exit_status = EXIT_PAM;
1576 return r;
1577 }
1578 }
1579 #endif
1580
1581 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1582 r = setup_netns(runtime->netns_storage_socket);
1583 if (r < 0) {
1584 *exit_status = EXIT_NETWORK;
1585 return r;
1586 }
1587 }
1588
1589 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
1590
1591 if (needs_mount_namespace) {
1592 char *tmp = NULL, *var = NULL;
1593
1594 /* The runtime struct only contains the parent
1595 * of the private /tmp, which is
1596 * non-accessible to world users. Inside of it
1597 * there's a /tmp that is sticky, and that's
1598 * the one we want to use here. */
1599
1600 if (context->private_tmp && runtime) {
1601 if (runtime->tmp_dir)
1602 tmp = strjoina(runtime->tmp_dir, "/tmp");
1603 if (runtime->var_tmp_dir)
1604 var = strjoina(runtime->var_tmp_dir, "/tmp");
1605 }
1606
1607 r = setup_namespace(
1608 params->apply_chroot ? context->root_directory : NULL,
1609 context->read_write_dirs,
1610 context->read_only_dirs,
1611 context->inaccessible_dirs,
1612 tmp,
1613 var,
1614 params->bus_endpoint_path,
1615 context->private_devices,
1616 context->protect_home,
1617 context->protect_system,
1618 context->mount_flags);
1619
1620 /* If we couldn't set up the namespace this is
1621 * probably due to a missing capability. In this case,
1622 * silently proceeed. */
1623 if (r == -EPERM || r == -EACCES) {
1624 log_open();
1625 log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1626 log_close();
1627 } else if (r < 0) {
1628 *exit_status = EXIT_NAMESPACE;
1629 return r;
1630 }
1631 }
1632
1633 if (params->apply_chroot) {
1634 if (!needs_mount_namespace && context->root_directory)
1635 if (chroot(context->root_directory) < 0) {
1636 *exit_status = EXIT_CHROOT;
1637 return -errno;
1638 }
1639
1640 if (chdir(context->working_directory ?: "/") < 0 &&
1641 !context->working_directory_missing_ok) {
1642 *exit_status = EXIT_CHDIR;
1643 return -errno;
1644 }
1645 } else {
1646 _cleanup_free_ char *d = NULL;
1647
1648 if (asprintf(&d, "%s/%s",
1649 context->root_directory ?: "",
1650 context->working_directory ?: "") < 0) {
1651 *exit_status = EXIT_MEMORY;
1652 return -ENOMEM;
1653 }
1654
1655 if (chdir(d) < 0 &&
1656 !context->working_directory_missing_ok) {
1657 *exit_status = EXIT_CHDIR;
1658 return -errno;
1659 }
1660 }
1661
1662 #ifdef HAVE_SELINUX
1663 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1664 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1665 if (r < 0) {
1666 *exit_status = EXIT_SELINUX_CONTEXT;
1667 return r;
1668 }
1669 }
1670 #endif
1671
1672 /* We repeat the fd closing here, to make sure that
1673 * nothing is leaked from the PAM modules. Note that
1674 * we are more aggressive this time since socket_fd
1675 * and the netns fds we don't need anymore. The custom
1676 * endpoint fd was needed to upload the policy and can
1677 * now be closed as well. */
1678 r = close_all_fds(fds, n_fds);
1679 if (r >= 0)
1680 r = shift_fds(fds, n_fds);
1681 if (r >= 0)
1682 r = flags_fds(fds, n_fds, context->non_blocking);
1683 if (r < 0) {
1684 *exit_status = EXIT_FDS;
1685 return r;
1686 }
1687
1688 if (params->apply_permissions) {
1689
1690 for (i = 0; i < _RLIMIT_MAX; i++) {
1691 if (!context->rlimit[i])
1692 continue;
1693
1694 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1695 *exit_status = EXIT_LIMITS;
1696 return -errno;
1697 }
1698 }
1699
1700 if (context->capability_bounding_set_drop) {
1701 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1702 if (r < 0) {
1703 *exit_status = EXIT_CAPABILITIES;
1704 return r;
1705 }
1706 }
1707
1708 #ifdef HAVE_SMACK
1709 if (context->smack_process_label) {
1710 r = mac_smack_apply_pid(0, context->smack_process_label);
1711 if (r < 0) {
1712 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1713 return r;
1714 }
1715 }
1716 #endif
1717
1718 if (context->user) {
1719 r = enforce_user(context, uid);
1720 if (r < 0) {
1721 *exit_status = EXIT_USER;
1722 return r;
1723 }
1724 }
1725
1726 /* PR_GET_SECUREBITS is not privileged, while
1727 * PR_SET_SECUREBITS is. So to suppress
1728 * potential EPERMs we'll try not to call
1729 * PR_SET_SECUREBITS unless necessary. */
1730 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1731 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1732 *exit_status = EXIT_SECUREBITS;
1733 return -errno;
1734 }
1735
1736 if (context->capabilities)
1737 if (cap_set_proc(context->capabilities) < 0) {
1738 *exit_status = EXIT_CAPABILITIES;
1739 return -errno;
1740 }
1741
1742 if (context->no_new_privileges)
1743 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1744 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1745 return -errno;
1746 }
1747
1748 #ifdef HAVE_SECCOMP
1749 if (context->address_families_whitelist ||
1750 !set_isempty(context->address_families)) {
1751 r = apply_address_families(context);
1752 if (r < 0) {
1753 *exit_status = EXIT_ADDRESS_FAMILIES;
1754 return r;
1755 }
1756 }
1757
1758 if (context->syscall_whitelist ||
1759 !set_isempty(context->syscall_filter) ||
1760 !set_isempty(context->syscall_archs)) {
1761 r = apply_seccomp(context);
1762 if (r < 0) {
1763 *exit_status = EXIT_SECCOMP;
1764 return r;
1765 }
1766 }
1767 #endif
1768
1769 #ifdef HAVE_SELINUX
1770 if (mac_selinux_use()) {
1771 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1772
1773 if (exec_context) {
1774 r = setexeccon(exec_context);
1775 if (r < 0) {
1776 *exit_status = EXIT_SELINUX_CONTEXT;
1777 return r;
1778 }
1779 }
1780 }
1781 #endif
1782
1783 #ifdef HAVE_APPARMOR
1784 if (context->apparmor_profile && mac_apparmor_use()) {
1785 r = aa_change_onexec(context->apparmor_profile);
1786 if (r < 0 && !context->apparmor_profile_ignore) {
1787 *exit_status = EXIT_APPARMOR_PROFILE;
1788 return -errno;
1789 }
1790 }
1791 #endif
1792 }
1793
1794 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1795 if (r < 0) {
1796 *exit_status = EXIT_MEMORY;
1797 return r;
1798 }
1799
1800 final_env = strv_env_merge(5,
1801 params->environment,
1802 our_env,
1803 context->environment,
1804 files_env,
1805 pam_env,
1806 NULL);
1807 if (!final_env) {
1808 *exit_status = EXIT_MEMORY;
1809 return -ENOMEM;
1810 }
1811
1812 final_argv = replace_env_argv(argv, final_env);
1813 if (!final_argv) {
1814 *exit_status = EXIT_MEMORY;
1815 return -ENOMEM;
1816 }
1817
1818 final_env = strv_env_clean(final_env);
1819
1820 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1821 _cleanup_free_ char *line;
1822
1823 line = exec_command_line(final_argv);
1824 if (line) {
1825 log_open();
1826 log_struct(LOG_DEBUG,
1827 LOG_UNIT_ID(unit),
1828 "EXECUTABLE=%s", command->path,
1829 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
1830 NULL);
1831 log_close();
1832 }
1833 }
1834
1835 execve(command->path, final_argv, final_env);
1836 *exit_status = EXIT_EXEC;
1837 return -errno;
1838 }
1839
1840 int exec_spawn(Unit *unit,
1841 ExecCommand *command,
1842 const ExecContext *context,
1843 const ExecParameters *params,
1844 ExecRuntime *runtime,
1845 pid_t *ret) {
1846
1847 _cleanup_strv_free_ char **files_env = NULL;
1848 int *fds = NULL; unsigned n_fds = 0;
1849 _cleanup_free_ char *line = NULL;
1850 int socket_fd, r;
1851 char **argv;
1852 pid_t pid;
1853
1854 assert(unit);
1855 assert(command);
1856 assert(context);
1857 assert(ret);
1858 assert(params);
1859 assert(params->fds || params->n_fds <= 0);
1860
1861 if (context->std_input == EXEC_INPUT_SOCKET ||
1862 context->std_output == EXEC_OUTPUT_SOCKET ||
1863 context->std_error == EXEC_OUTPUT_SOCKET) {
1864
1865 if (params->n_fds != 1) {
1866 log_unit_error(unit, "Got more than one socket.");
1867 return -EINVAL;
1868 }
1869
1870 socket_fd = params->fds[0];
1871 } else {
1872 socket_fd = -1;
1873 fds = params->fds;
1874 n_fds = params->n_fds;
1875 }
1876
1877 r = exec_context_load_environment(unit, context, &files_env);
1878 if (r < 0)
1879 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
1880
1881 argv = params->argv ?: command->argv;
1882 line = exec_command_line(argv);
1883 if (!line)
1884 return log_oom();
1885
1886 log_struct(LOG_DEBUG,
1887 LOG_UNIT_ID(unit),
1888 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
1889 "EXECUTABLE=%s", command->path,
1890 NULL);
1891 pid = fork();
1892 if (pid < 0)
1893 return log_unit_error_errno(unit, r, "Failed to fork: %m");
1894
1895 if (pid == 0) {
1896 int exit_status;
1897
1898 r = exec_child(unit,
1899 command,
1900 context,
1901 params,
1902 runtime,
1903 argv,
1904 socket_fd,
1905 fds, n_fds,
1906 files_env,
1907 &exit_status);
1908 if (r < 0) {
1909 log_open();
1910 log_struct_errno(LOG_ERR, r,
1911 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1912 LOG_UNIT_ID(unit),
1913 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
1914 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1915 command->path),
1916 "EXECUTABLE=%s", command->path,
1917 NULL);
1918 }
1919
1920 _exit(exit_status);
1921 }
1922
1923 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
1924
1925 /* We add the new process to the cgroup both in the child (so
1926 * that we can be sure that no user code is ever executed
1927 * outside of the cgroup) and in the parent (so that we can be
1928 * sure that when we kill the cgroup the process will be
1929 * killed too). */
1930 if (params->cgroup_path)
1931 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1932
1933 exec_status_start(&command->exec_status, pid);
1934
1935 *ret = pid;
1936 return 0;
1937 }
1938
1939 void exec_context_init(ExecContext *c) {
1940 assert(c);
1941
1942 c->umask = 0022;
1943 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1944 c->cpu_sched_policy = SCHED_OTHER;
1945 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1946 c->syslog_level_prefix = true;
1947 c->ignore_sigpipe = true;
1948 c->timer_slack_nsec = NSEC_INFINITY;
1949 c->personality = PERSONALITY_INVALID;
1950 c->runtime_directory_mode = 0755;
1951 }
1952
1953 void exec_context_done(ExecContext *c) {
1954 unsigned l;
1955
1956 assert(c);
1957
1958 strv_free(c->environment);
1959 c->environment = NULL;
1960
1961 strv_free(c->environment_files);
1962 c->environment_files = NULL;
1963
1964 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1965 free(c->rlimit[l]);
1966 c->rlimit[l] = NULL;
1967 }
1968
1969 free(c->working_directory);
1970 c->working_directory = NULL;
1971 free(c->root_directory);
1972 c->root_directory = NULL;
1973
1974 free(c->tty_path);
1975 c->tty_path = NULL;
1976
1977 free(c->syslog_identifier);
1978 c->syslog_identifier = NULL;
1979
1980 free(c->user);
1981 c->user = NULL;
1982
1983 free(c->group);
1984 c->group = NULL;
1985
1986 strv_free(c->supplementary_groups);
1987 c->supplementary_groups = NULL;
1988
1989 free(c->pam_name);
1990 c->pam_name = NULL;
1991
1992 if (c->capabilities) {
1993 cap_free(c->capabilities);
1994 c->capabilities = NULL;
1995 }
1996
1997 strv_free(c->read_only_dirs);
1998 c->read_only_dirs = NULL;
1999
2000 strv_free(c->read_write_dirs);
2001 c->read_write_dirs = NULL;
2002
2003 strv_free(c->inaccessible_dirs);
2004 c->inaccessible_dirs = NULL;
2005
2006 if (c->cpuset)
2007 CPU_FREE(c->cpuset);
2008
2009 free(c->utmp_id);
2010 c->utmp_id = NULL;
2011
2012 free(c->selinux_context);
2013 c->selinux_context = NULL;
2014
2015 free(c->apparmor_profile);
2016 c->apparmor_profile = NULL;
2017
2018 set_free(c->syscall_filter);
2019 c->syscall_filter = NULL;
2020
2021 set_free(c->syscall_archs);
2022 c->syscall_archs = NULL;
2023
2024 set_free(c->address_families);
2025 c->address_families = NULL;
2026
2027 strv_free(c->runtime_directory);
2028 c->runtime_directory = NULL;
2029
2030 bus_endpoint_free(c->bus_endpoint);
2031 c->bus_endpoint = NULL;
2032 }
2033
2034 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2035 char **i;
2036
2037 assert(c);
2038
2039 if (!runtime_prefix)
2040 return 0;
2041
2042 STRV_FOREACH(i, c->runtime_directory) {
2043 _cleanup_free_ char *p;
2044
2045 p = strjoin(runtime_prefix, "/", *i, NULL);
2046 if (!p)
2047 return -ENOMEM;
2048
2049 /* We execute this synchronously, since we need to be
2050 * sure this is gone when we start the service
2051 * next. */
2052 (void) rm_rf(p, REMOVE_ROOT);
2053 }
2054
2055 return 0;
2056 }
2057
2058 void exec_command_done(ExecCommand *c) {
2059 assert(c);
2060
2061 free(c->path);
2062 c->path = NULL;
2063
2064 strv_free(c->argv);
2065 c->argv = NULL;
2066 }
2067
2068 void exec_command_done_array(ExecCommand *c, unsigned n) {
2069 unsigned i;
2070
2071 for (i = 0; i < n; i++)
2072 exec_command_done(c+i);
2073 }
2074
2075 ExecCommand* exec_command_free_list(ExecCommand *c) {
2076 ExecCommand *i;
2077
2078 while ((i = c)) {
2079 LIST_REMOVE(command, c, i);
2080 exec_command_done(i);
2081 free(i);
2082 }
2083
2084 return NULL;
2085 }
2086
2087 void exec_command_free_array(ExecCommand **c, unsigned n) {
2088 unsigned i;
2089
2090 for (i = 0; i < n; i++)
2091 c[i] = exec_command_free_list(c[i]);
2092 }
2093
2094 typedef struct InvalidEnvInfo {
2095 Unit *unit;
2096 const char *path;
2097 } InvalidEnvInfo;
2098
2099 static void invalid_env(const char *p, void *userdata) {
2100 InvalidEnvInfo *info = userdata;
2101
2102 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2103 }
2104
2105 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
2106 char **i, **r = NULL;
2107
2108 assert(c);
2109 assert(l);
2110
2111 STRV_FOREACH(i, c->environment_files) {
2112 char *fn;
2113 int k;
2114 bool ignore = false;
2115 char **p;
2116 _cleanup_globfree_ glob_t pglob = {};
2117 int count, n;
2118
2119 fn = *i;
2120
2121 if (fn[0] == '-') {
2122 ignore = true;
2123 fn ++;
2124 }
2125
2126 if (!path_is_absolute(fn)) {
2127 if (ignore)
2128 continue;
2129
2130 strv_free(r);
2131 return -EINVAL;
2132 }
2133
2134 /* Filename supports globbing, take all matching files */
2135 errno = 0;
2136 if (glob(fn, 0, NULL, &pglob) != 0) {
2137 if (ignore)
2138 continue;
2139
2140 strv_free(r);
2141 return errno ? -errno : -EINVAL;
2142 }
2143 count = pglob.gl_pathc;
2144 if (count == 0) {
2145 if (ignore)
2146 continue;
2147
2148 strv_free(r);
2149 return -EINVAL;
2150 }
2151 for (n = 0; n < count; n++) {
2152 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2153 if (k < 0) {
2154 if (ignore)
2155 continue;
2156
2157 strv_free(r);
2158 return k;
2159 }
2160 /* Log invalid environment variables with filename */
2161 if (p) {
2162 InvalidEnvInfo info = {
2163 .unit = unit,
2164 .path = pglob.gl_pathv[n]
2165 };
2166
2167 p = strv_env_clean_with_callback(p, invalid_env, &info);
2168 }
2169
2170 if (r == NULL)
2171 r = p;
2172 else {
2173 char **m;
2174
2175 m = strv_env_merge(2, r, p);
2176 strv_free(r);
2177 strv_free(p);
2178 if (!m)
2179 return -ENOMEM;
2180
2181 r = m;
2182 }
2183 }
2184 }
2185
2186 *l = r;
2187
2188 return 0;
2189 }
2190
2191 static bool tty_may_match_dev_console(const char *tty) {
2192 _cleanup_free_ char *active = NULL;
2193 char *console;
2194
2195 if (startswith(tty, "/dev/"))
2196 tty += 5;
2197
2198 /* trivial identity? */
2199 if (streq(tty, "console"))
2200 return true;
2201
2202 console = resolve_dev_console(&active);
2203 /* if we could not resolve, assume it may */
2204 if (!console)
2205 return true;
2206
2207 /* "tty0" means the active VC, so it may be the same sometimes */
2208 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2209 }
2210
2211 bool exec_context_may_touch_console(ExecContext *ec) {
2212 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2213 is_terminal_input(ec->std_input) ||
2214 is_terminal_output(ec->std_output) ||
2215 is_terminal_output(ec->std_error)) &&
2216 tty_may_match_dev_console(tty_path(ec));
2217 }
2218
2219 static void strv_fprintf(FILE *f, char **l) {
2220 char **g;
2221
2222 assert(f);
2223
2224 STRV_FOREACH(g, l)
2225 fprintf(f, " %s", *g);
2226 }
2227
2228 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2229 char **e;
2230 unsigned i;
2231
2232 assert(c);
2233 assert(f);
2234
2235 prefix = strempty(prefix);
2236
2237 fprintf(f,
2238 "%sUMask: %04o\n"
2239 "%sWorkingDirectory: %s\n"
2240 "%sRootDirectory: %s\n"
2241 "%sNonBlocking: %s\n"
2242 "%sPrivateTmp: %s\n"
2243 "%sPrivateNetwork: %s\n"
2244 "%sPrivateDevices: %s\n"
2245 "%sProtectHome: %s\n"
2246 "%sProtectSystem: %s\n"
2247 "%sIgnoreSIGPIPE: %s\n",
2248 prefix, c->umask,
2249 prefix, c->working_directory ? c->working_directory : "/",
2250 prefix, c->root_directory ? c->root_directory : "/",
2251 prefix, yes_no(c->non_blocking),
2252 prefix, yes_no(c->private_tmp),
2253 prefix, yes_no(c->private_network),
2254 prefix, yes_no(c->private_devices),
2255 prefix, protect_home_to_string(c->protect_home),
2256 prefix, protect_system_to_string(c->protect_system),
2257 prefix, yes_no(c->ignore_sigpipe));
2258
2259 STRV_FOREACH(e, c->environment)
2260 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2261
2262 STRV_FOREACH(e, c->environment_files)
2263 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2264
2265 if (c->nice_set)
2266 fprintf(f,
2267 "%sNice: %i\n",
2268 prefix, c->nice);
2269
2270 if (c->oom_score_adjust_set)
2271 fprintf(f,
2272 "%sOOMScoreAdjust: %i\n",
2273 prefix, c->oom_score_adjust);
2274
2275 for (i = 0; i < RLIM_NLIMITS; i++)
2276 if (c->rlimit[i])
2277 fprintf(f, "%s%s: "RLIM_FMT"\n",
2278 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2279
2280 if (c->ioprio_set) {
2281 _cleanup_free_ char *class_str = NULL;
2282
2283 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2284 fprintf(f,
2285 "%sIOSchedulingClass: %s\n"
2286 "%sIOPriority: %i\n",
2287 prefix, strna(class_str),
2288 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2289 }
2290
2291 if (c->cpu_sched_set) {
2292 _cleanup_free_ char *policy_str = NULL;
2293
2294 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2295 fprintf(f,
2296 "%sCPUSchedulingPolicy: %s\n"
2297 "%sCPUSchedulingPriority: %i\n"
2298 "%sCPUSchedulingResetOnFork: %s\n",
2299 prefix, strna(policy_str),
2300 prefix, c->cpu_sched_priority,
2301 prefix, yes_no(c->cpu_sched_reset_on_fork));
2302 }
2303
2304 if (c->cpuset) {
2305 fprintf(f, "%sCPUAffinity:", prefix);
2306 for (i = 0; i < c->cpuset_ncpus; i++)
2307 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2308 fprintf(f, " %u", i);
2309 fputs("\n", f);
2310 }
2311
2312 if (c->timer_slack_nsec != NSEC_INFINITY)
2313 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2314
2315 fprintf(f,
2316 "%sStandardInput: %s\n"
2317 "%sStandardOutput: %s\n"
2318 "%sStandardError: %s\n",
2319 prefix, exec_input_to_string(c->std_input),
2320 prefix, exec_output_to_string(c->std_output),
2321 prefix, exec_output_to_string(c->std_error));
2322
2323 if (c->tty_path)
2324 fprintf(f,
2325 "%sTTYPath: %s\n"
2326 "%sTTYReset: %s\n"
2327 "%sTTYVHangup: %s\n"
2328 "%sTTYVTDisallocate: %s\n",
2329 prefix, c->tty_path,
2330 prefix, yes_no(c->tty_reset),
2331 prefix, yes_no(c->tty_vhangup),
2332 prefix, yes_no(c->tty_vt_disallocate));
2333
2334 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2335 c->std_output == EXEC_OUTPUT_KMSG ||
2336 c->std_output == EXEC_OUTPUT_JOURNAL ||
2337 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2338 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2339 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2340 c->std_error == EXEC_OUTPUT_SYSLOG ||
2341 c->std_error == EXEC_OUTPUT_KMSG ||
2342 c->std_error == EXEC_OUTPUT_JOURNAL ||
2343 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2344 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2345 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2346
2347 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2348
2349 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2350 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2351
2352 fprintf(f,
2353 "%sSyslogFacility: %s\n"
2354 "%sSyslogLevel: %s\n",
2355 prefix, strna(fac_str),
2356 prefix, strna(lvl_str));
2357 }
2358
2359 if (c->capabilities) {
2360 _cleanup_cap_free_charp_ char *t;
2361
2362 t = cap_to_text(c->capabilities, NULL);
2363 if (t)
2364 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2365 }
2366
2367 if (c->secure_bits)
2368 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2369 prefix,
2370 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2371 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2372 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2373 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2374 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2375 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2376
2377 if (c->capability_bounding_set_drop) {
2378 unsigned long l;
2379 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2380
2381 for (l = 0; l <= cap_last_cap(); l++)
2382 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2383 fprintf(f, " %s", strna(capability_to_name(l)));
2384
2385 fputs("\n", f);
2386 }
2387
2388 if (c->user)
2389 fprintf(f, "%sUser: %s\n", prefix, c->user);
2390 if (c->group)
2391 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2392
2393 if (strv_length(c->supplementary_groups) > 0) {
2394 fprintf(f, "%sSupplementaryGroups:", prefix);
2395 strv_fprintf(f, c->supplementary_groups);
2396 fputs("\n", f);
2397 }
2398
2399 if (c->pam_name)
2400 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2401
2402 if (strv_length(c->read_write_dirs) > 0) {
2403 fprintf(f, "%sReadWriteDirs:", prefix);
2404 strv_fprintf(f, c->read_write_dirs);
2405 fputs("\n", f);
2406 }
2407
2408 if (strv_length(c->read_only_dirs) > 0) {
2409 fprintf(f, "%sReadOnlyDirs:", prefix);
2410 strv_fprintf(f, c->read_only_dirs);
2411 fputs("\n", f);
2412 }
2413
2414 if (strv_length(c->inaccessible_dirs) > 0) {
2415 fprintf(f, "%sInaccessibleDirs:", prefix);
2416 strv_fprintf(f, c->inaccessible_dirs);
2417 fputs("\n", f);
2418 }
2419
2420 if (c->utmp_id)
2421 fprintf(f,
2422 "%sUtmpIdentifier: %s\n",
2423 prefix, c->utmp_id);
2424
2425 if (c->selinux_context)
2426 fprintf(f,
2427 "%sSELinuxContext: %s%s\n",
2428 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2429
2430 if (c->personality != PERSONALITY_INVALID)
2431 fprintf(f,
2432 "%sPersonality: %s\n",
2433 prefix, strna(personality_to_string(c->personality)));
2434
2435 if (c->syscall_filter) {
2436 #ifdef HAVE_SECCOMP
2437 Iterator j;
2438 void *id;
2439 bool first = true;
2440 #endif
2441
2442 fprintf(f,
2443 "%sSystemCallFilter: ",
2444 prefix);
2445
2446 if (!c->syscall_whitelist)
2447 fputc('~', f);
2448
2449 #ifdef HAVE_SECCOMP
2450 SET_FOREACH(id, c->syscall_filter, j) {
2451 _cleanup_free_ char *name = NULL;
2452
2453 if (first)
2454 first = false;
2455 else
2456 fputc(' ', f);
2457
2458 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2459 fputs(strna(name), f);
2460 }
2461 #endif
2462
2463 fputc('\n', f);
2464 }
2465
2466 if (c->syscall_archs) {
2467 #ifdef HAVE_SECCOMP
2468 Iterator j;
2469 void *id;
2470 #endif
2471
2472 fprintf(f,
2473 "%sSystemCallArchitectures:",
2474 prefix);
2475
2476 #ifdef HAVE_SECCOMP
2477 SET_FOREACH(id, c->syscall_archs, j)
2478 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2479 #endif
2480 fputc('\n', f);
2481 }
2482
2483 if (c->syscall_errno != 0)
2484 fprintf(f,
2485 "%sSystemCallErrorNumber: %s\n",
2486 prefix, strna(errno_to_name(c->syscall_errno)));
2487
2488 if (c->apparmor_profile)
2489 fprintf(f,
2490 "%sAppArmorProfile: %s%s\n",
2491 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2492 }
2493
2494 bool exec_context_maintains_privileges(ExecContext *c) {
2495 assert(c);
2496
2497 /* Returns true if the process forked off would run run under
2498 * an unchanged UID or as root. */
2499
2500 if (!c->user)
2501 return true;
2502
2503 if (streq(c->user, "root") || streq(c->user, "0"))
2504 return true;
2505
2506 return false;
2507 }
2508
2509 void exec_status_start(ExecStatus *s, pid_t pid) {
2510 assert(s);
2511
2512 zero(*s);
2513 s->pid = pid;
2514 dual_timestamp_get(&s->start_timestamp);
2515 }
2516
2517 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2518 assert(s);
2519
2520 if (s->pid && s->pid != pid)
2521 zero(*s);
2522
2523 s->pid = pid;
2524 dual_timestamp_get(&s->exit_timestamp);
2525
2526 s->code = code;
2527 s->status = status;
2528
2529 if (context) {
2530 if (context->utmp_id)
2531 utmp_put_dead_process(context->utmp_id, pid, code, status);
2532
2533 exec_context_tty_reset(context);
2534 }
2535 }
2536
2537 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2538 char buf[FORMAT_TIMESTAMP_MAX];
2539
2540 assert(s);
2541 assert(f);
2542
2543 if (s->pid <= 0)
2544 return;
2545
2546 prefix = strempty(prefix);
2547
2548 fprintf(f,
2549 "%sPID: "PID_FMT"\n",
2550 prefix, s->pid);
2551
2552 if (s->start_timestamp.realtime > 0)
2553 fprintf(f,
2554 "%sStart Timestamp: %s\n",
2555 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2556
2557 if (s->exit_timestamp.realtime > 0)
2558 fprintf(f,
2559 "%sExit Timestamp: %s\n"
2560 "%sExit Code: %s\n"
2561 "%sExit Status: %i\n",
2562 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2563 prefix, sigchld_code_to_string(s->code),
2564 prefix, s->status);
2565 }
2566
2567 char *exec_command_line(char **argv) {
2568 size_t k;
2569 char *n, *p, **a;
2570 bool first = true;
2571
2572 assert(argv);
2573
2574 k = 1;
2575 STRV_FOREACH(a, argv)
2576 k += strlen(*a)+3;
2577
2578 if (!(n = new(char, k)))
2579 return NULL;
2580
2581 p = n;
2582 STRV_FOREACH(a, argv) {
2583
2584 if (!first)
2585 *(p++) = ' ';
2586 else
2587 first = false;
2588
2589 if (strpbrk(*a, WHITESPACE)) {
2590 *(p++) = '\'';
2591 p = stpcpy(p, *a);
2592 *(p++) = '\'';
2593 } else
2594 p = stpcpy(p, *a);
2595
2596 }
2597
2598 *p = 0;
2599
2600 /* FIXME: this doesn't really handle arguments that have
2601 * spaces and ticks in them */
2602
2603 return n;
2604 }
2605
2606 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2607 _cleanup_free_ char *cmd = NULL;
2608 const char *prefix2;
2609
2610 assert(c);
2611 assert(f);
2612
2613 prefix = strempty(prefix);
2614 prefix2 = strjoina(prefix, "\t");
2615
2616 cmd = exec_command_line(c->argv);
2617 fprintf(f,
2618 "%sCommand Line: %s\n",
2619 prefix, cmd ? cmd : strerror(ENOMEM));
2620
2621 exec_status_dump(&c->exec_status, f, prefix2);
2622 }
2623
2624 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2625 assert(f);
2626
2627 prefix = strempty(prefix);
2628
2629 LIST_FOREACH(command, c, c)
2630 exec_command_dump(c, f, prefix);
2631 }
2632
2633 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2634 ExecCommand *end;
2635
2636 assert(l);
2637 assert(e);
2638
2639 if (*l) {
2640 /* It's kind of important, that we keep the order here */
2641 LIST_FIND_TAIL(command, *l, end);
2642 LIST_INSERT_AFTER(command, *l, end, e);
2643 } else
2644 *l = e;
2645 }
2646
2647 int exec_command_set(ExecCommand *c, const char *path, ...) {
2648 va_list ap;
2649 char **l, *p;
2650
2651 assert(c);
2652 assert(path);
2653
2654 va_start(ap, path);
2655 l = strv_new_ap(path, ap);
2656 va_end(ap);
2657
2658 if (!l)
2659 return -ENOMEM;
2660
2661 p = strdup(path);
2662 if (!p) {
2663 strv_free(l);
2664 return -ENOMEM;
2665 }
2666
2667 free(c->path);
2668 c->path = p;
2669
2670 strv_free(c->argv);
2671 c->argv = l;
2672
2673 return 0;
2674 }
2675
2676 int exec_command_append(ExecCommand *c, const char *path, ...) {
2677 _cleanup_strv_free_ char **l = NULL;
2678 va_list ap;
2679 int r;
2680
2681 assert(c);
2682 assert(path);
2683
2684 va_start(ap, path);
2685 l = strv_new_ap(path, ap);
2686 va_end(ap);
2687
2688 if (!l)
2689 return -ENOMEM;
2690
2691 r = strv_extend_strv(&c->argv, l);
2692 if (r < 0)
2693 return r;
2694
2695 return 0;
2696 }
2697
2698
2699 static int exec_runtime_allocate(ExecRuntime **rt) {
2700
2701 if (*rt)
2702 return 0;
2703
2704 *rt = new0(ExecRuntime, 1);
2705 if (!*rt)
2706 return -ENOMEM;
2707
2708 (*rt)->n_ref = 1;
2709 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2710
2711 return 0;
2712 }
2713
2714 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2715 int r;
2716
2717 assert(rt);
2718 assert(c);
2719 assert(id);
2720
2721 if (*rt)
2722 return 1;
2723
2724 if (!c->private_network && !c->private_tmp)
2725 return 0;
2726
2727 r = exec_runtime_allocate(rt);
2728 if (r < 0)
2729 return r;
2730
2731 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2732 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2733 return -errno;
2734 }
2735
2736 if (c->private_tmp && !(*rt)->tmp_dir) {
2737 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2738 if (r < 0)
2739 return r;
2740 }
2741
2742 return 1;
2743 }
2744
2745 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2746 assert(r);
2747 assert(r->n_ref > 0);
2748
2749 r->n_ref++;
2750 return r;
2751 }
2752
2753 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2754
2755 if (!r)
2756 return NULL;
2757
2758 assert(r->n_ref > 0);
2759
2760 r->n_ref--;
2761 if (r->n_ref > 0)
2762 return NULL;
2763
2764 free(r->tmp_dir);
2765 free(r->var_tmp_dir);
2766 safe_close_pair(r->netns_storage_socket);
2767 free(r);
2768
2769 return NULL;
2770 }
2771
2772 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
2773 assert(u);
2774 assert(f);
2775 assert(fds);
2776
2777 if (!rt)
2778 return 0;
2779
2780 if (rt->tmp_dir)
2781 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2782
2783 if (rt->var_tmp_dir)
2784 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2785
2786 if (rt->netns_storage_socket[0] >= 0) {
2787 int copy;
2788
2789 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2790 if (copy < 0)
2791 return copy;
2792
2793 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2794 }
2795
2796 if (rt->netns_storage_socket[1] >= 0) {
2797 int copy;
2798
2799 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2800 if (copy < 0)
2801 return copy;
2802
2803 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2804 }
2805
2806 return 0;
2807 }
2808
2809 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
2810 int r;
2811
2812 assert(rt);
2813 assert(key);
2814 assert(value);
2815
2816 if (streq(key, "tmp-dir")) {
2817 char *copy;
2818
2819 r = exec_runtime_allocate(rt);
2820 if (r < 0)
2821 return log_oom();
2822
2823 copy = strdup(value);
2824 if (!copy)
2825 return log_oom();
2826
2827 free((*rt)->tmp_dir);
2828 (*rt)->tmp_dir = copy;
2829
2830 } else if (streq(key, "var-tmp-dir")) {
2831 char *copy;
2832
2833 r = exec_runtime_allocate(rt);
2834 if (r < 0)
2835 return log_oom();
2836
2837 copy = strdup(value);
2838 if (!copy)
2839 return log_oom();
2840
2841 free((*rt)->var_tmp_dir);
2842 (*rt)->var_tmp_dir = copy;
2843
2844 } else if (streq(key, "netns-socket-0")) {
2845 int fd;
2846
2847 r = exec_runtime_allocate(rt);
2848 if (r < 0)
2849 return log_oom();
2850
2851 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2852 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2853 else {
2854 safe_close((*rt)->netns_storage_socket[0]);
2855 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2856 }
2857 } else if (streq(key, "netns-socket-1")) {
2858 int fd;
2859
2860 r = exec_runtime_allocate(rt);
2861 if (r < 0)
2862 return log_oom();
2863
2864 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2865 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2866 else {
2867 safe_close((*rt)->netns_storage_socket[1]);
2868 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2869 }
2870 } else
2871 return 0;
2872
2873 return 1;
2874 }
2875
2876 static void *remove_tmpdir_thread(void *p) {
2877 _cleanup_free_ char *path = p;
2878
2879 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
2880 return NULL;
2881 }
2882
2883 void exec_runtime_destroy(ExecRuntime *rt) {
2884 int r;
2885
2886 if (!rt)
2887 return;
2888
2889 /* If there are multiple users of this, let's leave the stuff around */
2890 if (rt->n_ref > 1)
2891 return;
2892
2893 if (rt->tmp_dir) {
2894 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2895
2896 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2897 if (r < 0) {
2898 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2899 free(rt->tmp_dir);
2900 }
2901
2902 rt->tmp_dir = NULL;
2903 }
2904
2905 if (rt->var_tmp_dir) {
2906 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2907
2908 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2909 if (r < 0) {
2910 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2911 free(rt->var_tmp_dir);
2912 }
2913
2914 rt->var_tmp_dir = NULL;
2915 }
2916
2917 safe_close_pair(rt->netns_storage_socket);
2918 }
2919
2920 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2921 [EXEC_INPUT_NULL] = "null",
2922 [EXEC_INPUT_TTY] = "tty",
2923 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2924 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2925 [EXEC_INPUT_SOCKET] = "socket"
2926 };
2927
2928 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2929
2930 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2931 [EXEC_OUTPUT_INHERIT] = "inherit",
2932 [EXEC_OUTPUT_NULL] = "null",
2933 [EXEC_OUTPUT_TTY] = "tty",
2934 [EXEC_OUTPUT_SYSLOG] = "syslog",
2935 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2936 [EXEC_OUTPUT_KMSG] = "kmsg",
2937 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2938 [EXEC_OUTPUT_JOURNAL] = "journal",
2939 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2940 [EXEC_OUTPUT_SOCKET] = "socket"
2941 };
2942
2943 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);