]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
e4a23ac16955e1a1caef78aa7777dbde1de8ae47
[thirdparty/systemd.git] / src / core / execute.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <glob.h>
23 #include <grp.h>
24 #include <poll.h>
25 #include <signal.h>
26 #include <string.h>
27 #include <sys/capability.h>
28 #include <sys/eventfd.h>
29 #include <sys/mman.h>
30 #include <sys/personality.h>
31 #include <sys/prctl.h>
32 #include <sys/socket.h>
33 #include <sys/stat.h>
34 #include <sys/un.h>
35 #include <unistd.h>
36 #include <utmpx.h>
37
38 #ifdef HAVE_PAM
39 #include <security/pam_appl.h>
40 #endif
41
42 #ifdef HAVE_SELINUX
43 #include <selinux/selinux.h>
44 #endif
45
46 #ifdef HAVE_SECCOMP
47 #include <seccomp.h>
48 #endif
49
50 #ifdef HAVE_APPARMOR
51 #include <sys/apparmor.h>
52 #endif
53
54 #include "sd-messages.h"
55
56 #include "af-list.h"
57 #include "alloc-util.h"
58 #ifdef HAVE_APPARMOR
59 #include "apparmor-util.h"
60 #endif
61 #include "async.h"
62 #include "barrier.h"
63 #include "cap-list.h"
64 #include "capability-util.h"
65 #include "def.h"
66 #include "env-util.h"
67 #include "errno-list.h"
68 #include "execute.h"
69 #include "exit-status.h"
70 #include "fd-util.h"
71 #include "fileio.h"
72 #include "formats-util.h"
73 #include "fs-util.h"
74 #include "glob-util.h"
75 #include "io-util.h"
76 #include "ioprio.h"
77 #include "log.h"
78 #include "macro.h"
79 #include "missing.h"
80 #include "mkdir.h"
81 #include "namespace.h"
82 #include "parse-util.h"
83 #include "path-util.h"
84 #include "process-util.h"
85 #include "rlimit-util.h"
86 #include "rm-rf.h"
87 #ifdef HAVE_SECCOMP
88 #include "seccomp-util.h"
89 #endif
90 #include "securebits.h"
91 #include "selinux-util.h"
92 #include "signal-util.h"
93 #include "smack-util.h"
94 #include "special.h"
95 #include "string-table.h"
96 #include "string-util.h"
97 #include "strv.h"
98 #include "syslog-util.h"
99 #include "terminal-util.h"
100 #include "unit.h"
101 #include "user-util.h"
102 #include "util.h"
103 #include "utmp-wtmp.h"
104
105 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
106 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
107
108 /* This assumes there is a 'tty' group */
109 #define TTY_MODE 0620
110
111 #define SNDBUF_SIZE (8*1024*1024)
112
113 static int shift_fds(int fds[], unsigned n_fds) {
114 int start, restart_from;
115
116 if (n_fds <= 0)
117 return 0;
118
119 /* Modifies the fds array! (sorts it) */
120
121 assert(fds);
122
123 start = 0;
124 for (;;) {
125 int i;
126
127 restart_from = -1;
128
129 for (i = start; i < (int) n_fds; i++) {
130 int nfd;
131
132 /* Already at right index? */
133 if (fds[i] == i+3)
134 continue;
135
136 nfd = fcntl(fds[i], F_DUPFD, i + 3);
137 if (nfd < 0)
138 return -errno;
139
140 safe_close(fds[i]);
141 fds[i] = nfd;
142
143 /* Hmm, the fd we wanted isn't free? Then
144 * let's remember that and try again from here */
145 if (nfd != i+3 && restart_from < 0)
146 restart_from = i;
147 }
148
149 if (restart_from < 0)
150 break;
151
152 start = restart_from;
153 }
154
155 return 0;
156 }
157
158 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
159 unsigned i;
160 int r;
161
162 if (n_fds <= 0)
163 return 0;
164
165 assert(fds);
166
167 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
168
169 for (i = 0; i < n_fds; i++) {
170
171 r = fd_nonblock(fds[i], nonblock);
172 if (r < 0)
173 return r;
174
175 /* We unconditionally drop FD_CLOEXEC from the fds,
176 * since after all we want to pass these fds to our
177 * children */
178
179 r = fd_cloexec(fds[i], false);
180 if (r < 0)
181 return r;
182 }
183
184 return 0;
185 }
186
187 static const char *exec_context_tty_path(const ExecContext *context) {
188 assert(context);
189
190 if (context->stdio_as_fds)
191 return NULL;
192
193 if (context->tty_path)
194 return context->tty_path;
195
196 return "/dev/console";
197 }
198
199 static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
200 const char *path;
201
202 assert(context);
203
204 path = exec_context_tty_path(context);
205
206 if (context->tty_vhangup) {
207 if (p && p->stdin_fd >= 0)
208 (void) terminal_vhangup_fd(p->stdin_fd);
209 else if (path)
210 (void) terminal_vhangup(path);
211 }
212
213 if (context->tty_reset) {
214 if (p && p->stdin_fd >= 0)
215 (void) reset_terminal_fd(p->stdin_fd, true);
216 else if (path)
217 (void) reset_terminal(path);
218 }
219
220 if (context->tty_vt_disallocate && path)
221 (void) vt_disallocate(path);
222 }
223
224 static bool is_terminal_input(ExecInput i) {
225 return IN_SET(i,
226 EXEC_INPUT_TTY,
227 EXEC_INPUT_TTY_FORCE,
228 EXEC_INPUT_TTY_FAIL);
229 }
230
231 static bool is_terminal_output(ExecOutput o) {
232 return IN_SET(o,
233 EXEC_OUTPUT_TTY,
234 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
235 EXEC_OUTPUT_KMSG_AND_CONSOLE,
236 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
237 }
238
239 static bool exec_context_needs_term(const ExecContext *c) {
240 assert(c);
241
242 /* Return true if the execution context suggests we should set $TERM to something useful. */
243
244 if (is_terminal_input(c->std_input))
245 return true;
246
247 if (is_terminal_output(c->std_output))
248 return true;
249
250 if (is_terminal_output(c->std_error))
251 return true;
252
253 return !!c->tty_path;
254 }
255
256 static int open_null_as(int flags, int nfd) {
257 int fd, r;
258
259 assert(nfd >= 0);
260
261 fd = open("/dev/null", flags|O_NOCTTY);
262 if (fd < 0)
263 return -errno;
264
265 if (fd != nfd) {
266 r = dup2(fd, nfd) < 0 ? -errno : nfd;
267 safe_close(fd);
268 } else
269 r = nfd;
270
271 return r;
272 }
273
274 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
275 union sockaddr_union sa = {
276 .un.sun_family = AF_UNIX,
277 .un.sun_path = "/run/systemd/journal/stdout",
278 };
279 uid_t olduid = UID_INVALID;
280 gid_t oldgid = GID_INVALID;
281 int r;
282
283 if (gid != GID_INVALID) {
284 oldgid = getgid();
285
286 r = setegid(gid);
287 if (r < 0)
288 return -errno;
289 }
290
291 if (uid != UID_INVALID) {
292 olduid = getuid();
293
294 r = seteuid(uid);
295 if (r < 0) {
296 r = -errno;
297 goto restore_gid;
298 }
299 }
300
301 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
302 if (r < 0)
303 r = -errno;
304
305 /* If we fail to restore the uid or gid, things will likely
306 fail later on. This should only happen if an LSM interferes. */
307
308 if (uid != UID_INVALID)
309 (void) seteuid(olduid);
310
311 restore_gid:
312 if (gid != GID_INVALID)
313 (void) setegid(oldgid);
314
315 return r;
316 }
317
318 static int connect_logger_as(
319 Unit *unit,
320 const ExecContext *context,
321 ExecOutput output,
322 const char *ident,
323 int nfd,
324 uid_t uid,
325 gid_t gid) {
326
327 int fd, r;
328
329 assert(context);
330 assert(output < _EXEC_OUTPUT_MAX);
331 assert(ident);
332 assert(nfd >= 0);
333
334 fd = socket(AF_UNIX, SOCK_STREAM, 0);
335 if (fd < 0)
336 return -errno;
337
338 r = connect_journal_socket(fd, uid, gid);
339 if (r < 0)
340 return r;
341
342 if (shutdown(fd, SHUT_RD) < 0) {
343 safe_close(fd);
344 return -errno;
345 }
346
347 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
348
349 dprintf(fd,
350 "%s\n"
351 "%s\n"
352 "%i\n"
353 "%i\n"
354 "%i\n"
355 "%i\n"
356 "%i\n",
357 context->syslog_identifier ? context->syslog_identifier : ident,
358 unit->id,
359 context->syslog_priority,
360 !!context->syslog_level_prefix,
361 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
362 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
363 is_terminal_output(output));
364
365 if (fd == nfd)
366 return nfd;
367
368 r = dup2(fd, nfd) < 0 ? -errno : nfd;
369 safe_close(fd);
370
371 return r;
372 }
373 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
374 int fd, r;
375
376 assert(path);
377 assert(nfd >= 0);
378
379 fd = open_terminal(path, mode | O_NOCTTY);
380 if (fd < 0)
381 return fd;
382
383 if (fd != nfd) {
384 r = dup2(fd, nfd) < 0 ? -errno : nfd;
385 safe_close(fd);
386 } else
387 r = nfd;
388
389 return r;
390 }
391
392 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
393
394 if (is_terminal_input(std_input) && !apply_tty_stdin)
395 return EXEC_INPUT_NULL;
396
397 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
398 return EXEC_INPUT_NULL;
399
400 return std_input;
401 }
402
403 static int fixup_output(ExecOutput std_output, int socket_fd) {
404
405 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
406 return EXEC_OUTPUT_INHERIT;
407
408 return std_output;
409 }
410
411 static int setup_input(
412 const ExecContext *context,
413 const ExecParameters *params,
414 int socket_fd) {
415
416 ExecInput i;
417
418 assert(context);
419 assert(params);
420
421 if (params->stdin_fd >= 0) {
422 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
423 return -errno;
424
425 /* Try to make this the controlling tty, if it is a tty, and reset it */
426 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
427 (void) reset_terminal_fd(STDIN_FILENO, true);
428
429 return STDIN_FILENO;
430 }
431
432 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
433
434 switch (i) {
435
436 case EXEC_INPUT_NULL:
437 return open_null_as(O_RDONLY, STDIN_FILENO);
438
439 case EXEC_INPUT_TTY:
440 case EXEC_INPUT_TTY_FORCE:
441 case EXEC_INPUT_TTY_FAIL: {
442 int fd, r;
443
444 fd = acquire_terminal(exec_context_tty_path(context),
445 i == EXEC_INPUT_TTY_FAIL,
446 i == EXEC_INPUT_TTY_FORCE,
447 false,
448 USEC_INFINITY);
449 if (fd < 0)
450 return fd;
451
452 if (fd != STDIN_FILENO) {
453 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
454 safe_close(fd);
455 } else
456 r = STDIN_FILENO;
457
458 return r;
459 }
460
461 case EXEC_INPUT_SOCKET:
462 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
463
464 default:
465 assert_not_reached("Unknown input type");
466 }
467 }
468
469 static int setup_output(
470 Unit *unit,
471 const ExecContext *context,
472 const ExecParameters *params,
473 int fileno,
474 int socket_fd,
475 const char *ident,
476 uid_t uid,
477 gid_t gid,
478 dev_t *journal_stream_dev,
479 ino_t *journal_stream_ino) {
480
481 ExecOutput o;
482 ExecInput i;
483 int r;
484
485 assert(unit);
486 assert(context);
487 assert(params);
488 assert(ident);
489 assert(journal_stream_dev);
490 assert(journal_stream_ino);
491
492 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
493
494 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
495 return -errno;
496
497 return STDOUT_FILENO;
498 }
499
500 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
501 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
502 return -errno;
503
504 return STDERR_FILENO;
505 }
506
507 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
508 o = fixup_output(context->std_output, socket_fd);
509
510 if (fileno == STDERR_FILENO) {
511 ExecOutput e;
512 e = fixup_output(context->std_error, socket_fd);
513
514 /* This expects the input and output are already set up */
515
516 /* Don't change the stderr file descriptor if we inherit all
517 * the way and are not on a tty */
518 if (e == EXEC_OUTPUT_INHERIT &&
519 o == EXEC_OUTPUT_INHERIT &&
520 i == EXEC_INPUT_NULL &&
521 !is_terminal_input(context->std_input) &&
522 getppid () != 1)
523 return fileno;
524
525 /* Duplicate from stdout if possible */
526 if (e == o || e == EXEC_OUTPUT_INHERIT)
527 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
528
529 o = e;
530
531 } else if (o == EXEC_OUTPUT_INHERIT) {
532 /* If input got downgraded, inherit the original value */
533 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
534 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
535
536 /* If the input is connected to anything that's not a /dev/null, inherit that... */
537 if (i != EXEC_INPUT_NULL)
538 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
539
540 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
541 if (getppid() != 1)
542 return fileno;
543
544 /* We need to open /dev/null here anew, to get the right access mode. */
545 return open_null_as(O_WRONLY, fileno);
546 }
547
548 switch (o) {
549
550 case EXEC_OUTPUT_NULL:
551 return open_null_as(O_WRONLY, fileno);
552
553 case EXEC_OUTPUT_TTY:
554 if (is_terminal_input(i))
555 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
556
557 /* We don't reset the terminal if this is just about output */
558 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
559
560 case EXEC_OUTPUT_SYSLOG:
561 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
562 case EXEC_OUTPUT_KMSG:
563 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
564 case EXEC_OUTPUT_JOURNAL:
565 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
566 r = connect_logger_as(unit, context, o, ident, fileno, uid, gid);
567 if (r < 0) {
568 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
569 r = open_null_as(O_WRONLY, fileno);
570 } else {
571 struct stat st;
572
573 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
574 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
575 * services to detect whether they are connected to the journal or not. */
576
577 if (fstat(fileno, &st) >= 0) {
578 *journal_stream_dev = st.st_dev;
579 *journal_stream_ino = st.st_ino;
580 }
581 }
582 return r;
583
584 case EXEC_OUTPUT_SOCKET:
585 assert(socket_fd >= 0);
586 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
587
588 default:
589 assert_not_reached("Unknown error type");
590 }
591 }
592
593 static int chown_terminal(int fd, uid_t uid) {
594 struct stat st;
595
596 assert(fd >= 0);
597
598 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
599 if (isatty(fd) < 1)
600 return 0;
601
602 /* This might fail. What matters are the results. */
603 (void) fchown(fd, uid, -1);
604 (void) fchmod(fd, TTY_MODE);
605
606 if (fstat(fd, &st) < 0)
607 return -errno;
608
609 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
610 return -EPERM;
611
612 return 0;
613 }
614
615 static int setup_confirm_stdio(int *_saved_stdin, int *_saved_stdout) {
616 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
617 int r;
618
619 assert(_saved_stdin);
620 assert(_saved_stdout);
621
622 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
623 if (saved_stdin < 0)
624 return -errno;
625
626 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
627 if (saved_stdout < 0)
628 return -errno;
629
630 fd = acquire_terminal(
631 "/dev/console",
632 false,
633 false,
634 false,
635 DEFAULT_CONFIRM_USEC);
636 if (fd < 0)
637 return fd;
638
639 r = chown_terminal(fd, getuid());
640 if (r < 0)
641 return r;
642
643 r = reset_terminal_fd(fd, true);
644 if (r < 0)
645 return r;
646
647 if (dup2(fd, STDIN_FILENO) < 0)
648 return -errno;
649
650 if (dup2(fd, STDOUT_FILENO) < 0)
651 return -errno;
652
653 if (fd >= 2)
654 safe_close(fd);
655 fd = -1;
656
657 *_saved_stdin = saved_stdin;
658 *_saved_stdout = saved_stdout;
659
660 saved_stdin = saved_stdout = -1;
661
662 return 0;
663 }
664
665 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
666 _cleanup_close_ int fd = -1;
667 va_list ap;
668
669 assert(format);
670
671 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
672 if (fd < 0)
673 return fd;
674
675 va_start(ap, format);
676 vdprintf(fd, format, ap);
677 va_end(ap);
678
679 return 0;
680 }
681
682 static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
683 int r = 0;
684
685 assert(saved_stdin);
686 assert(saved_stdout);
687
688 release_terminal();
689
690 if (*saved_stdin >= 0)
691 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
692 r = -errno;
693
694 if (*saved_stdout >= 0)
695 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
696 r = -errno;
697
698 *saved_stdin = safe_close(*saved_stdin);
699 *saved_stdout = safe_close(*saved_stdout);
700
701 return r;
702 }
703
704 static int ask_for_confirmation(char *response, char **argv) {
705 int saved_stdout = -1, saved_stdin = -1, r;
706 _cleanup_free_ char *line = NULL;
707
708 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
709 if (r < 0)
710 return r;
711
712 line = exec_command_line(argv);
713 if (!line)
714 return -ENOMEM;
715
716 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
717
718 restore_confirm_stdio(&saved_stdin, &saved_stdout);
719
720 return r;
721 }
722
723 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
724 bool keep_groups = false;
725 int r;
726
727 assert(context);
728
729 /* Lookup and set GID and supplementary group list. Here too
730 * we avoid NSS lookups for gid=0. */
731
732 if (context->group || username) {
733 /* First step, initialize groups from /etc/groups */
734 if (username && gid != 0) {
735 if (initgroups(username, gid) < 0)
736 return -errno;
737
738 keep_groups = true;
739 }
740
741 /* Second step, set our gids */
742 if (setresgid(gid, gid, gid) < 0)
743 return -errno;
744 }
745
746 if (context->supplementary_groups) {
747 int ngroups_max, k;
748 gid_t *gids;
749 char **i;
750
751 /* Final step, initialize any manually set supplementary groups */
752 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
753
754 if (!(gids = new(gid_t, ngroups_max)))
755 return -ENOMEM;
756
757 if (keep_groups) {
758 k = getgroups(ngroups_max, gids);
759 if (k < 0) {
760 free(gids);
761 return -errno;
762 }
763 } else
764 k = 0;
765
766 STRV_FOREACH(i, context->supplementary_groups) {
767 const char *g;
768
769 if (k >= ngroups_max) {
770 free(gids);
771 return -E2BIG;
772 }
773
774 g = *i;
775 r = get_group_creds(&g, gids+k);
776 if (r < 0) {
777 free(gids);
778 return r;
779 }
780
781 k++;
782 }
783
784 if (maybe_setgroups(k, gids) < 0) {
785 free(gids);
786 return -errno;
787 }
788
789 free(gids);
790 }
791
792 return 0;
793 }
794
795 static int enforce_user(const ExecContext *context, uid_t uid) {
796 assert(context);
797
798 /* Sets (but doesn't look up) the uid and make sure we keep the
799 * capabilities while doing so. */
800
801 if (context->capability_ambient_set != 0) {
802
803 /* First step: If we need to keep capabilities but
804 * drop privileges we need to make sure we keep our
805 * caps, while we drop privileges. */
806 if (uid != 0) {
807 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
808
809 if (prctl(PR_GET_SECUREBITS) != sb)
810 if (prctl(PR_SET_SECUREBITS, sb) < 0)
811 return -errno;
812 }
813 }
814
815 /* Second step: actually set the uids */
816 if (setresuid(uid, uid, uid) < 0)
817 return -errno;
818
819 /* At this point we should have all necessary capabilities but
820 are otherwise a normal user. However, the caps might got
821 corrupted due to the setresuid() so we need clean them up
822 later. This is done outside of this call. */
823
824 return 0;
825 }
826
827 #ifdef HAVE_PAM
828
829 static int null_conv(
830 int num_msg,
831 const struct pam_message **msg,
832 struct pam_response **resp,
833 void *appdata_ptr) {
834
835 /* We don't support conversations */
836
837 return PAM_CONV_ERR;
838 }
839
840 #endif
841
842 static int setup_pam(
843 const char *name,
844 const char *user,
845 uid_t uid,
846 gid_t gid,
847 const char *tty,
848 char ***env,
849 int fds[], unsigned n_fds) {
850
851 #ifdef HAVE_PAM
852
853 static const struct pam_conv conv = {
854 .conv = null_conv,
855 .appdata_ptr = NULL
856 };
857
858 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
859 pam_handle_t *handle = NULL;
860 sigset_t old_ss;
861 int pam_code = PAM_SUCCESS, r;
862 char **nv, **e = NULL;
863 bool close_session = false;
864 pid_t pam_pid = 0, parent_pid;
865 int flags = 0;
866
867 assert(name);
868 assert(user);
869 assert(env);
870
871 /* We set up PAM in the parent process, then fork. The child
872 * will then stay around until killed via PR_GET_PDEATHSIG or
873 * systemd via the cgroup logic. It will then remove the PAM
874 * session again. The parent process will exec() the actual
875 * daemon. We do things this way to ensure that the main PID
876 * of the daemon is the one we initially fork()ed. */
877
878 r = barrier_create(&barrier);
879 if (r < 0)
880 goto fail;
881
882 if (log_get_max_level() < LOG_DEBUG)
883 flags |= PAM_SILENT;
884
885 pam_code = pam_start(name, user, &conv, &handle);
886 if (pam_code != PAM_SUCCESS) {
887 handle = NULL;
888 goto fail;
889 }
890
891 if (tty) {
892 pam_code = pam_set_item(handle, PAM_TTY, tty);
893 if (pam_code != PAM_SUCCESS)
894 goto fail;
895 }
896
897 STRV_FOREACH(nv, *env) {
898 pam_code = pam_putenv(handle, *nv);
899 if (pam_code != PAM_SUCCESS)
900 goto fail;
901 }
902
903 pam_code = pam_acct_mgmt(handle, flags);
904 if (pam_code != PAM_SUCCESS)
905 goto fail;
906
907 pam_code = pam_open_session(handle, flags);
908 if (pam_code != PAM_SUCCESS)
909 goto fail;
910
911 close_session = true;
912
913 e = pam_getenvlist(handle);
914 if (!e) {
915 pam_code = PAM_BUF_ERR;
916 goto fail;
917 }
918
919 /* Block SIGTERM, so that we know that it won't get lost in
920 * the child */
921
922 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
923
924 parent_pid = getpid();
925
926 pam_pid = fork();
927 if (pam_pid < 0) {
928 r = -errno;
929 goto fail;
930 }
931
932 if (pam_pid == 0) {
933 int sig, ret = EXIT_PAM;
934
935 /* The child's job is to reset the PAM session on
936 * termination */
937 barrier_set_role(&barrier, BARRIER_CHILD);
938
939 /* This string must fit in 10 chars (i.e. the length
940 * of "/sbin/init"), to look pretty in /bin/ps */
941 rename_process("(sd-pam)");
942
943 /* Make sure we don't keep open the passed fds in this
944 child. We assume that otherwise only those fds are
945 open here that have been opened by PAM. */
946 close_many(fds, n_fds);
947
948 /* Drop privileges - we don't need any to pam_close_session
949 * and this will make PR_SET_PDEATHSIG work in most cases.
950 * If this fails, ignore the error - but expect sd-pam threads
951 * to fail to exit normally */
952
953 if (maybe_setgroups(0, NULL) < 0)
954 log_warning_errno(errno, "Failed to setgroups() in sd-pam: %m");
955 if (setresgid(gid, gid, gid) < 0)
956 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
957 if (setresuid(uid, uid, uid) < 0)
958 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
959
960 (void) ignore_signals(SIGPIPE, -1);
961
962 /* Wait until our parent died. This will only work if
963 * the above setresuid() succeeds, otherwise the kernel
964 * will not allow unprivileged parents kill their privileged
965 * children this way. We rely on the control groups kill logic
966 * to do the rest for us. */
967 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
968 goto child_finish;
969
970 /* Tell the parent that our setup is done. This is especially
971 * important regarding dropping privileges. Otherwise, unit
972 * setup might race against our setresuid(2) call. */
973 barrier_place(&barrier);
974
975 /* Check if our parent process might already have
976 * died? */
977 if (getppid() == parent_pid) {
978 sigset_t ss;
979
980 assert_se(sigemptyset(&ss) >= 0);
981 assert_se(sigaddset(&ss, SIGTERM) >= 0);
982
983 for (;;) {
984 if (sigwait(&ss, &sig) < 0) {
985 if (errno == EINTR)
986 continue;
987
988 goto child_finish;
989 }
990
991 assert(sig == SIGTERM);
992 break;
993 }
994 }
995
996 /* If our parent died we'll end the session */
997 if (getppid() != parent_pid) {
998 pam_code = pam_close_session(handle, flags);
999 if (pam_code != PAM_SUCCESS)
1000 goto child_finish;
1001 }
1002
1003 ret = 0;
1004
1005 child_finish:
1006 pam_end(handle, pam_code | flags);
1007 _exit(ret);
1008 }
1009
1010 barrier_set_role(&barrier, BARRIER_PARENT);
1011
1012 /* If the child was forked off successfully it will do all the
1013 * cleanups, so forget about the handle here. */
1014 handle = NULL;
1015
1016 /* Unblock SIGTERM again in the parent */
1017 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
1018
1019 /* We close the log explicitly here, since the PAM modules
1020 * might have opened it, but we don't want this fd around. */
1021 closelog();
1022
1023 /* Synchronously wait for the child to initialize. We don't care for
1024 * errors as we cannot recover. However, warn loudly if it happens. */
1025 if (!barrier_place_and_sync(&barrier))
1026 log_error("PAM initialization failed");
1027
1028 strv_free(*env);
1029 *env = e;
1030
1031 return 0;
1032
1033 fail:
1034 if (pam_code != PAM_SUCCESS) {
1035 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
1036 r = -EPERM; /* PAM errors do not map to errno */
1037 } else
1038 log_error_errno(r, "PAM failed: %m");
1039
1040 if (handle) {
1041 if (close_session)
1042 pam_code = pam_close_session(handle, flags);
1043
1044 pam_end(handle, pam_code | flags);
1045 }
1046
1047 strv_free(e);
1048 closelog();
1049
1050 return r;
1051 #else
1052 return 0;
1053 #endif
1054 }
1055
1056 static void rename_process_from_path(const char *path) {
1057 char process_name[11];
1058 const char *p;
1059 size_t l;
1060
1061 /* This resulting string must fit in 10 chars (i.e. the length
1062 * of "/sbin/init") to look pretty in /bin/ps */
1063
1064 p = basename(path);
1065 if (isempty(p)) {
1066 rename_process("(...)");
1067 return;
1068 }
1069
1070 l = strlen(p);
1071 if (l > 8) {
1072 /* The end of the process name is usually more
1073 * interesting, since the first bit might just be
1074 * "systemd-" */
1075 p = p + l - 8;
1076 l = 8;
1077 }
1078
1079 process_name[0] = '(';
1080 memcpy(process_name+1, p, l);
1081 process_name[1+l] = ')';
1082 process_name[1+l+1] = 0;
1083
1084 rename_process(process_name);
1085 }
1086
1087 #ifdef HAVE_SECCOMP
1088
1089 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1090 if (!is_seccomp_available()) {
1091 log_open();
1092 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1093 log_close();
1094 return true;
1095 }
1096 return false;
1097 }
1098
1099 static int apply_seccomp(const Unit* u, const ExecContext *c) {
1100 uint32_t negative_action, action;
1101 scmp_filter_ctx *seccomp;
1102 Iterator i;
1103 void *id;
1104 int r;
1105
1106 assert(c);
1107
1108 if (skip_seccomp_unavailable(u, "syscall filtering"))
1109 return 0;
1110
1111 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1112
1113 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
1114 if (!seccomp)
1115 return -ENOMEM;
1116
1117 if (c->syscall_archs) {
1118
1119 SET_FOREACH(id, c->syscall_archs, i) {
1120 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1121 if (r == -EEXIST)
1122 continue;
1123 if (r < 0)
1124 goto finish;
1125 }
1126
1127 } else {
1128 r = seccomp_add_secondary_archs(seccomp);
1129 if (r < 0)
1130 goto finish;
1131 }
1132
1133 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1134 SET_FOREACH(id, c->syscall_filter, i) {
1135 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1136 if (r < 0)
1137 goto finish;
1138 }
1139
1140 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1141 if (r < 0)
1142 goto finish;
1143
1144 r = seccomp_load(seccomp);
1145
1146 finish:
1147 seccomp_release(seccomp);
1148 return r;
1149 }
1150
1151 static int apply_address_families(const Unit* u, const ExecContext *c) {
1152 scmp_filter_ctx *seccomp;
1153 Iterator i;
1154 int r;
1155
1156 assert(c);
1157
1158 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1159 return 0;
1160
1161 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1162 if (!seccomp)
1163 return -ENOMEM;
1164
1165 r = seccomp_add_secondary_archs(seccomp);
1166 if (r < 0)
1167 goto finish;
1168
1169 if (c->address_families_whitelist) {
1170 int af, first = 0, last = 0;
1171 void *afp;
1172
1173 /* If this is a whitelist, we first block the address
1174 * families that are out of range and then everything
1175 * that is not in the set. First, we find the lowest
1176 * and highest address family in the set. */
1177
1178 SET_FOREACH(afp, c->address_families, i) {
1179 af = PTR_TO_INT(afp);
1180
1181 if (af <= 0 || af >= af_max())
1182 continue;
1183
1184 if (first == 0 || af < first)
1185 first = af;
1186
1187 if (last == 0 || af > last)
1188 last = af;
1189 }
1190
1191 assert((first == 0) == (last == 0));
1192
1193 if (first == 0) {
1194
1195 /* No entries in the valid range, block everything */
1196 r = seccomp_rule_add(
1197 seccomp,
1198 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1199 SCMP_SYS(socket),
1200 0);
1201 if (r < 0)
1202 goto finish;
1203
1204 } else {
1205
1206 /* Block everything below the first entry */
1207 r = seccomp_rule_add(
1208 seccomp,
1209 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1210 SCMP_SYS(socket),
1211 1,
1212 SCMP_A0(SCMP_CMP_LT, first));
1213 if (r < 0)
1214 goto finish;
1215
1216 /* Block everything above the last entry */
1217 r = seccomp_rule_add(
1218 seccomp,
1219 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1220 SCMP_SYS(socket),
1221 1,
1222 SCMP_A0(SCMP_CMP_GT, last));
1223 if (r < 0)
1224 goto finish;
1225
1226 /* Block everything between the first and last
1227 * entry */
1228 for (af = 1; af < af_max(); af++) {
1229
1230 if (set_contains(c->address_families, INT_TO_PTR(af)))
1231 continue;
1232
1233 r = seccomp_rule_add(
1234 seccomp,
1235 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1236 SCMP_SYS(socket),
1237 1,
1238 SCMP_A0(SCMP_CMP_EQ, af));
1239 if (r < 0)
1240 goto finish;
1241 }
1242 }
1243
1244 } else {
1245 void *af;
1246
1247 /* If this is a blacklist, then generate one rule for
1248 * each address family that are then combined in OR
1249 * checks. */
1250
1251 SET_FOREACH(af, c->address_families, i) {
1252
1253 r = seccomp_rule_add(
1254 seccomp,
1255 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1256 SCMP_SYS(socket),
1257 1,
1258 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1259 if (r < 0)
1260 goto finish;
1261 }
1262 }
1263
1264 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1265 if (r < 0)
1266 goto finish;
1267
1268 r = seccomp_load(seccomp);
1269
1270 finish:
1271 seccomp_release(seccomp);
1272 return r;
1273 }
1274
1275 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
1276 scmp_filter_ctx *seccomp;
1277 int r;
1278
1279 assert(c);
1280
1281 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1282 return 0;
1283
1284 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1285 if (!seccomp)
1286 return -ENOMEM;
1287
1288 r = seccomp_add_secondary_archs(seccomp);
1289 if (r < 0)
1290 goto finish;
1291
1292 r = seccomp_rule_add(
1293 seccomp,
1294 SCMP_ACT_ERRNO(EPERM),
1295 SCMP_SYS(mmap),
1296 1,
1297 SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
1298 if (r < 0)
1299 goto finish;
1300
1301 r = seccomp_rule_add(
1302 seccomp,
1303 SCMP_ACT_ERRNO(EPERM),
1304 SCMP_SYS(mprotect),
1305 1,
1306 SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
1307 if (r < 0)
1308 goto finish;
1309
1310 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1311 if (r < 0)
1312 goto finish;
1313
1314 r = seccomp_load(seccomp);
1315
1316 finish:
1317 seccomp_release(seccomp);
1318 return r;
1319 }
1320
1321 static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
1322 static const int permitted_policies[] = {
1323 SCHED_OTHER,
1324 SCHED_BATCH,
1325 SCHED_IDLE,
1326 };
1327
1328 scmp_filter_ctx *seccomp;
1329 unsigned i;
1330 int r, p, max_policy = 0;
1331
1332 assert(c);
1333
1334 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1335 return 0;
1336
1337 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1338 if (!seccomp)
1339 return -ENOMEM;
1340
1341 r = seccomp_add_secondary_archs(seccomp);
1342 if (r < 0)
1343 goto finish;
1344
1345 /* Determine the highest policy constant we want to allow */
1346 for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
1347 if (permitted_policies[i] > max_policy)
1348 max_policy = permitted_policies[i];
1349
1350 /* Go through all policies with lower values than that, and block them -- unless they appear in the
1351 * whitelist. */
1352 for (p = 0; p < max_policy; p++) {
1353 bool good = false;
1354
1355 /* Check if this is in the whitelist. */
1356 for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
1357 if (permitted_policies[i] == p) {
1358 good = true;
1359 break;
1360 }
1361
1362 if (good)
1363 continue;
1364
1365 /* Deny this policy */
1366 r = seccomp_rule_add(
1367 seccomp,
1368 SCMP_ACT_ERRNO(EPERM),
1369 SCMP_SYS(sched_setscheduler),
1370 1,
1371 SCMP_A1(SCMP_CMP_EQ, p));
1372 if (r < 0)
1373 goto finish;
1374 }
1375
1376 /* Blacklist all other policies, i.e. the ones with higher values. Note that all comparisons are unsigned here,
1377 * hence no need no check for < 0 values. */
1378 r = seccomp_rule_add(
1379 seccomp,
1380 SCMP_ACT_ERRNO(EPERM),
1381 SCMP_SYS(sched_setscheduler),
1382 1,
1383 SCMP_A1(SCMP_CMP_GT, max_policy));
1384 if (r < 0)
1385 goto finish;
1386
1387 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1388 if (r < 0)
1389 goto finish;
1390
1391 r = seccomp_load(seccomp);
1392
1393 finish:
1394 seccomp_release(seccomp);
1395 return r;
1396 }
1397
1398 static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
1399 scmp_filter_ctx *seccomp;
1400 int r;
1401
1402 assert(c);
1403
1404 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1405 * let's protect even those systems where this is left on in the kernel. */
1406
1407 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1408 return 0;
1409
1410 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1411 if (!seccomp)
1412 return -ENOMEM;
1413
1414 r = seccomp_add_secondary_archs(seccomp);
1415 if (r < 0)
1416 goto finish;
1417
1418 r = seccomp_rule_add(
1419 seccomp,
1420 SCMP_ACT_ERRNO(EPERM),
1421 SCMP_SYS(_sysctl),
1422 0);
1423 if (r < 0)
1424 goto finish;
1425
1426 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1427 if (r < 0)
1428 goto finish;
1429
1430 r = seccomp_load(seccomp);
1431
1432 finish:
1433 seccomp_release(seccomp);
1434 return r;
1435 }
1436
1437 static int apply_private_devices(Unit *u, const ExecContext *c) {
1438 const SystemCallFilterSet *set;
1439 scmp_filter_ctx *seccomp;
1440 const char *sys;
1441 bool syscalls_found = false;
1442 int r;
1443
1444 assert(c);
1445
1446 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1447
1448 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1449 return 0;
1450
1451 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1452 if (!seccomp)
1453 return -ENOMEM;
1454
1455 r = seccomp_add_secondary_archs(seccomp);
1456 if (r < 0)
1457 goto finish;
1458
1459 for (set = syscall_filter_sets; set->set_name; set++)
1460 if (streq(set->set_name, "@raw-io")) {
1461 syscalls_found = true;
1462 break;
1463 }
1464
1465 /* We should never fail here */
1466 if (!syscalls_found) {
1467 r = -EOPNOTSUPP;
1468 goto finish;
1469 }
1470
1471 NULSTR_FOREACH(sys, set->value) {
1472 int id;
1473 bool add = true;
1474
1475 #ifndef __NR_s390_pci_mmio_read
1476 if (streq(sys, "s390_pci_mmio_read"))
1477 add = false;
1478 #endif
1479 #ifndef __NR_s390_pci_mmio_write
1480 if (streq(sys, "s390_pci_mmio_write"))
1481 add = false;
1482 #endif
1483
1484 if (!add)
1485 continue;
1486
1487 id = seccomp_syscall_resolve_name(sys);
1488
1489 r = seccomp_rule_add(
1490 seccomp,
1491 SCMP_ACT_ERRNO(EPERM),
1492 id, 0);
1493 if (r < 0)
1494 goto finish;
1495 }
1496
1497 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1498 if (r < 0)
1499 goto finish;
1500
1501 r = seccomp_load(seccomp);
1502
1503 finish:
1504 seccomp_release(seccomp);
1505 return r;
1506 }
1507
1508 #endif
1509
1510 static void do_idle_pipe_dance(int idle_pipe[4]) {
1511 assert(idle_pipe);
1512
1513 idle_pipe[1] = safe_close(idle_pipe[1]);
1514 idle_pipe[2] = safe_close(idle_pipe[2]);
1515
1516 if (idle_pipe[0] >= 0) {
1517 int r;
1518
1519 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1520
1521 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1522 ssize_t n;
1523
1524 /* Signal systemd that we are bored and want to continue. */
1525 n = write(idle_pipe[3], "x", 1);
1526 if (n > 0)
1527 /* Wait for systemd to react to the signal above. */
1528 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1529 }
1530
1531 idle_pipe[0] = safe_close(idle_pipe[0]);
1532
1533 }
1534
1535 idle_pipe[3] = safe_close(idle_pipe[3]);
1536 }
1537
1538 static int build_environment(
1539 Unit *u,
1540 const ExecContext *c,
1541 const ExecParameters *p,
1542 unsigned n_fds,
1543 const char *home,
1544 const char *username,
1545 const char *shell,
1546 dev_t journal_stream_dev,
1547 ino_t journal_stream_ino,
1548 char ***ret) {
1549
1550 _cleanup_strv_free_ char **our_env = NULL;
1551 unsigned n_env = 0;
1552 char *x;
1553
1554 assert(c);
1555 assert(ret);
1556
1557 our_env = new0(char*, 13);
1558 if (!our_env)
1559 return -ENOMEM;
1560
1561 if (n_fds > 0) {
1562 _cleanup_free_ char *joined = NULL;
1563
1564 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1565 return -ENOMEM;
1566 our_env[n_env++] = x;
1567
1568 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1569 return -ENOMEM;
1570 our_env[n_env++] = x;
1571
1572 joined = strv_join(p->fd_names, ":");
1573 if (!joined)
1574 return -ENOMEM;
1575
1576 x = strjoin("LISTEN_FDNAMES=", joined, NULL);
1577 if (!x)
1578 return -ENOMEM;
1579 our_env[n_env++] = x;
1580 }
1581
1582 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
1583 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1584 return -ENOMEM;
1585 our_env[n_env++] = x;
1586
1587 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
1588 return -ENOMEM;
1589 our_env[n_env++] = x;
1590 }
1591
1592 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1593 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1594 * check the database directly. */
1595 if (unit_has_name(u, SPECIAL_DBUS_SERVICE)) {
1596 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1597 if (!x)
1598 return -ENOMEM;
1599 our_env[n_env++] = x;
1600 }
1601
1602 if (home) {
1603 x = strappend("HOME=", home);
1604 if (!x)
1605 return -ENOMEM;
1606 our_env[n_env++] = x;
1607 }
1608
1609 if (username) {
1610 x = strappend("LOGNAME=", username);
1611 if (!x)
1612 return -ENOMEM;
1613 our_env[n_env++] = x;
1614
1615 x = strappend("USER=", username);
1616 if (!x)
1617 return -ENOMEM;
1618 our_env[n_env++] = x;
1619 }
1620
1621 if (shell) {
1622 x = strappend("SHELL=", shell);
1623 if (!x)
1624 return -ENOMEM;
1625 our_env[n_env++] = x;
1626 }
1627
1628 if (exec_context_needs_term(c)) {
1629 const char *tty_path, *term = NULL;
1630
1631 tty_path = exec_context_tty_path(c);
1632
1633 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1634 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1635 * passes to PID 1 ends up all the way in the console login shown. */
1636
1637 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1638 term = getenv("TERM");
1639 if (!term)
1640 term = default_term_for_tty(tty_path);
1641
1642 x = strappend("TERM=", term);
1643 if (!x)
1644 return -ENOMEM;
1645 our_env[n_env++] = x;
1646 }
1647
1648 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1649 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1650 return -ENOMEM;
1651
1652 our_env[n_env++] = x;
1653 }
1654
1655 our_env[n_env++] = NULL;
1656 assert(n_env <= 12);
1657
1658 *ret = our_env;
1659 our_env = NULL;
1660
1661 return 0;
1662 }
1663
1664 static int build_pass_environment(const ExecContext *c, char ***ret) {
1665 _cleanup_strv_free_ char **pass_env = NULL;
1666 size_t n_env = 0, n_bufsize = 0;
1667 char **i;
1668
1669 STRV_FOREACH(i, c->pass_environment) {
1670 _cleanup_free_ char *x = NULL;
1671 char *v;
1672
1673 v = getenv(*i);
1674 if (!v)
1675 continue;
1676 x = strjoin(*i, "=", v, NULL);
1677 if (!x)
1678 return -ENOMEM;
1679 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1680 return -ENOMEM;
1681 pass_env[n_env++] = x;
1682 pass_env[n_env] = NULL;
1683 x = NULL;
1684 }
1685
1686 *ret = pass_env;
1687 pass_env = NULL;
1688
1689 return 0;
1690 }
1691
1692 static bool exec_needs_mount_namespace(
1693 const ExecContext *context,
1694 const ExecParameters *params,
1695 ExecRuntime *runtime) {
1696
1697 assert(context);
1698 assert(params);
1699
1700 if (!strv_isempty(context->read_write_paths) ||
1701 !strv_isempty(context->read_only_paths) ||
1702 !strv_isempty(context->inaccessible_paths))
1703 return true;
1704
1705 if (context->mount_flags != 0)
1706 return true;
1707
1708 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1709 return true;
1710
1711 if (context->private_devices ||
1712 context->protect_system != PROTECT_SYSTEM_NO ||
1713 context->protect_home != PROTECT_HOME_NO ||
1714 context->protect_kernel_tunables ||
1715 context->protect_control_groups)
1716 return true;
1717
1718 return false;
1719 }
1720
1721 static int setup_private_users(uid_t uid, gid_t gid) {
1722 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1723 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1724 _cleanup_close_ int unshare_ready_fd = -1;
1725 _cleanup_(sigkill_waitp) pid_t pid = 0;
1726 uint64_t c = 1;
1727 siginfo_t si;
1728 ssize_t n;
1729 int r;
1730
1731 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1732 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1733 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1734 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1735 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1736 * continues execution normally. */
1737
1738 if (uid != 0 && uid_is_valid(uid))
1739 asprintf(&uid_map,
1740 "0 0 1\n" /* Map root → root */
1741 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1742 uid, uid); /* The case where the above is the same */
1743 else
1744 uid_map = strdup("0 0 1\n");
1745 if (!uid_map)
1746 return -ENOMEM;
1747
1748 if (gid != 0 && gid_is_valid(gid))
1749 asprintf(&gid_map,
1750 "0 0 1\n" /* Map root → root */
1751 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1752 gid, gid);
1753 else
1754 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1755 if (!gid_map)
1756 return -ENOMEM;
1757
1758 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1759 * namespace. */
1760 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1761 if (unshare_ready_fd < 0)
1762 return -errno;
1763
1764 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1765 * failed. */
1766 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1767 return -errno;
1768
1769 pid = fork();
1770 if (pid < 0)
1771 return -errno;
1772
1773 if (pid == 0) {
1774 _cleanup_close_ int fd = -1;
1775 const char *a;
1776 pid_t ppid;
1777
1778 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1779 * here, after the parent opened its own user namespace. */
1780
1781 ppid = getppid();
1782 errno_pipe[0] = safe_close(errno_pipe[0]);
1783
1784 /* Wait until the parent unshared the user namespace */
1785 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1786 r = -errno;
1787 goto child_fail;
1788 }
1789
1790 /* Disable the setgroups() system call in the child user namespace, for good. */
1791 a = procfs_file_alloca(ppid, "setgroups");
1792 fd = open(a, O_WRONLY|O_CLOEXEC);
1793 if (fd < 0) {
1794 if (errno != ENOENT) {
1795 r = -errno;
1796 goto child_fail;
1797 }
1798
1799 /* If the file is missing the kernel is too old, let's continue anyway. */
1800 } else {
1801 if (write(fd, "deny\n", 5) < 0) {
1802 r = -errno;
1803 goto child_fail;
1804 }
1805
1806 fd = safe_close(fd);
1807 }
1808
1809 /* First write the GID map */
1810 a = procfs_file_alloca(ppid, "gid_map");
1811 fd = open(a, O_WRONLY|O_CLOEXEC);
1812 if (fd < 0) {
1813 r = -errno;
1814 goto child_fail;
1815 }
1816 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1817 r = -errno;
1818 goto child_fail;
1819 }
1820 fd = safe_close(fd);
1821
1822 /* The write the UID map */
1823 a = procfs_file_alloca(ppid, "uid_map");
1824 fd = open(a, O_WRONLY|O_CLOEXEC);
1825 if (fd < 0) {
1826 r = -errno;
1827 goto child_fail;
1828 }
1829 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1830 r = -errno;
1831 goto child_fail;
1832 }
1833
1834 _exit(EXIT_SUCCESS);
1835
1836 child_fail:
1837 (void) write(errno_pipe[1], &r, sizeof(r));
1838 _exit(EXIT_FAILURE);
1839 }
1840
1841 errno_pipe[1] = safe_close(errno_pipe[1]);
1842
1843 if (unshare(CLONE_NEWUSER) < 0)
1844 return -errno;
1845
1846 /* Let the child know that the namespace is ready now */
1847 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1848 return -errno;
1849
1850 /* Try to read an error code from the child */
1851 n = read(errno_pipe[0], &r, sizeof(r));
1852 if (n < 0)
1853 return -errno;
1854 if (n == sizeof(r)) { /* an error code was sent to us */
1855 if (r < 0)
1856 return r;
1857 return -EIO;
1858 }
1859 if (n != 0) /* on success we should have read 0 bytes */
1860 return -EIO;
1861
1862 r = wait_for_terminate(pid, &si);
1863 if (r < 0)
1864 return r;
1865 pid = 0;
1866
1867 /* If something strange happened with the child, let's consider this fatal, too */
1868 if (si.si_code != CLD_EXITED || si.si_status != 0)
1869 return -EIO;
1870
1871 return 0;
1872 }
1873
1874 static int setup_runtime_directory(
1875 const ExecContext *context,
1876 const ExecParameters *params,
1877 uid_t uid,
1878 gid_t gid) {
1879
1880 char **rt;
1881 int r;
1882
1883 assert(context);
1884 assert(params);
1885
1886 STRV_FOREACH(rt, context->runtime_directory) {
1887 _cleanup_free_ char *p;
1888
1889 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1890 if (!p)
1891 return -ENOMEM;
1892
1893 r = mkdir_p_label(p, context->runtime_directory_mode);
1894 if (r < 0)
1895 return r;
1896
1897 r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
1898 if (r < 0)
1899 return r;
1900 }
1901
1902 return 0;
1903 }
1904
1905 static int setup_smack(
1906 const ExecContext *context,
1907 const ExecCommand *command) {
1908
1909 #ifdef HAVE_SMACK
1910 int r;
1911
1912 assert(context);
1913 assert(command);
1914
1915 if (!mac_smack_use())
1916 return 0;
1917
1918 if (context->smack_process_label) {
1919 r = mac_smack_apply_pid(0, context->smack_process_label);
1920 if (r < 0)
1921 return r;
1922 }
1923 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1924 else {
1925 _cleanup_free_ char *exec_label = NULL;
1926
1927 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1928 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
1929 return r;
1930
1931 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1932 if (r < 0)
1933 return r;
1934 }
1935 #endif
1936 #endif
1937
1938 return 0;
1939 }
1940
1941 static int compile_read_write_paths(
1942 const ExecContext *context,
1943 const ExecParameters *params,
1944 char ***ret) {
1945
1946 _cleanup_strv_free_ char **l = NULL;
1947 char **rt;
1948
1949 /* Compile the list of writable paths. This is the combination of the explicitly configured paths, plus all
1950 * runtime directories. */
1951
1952 if (strv_isempty(context->read_write_paths) &&
1953 strv_isempty(context->runtime_directory)) {
1954 *ret = NULL; /* NOP if neither is set */
1955 return 0;
1956 }
1957
1958 l = strv_copy(context->read_write_paths);
1959 if (!l)
1960 return -ENOMEM;
1961
1962 STRV_FOREACH(rt, context->runtime_directory) {
1963 char *s;
1964
1965 s = strjoin(params->runtime_prefix, "/", *rt, NULL);
1966 if (!s)
1967 return -ENOMEM;
1968
1969 if (strv_consume(&l, s) < 0)
1970 return -ENOMEM;
1971 }
1972
1973 *ret = l;
1974 l = NULL;
1975
1976 return 0;
1977 }
1978
1979 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
1980 assert(array);
1981 assert(n);
1982
1983 if (!pair)
1984 return;
1985
1986 if (pair[0] >= 0)
1987 array[(*n)++] = pair[0];
1988 if (pair[1] >= 0)
1989 array[(*n)++] = pair[1];
1990 }
1991
1992 static int close_remaining_fds(
1993 const ExecParameters *params,
1994 ExecRuntime *runtime,
1995 DynamicCreds *dcreds,
1996 int user_lookup_fd,
1997 int socket_fd,
1998 int *fds, unsigned n_fds) {
1999
2000 unsigned n_dont_close = 0;
2001 int dont_close[n_fds + 12];
2002
2003 assert(params);
2004
2005 if (params->stdin_fd >= 0)
2006 dont_close[n_dont_close++] = params->stdin_fd;
2007 if (params->stdout_fd >= 0)
2008 dont_close[n_dont_close++] = params->stdout_fd;
2009 if (params->stderr_fd >= 0)
2010 dont_close[n_dont_close++] = params->stderr_fd;
2011
2012 if (socket_fd >= 0)
2013 dont_close[n_dont_close++] = socket_fd;
2014 if (n_fds > 0) {
2015 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2016 n_dont_close += n_fds;
2017 }
2018
2019 if (runtime)
2020 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2021
2022 if (dcreds) {
2023 if (dcreds->user)
2024 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2025 if (dcreds->group)
2026 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
2027 }
2028
2029 if (user_lookup_fd >= 0)
2030 dont_close[n_dont_close++] = user_lookup_fd;
2031
2032 return close_all_fds(dont_close, n_dont_close);
2033 }
2034
2035 static bool context_has_address_families(const ExecContext *c) {
2036 assert(c);
2037
2038 return c->address_families_whitelist ||
2039 !set_isempty(c->address_families);
2040 }
2041
2042 static bool context_has_syscall_filters(const ExecContext *c) {
2043 assert(c);
2044
2045 return c->syscall_whitelist ||
2046 !set_isempty(c->syscall_filter) ||
2047 !set_isempty(c->syscall_archs);
2048 }
2049
2050 static bool context_has_no_new_privileges(const ExecContext *c) {
2051 assert(c);
2052
2053 if (c->no_new_privileges)
2054 return true;
2055
2056 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
2057 return false;
2058
2059 return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */
2060 c->memory_deny_write_execute ||
2061 c->restrict_realtime ||
2062 c->protect_kernel_tunables ||
2063 context_has_syscall_filters(c);
2064 }
2065
2066 static int send_user_lookup(
2067 Unit *unit,
2068 int user_lookup_fd,
2069 uid_t uid,
2070 gid_t gid) {
2071
2072 assert(unit);
2073
2074 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2075 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2076 * specified. */
2077
2078 if (user_lookup_fd < 0)
2079 return 0;
2080
2081 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2082 return 0;
2083
2084 if (writev(user_lookup_fd,
2085 (struct iovec[]) {
2086 { .iov_base = &uid, .iov_len = sizeof(uid) },
2087 { .iov_base = &gid, .iov_len = sizeof(gid) },
2088 { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
2089 return -errno;
2090
2091 return 0;
2092 }
2093
2094 static int exec_child(
2095 Unit *unit,
2096 ExecCommand *command,
2097 const ExecContext *context,
2098 const ExecParameters *params,
2099 ExecRuntime *runtime,
2100 DynamicCreds *dcreds,
2101 char **argv,
2102 int socket_fd,
2103 int *fds, unsigned n_fds,
2104 char **files_env,
2105 int user_lookup_fd,
2106 int *exit_status) {
2107
2108 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
2109 _cleanup_free_ char *mac_selinux_context_net = NULL;
2110 const char *username = NULL, *home = NULL, *shell = NULL, *wd;
2111 dev_t journal_stream_dev = 0;
2112 ino_t journal_stream_ino = 0;
2113 bool needs_mount_namespace;
2114 uid_t uid = UID_INVALID;
2115 gid_t gid = GID_INVALID;
2116 int i, r;
2117
2118 assert(unit);
2119 assert(command);
2120 assert(context);
2121 assert(params);
2122 assert(exit_status);
2123
2124 rename_process_from_path(command->path);
2125
2126 /* We reset exactly these signals, since they are the
2127 * only ones we set to SIG_IGN in the main daemon. All
2128 * others we leave untouched because we set them to
2129 * SIG_DFL or a valid handler initially, both of which
2130 * will be demoted to SIG_DFL. */
2131 (void) default_signals(SIGNALS_CRASH_HANDLER,
2132 SIGNALS_IGNORE, -1);
2133
2134 if (context->ignore_sigpipe)
2135 (void) ignore_signals(SIGPIPE, -1);
2136
2137 r = reset_signal_mask();
2138 if (r < 0) {
2139 *exit_status = EXIT_SIGNAL_MASK;
2140 return r;
2141 }
2142
2143 if (params->idle_pipe)
2144 do_idle_pipe_dance(params->idle_pipe);
2145
2146 /* Close sockets very early to make sure we don't
2147 * block init reexecution because it cannot bind its
2148 * sockets */
2149
2150 log_forget_fds();
2151
2152 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
2153 if (r < 0) {
2154 *exit_status = EXIT_FDS;
2155 return r;
2156 }
2157
2158 if (!context->same_pgrp)
2159 if (setsid() < 0) {
2160 *exit_status = EXIT_SETSID;
2161 return -errno;
2162 }
2163
2164 exec_context_tty_reset(context, params);
2165
2166 if (params->flags & EXEC_CONFIRM_SPAWN) {
2167 char response;
2168
2169 r = ask_for_confirmation(&response, argv);
2170 if (r == -ETIMEDOUT)
2171 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
2172 else if (r < 0)
2173 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
2174 else if (response == 's') {
2175 write_confirm_message("Skipping execution.\n");
2176 *exit_status = EXIT_CONFIRM;
2177 return -ECANCELED;
2178 } else if (response == 'n') {
2179 write_confirm_message("Failing execution.\n");
2180 *exit_status = 0;
2181 return 0;
2182 }
2183 }
2184
2185 if (context->dynamic_user && dcreds) {
2186
2187 /* Make sure we bypass our own NSS module for any NSS checks */
2188 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2189 *exit_status = EXIT_USER;
2190 return -errno;
2191 }
2192
2193 r = dynamic_creds_realize(dcreds, &uid, &gid);
2194 if (r < 0) {
2195 *exit_status = EXIT_USER;
2196 return r;
2197 }
2198
2199 if (!uid_is_valid(uid) || !gid_is_valid(gid)) {
2200 *exit_status = EXIT_USER;
2201 return -ESRCH;
2202 }
2203
2204 if (dcreds->user)
2205 username = dcreds->user->name;
2206
2207 } else {
2208 if (context->user) {
2209 username = context->user;
2210 r = get_user_creds_clean(&username, &uid, &gid, &home, &shell);
2211 if (r < 0) {
2212 *exit_status = EXIT_USER;
2213 return r;
2214 }
2215
2216 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
2217 * (i.e. are "/" or "/bin/nologin"). */
2218 }
2219
2220 if (context->group) {
2221 const char *g = context->group;
2222
2223 r = get_group_creds(&g, &gid);
2224 if (r < 0) {
2225 *exit_status = EXIT_GROUP;
2226 return r;
2227 }
2228 }
2229 }
2230
2231 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2232 if (r < 0) {
2233 *exit_status = EXIT_USER;
2234 return r;
2235 }
2236
2237 user_lookup_fd = safe_close(user_lookup_fd);
2238
2239 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2240 * must sure to drop O_NONBLOCK */
2241 if (socket_fd >= 0)
2242 (void) fd_nonblock(socket_fd, false);
2243
2244 r = setup_input(context, params, socket_fd);
2245 if (r < 0) {
2246 *exit_status = EXIT_STDIN;
2247 return r;
2248 }
2249
2250 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2251 if (r < 0) {
2252 *exit_status = EXIT_STDOUT;
2253 return r;
2254 }
2255
2256 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2257 if (r < 0) {
2258 *exit_status = EXIT_STDERR;
2259 return r;
2260 }
2261
2262 if (params->cgroup_path) {
2263 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2264 if (r < 0) {
2265 *exit_status = EXIT_CGROUP;
2266 return r;
2267 }
2268 }
2269
2270 if (context->oom_score_adjust_set) {
2271 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
2272
2273 /* When we can't make this change due to EPERM, then
2274 * let's silently skip over it. User namespaces
2275 * prohibit write access to this file, and we
2276 * shouldn't trip up over that. */
2277
2278 sprintf(t, "%i", context->oom_score_adjust);
2279 r = write_string_file("/proc/self/oom_score_adj", t, 0);
2280 if (r == -EPERM || r == -EACCES) {
2281 log_open();
2282 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
2283 log_close();
2284 } else if (r < 0) {
2285 *exit_status = EXIT_OOM_ADJUST;
2286 return -errno;
2287 }
2288 }
2289
2290 if (context->nice_set)
2291 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
2292 *exit_status = EXIT_NICE;
2293 return -errno;
2294 }
2295
2296 if (context->cpu_sched_set) {
2297 struct sched_param param = {
2298 .sched_priority = context->cpu_sched_priority,
2299 };
2300
2301 r = sched_setscheduler(0,
2302 context->cpu_sched_policy |
2303 (context->cpu_sched_reset_on_fork ?
2304 SCHED_RESET_ON_FORK : 0),
2305 &param);
2306 if (r < 0) {
2307 *exit_status = EXIT_SETSCHEDULER;
2308 return -errno;
2309 }
2310 }
2311
2312 if (context->cpuset)
2313 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
2314 *exit_status = EXIT_CPUAFFINITY;
2315 return -errno;
2316 }
2317
2318 if (context->ioprio_set)
2319 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
2320 *exit_status = EXIT_IOPRIO;
2321 return -errno;
2322 }
2323
2324 if (context->timer_slack_nsec != NSEC_INFINITY)
2325 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
2326 *exit_status = EXIT_TIMERSLACK;
2327 return -errno;
2328 }
2329
2330 if (context->personality != PERSONALITY_INVALID)
2331 if (personality(context->personality) < 0) {
2332 *exit_status = EXIT_PERSONALITY;
2333 return -errno;
2334 }
2335
2336 if (context->utmp_id)
2337 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path,
2338 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2339 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2340 USER_PROCESS,
2341 username ? "root" : context->user);
2342
2343 if (context->user && is_terminal_input(context->std_input)) {
2344 r = chown_terminal(STDIN_FILENO, uid);
2345 if (r < 0) {
2346 *exit_status = EXIT_STDIN;
2347 return r;
2348 }
2349 }
2350
2351 /* If delegation is enabled we'll pass ownership of the cgroup
2352 * (but only in systemd's own controller hierarchy!) to the
2353 * user of the new process. */
2354 if (params->cgroup_path && context->user && params->cgroup_delegate) {
2355 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2356 if (r < 0) {
2357 *exit_status = EXIT_CGROUP;
2358 return r;
2359 }
2360
2361
2362 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2363 if (r < 0) {
2364 *exit_status = EXIT_CGROUP;
2365 return r;
2366 }
2367 }
2368
2369 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
2370 r = setup_runtime_directory(context, params, uid, gid);
2371 if (r < 0) {
2372 *exit_status = EXIT_RUNTIME_DIRECTORY;
2373 return r;
2374 }
2375 }
2376
2377 r = build_environment(
2378 unit,
2379 context,
2380 params,
2381 n_fds,
2382 home,
2383 username,
2384 shell,
2385 journal_stream_dev,
2386 journal_stream_ino,
2387 &our_env);
2388 if (r < 0) {
2389 *exit_status = EXIT_MEMORY;
2390 return r;
2391 }
2392
2393 r = build_pass_environment(context, &pass_env);
2394 if (r < 0) {
2395 *exit_status = EXIT_MEMORY;
2396 return r;
2397 }
2398
2399 accum_env = strv_env_merge(5,
2400 params->environment,
2401 our_env,
2402 pass_env,
2403 context->environment,
2404 files_env,
2405 NULL);
2406 if (!accum_env) {
2407 *exit_status = EXIT_MEMORY;
2408 return -ENOMEM;
2409 }
2410 accum_env = strv_env_clean(accum_env);
2411
2412 (void) umask(context->umask);
2413
2414 if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
2415 r = setup_smack(context, command);
2416 if (r < 0) {
2417 *exit_status = EXIT_SMACK_PROCESS_LABEL;
2418 return r;
2419 }
2420
2421 if (context->pam_name && username) {
2422 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
2423 if (r < 0) {
2424 *exit_status = EXIT_PAM;
2425 return r;
2426 }
2427 }
2428 }
2429
2430 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
2431 r = setup_netns(runtime->netns_storage_socket);
2432 if (r < 0) {
2433 *exit_status = EXIT_NETWORK;
2434 return r;
2435 }
2436 }
2437
2438 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
2439 if (needs_mount_namespace) {
2440 _cleanup_free_ char **rw = NULL;
2441 char *tmp = NULL, *var = NULL;
2442
2443 /* The runtime struct only contains the parent
2444 * of the private /tmp, which is
2445 * non-accessible to world users. Inside of it
2446 * there's a /tmp that is sticky, and that's
2447 * the one we want to use here. */
2448
2449 if (context->private_tmp && runtime) {
2450 if (runtime->tmp_dir)
2451 tmp = strjoina(runtime->tmp_dir, "/tmp");
2452 if (runtime->var_tmp_dir)
2453 var = strjoina(runtime->var_tmp_dir, "/tmp");
2454 }
2455
2456 r = compile_read_write_paths(context, params, &rw);
2457 if (r < 0) {
2458 *exit_status = EXIT_NAMESPACE;
2459 return r;
2460 }
2461
2462 r = setup_namespace(
2463 (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL,
2464 rw,
2465 context->read_only_paths,
2466 context->inaccessible_paths,
2467 tmp,
2468 var,
2469 context->private_devices,
2470 context->protect_kernel_tunables,
2471 context->protect_control_groups,
2472 context->protect_home,
2473 context->protect_system,
2474 context->mount_flags);
2475
2476 /* If we couldn't set up the namespace this is
2477 * probably due to a missing capability. In this case,
2478 * silently proceeed. */
2479 if (r == -EPERM || r == -EACCES) {
2480 log_open();
2481 log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2482 log_close();
2483 } else if (r < 0) {
2484 *exit_status = EXIT_NAMESPACE;
2485 return r;
2486 }
2487 }
2488
2489 if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
2490 r = enforce_groups(context, username, gid);
2491 if (r < 0) {
2492 *exit_status = EXIT_GROUP;
2493 return r;
2494 }
2495 }
2496
2497 if (context->working_directory_home)
2498 wd = home;
2499 else if (context->working_directory)
2500 wd = context->working_directory;
2501 else
2502 wd = "/";
2503
2504 if (params->flags & EXEC_APPLY_CHROOT) {
2505 if (!needs_mount_namespace && context->root_directory)
2506 if (chroot(context->root_directory) < 0) {
2507 *exit_status = EXIT_CHROOT;
2508 return -errno;
2509 }
2510
2511 if (chdir(wd) < 0 &&
2512 !context->working_directory_missing_ok) {
2513 *exit_status = EXIT_CHDIR;
2514 return -errno;
2515 }
2516 } else {
2517 const char *d;
2518
2519 d = strjoina(strempty(context->root_directory), "/", strempty(wd));
2520 if (chdir(d) < 0 &&
2521 !context->working_directory_missing_ok) {
2522 *exit_status = EXIT_CHDIR;
2523 return -errno;
2524 }
2525 }
2526
2527 #ifdef HAVE_SELINUX
2528 if ((params->flags & EXEC_APPLY_PERMISSIONS) &&
2529 mac_selinux_use() &&
2530 params->selinux_context_net &&
2531 socket_fd >= 0 &&
2532 !command->privileged) {
2533
2534 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
2535 if (r < 0) {
2536 *exit_status = EXIT_SELINUX_CONTEXT;
2537 return r;
2538 }
2539 }
2540 #endif
2541
2542 if ((params->flags & EXEC_APPLY_PERMISSIONS) && context->private_users) {
2543 r = setup_private_users(uid, gid);
2544 if (r < 0) {
2545 *exit_status = EXIT_USER;
2546 return r;
2547 }
2548 }
2549
2550 /* We repeat the fd closing here, to make sure that
2551 * nothing is leaked from the PAM modules. Note that
2552 * we are more aggressive this time since socket_fd
2553 * and the netns fds we don't need anymore. The custom
2554 * endpoint fd was needed to upload the policy and can
2555 * now be closed as well. */
2556 r = close_all_fds(fds, n_fds);
2557 if (r >= 0)
2558 r = shift_fds(fds, n_fds);
2559 if (r >= 0)
2560 r = flags_fds(fds, n_fds, context->non_blocking);
2561 if (r < 0) {
2562 *exit_status = EXIT_FDS;
2563 return r;
2564 }
2565
2566 if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
2567
2568 int secure_bits = context->secure_bits;
2569
2570 for (i = 0; i < _RLIMIT_MAX; i++) {
2571
2572 if (!context->rlimit[i])
2573 continue;
2574
2575 r = setrlimit_closest(i, context->rlimit[i]);
2576 if (r < 0) {
2577 *exit_status = EXIT_LIMITS;
2578 return r;
2579 }
2580 }
2581
2582 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2583 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
2584 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
2585 *exit_status = EXIT_LIMITS;
2586 return -errno;
2587 }
2588 }
2589
2590 if (!cap_test_all(context->capability_bounding_set)) {
2591 r = capability_bounding_set_drop(context->capability_bounding_set, false);
2592 if (r < 0) {
2593 *exit_status = EXIT_CAPABILITIES;
2594 return r;
2595 }
2596 }
2597
2598 /* This is done before enforce_user, but ambient set
2599 * does not survive over setresuid() if keep_caps is not set. */
2600 if (context->capability_ambient_set != 0) {
2601 r = capability_ambient_set_apply(context->capability_ambient_set, true);
2602 if (r < 0) {
2603 *exit_status = EXIT_CAPABILITIES;
2604 return r;
2605 }
2606 }
2607
2608 if (context->user) {
2609 r = enforce_user(context, uid);
2610 if (r < 0) {
2611 *exit_status = EXIT_USER;
2612 return r;
2613 }
2614 if (context->capability_ambient_set != 0) {
2615
2616 /* Fix the ambient capabilities after user change. */
2617 r = capability_ambient_set_apply(context->capability_ambient_set, false);
2618 if (r < 0) {
2619 *exit_status = EXIT_CAPABILITIES;
2620 return r;
2621 }
2622
2623 /* If we were asked to change user and ambient capabilities
2624 * were requested, we had to add keep-caps to the securebits
2625 * so that we would maintain the inherited capability set
2626 * through the setresuid(). Make sure that the bit is added
2627 * also to the context secure_bits so that we don't try to
2628 * drop the bit away next. */
2629
2630 secure_bits |= 1<<SECURE_KEEP_CAPS;
2631 }
2632 }
2633
2634 /* PR_GET_SECUREBITS is not privileged, while
2635 * PR_SET_SECUREBITS is. So to suppress
2636 * potential EPERMs we'll try not to call
2637 * PR_SET_SECUREBITS unless necessary. */
2638 if (prctl(PR_GET_SECUREBITS) != secure_bits)
2639 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
2640 *exit_status = EXIT_SECUREBITS;
2641 return -errno;
2642 }
2643
2644 if (context_has_no_new_privileges(context))
2645 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2646 *exit_status = EXIT_NO_NEW_PRIVILEGES;
2647 return -errno;
2648 }
2649
2650 #ifdef HAVE_SECCOMP
2651 if (context_has_address_families(context)) {
2652 r = apply_address_families(unit, context);
2653 if (r < 0) {
2654 *exit_status = EXIT_ADDRESS_FAMILIES;
2655 return r;
2656 }
2657 }
2658
2659 if (context->memory_deny_write_execute) {
2660 r = apply_memory_deny_write_execute(unit, context);
2661 if (r < 0) {
2662 *exit_status = EXIT_SECCOMP;
2663 return r;
2664 }
2665 }
2666
2667 if (context->restrict_realtime) {
2668 r = apply_restrict_realtime(unit, context);
2669 if (r < 0) {
2670 *exit_status = EXIT_SECCOMP;
2671 return r;
2672 }
2673 }
2674
2675 if (context->protect_kernel_tunables) {
2676 r = apply_protect_sysctl(unit, context);
2677 if (r < 0) {
2678 *exit_status = EXIT_SECCOMP;
2679 return r;
2680 }
2681 }
2682
2683 if (context->private_devices) {
2684 r = apply_private_devices(unit, context);
2685 if (r < 0) {
2686 *exit_status = EXIT_SECCOMP;
2687 return r;
2688 }
2689 }
2690
2691 if (context_has_syscall_filters(context)) {
2692 r = apply_seccomp(unit, context);
2693 if (r < 0) {
2694 *exit_status = EXIT_SECCOMP;
2695 return r;
2696 }
2697 }
2698 #endif
2699
2700 #ifdef HAVE_SELINUX
2701 if (mac_selinux_use()) {
2702 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
2703
2704 if (exec_context) {
2705 r = setexeccon(exec_context);
2706 if (r < 0) {
2707 *exit_status = EXIT_SELINUX_CONTEXT;
2708 return r;
2709 }
2710 }
2711 }
2712 #endif
2713
2714 #ifdef HAVE_APPARMOR
2715 if (context->apparmor_profile && mac_apparmor_use()) {
2716 r = aa_change_onexec(context->apparmor_profile);
2717 if (r < 0 && !context->apparmor_profile_ignore) {
2718 *exit_status = EXIT_APPARMOR_PROFILE;
2719 return -errno;
2720 }
2721 }
2722 #endif
2723 }
2724
2725 final_argv = replace_env_argv(argv, accum_env);
2726 if (!final_argv) {
2727 *exit_status = EXIT_MEMORY;
2728 return -ENOMEM;
2729 }
2730
2731 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
2732 _cleanup_free_ char *line;
2733
2734 line = exec_command_line(final_argv);
2735 if (line) {
2736 log_open();
2737 log_struct(LOG_DEBUG,
2738 LOG_UNIT_ID(unit),
2739 "EXECUTABLE=%s", command->path,
2740 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
2741 NULL);
2742 log_close();
2743 }
2744 }
2745
2746 execve(command->path, final_argv, accum_env);
2747 *exit_status = EXIT_EXEC;
2748 return -errno;
2749 }
2750
2751 int exec_spawn(Unit *unit,
2752 ExecCommand *command,
2753 const ExecContext *context,
2754 const ExecParameters *params,
2755 ExecRuntime *runtime,
2756 DynamicCreds *dcreds,
2757 pid_t *ret) {
2758
2759 _cleanup_strv_free_ char **files_env = NULL;
2760 int *fds = NULL; unsigned n_fds = 0;
2761 _cleanup_free_ char *line = NULL;
2762 int socket_fd, r;
2763 char **argv;
2764 pid_t pid;
2765
2766 assert(unit);
2767 assert(command);
2768 assert(context);
2769 assert(ret);
2770 assert(params);
2771 assert(params->fds || params->n_fds <= 0);
2772
2773 if (context->std_input == EXEC_INPUT_SOCKET ||
2774 context->std_output == EXEC_OUTPUT_SOCKET ||
2775 context->std_error == EXEC_OUTPUT_SOCKET) {
2776
2777 if (params->n_fds != 1) {
2778 log_unit_error(unit, "Got more than one socket.");
2779 return -EINVAL;
2780 }
2781
2782 socket_fd = params->fds[0];
2783 } else {
2784 socket_fd = -1;
2785 fds = params->fds;
2786 n_fds = params->n_fds;
2787 }
2788
2789 r = exec_context_load_environment(unit, context, &files_env);
2790 if (r < 0)
2791 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
2792
2793 argv = params->argv ?: command->argv;
2794 line = exec_command_line(argv);
2795 if (!line)
2796 return log_oom();
2797
2798 log_struct(LOG_DEBUG,
2799 LOG_UNIT_ID(unit),
2800 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
2801 "EXECUTABLE=%s", command->path,
2802 NULL);
2803 pid = fork();
2804 if (pid < 0)
2805 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
2806
2807 if (pid == 0) {
2808 int exit_status;
2809
2810 r = exec_child(unit,
2811 command,
2812 context,
2813 params,
2814 runtime,
2815 dcreds,
2816 argv,
2817 socket_fd,
2818 fds, n_fds,
2819 files_env,
2820 unit->manager->user_lookup_fds[1],
2821 &exit_status);
2822 if (r < 0) {
2823 log_open();
2824 log_struct_errno(LOG_ERR, r,
2825 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
2826 LOG_UNIT_ID(unit),
2827 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
2828 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
2829 command->path),
2830 "EXECUTABLE=%s", command->path,
2831 NULL);
2832 }
2833
2834 _exit(exit_status);
2835 }
2836
2837 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
2838
2839 /* We add the new process to the cgroup both in the child (so
2840 * that we can be sure that no user code is ever executed
2841 * outside of the cgroup) and in the parent (so that we can be
2842 * sure that when we kill the cgroup the process will be
2843 * killed too). */
2844 if (params->cgroup_path)
2845 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2846
2847 exec_status_start(&command->exec_status, pid);
2848
2849 *ret = pid;
2850 return 0;
2851 }
2852
2853 void exec_context_init(ExecContext *c) {
2854 assert(c);
2855
2856 c->umask = 0022;
2857 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
2858 c->cpu_sched_policy = SCHED_OTHER;
2859 c->syslog_priority = LOG_DAEMON|LOG_INFO;
2860 c->syslog_level_prefix = true;
2861 c->ignore_sigpipe = true;
2862 c->timer_slack_nsec = NSEC_INFINITY;
2863 c->personality = PERSONALITY_INVALID;
2864 c->runtime_directory_mode = 0755;
2865 c->capability_bounding_set = CAP_ALL;
2866 }
2867
2868 void exec_context_done(ExecContext *c) {
2869 unsigned l;
2870
2871 assert(c);
2872
2873 c->environment = strv_free(c->environment);
2874 c->environment_files = strv_free(c->environment_files);
2875 c->pass_environment = strv_free(c->pass_environment);
2876
2877 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
2878 c->rlimit[l] = mfree(c->rlimit[l]);
2879
2880 c->working_directory = mfree(c->working_directory);
2881 c->root_directory = mfree(c->root_directory);
2882 c->tty_path = mfree(c->tty_path);
2883 c->syslog_identifier = mfree(c->syslog_identifier);
2884 c->user = mfree(c->user);
2885 c->group = mfree(c->group);
2886
2887 c->supplementary_groups = strv_free(c->supplementary_groups);
2888
2889 c->pam_name = mfree(c->pam_name);
2890
2891 c->read_only_paths = strv_free(c->read_only_paths);
2892 c->read_write_paths = strv_free(c->read_write_paths);
2893 c->inaccessible_paths = strv_free(c->inaccessible_paths);
2894
2895 if (c->cpuset)
2896 CPU_FREE(c->cpuset);
2897
2898 c->utmp_id = mfree(c->utmp_id);
2899 c->selinux_context = mfree(c->selinux_context);
2900 c->apparmor_profile = mfree(c->apparmor_profile);
2901
2902 c->syscall_filter = set_free(c->syscall_filter);
2903 c->syscall_archs = set_free(c->syscall_archs);
2904 c->address_families = set_free(c->address_families);
2905
2906 c->runtime_directory = strv_free(c->runtime_directory);
2907 }
2908
2909 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2910 char **i;
2911
2912 assert(c);
2913
2914 if (!runtime_prefix)
2915 return 0;
2916
2917 STRV_FOREACH(i, c->runtime_directory) {
2918 _cleanup_free_ char *p;
2919
2920 p = strjoin(runtime_prefix, "/", *i, NULL);
2921 if (!p)
2922 return -ENOMEM;
2923
2924 /* We execute this synchronously, since we need to be
2925 * sure this is gone when we start the service
2926 * next. */
2927 (void) rm_rf(p, REMOVE_ROOT);
2928 }
2929
2930 return 0;
2931 }
2932
2933 void exec_command_done(ExecCommand *c) {
2934 assert(c);
2935
2936 c->path = mfree(c->path);
2937
2938 c->argv = strv_free(c->argv);
2939 }
2940
2941 void exec_command_done_array(ExecCommand *c, unsigned n) {
2942 unsigned i;
2943
2944 for (i = 0; i < n; i++)
2945 exec_command_done(c+i);
2946 }
2947
2948 ExecCommand* exec_command_free_list(ExecCommand *c) {
2949 ExecCommand *i;
2950
2951 while ((i = c)) {
2952 LIST_REMOVE(command, c, i);
2953 exec_command_done(i);
2954 free(i);
2955 }
2956
2957 return NULL;
2958 }
2959
2960 void exec_command_free_array(ExecCommand **c, unsigned n) {
2961 unsigned i;
2962
2963 for (i = 0; i < n; i++)
2964 c[i] = exec_command_free_list(c[i]);
2965 }
2966
2967 typedef struct InvalidEnvInfo {
2968 Unit *unit;
2969 const char *path;
2970 } InvalidEnvInfo;
2971
2972 static void invalid_env(const char *p, void *userdata) {
2973 InvalidEnvInfo *info = userdata;
2974
2975 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2976 }
2977
2978 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
2979 char **i, **r = NULL;
2980
2981 assert(c);
2982 assert(l);
2983
2984 STRV_FOREACH(i, c->environment_files) {
2985 char *fn;
2986 int k;
2987 bool ignore = false;
2988 char **p;
2989 _cleanup_globfree_ glob_t pglob = {};
2990 int count, n;
2991
2992 fn = *i;
2993
2994 if (fn[0] == '-') {
2995 ignore = true;
2996 fn++;
2997 }
2998
2999 if (!path_is_absolute(fn)) {
3000 if (ignore)
3001 continue;
3002
3003 strv_free(r);
3004 return -EINVAL;
3005 }
3006
3007 /* Filename supports globbing, take all matching files */
3008 errno = 0;
3009 if (glob(fn, 0, NULL, &pglob) != 0) {
3010 if (ignore)
3011 continue;
3012
3013 strv_free(r);
3014 return errno > 0 ? -errno : -EINVAL;
3015 }
3016 count = pglob.gl_pathc;
3017 if (count == 0) {
3018 if (ignore)
3019 continue;
3020
3021 strv_free(r);
3022 return -EINVAL;
3023 }
3024 for (n = 0; n < count; n++) {
3025 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
3026 if (k < 0) {
3027 if (ignore)
3028 continue;
3029
3030 strv_free(r);
3031 return k;
3032 }
3033 /* Log invalid environment variables with filename */
3034 if (p) {
3035 InvalidEnvInfo info = {
3036 .unit = unit,
3037 .path = pglob.gl_pathv[n]
3038 };
3039
3040 p = strv_env_clean_with_callback(p, invalid_env, &info);
3041 }
3042
3043 if (r == NULL)
3044 r = p;
3045 else {
3046 char **m;
3047
3048 m = strv_env_merge(2, r, p);
3049 strv_free(r);
3050 strv_free(p);
3051 if (!m)
3052 return -ENOMEM;
3053
3054 r = m;
3055 }
3056 }
3057 }
3058
3059 *l = r;
3060
3061 return 0;
3062 }
3063
3064 static bool tty_may_match_dev_console(const char *tty) {
3065 _cleanup_free_ char *active = NULL;
3066 char *console;
3067
3068 if (!tty)
3069 return true;
3070
3071 if (startswith(tty, "/dev/"))
3072 tty += 5;
3073
3074 /* trivial identity? */
3075 if (streq(tty, "console"))
3076 return true;
3077
3078 console = resolve_dev_console(&active);
3079 /* if we could not resolve, assume it may */
3080 if (!console)
3081 return true;
3082
3083 /* "tty0" means the active VC, so it may be the same sometimes */
3084 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
3085 }
3086
3087 bool exec_context_may_touch_console(ExecContext *ec) {
3088
3089 return (ec->tty_reset ||
3090 ec->tty_vhangup ||
3091 ec->tty_vt_disallocate ||
3092 is_terminal_input(ec->std_input) ||
3093 is_terminal_output(ec->std_output) ||
3094 is_terminal_output(ec->std_error)) &&
3095 tty_may_match_dev_console(exec_context_tty_path(ec));
3096 }
3097
3098 static void strv_fprintf(FILE *f, char **l) {
3099 char **g;
3100
3101 assert(f);
3102
3103 STRV_FOREACH(g, l)
3104 fprintf(f, " %s", *g);
3105 }
3106
3107 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
3108 char **e, **d;
3109 unsigned i;
3110
3111 assert(c);
3112 assert(f);
3113
3114 prefix = strempty(prefix);
3115
3116 fprintf(f,
3117 "%sUMask: %04o\n"
3118 "%sWorkingDirectory: %s\n"
3119 "%sRootDirectory: %s\n"
3120 "%sNonBlocking: %s\n"
3121 "%sPrivateTmp: %s\n"
3122 "%sPrivateDevices: %s\n"
3123 "%sProtectKernelTunables: %s\n"
3124 "%sProtectControlGroups: %s\n"
3125 "%sPrivateNetwork: %s\n"
3126 "%sPrivateUsers: %s\n"
3127 "%sProtectHome: %s\n"
3128 "%sProtectSystem: %s\n"
3129 "%sIgnoreSIGPIPE: %s\n"
3130 "%sMemoryDenyWriteExecute: %s\n"
3131 "%sRestrictRealtime: %s\n",
3132 prefix, c->umask,
3133 prefix, c->working_directory ? c->working_directory : "/",
3134 prefix, c->root_directory ? c->root_directory : "/",
3135 prefix, yes_no(c->non_blocking),
3136 prefix, yes_no(c->private_tmp),
3137 prefix, yes_no(c->private_devices),
3138 prefix, yes_no(c->protect_kernel_tunables),
3139 prefix, yes_no(c->protect_control_groups),
3140 prefix, yes_no(c->private_network),
3141 prefix, yes_no(c->private_users),
3142 prefix, protect_home_to_string(c->protect_home),
3143 prefix, protect_system_to_string(c->protect_system),
3144 prefix, yes_no(c->ignore_sigpipe),
3145 prefix, yes_no(c->memory_deny_write_execute),
3146 prefix, yes_no(c->restrict_realtime));
3147
3148 STRV_FOREACH(e, c->environment)
3149 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3150
3151 STRV_FOREACH(e, c->environment_files)
3152 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
3153
3154 STRV_FOREACH(e, c->pass_environment)
3155 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3156
3157 fprintf(f, "%sRuntimeDirectoryMode: %04o\n", prefix, c->runtime_directory_mode);
3158
3159 STRV_FOREACH(d, c->runtime_directory)
3160 fprintf(f, "%sRuntimeDirectory: %s\n", prefix, *d);
3161
3162 if (c->nice_set)
3163 fprintf(f,
3164 "%sNice: %i\n",
3165 prefix, c->nice);
3166
3167 if (c->oom_score_adjust_set)
3168 fprintf(f,
3169 "%sOOMScoreAdjust: %i\n",
3170 prefix, c->oom_score_adjust);
3171
3172 for (i = 0; i < RLIM_NLIMITS; i++)
3173 if (c->rlimit[i]) {
3174 fprintf(f, "%s%s: " RLIM_FMT "\n",
3175 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3176 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3177 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3178 }
3179
3180 if (c->ioprio_set) {
3181 _cleanup_free_ char *class_str = NULL;
3182
3183 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3184 fprintf(f,
3185 "%sIOSchedulingClass: %s\n"
3186 "%sIOPriority: %i\n",
3187 prefix, strna(class_str),
3188 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
3189 }
3190
3191 if (c->cpu_sched_set) {
3192 _cleanup_free_ char *policy_str = NULL;
3193
3194 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3195 fprintf(f,
3196 "%sCPUSchedulingPolicy: %s\n"
3197 "%sCPUSchedulingPriority: %i\n"
3198 "%sCPUSchedulingResetOnFork: %s\n",
3199 prefix, strna(policy_str),
3200 prefix, c->cpu_sched_priority,
3201 prefix, yes_no(c->cpu_sched_reset_on_fork));
3202 }
3203
3204 if (c->cpuset) {
3205 fprintf(f, "%sCPUAffinity:", prefix);
3206 for (i = 0; i < c->cpuset_ncpus; i++)
3207 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
3208 fprintf(f, " %u", i);
3209 fputs("\n", f);
3210 }
3211
3212 if (c->timer_slack_nsec != NSEC_INFINITY)
3213 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
3214
3215 fprintf(f,
3216 "%sStandardInput: %s\n"
3217 "%sStandardOutput: %s\n"
3218 "%sStandardError: %s\n",
3219 prefix, exec_input_to_string(c->std_input),
3220 prefix, exec_output_to_string(c->std_output),
3221 prefix, exec_output_to_string(c->std_error));
3222
3223 if (c->tty_path)
3224 fprintf(f,
3225 "%sTTYPath: %s\n"
3226 "%sTTYReset: %s\n"
3227 "%sTTYVHangup: %s\n"
3228 "%sTTYVTDisallocate: %s\n",
3229 prefix, c->tty_path,
3230 prefix, yes_no(c->tty_reset),
3231 prefix, yes_no(c->tty_vhangup),
3232 prefix, yes_no(c->tty_vt_disallocate));
3233
3234 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
3235 c->std_output == EXEC_OUTPUT_KMSG ||
3236 c->std_output == EXEC_OUTPUT_JOURNAL ||
3237 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
3238 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
3239 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
3240 c->std_error == EXEC_OUTPUT_SYSLOG ||
3241 c->std_error == EXEC_OUTPUT_KMSG ||
3242 c->std_error == EXEC_OUTPUT_JOURNAL ||
3243 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
3244 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
3245 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
3246
3247 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
3248
3249 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3250 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3251
3252 fprintf(f,
3253 "%sSyslogFacility: %s\n"
3254 "%sSyslogLevel: %s\n",
3255 prefix, strna(fac_str),
3256 prefix, strna(lvl_str));
3257 }
3258
3259 if (c->secure_bits)
3260 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
3261 prefix,
3262 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
3263 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
3264 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
3265 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
3266 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
3267 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
3268
3269 if (c->capability_bounding_set != CAP_ALL) {
3270 unsigned long l;
3271 fprintf(f, "%sCapabilityBoundingSet:", prefix);
3272
3273 for (l = 0; l <= cap_last_cap(); l++)
3274 if (c->capability_bounding_set & (UINT64_C(1) << l))
3275 fprintf(f, " %s", strna(capability_to_name(l)));
3276
3277 fputs("\n", f);
3278 }
3279
3280 if (c->capability_ambient_set != 0) {
3281 unsigned long l;
3282 fprintf(f, "%sAmbientCapabilities:", prefix);
3283
3284 for (l = 0; l <= cap_last_cap(); l++)
3285 if (c->capability_ambient_set & (UINT64_C(1) << l))
3286 fprintf(f, " %s", strna(capability_to_name(l)));
3287
3288 fputs("\n", f);
3289 }
3290
3291 if (c->user)
3292 fprintf(f, "%sUser: %s\n", prefix, c->user);
3293 if (c->group)
3294 fprintf(f, "%sGroup: %s\n", prefix, c->group);
3295
3296 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
3297
3298 if (strv_length(c->supplementary_groups) > 0) {
3299 fprintf(f, "%sSupplementaryGroups:", prefix);
3300 strv_fprintf(f, c->supplementary_groups);
3301 fputs("\n", f);
3302 }
3303
3304 if (c->pam_name)
3305 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
3306
3307 if (strv_length(c->read_write_paths) > 0) {
3308 fprintf(f, "%sReadWritePaths:", prefix);
3309 strv_fprintf(f, c->read_write_paths);
3310 fputs("\n", f);
3311 }
3312
3313 if (strv_length(c->read_only_paths) > 0) {
3314 fprintf(f, "%sReadOnlyPaths:", prefix);
3315 strv_fprintf(f, c->read_only_paths);
3316 fputs("\n", f);
3317 }
3318
3319 if (strv_length(c->inaccessible_paths) > 0) {
3320 fprintf(f, "%sInaccessiblePaths:", prefix);
3321 strv_fprintf(f, c->inaccessible_paths);
3322 fputs("\n", f);
3323 }
3324
3325 if (c->utmp_id)
3326 fprintf(f,
3327 "%sUtmpIdentifier: %s\n",
3328 prefix, c->utmp_id);
3329
3330 if (c->selinux_context)
3331 fprintf(f,
3332 "%sSELinuxContext: %s%s\n",
3333 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
3334
3335 if (c->personality != PERSONALITY_INVALID)
3336 fprintf(f,
3337 "%sPersonality: %s\n",
3338 prefix, strna(personality_to_string(c->personality)));
3339
3340 if (c->syscall_filter) {
3341 #ifdef HAVE_SECCOMP
3342 Iterator j;
3343 void *id;
3344 bool first = true;
3345 #endif
3346
3347 fprintf(f,
3348 "%sSystemCallFilter: ",
3349 prefix);
3350
3351 if (!c->syscall_whitelist)
3352 fputc('~', f);
3353
3354 #ifdef HAVE_SECCOMP
3355 SET_FOREACH(id, c->syscall_filter, j) {
3356 _cleanup_free_ char *name = NULL;
3357
3358 if (first)
3359 first = false;
3360 else
3361 fputc(' ', f);
3362
3363 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
3364 fputs(strna(name), f);
3365 }
3366 #endif
3367
3368 fputc('\n', f);
3369 }
3370
3371 if (c->syscall_archs) {
3372 #ifdef HAVE_SECCOMP
3373 Iterator j;
3374 void *id;
3375 #endif
3376
3377 fprintf(f,
3378 "%sSystemCallArchitectures:",
3379 prefix);
3380
3381 #ifdef HAVE_SECCOMP
3382 SET_FOREACH(id, c->syscall_archs, j)
3383 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
3384 #endif
3385 fputc('\n', f);
3386 }
3387
3388 if (c->syscall_errno > 0)
3389 fprintf(f,
3390 "%sSystemCallErrorNumber: %s\n",
3391 prefix, strna(errno_to_name(c->syscall_errno)));
3392
3393 if (c->apparmor_profile)
3394 fprintf(f,
3395 "%sAppArmorProfile: %s%s\n",
3396 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3397 }
3398
3399 bool exec_context_maintains_privileges(ExecContext *c) {
3400 assert(c);
3401
3402 /* Returns true if the process forked off would run under
3403 * an unchanged UID or as root. */
3404
3405 if (!c->user)
3406 return true;
3407
3408 if (streq(c->user, "root") || streq(c->user, "0"))
3409 return true;
3410
3411 return false;
3412 }
3413
3414 void exec_status_start(ExecStatus *s, pid_t pid) {
3415 assert(s);
3416
3417 zero(*s);
3418 s->pid = pid;
3419 dual_timestamp_get(&s->start_timestamp);
3420 }
3421
3422 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
3423 assert(s);
3424
3425 if (s->pid && s->pid != pid)
3426 zero(*s);
3427
3428 s->pid = pid;
3429 dual_timestamp_get(&s->exit_timestamp);
3430
3431 s->code = code;
3432 s->status = status;
3433
3434 if (context) {
3435 if (context->utmp_id)
3436 utmp_put_dead_process(context->utmp_id, pid, code, status);
3437
3438 exec_context_tty_reset(context, NULL);
3439 }
3440 }
3441
3442 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
3443 char buf[FORMAT_TIMESTAMP_MAX];
3444
3445 assert(s);
3446 assert(f);
3447
3448 if (s->pid <= 0)
3449 return;
3450
3451 prefix = strempty(prefix);
3452
3453 fprintf(f,
3454 "%sPID: "PID_FMT"\n",
3455 prefix, s->pid);
3456
3457 if (dual_timestamp_is_set(&s->start_timestamp))
3458 fprintf(f,
3459 "%sStart Timestamp: %s\n",
3460 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
3461
3462 if (dual_timestamp_is_set(&s->exit_timestamp))
3463 fprintf(f,
3464 "%sExit Timestamp: %s\n"
3465 "%sExit Code: %s\n"
3466 "%sExit Status: %i\n",
3467 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
3468 prefix, sigchld_code_to_string(s->code),
3469 prefix, s->status);
3470 }
3471
3472 char *exec_command_line(char **argv) {
3473 size_t k;
3474 char *n, *p, **a;
3475 bool first = true;
3476
3477 assert(argv);
3478
3479 k = 1;
3480 STRV_FOREACH(a, argv)
3481 k += strlen(*a)+3;
3482
3483 if (!(n = new(char, k)))
3484 return NULL;
3485
3486 p = n;
3487 STRV_FOREACH(a, argv) {
3488
3489 if (!first)
3490 *(p++) = ' ';
3491 else
3492 first = false;
3493
3494 if (strpbrk(*a, WHITESPACE)) {
3495 *(p++) = '\'';
3496 p = stpcpy(p, *a);
3497 *(p++) = '\'';
3498 } else
3499 p = stpcpy(p, *a);
3500
3501 }
3502
3503 *p = 0;
3504
3505 /* FIXME: this doesn't really handle arguments that have
3506 * spaces and ticks in them */
3507
3508 return n;
3509 }
3510
3511 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
3512 _cleanup_free_ char *cmd = NULL;
3513 const char *prefix2;
3514
3515 assert(c);
3516 assert(f);
3517
3518 prefix = strempty(prefix);
3519 prefix2 = strjoina(prefix, "\t");
3520
3521 cmd = exec_command_line(c->argv);
3522 fprintf(f,
3523 "%sCommand Line: %s\n",
3524 prefix, cmd ? cmd : strerror(ENOMEM));
3525
3526 exec_status_dump(&c->exec_status, f, prefix2);
3527 }
3528
3529 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
3530 assert(f);
3531
3532 prefix = strempty(prefix);
3533
3534 LIST_FOREACH(command, c, c)
3535 exec_command_dump(c, f, prefix);
3536 }
3537
3538 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
3539 ExecCommand *end;
3540
3541 assert(l);
3542 assert(e);
3543
3544 if (*l) {
3545 /* It's kind of important, that we keep the order here */
3546 LIST_FIND_TAIL(command, *l, end);
3547 LIST_INSERT_AFTER(command, *l, end, e);
3548 } else
3549 *l = e;
3550 }
3551
3552 int exec_command_set(ExecCommand *c, const char *path, ...) {
3553 va_list ap;
3554 char **l, *p;
3555
3556 assert(c);
3557 assert(path);
3558
3559 va_start(ap, path);
3560 l = strv_new_ap(path, ap);
3561 va_end(ap);
3562
3563 if (!l)
3564 return -ENOMEM;
3565
3566 p = strdup(path);
3567 if (!p) {
3568 strv_free(l);
3569 return -ENOMEM;
3570 }
3571
3572 free(c->path);
3573 c->path = p;
3574
3575 strv_free(c->argv);
3576 c->argv = l;
3577
3578 return 0;
3579 }
3580
3581 int exec_command_append(ExecCommand *c, const char *path, ...) {
3582 _cleanup_strv_free_ char **l = NULL;
3583 va_list ap;
3584 int r;
3585
3586 assert(c);
3587 assert(path);
3588
3589 va_start(ap, path);
3590 l = strv_new_ap(path, ap);
3591 va_end(ap);
3592
3593 if (!l)
3594 return -ENOMEM;
3595
3596 r = strv_extend_strv(&c->argv, l, false);
3597 if (r < 0)
3598 return r;
3599
3600 return 0;
3601 }
3602
3603
3604 static int exec_runtime_allocate(ExecRuntime **rt) {
3605
3606 if (*rt)
3607 return 0;
3608
3609 *rt = new0(ExecRuntime, 1);
3610 if (!*rt)
3611 return -ENOMEM;
3612
3613 (*rt)->n_ref = 1;
3614 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
3615
3616 return 0;
3617 }
3618
3619 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
3620 int r;
3621
3622 assert(rt);
3623 assert(c);
3624 assert(id);
3625
3626 if (*rt)
3627 return 1;
3628
3629 if (!c->private_network && !c->private_tmp)
3630 return 0;
3631
3632 r = exec_runtime_allocate(rt);
3633 if (r < 0)
3634 return r;
3635
3636 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
3637 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
3638 return -errno;
3639 }
3640
3641 if (c->private_tmp && !(*rt)->tmp_dir) {
3642 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
3643 if (r < 0)
3644 return r;
3645 }
3646
3647 return 1;
3648 }
3649
3650 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
3651 assert(r);
3652 assert(r->n_ref > 0);
3653
3654 r->n_ref++;
3655 return r;
3656 }
3657
3658 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
3659
3660 if (!r)
3661 return NULL;
3662
3663 assert(r->n_ref > 0);
3664
3665 r->n_ref--;
3666 if (r->n_ref > 0)
3667 return NULL;
3668
3669 free(r->tmp_dir);
3670 free(r->var_tmp_dir);
3671 safe_close_pair(r->netns_storage_socket);
3672 free(r);
3673
3674 return NULL;
3675 }
3676
3677 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
3678 assert(u);
3679 assert(f);
3680 assert(fds);
3681
3682 if (!rt)
3683 return 0;
3684
3685 if (rt->tmp_dir)
3686 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
3687
3688 if (rt->var_tmp_dir)
3689 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
3690
3691 if (rt->netns_storage_socket[0] >= 0) {
3692 int copy;
3693
3694 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
3695 if (copy < 0)
3696 return copy;
3697
3698 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
3699 }
3700
3701 if (rt->netns_storage_socket[1] >= 0) {
3702 int copy;
3703
3704 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
3705 if (copy < 0)
3706 return copy;
3707
3708 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
3709 }
3710
3711 return 0;
3712 }
3713
3714 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
3715 int r;
3716
3717 assert(rt);
3718 assert(key);
3719 assert(value);
3720
3721 if (streq(key, "tmp-dir")) {
3722 char *copy;
3723
3724 r = exec_runtime_allocate(rt);
3725 if (r < 0)
3726 return log_oom();
3727
3728 copy = strdup(value);
3729 if (!copy)
3730 return log_oom();
3731
3732 free((*rt)->tmp_dir);
3733 (*rt)->tmp_dir = copy;
3734
3735 } else if (streq(key, "var-tmp-dir")) {
3736 char *copy;
3737
3738 r = exec_runtime_allocate(rt);
3739 if (r < 0)
3740 return log_oom();
3741
3742 copy = strdup(value);
3743 if (!copy)
3744 return log_oom();
3745
3746 free((*rt)->var_tmp_dir);
3747 (*rt)->var_tmp_dir = copy;
3748
3749 } else if (streq(key, "netns-socket-0")) {
3750 int fd;
3751
3752 r = exec_runtime_allocate(rt);
3753 if (r < 0)
3754 return log_oom();
3755
3756 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
3757 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
3758 else {
3759 safe_close((*rt)->netns_storage_socket[0]);
3760 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
3761 }
3762 } else if (streq(key, "netns-socket-1")) {
3763 int fd;
3764
3765 r = exec_runtime_allocate(rt);
3766 if (r < 0)
3767 return log_oom();
3768
3769 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
3770 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
3771 else {
3772 safe_close((*rt)->netns_storage_socket[1]);
3773 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
3774 }
3775 } else
3776 return 0;
3777
3778 return 1;
3779 }
3780
3781 static void *remove_tmpdir_thread(void *p) {
3782 _cleanup_free_ char *path = p;
3783
3784 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
3785 return NULL;
3786 }
3787
3788 void exec_runtime_destroy(ExecRuntime *rt) {
3789 int r;
3790
3791 if (!rt)
3792 return;
3793
3794 /* If there are multiple users of this, let's leave the stuff around */
3795 if (rt->n_ref > 1)
3796 return;
3797
3798 if (rt->tmp_dir) {
3799 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
3800
3801 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
3802 if (r < 0) {
3803 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
3804 free(rt->tmp_dir);
3805 }
3806
3807 rt->tmp_dir = NULL;
3808 }
3809
3810 if (rt->var_tmp_dir) {
3811 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
3812
3813 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
3814 if (r < 0) {
3815 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
3816 free(rt->var_tmp_dir);
3817 }
3818
3819 rt->var_tmp_dir = NULL;
3820 }
3821
3822 safe_close_pair(rt->netns_storage_socket);
3823 }
3824
3825 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
3826 [EXEC_INPUT_NULL] = "null",
3827 [EXEC_INPUT_TTY] = "tty",
3828 [EXEC_INPUT_TTY_FORCE] = "tty-force",
3829 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
3830 [EXEC_INPUT_SOCKET] = "socket"
3831 };
3832
3833 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
3834
3835 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
3836 [EXEC_OUTPUT_INHERIT] = "inherit",
3837 [EXEC_OUTPUT_NULL] = "null",
3838 [EXEC_OUTPUT_TTY] = "tty",
3839 [EXEC_OUTPUT_SYSLOG] = "syslog",
3840 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
3841 [EXEC_OUTPUT_KMSG] = "kmsg",
3842 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
3843 [EXEC_OUTPUT_JOURNAL] = "journal",
3844 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
3845 [EXEC_OUTPUT_SOCKET] = "socket"
3846 };
3847
3848 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
3849
3850 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
3851 [EXEC_UTMP_INIT] = "init",
3852 [EXEC_UTMP_LOGIN] = "login",
3853 [EXEC_UTMP_USER] = "user",
3854 };
3855
3856 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);