]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
Merge pull request #6917 from keszybz/restore-some-tests
[thirdparty/systemd.git] / src / core / execute.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <glob.h>
23 #include <grp.h>
24 #include <poll.h>
25 #include <signal.h>
26 #include <string.h>
27 #include <sys/capability.h>
28 #include <sys/eventfd.h>
29 #include <sys/mman.h>
30 #include <sys/personality.h>
31 #include <sys/prctl.h>
32 #include <sys/shm.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/types.h>
36 #include <sys/un.h>
37 #include <unistd.h>
38 #include <utmpx.h>
39
40 #ifdef HAVE_PAM
41 #include <security/pam_appl.h>
42 #endif
43
44 #ifdef HAVE_SELINUX
45 #include <selinux/selinux.h>
46 #endif
47
48 #ifdef HAVE_SECCOMP
49 #include <seccomp.h>
50 #endif
51
52 #ifdef HAVE_APPARMOR
53 #include <sys/apparmor.h>
54 #endif
55
56 #include "sd-messages.h"
57
58 #include "af-list.h"
59 #include "alloc-util.h"
60 #ifdef HAVE_APPARMOR
61 #include "apparmor-util.h"
62 #endif
63 #include "async.h"
64 #include "barrier.h"
65 #include "cap-list.h"
66 #include "capability-util.h"
67 #include "def.h"
68 #include "env-util.h"
69 #include "errno-list.h"
70 #include "execute.h"
71 #include "exit-status.h"
72 #include "fd-util.h"
73 #include "fileio.h"
74 #include "format-util.h"
75 #include "fs-util.h"
76 #include "glob-util.h"
77 #include "io-util.h"
78 #include "ioprio.h"
79 #include "log.h"
80 #include "macro.h"
81 #include "missing.h"
82 #include "mkdir.h"
83 #include "namespace.h"
84 #include "parse-util.h"
85 #include "path-util.h"
86 #include "process-util.h"
87 #include "rlimit-util.h"
88 #include "rm-rf.h"
89 #ifdef HAVE_SECCOMP
90 #include "seccomp-util.h"
91 #endif
92 #include "securebits.h"
93 #include "securebits-util.h"
94 #include "selinux-util.h"
95 #include "signal-util.h"
96 #include "smack-util.h"
97 #include "special.h"
98 #include "string-table.h"
99 #include "string-util.h"
100 #include "strv.h"
101 #include "syslog-util.h"
102 #include "terminal-util.h"
103 #include "unit.h"
104 #include "user-util.h"
105 #include "util.h"
106 #include "utmp-wtmp.h"
107
108 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
109 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
110
111 /* This assumes there is a 'tty' group */
112 #define TTY_MODE 0620
113
114 #define SNDBUF_SIZE (8*1024*1024)
115
116 static int shift_fds(int fds[], unsigned n_fds) {
117 int start, restart_from;
118
119 if (n_fds <= 0)
120 return 0;
121
122 /* Modifies the fds array! (sorts it) */
123
124 assert(fds);
125
126 start = 0;
127 for (;;) {
128 int i;
129
130 restart_from = -1;
131
132 for (i = start; i < (int) n_fds; i++) {
133 int nfd;
134
135 /* Already at right index? */
136 if (fds[i] == i+3)
137 continue;
138
139 nfd = fcntl(fds[i], F_DUPFD, i + 3);
140 if (nfd < 0)
141 return -errno;
142
143 safe_close(fds[i]);
144 fds[i] = nfd;
145
146 /* Hmm, the fd we wanted isn't free? Then
147 * let's remember that and try again from here */
148 if (nfd != i+3 && restart_from < 0)
149 restart_from = i;
150 }
151
152 if (restart_from < 0)
153 break;
154
155 start = restart_from;
156 }
157
158 return 0;
159 }
160
161 static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
162 unsigned i, n_fds;
163 int r;
164
165 n_fds = n_storage_fds + n_socket_fds;
166 if (n_fds <= 0)
167 return 0;
168
169 assert(fds);
170
171 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
172 * O_NONBLOCK only applies to socket activation though. */
173
174 for (i = 0; i < n_fds; i++) {
175
176 if (i < n_socket_fds) {
177 r = fd_nonblock(fds[i], nonblock);
178 if (r < 0)
179 return r;
180 }
181
182 /* We unconditionally drop FD_CLOEXEC from the fds,
183 * since after all we want to pass these fds to our
184 * children */
185
186 r = fd_cloexec(fds[i], false);
187 if (r < 0)
188 return r;
189 }
190
191 return 0;
192 }
193
194 static const char *exec_context_tty_path(const ExecContext *context) {
195 assert(context);
196
197 if (context->stdio_as_fds)
198 return NULL;
199
200 if (context->tty_path)
201 return context->tty_path;
202
203 return "/dev/console";
204 }
205
206 static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
207 const char *path;
208
209 assert(context);
210
211 path = exec_context_tty_path(context);
212
213 if (context->tty_vhangup) {
214 if (p && p->stdin_fd >= 0)
215 (void) terminal_vhangup_fd(p->stdin_fd);
216 else if (path)
217 (void) terminal_vhangup(path);
218 }
219
220 if (context->tty_reset) {
221 if (p && p->stdin_fd >= 0)
222 (void) reset_terminal_fd(p->stdin_fd, true);
223 else if (path)
224 (void) reset_terminal(path);
225 }
226
227 if (context->tty_vt_disallocate && path)
228 (void) vt_disallocate(path);
229 }
230
231 static bool is_terminal_input(ExecInput i) {
232 return IN_SET(i,
233 EXEC_INPUT_TTY,
234 EXEC_INPUT_TTY_FORCE,
235 EXEC_INPUT_TTY_FAIL);
236 }
237
238 static bool is_terminal_output(ExecOutput o) {
239 return IN_SET(o,
240 EXEC_OUTPUT_TTY,
241 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
242 EXEC_OUTPUT_KMSG_AND_CONSOLE,
243 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
244 }
245
246 static bool is_syslog_output(ExecOutput o) {
247 return IN_SET(o,
248 EXEC_OUTPUT_SYSLOG,
249 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
250 }
251
252 static bool is_kmsg_output(ExecOutput o) {
253 return IN_SET(o,
254 EXEC_OUTPUT_KMSG,
255 EXEC_OUTPUT_KMSG_AND_CONSOLE);
256 }
257
258 static bool exec_context_needs_term(const ExecContext *c) {
259 assert(c);
260
261 /* Return true if the execution context suggests we should set $TERM to something useful. */
262
263 if (is_terminal_input(c->std_input))
264 return true;
265
266 if (is_terminal_output(c->std_output))
267 return true;
268
269 if (is_terminal_output(c->std_error))
270 return true;
271
272 return !!c->tty_path;
273 }
274
275 static int open_null_as(int flags, int nfd) {
276 int fd, r;
277
278 assert(nfd >= 0);
279
280 fd = open("/dev/null", flags|O_NOCTTY);
281 if (fd < 0)
282 return -errno;
283
284 if (fd != nfd) {
285 r = dup2(fd, nfd) < 0 ? -errno : nfd;
286 safe_close(fd);
287 } else
288 r = nfd;
289
290 return r;
291 }
292
293 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
294 static const union sockaddr_union sa = {
295 .un.sun_family = AF_UNIX,
296 .un.sun_path = "/run/systemd/journal/stdout",
297 };
298 uid_t olduid = UID_INVALID;
299 gid_t oldgid = GID_INVALID;
300 int r;
301
302 if (gid_is_valid(gid)) {
303 oldgid = getgid();
304
305 if (setegid(gid) < 0)
306 return -errno;
307 }
308
309 if (uid_is_valid(uid)) {
310 olduid = getuid();
311
312 if (seteuid(uid) < 0) {
313 r = -errno;
314 goto restore_gid;
315 }
316 }
317
318 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
319
320 /* If we fail to restore the uid or gid, things will likely
321 fail later on. This should only happen if an LSM interferes. */
322
323 if (uid_is_valid(uid))
324 (void) seteuid(olduid);
325
326 restore_gid:
327 if (gid_is_valid(gid))
328 (void) setegid(oldgid);
329
330 return r;
331 }
332
333 static int connect_logger_as(
334 Unit *unit,
335 const ExecContext *context,
336 const ExecParameters *params,
337 ExecOutput output,
338 const char *ident,
339 int nfd,
340 uid_t uid,
341 gid_t gid) {
342
343 int fd, r;
344
345 assert(context);
346 assert(params);
347 assert(output < _EXEC_OUTPUT_MAX);
348 assert(ident);
349 assert(nfd >= 0);
350
351 fd = socket(AF_UNIX, SOCK_STREAM, 0);
352 if (fd < 0)
353 return -errno;
354
355 r = connect_journal_socket(fd, uid, gid);
356 if (r < 0)
357 return r;
358
359 if (shutdown(fd, SHUT_RD) < 0) {
360 safe_close(fd);
361 return -errno;
362 }
363
364 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
365
366 dprintf(fd,
367 "%s\n"
368 "%s\n"
369 "%i\n"
370 "%i\n"
371 "%i\n"
372 "%i\n"
373 "%i\n",
374 context->syslog_identifier ?: ident,
375 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
376 context->syslog_priority,
377 !!context->syslog_level_prefix,
378 is_syslog_output(output),
379 is_kmsg_output(output),
380 is_terminal_output(output));
381
382 if (fd == nfd)
383 return nfd;
384
385 r = dup2(fd, nfd) < 0 ? -errno : nfd;
386 safe_close(fd);
387
388 return r;
389 }
390 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
391 int fd, r;
392
393 assert(path);
394 assert(nfd >= 0);
395
396 fd = open_terminal(path, mode | O_NOCTTY);
397 if (fd < 0)
398 return fd;
399
400 if (fd != nfd) {
401 r = dup2(fd, nfd) < 0 ? -errno : nfd;
402 safe_close(fd);
403 } else
404 r = nfd;
405
406 return r;
407 }
408
409 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
410
411 if (is_terminal_input(std_input) && !apply_tty_stdin)
412 return EXEC_INPUT_NULL;
413
414 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
415 return EXEC_INPUT_NULL;
416
417 return std_input;
418 }
419
420 static int fixup_output(ExecOutput std_output, int socket_fd) {
421
422 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
423 return EXEC_OUTPUT_INHERIT;
424
425 return std_output;
426 }
427
428 static int setup_input(
429 const ExecContext *context,
430 const ExecParameters *params,
431 int socket_fd,
432 int named_iofds[3]) {
433
434 ExecInput i;
435
436 assert(context);
437 assert(params);
438
439 if (params->stdin_fd >= 0) {
440 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
441 return -errno;
442
443 /* Try to make this the controlling tty, if it is a tty, and reset it */
444 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
445 (void) reset_terminal_fd(STDIN_FILENO, true);
446
447 return STDIN_FILENO;
448 }
449
450 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
451
452 switch (i) {
453
454 case EXEC_INPUT_NULL:
455 return open_null_as(O_RDONLY, STDIN_FILENO);
456
457 case EXEC_INPUT_TTY:
458 case EXEC_INPUT_TTY_FORCE:
459 case EXEC_INPUT_TTY_FAIL: {
460 int fd, r;
461
462 fd = acquire_terminal(exec_context_tty_path(context),
463 i == EXEC_INPUT_TTY_FAIL,
464 i == EXEC_INPUT_TTY_FORCE,
465 false,
466 USEC_INFINITY);
467 if (fd < 0)
468 return fd;
469
470 if (fd != STDIN_FILENO) {
471 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
472 safe_close(fd);
473 } else
474 r = STDIN_FILENO;
475
476 return r;
477 }
478
479 case EXEC_INPUT_SOCKET:
480 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
481
482 case EXEC_INPUT_NAMED_FD:
483 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
484 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
485
486 default:
487 assert_not_reached("Unknown input type");
488 }
489 }
490
491 static int setup_output(
492 Unit *unit,
493 const ExecContext *context,
494 const ExecParameters *params,
495 int fileno,
496 int socket_fd,
497 int named_iofds[3],
498 const char *ident,
499 uid_t uid,
500 gid_t gid,
501 dev_t *journal_stream_dev,
502 ino_t *journal_stream_ino) {
503
504 ExecOutput o;
505 ExecInput i;
506 int r;
507
508 assert(unit);
509 assert(context);
510 assert(params);
511 assert(ident);
512 assert(journal_stream_dev);
513 assert(journal_stream_ino);
514
515 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
516
517 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
518 return -errno;
519
520 return STDOUT_FILENO;
521 }
522
523 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
524 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
525 return -errno;
526
527 return STDERR_FILENO;
528 }
529
530 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
531 o = fixup_output(context->std_output, socket_fd);
532
533 if (fileno == STDERR_FILENO) {
534 ExecOutput e;
535 e = fixup_output(context->std_error, socket_fd);
536
537 /* This expects the input and output are already set up */
538
539 /* Don't change the stderr file descriptor if we inherit all
540 * the way and are not on a tty */
541 if (e == EXEC_OUTPUT_INHERIT &&
542 o == EXEC_OUTPUT_INHERIT &&
543 i == EXEC_INPUT_NULL &&
544 !is_terminal_input(context->std_input) &&
545 getppid () != 1)
546 return fileno;
547
548 /* Duplicate from stdout if possible */
549 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
550 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
551
552 o = e;
553
554 } else if (o == EXEC_OUTPUT_INHERIT) {
555 /* If input got downgraded, inherit the original value */
556 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
557 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
558
559 /* If the input is connected to anything that's not a /dev/null, inherit that... */
560 if (i != EXEC_INPUT_NULL)
561 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
562
563 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
564 if (getppid() != 1)
565 return fileno;
566
567 /* We need to open /dev/null here anew, to get the right access mode. */
568 return open_null_as(O_WRONLY, fileno);
569 }
570
571 switch (o) {
572
573 case EXEC_OUTPUT_NULL:
574 return open_null_as(O_WRONLY, fileno);
575
576 case EXEC_OUTPUT_TTY:
577 if (is_terminal_input(i))
578 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
579
580 /* We don't reset the terminal if this is just about output */
581 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
582
583 case EXEC_OUTPUT_SYSLOG:
584 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
585 case EXEC_OUTPUT_KMSG:
586 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
587 case EXEC_OUTPUT_JOURNAL:
588 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
589 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
590 if (r < 0) {
591 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
592 r = open_null_as(O_WRONLY, fileno);
593 } else {
594 struct stat st;
595
596 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
597 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
598 * services to detect whether they are connected to the journal or not.
599 *
600 * If both stdout and stderr are connected to a stream then let's make sure to store the data
601 * about STDERR as that's usually the best way to do logging. */
602
603 if (fstat(fileno, &st) >= 0 &&
604 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
605 *journal_stream_dev = st.st_dev;
606 *journal_stream_ino = st.st_ino;
607 }
608 }
609 return r;
610
611 case EXEC_OUTPUT_SOCKET:
612 assert(socket_fd >= 0);
613 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
614
615 case EXEC_OUTPUT_NAMED_FD:
616 (void) fd_nonblock(named_iofds[fileno], false);
617 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
618
619 default:
620 assert_not_reached("Unknown error type");
621 }
622 }
623
624 static int chown_terminal(int fd, uid_t uid) {
625 struct stat st;
626
627 assert(fd >= 0);
628
629 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
630 if (isatty(fd) < 1)
631 return 0;
632
633 /* This might fail. What matters are the results. */
634 (void) fchown(fd, uid, -1);
635 (void) fchmod(fd, TTY_MODE);
636
637 if (fstat(fd, &st) < 0)
638 return -errno;
639
640 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
641 return -EPERM;
642
643 return 0;
644 }
645
646 static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
647 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
648 int r;
649
650 assert(_saved_stdin);
651 assert(_saved_stdout);
652
653 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
654 if (saved_stdin < 0)
655 return -errno;
656
657 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
658 if (saved_stdout < 0)
659 return -errno;
660
661 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
662 if (fd < 0)
663 return fd;
664
665 r = chown_terminal(fd, getuid());
666 if (r < 0)
667 return r;
668
669 r = reset_terminal_fd(fd, true);
670 if (r < 0)
671 return r;
672
673 if (dup2(fd, STDIN_FILENO) < 0)
674 return -errno;
675
676 if (dup2(fd, STDOUT_FILENO) < 0)
677 return -errno;
678
679 if (fd >= 2)
680 safe_close(fd);
681 fd = -1;
682
683 *_saved_stdin = saved_stdin;
684 *_saved_stdout = saved_stdout;
685
686 saved_stdin = saved_stdout = -1;
687
688 return 0;
689 }
690
691 static void write_confirm_error_fd(int err, int fd, const Unit *u) {
692 assert(err < 0);
693
694 if (err == -ETIMEDOUT)
695 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
696 else {
697 errno = -err;
698 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
699 }
700 }
701
702 static void write_confirm_error(int err, const char *vc, const Unit *u) {
703 _cleanup_close_ int fd = -1;
704
705 assert(vc);
706
707 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
708 if (fd < 0)
709 return;
710
711 write_confirm_error_fd(err, fd, u);
712 }
713
714 static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
715 int r = 0;
716
717 assert(saved_stdin);
718 assert(saved_stdout);
719
720 release_terminal();
721
722 if (*saved_stdin >= 0)
723 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
724 r = -errno;
725
726 if (*saved_stdout >= 0)
727 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
728 r = -errno;
729
730 *saved_stdin = safe_close(*saved_stdin);
731 *saved_stdout = safe_close(*saved_stdout);
732
733 return r;
734 }
735
736 enum {
737 CONFIRM_PRETEND_FAILURE = -1,
738 CONFIRM_PRETEND_SUCCESS = 0,
739 CONFIRM_EXECUTE = 1,
740 };
741
742 static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
743 int saved_stdout = -1, saved_stdin = -1, r;
744 _cleanup_free_ char *e = NULL;
745 char c;
746
747 /* For any internal errors, assume a positive response. */
748 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
749 if (r < 0) {
750 write_confirm_error(r, vc, u);
751 return CONFIRM_EXECUTE;
752 }
753
754 /* confirm_spawn might have been disabled while we were sleeping. */
755 if (manager_is_confirm_spawn_disabled(u->manager)) {
756 r = 1;
757 goto restore_stdio;
758 }
759
760 e = ellipsize(cmdline, 60, 100);
761 if (!e) {
762 log_oom();
763 r = CONFIRM_EXECUTE;
764 goto restore_stdio;
765 }
766
767 for (;;) {
768 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
769 if (r < 0) {
770 write_confirm_error_fd(r, STDOUT_FILENO, u);
771 r = CONFIRM_EXECUTE;
772 goto restore_stdio;
773 }
774
775 switch (c) {
776 case 'c':
777 printf("Resuming normal execution.\n");
778 manager_disable_confirm_spawn();
779 r = 1;
780 break;
781 case 'D':
782 unit_dump(u, stdout, " ");
783 continue; /* ask again */
784 case 'f':
785 printf("Failing execution.\n");
786 r = CONFIRM_PRETEND_FAILURE;
787 break;
788 case 'h':
789 printf(" c - continue, proceed without asking anymore\n"
790 " D - dump, show the state of the unit\n"
791 " f - fail, don't execute the command and pretend it failed\n"
792 " h - help\n"
793 " i - info, show a short summary of the unit\n"
794 " j - jobs, show jobs that are in progress\n"
795 " s - skip, don't execute the command and pretend it succeeded\n"
796 " y - yes, execute the command\n");
797 continue; /* ask again */
798 case 'i':
799 printf(" Description: %s\n"
800 " Unit: %s\n"
801 " Command: %s\n",
802 u->id, u->description, cmdline);
803 continue; /* ask again */
804 case 'j':
805 manager_dump_jobs(u->manager, stdout, " ");
806 continue; /* ask again */
807 case 'n':
808 /* 'n' was removed in favor of 'f'. */
809 printf("Didn't understand 'n', did you mean 'f'?\n");
810 continue; /* ask again */
811 case 's':
812 printf("Skipping execution.\n");
813 r = CONFIRM_PRETEND_SUCCESS;
814 break;
815 case 'y':
816 r = CONFIRM_EXECUTE;
817 break;
818 default:
819 assert_not_reached("Unhandled choice");
820 }
821 break;
822 }
823
824 restore_stdio:
825 restore_confirm_stdio(&saved_stdin, &saved_stdout);
826 return r;
827 }
828
829 static int get_fixed_user(const ExecContext *c, const char **user,
830 uid_t *uid, gid_t *gid,
831 const char **home, const char **shell) {
832 int r;
833 const char *name;
834
835 assert(c);
836
837 if (!c->user)
838 return 0;
839
840 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
841 * (i.e. are "/" or "/bin/nologin"). */
842
843 name = c->user;
844 r = get_user_creds_clean(&name, uid, gid, home, shell);
845 if (r < 0)
846 return r;
847
848 *user = name;
849 return 0;
850 }
851
852 static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
853 int r;
854 const char *name;
855
856 assert(c);
857
858 if (!c->group)
859 return 0;
860
861 name = c->group;
862 r = get_group_creds(&name, gid);
863 if (r < 0)
864 return r;
865
866 *group = name;
867 return 0;
868 }
869
870 static int get_supplementary_groups(const ExecContext *c, const char *user,
871 const char *group, gid_t gid,
872 gid_t **supplementary_gids, int *ngids) {
873 char **i;
874 int r, k = 0;
875 int ngroups_max;
876 bool keep_groups = false;
877 gid_t *groups = NULL;
878 _cleanup_free_ gid_t *l_gids = NULL;
879
880 assert(c);
881
882 /*
883 * If user is given, then lookup GID and supplementary groups list.
884 * We avoid NSS lookups for gid=0. Also we have to initialize groups
885 * here and as early as possible so we keep the list of supplementary
886 * groups of the caller.
887 */
888 if (user && gid_is_valid(gid) && gid != 0) {
889 /* First step, initialize groups from /etc/groups */
890 if (initgroups(user, gid) < 0)
891 return -errno;
892
893 keep_groups = true;
894 }
895
896 if (!c->supplementary_groups)
897 return 0;
898
899 /*
900 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
901 * be positive, otherwise fail.
902 */
903 errno = 0;
904 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
905 if (ngroups_max <= 0) {
906 if (errno > 0)
907 return -errno;
908 else
909 return -EOPNOTSUPP; /* For all other values */
910 }
911
912 l_gids = new(gid_t, ngroups_max);
913 if (!l_gids)
914 return -ENOMEM;
915
916 if (keep_groups) {
917 /*
918 * Lookup the list of groups that the user belongs to, we
919 * avoid NSS lookups here too for gid=0.
920 */
921 k = ngroups_max;
922 if (getgrouplist(user, gid, l_gids, &k) < 0)
923 return -EINVAL;
924 } else
925 k = 0;
926
927 STRV_FOREACH(i, c->supplementary_groups) {
928 const char *g;
929
930 if (k >= ngroups_max)
931 return -E2BIG;
932
933 g = *i;
934 r = get_group_creds(&g, l_gids+k);
935 if (r < 0)
936 return r;
937
938 k++;
939 }
940
941 /*
942 * Sets ngids to zero to drop all supplementary groups, happens
943 * when we are under root and SupplementaryGroups= is empty.
944 */
945 if (k == 0) {
946 *ngids = 0;
947 return 0;
948 }
949
950 /* Otherwise get the final list of supplementary groups */
951 groups = memdup(l_gids, sizeof(gid_t) * k);
952 if (!groups)
953 return -ENOMEM;
954
955 *supplementary_gids = groups;
956 *ngids = k;
957
958 groups = NULL;
959
960 return 0;
961 }
962
963 static int enforce_groups(const ExecContext *context, gid_t gid,
964 gid_t *supplementary_gids, int ngids) {
965 int r;
966
967 assert(context);
968
969 /* Handle SupplementaryGroups= even if it is empty */
970 if (context->supplementary_groups) {
971 r = maybe_setgroups(ngids, supplementary_gids);
972 if (r < 0)
973 return r;
974 }
975
976 if (gid_is_valid(gid)) {
977 /* Then set our gids */
978 if (setresgid(gid, gid, gid) < 0)
979 return -errno;
980 }
981
982 return 0;
983 }
984
985 static int enforce_user(const ExecContext *context, uid_t uid) {
986 assert(context);
987
988 if (!uid_is_valid(uid))
989 return 0;
990
991 /* Sets (but doesn't look up) the uid and make sure we keep the
992 * capabilities while doing so. */
993
994 if (context->capability_ambient_set != 0) {
995
996 /* First step: If we need to keep capabilities but
997 * drop privileges we need to make sure we keep our
998 * caps, while we drop privileges. */
999 if (uid != 0) {
1000 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
1001
1002 if (prctl(PR_GET_SECUREBITS) != sb)
1003 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1004 return -errno;
1005 }
1006 }
1007
1008 /* Second step: actually set the uids */
1009 if (setresuid(uid, uid, uid) < 0)
1010 return -errno;
1011
1012 /* At this point we should have all necessary capabilities but
1013 are otherwise a normal user. However, the caps might got
1014 corrupted due to the setresuid() so we need clean them up
1015 later. This is done outside of this call. */
1016
1017 return 0;
1018 }
1019
1020 #ifdef HAVE_PAM
1021
1022 static int null_conv(
1023 int num_msg,
1024 const struct pam_message **msg,
1025 struct pam_response **resp,
1026 void *appdata_ptr) {
1027
1028 /* We don't support conversations */
1029
1030 return PAM_CONV_ERR;
1031 }
1032
1033 #endif
1034
1035 static int setup_pam(
1036 const char *name,
1037 const char *user,
1038 uid_t uid,
1039 gid_t gid,
1040 const char *tty,
1041 char ***env,
1042 int fds[], unsigned n_fds) {
1043
1044 #ifdef HAVE_PAM
1045
1046 static const struct pam_conv conv = {
1047 .conv = null_conv,
1048 .appdata_ptr = NULL
1049 };
1050
1051 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
1052 pam_handle_t *handle = NULL;
1053 sigset_t old_ss;
1054 int pam_code = PAM_SUCCESS, r;
1055 char **nv, **e = NULL;
1056 bool close_session = false;
1057 pid_t pam_pid = 0, parent_pid;
1058 int flags = 0;
1059
1060 assert(name);
1061 assert(user);
1062 assert(env);
1063
1064 /* We set up PAM in the parent process, then fork. The child
1065 * will then stay around until killed via PR_GET_PDEATHSIG or
1066 * systemd via the cgroup logic. It will then remove the PAM
1067 * session again. The parent process will exec() the actual
1068 * daemon. We do things this way to ensure that the main PID
1069 * of the daemon is the one we initially fork()ed. */
1070
1071 r = barrier_create(&barrier);
1072 if (r < 0)
1073 goto fail;
1074
1075 if (log_get_max_level() < LOG_DEBUG)
1076 flags |= PAM_SILENT;
1077
1078 pam_code = pam_start(name, user, &conv, &handle);
1079 if (pam_code != PAM_SUCCESS) {
1080 handle = NULL;
1081 goto fail;
1082 }
1083
1084 if (tty) {
1085 pam_code = pam_set_item(handle, PAM_TTY, tty);
1086 if (pam_code != PAM_SUCCESS)
1087 goto fail;
1088 }
1089
1090 STRV_FOREACH(nv, *env) {
1091 pam_code = pam_putenv(handle, *nv);
1092 if (pam_code != PAM_SUCCESS)
1093 goto fail;
1094 }
1095
1096 pam_code = pam_acct_mgmt(handle, flags);
1097 if (pam_code != PAM_SUCCESS)
1098 goto fail;
1099
1100 pam_code = pam_open_session(handle, flags);
1101 if (pam_code != PAM_SUCCESS)
1102 goto fail;
1103
1104 close_session = true;
1105
1106 e = pam_getenvlist(handle);
1107 if (!e) {
1108 pam_code = PAM_BUF_ERR;
1109 goto fail;
1110 }
1111
1112 /* Block SIGTERM, so that we know that it won't get lost in
1113 * the child */
1114
1115 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
1116
1117 parent_pid = getpid_cached();
1118
1119 pam_pid = fork();
1120 if (pam_pid < 0) {
1121 r = -errno;
1122 goto fail;
1123 }
1124
1125 if (pam_pid == 0) {
1126 int sig, ret = EXIT_PAM;
1127
1128 /* The child's job is to reset the PAM session on
1129 * termination */
1130 barrier_set_role(&barrier, BARRIER_CHILD);
1131
1132 /* This string must fit in 10 chars (i.e. the length
1133 * of "/sbin/init"), to look pretty in /bin/ps */
1134 rename_process("(sd-pam)");
1135
1136 /* Make sure we don't keep open the passed fds in this
1137 child. We assume that otherwise only those fds are
1138 open here that have been opened by PAM. */
1139 close_many(fds, n_fds);
1140
1141 /* Drop privileges - we don't need any to pam_close_session
1142 * and this will make PR_SET_PDEATHSIG work in most cases.
1143 * If this fails, ignore the error - but expect sd-pam threads
1144 * to fail to exit normally */
1145
1146 r = maybe_setgroups(0, NULL);
1147 if (r < 0)
1148 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
1149 if (setresgid(gid, gid, gid) < 0)
1150 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
1151 if (setresuid(uid, uid, uid) < 0)
1152 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
1153
1154 (void) ignore_signals(SIGPIPE, -1);
1155
1156 /* Wait until our parent died. This will only work if
1157 * the above setresuid() succeeds, otherwise the kernel
1158 * will not allow unprivileged parents kill their privileged
1159 * children this way. We rely on the control groups kill logic
1160 * to do the rest for us. */
1161 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1162 goto child_finish;
1163
1164 /* Tell the parent that our setup is done. This is especially
1165 * important regarding dropping privileges. Otherwise, unit
1166 * setup might race against our setresuid(2) call.
1167 *
1168 * If the parent aborted, we'll detect this below, hence ignore
1169 * return failure here. */
1170 (void) barrier_place(&barrier);
1171
1172 /* Check if our parent process might already have died? */
1173 if (getppid() == parent_pid) {
1174 sigset_t ss;
1175
1176 assert_se(sigemptyset(&ss) >= 0);
1177 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1178
1179 for (;;) {
1180 if (sigwait(&ss, &sig) < 0) {
1181 if (errno == EINTR)
1182 continue;
1183
1184 goto child_finish;
1185 }
1186
1187 assert(sig == SIGTERM);
1188 break;
1189 }
1190 }
1191
1192 /* If our parent died we'll end the session */
1193 if (getppid() != parent_pid) {
1194 pam_code = pam_close_session(handle, flags);
1195 if (pam_code != PAM_SUCCESS)
1196 goto child_finish;
1197 }
1198
1199 ret = 0;
1200
1201 child_finish:
1202 pam_end(handle, pam_code | flags);
1203 _exit(ret);
1204 }
1205
1206 barrier_set_role(&barrier, BARRIER_PARENT);
1207
1208 /* If the child was forked off successfully it will do all the
1209 * cleanups, so forget about the handle here. */
1210 handle = NULL;
1211
1212 /* Unblock SIGTERM again in the parent */
1213 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
1214
1215 /* We close the log explicitly here, since the PAM modules
1216 * might have opened it, but we don't want this fd around. */
1217 closelog();
1218
1219 /* Synchronously wait for the child to initialize. We don't care for
1220 * errors as we cannot recover. However, warn loudly if it happens. */
1221 if (!barrier_place_and_sync(&barrier))
1222 log_error("PAM initialization failed");
1223
1224 strv_free(*env);
1225 *env = e;
1226
1227 return 0;
1228
1229 fail:
1230 if (pam_code != PAM_SUCCESS) {
1231 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
1232 r = -EPERM; /* PAM errors do not map to errno */
1233 } else
1234 log_error_errno(r, "PAM failed: %m");
1235
1236 if (handle) {
1237 if (close_session)
1238 pam_code = pam_close_session(handle, flags);
1239
1240 pam_end(handle, pam_code | flags);
1241 }
1242
1243 strv_free(e);
1244 closelog();
1245
1246 return r;
1247 #else
1248 return 0;
1249 #endif
1250 }
1251
1252 static void rename_process_from_path(const char *path) {
1253 char process_name[11];
1254 const char *p;
1255 size_t l;
1256
1257 /* This resulting string must fit in 10 chars (i.e. the length
1258 * of "/sbin/init") to look pretty in /bin/ps */
1259
1260 p = basename(path);
1261 if (isempty(p)) {
1262 rename_process("(...)");
1263 return;
1264 }
1265
1266 l = strlen(p);
1267 if (l > 8) {
1268 /* The end of the process name is usually more
1269 * interesting, since the first bit might just be
1270 * "systemd-" */
1271 p = p + l - 8;
1272 l = 8;
1273 }
1274
1275 process_name[0] = '(';
1276 memcpy(process_name+1, p, l);
1277 process_name[1+l] = ')';
1278 process_name[1+l+1] = 0;
1279
1280 rename_process(process_name);
1281 }
1282
1283 static bool context_has_address_families(const ExecContext *c) {
1284 assert(c);
1285
1286 return c->address_families_whitelist ||
1287 !set_isempty(c->address_families);
1288 }
1289
1290 static bool context_has_syscall_filters(const ExecContext *c) {
1291 assert(c);
1292
1293 return c->syscall_whitelist ||
1294 !set_isempty(c->syscall_filter);
1295 }
1296
1297 static bool context_has_no_new_privileges(const ExecContext *c) {
1298 assert(c);
1299
1300 if (c->no_new_privileges)
1301 return true;
1302
1303 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1304 return false;
1305
1306 /* We need NNP if we have any form of seccomp and are unprivileged */
1307 return context_has_address_families(c) ||
1308 c->memory_deny_write_execute ||
1309 c->restrict_realtime ||
1310 exec_context_restrict_namespaces_set(c) ||
1311 c->protect_kernel_tunables ||
1312 c->protect_kernel_modules ||
1313 c->private_devices ||
1314 context_has_syscall_filters(c) ||
1315 !set_isempty(c->syscall_archs) ||
1316 c->lock_personality;
1317 }
1318
1319 #ifdef HAVE_SECCOMP
1320
1321 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1322
1323 if (is_seccomp_available())
1324 return false;
1325
1326 log_open();
1327 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1328 log_close();
1329 return true;
1330 }
1331
1332 static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
1333 uint32_t negative_action, default_action, action;
1334 int r;
1335
1336 assert(u);
1337 assert(c);
1338
1339 if (!context_has_syscall_filters(c))
1340 return 0;
1341
1342 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1343 return 0;
1344
1345 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1346
1347 if (c->syscall_whitelist) {
1348 default_action = negative_action;
1349 action = SCMP_ACT_ALLOW;
1350 } else {
1351 default_action = SCMP_ACT_ALLOW;
1352 action = negative_action;
1353 }
1354
1355 if (needs_ambient_hack) {
1356 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1357 if (r < 0)
1358 return r;
1359 }
1360
1361 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
1362 }
1363
1364 static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1365 assert(u);
1366 assert(c);
1367
1368 if (set_isempty(c->syscall_archs))
1369 return 0;
1370
1371 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1372 return 0;
1373
1374 return seccomp_restrict_archs(c->syscall_archs);
1375 }
1376
1377 static int apply_address_families(const Unit* u, const ExecContext *c) {
1378 assert(u);
1379 assert(c);
1380
1381 if (!context_has_address_families(c))
1382 return 0;
1383
1384 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1385 return 0;
1386
1387 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
1388 }
1389
1390 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
1391 assert(u);
1392 assert(c);
1393
1394 if (!c->memory_deny_write_execute)
1395 return 0;
1396
1397 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1398 return 0;
1399
1400 return seccomp_memory_deny_write_execute();
1401 }
1402
1403 static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
1404 assert(u);
1405 assert(c);
1406
1407 if (!c->restrict_realtime)
1408 return 0;
1409
1410 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1411 return 0;
1412
1413 return seccomp_restrict_realtime();
1414 }
1415
1416 static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
1417 assert(u);
1418 assert(c);
1419
1420 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1421 * let's protect even those systems where this is left on in the kernel. */
1422
1423 if (!c->protect_kernel_tunables)
1424 return 0;
1425
1426 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1427 return 0;
1428
1429 return seccomp_protect_sysctl();
1430 }
1431
1432 static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
1433 assert(u);
1434 assert(c);
1435
1436 /* Turn off module syscalls on ProtectKernelModules=yes */
1437
1438 if (!c->protect_kernel_modules)
1439 return 0;
1440
1441 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1442 return 0;
1443
1444 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
1445 }
1446
1447 static int apply_private_devices(const Unit *u, const ExecContext *c) {
1448 assert(u);
1449 assert(c);
1450
1451 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1452
1453 if (!c->private_devices)
1454 return 0;
1455
1456 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1457 return 0;
1458
1459 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
1460 }
1461
1462 static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
1463 assert(u);
1464 assert(c);
1465
1466 if (!exec_context_restrict_namespaces_set(c))
1467 return 0;
1468
1469 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1470 return 0;
1471
1472 return seccomp_restrict_namespaces(c->restrict_namespaces);
1473 }
1474
1475 static int apply_lock_personality(const Unit* u, const ExecContext *c) {
1476 unsigned long personality;
1477 int r;
1478
1479 assert(u);
1480 assert(c);
1481
1482 if (!c->lock_personality)
1483 return 0;
1484
1485 if (skip_seccomp_unavailable(u, "LockPersonality="))
1486 return 0;
1487
1488 personality = c->personality;
1489
1490 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1491 if (personality == PERSONALITY_INVALID) {
1492
1493 r = opinionated_personality(&personality);
1494 if (r < 0)
1495 return r;
1496 }
1497
1498 return seccomp_lock_personality(personality);
1499 }
1500
1501 #endif
1502
1503 static void do_idle_pipe_dance(int idle_pipe[4]) {
1504 assert(idle_pipe);
1505
1506 idle_pipe[1] = safe_close(idle_pipe[1]);
1507 idle_pipe[2] = safe_close(idle_pipe[2]);
1508
1509 if (idle_pipe[0] >= 0) {
1510 int r;
1511
1512 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1513
1514 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1515 ssize_t n;
1516
1517 /* Signal systemd that we are bored and want to continue. */
1518 n = write(idle_pipe[3], "x", 1);
1519 if (n > 0)
1520 /* Wait for systemd to react to the signal above. */
1521 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1522 }
1523
1524 idle_pipe[0] = safe_close(idle_pipe[0]);
1525
1526 }
1527
1528 idle_pipe[3] = safe_close(idle_pipe[3]);
1529 }
1530
1531 static int build_environment(
1532 Unit *u,
1533 const ExecContext *c,
1534 const ExecParameters *p,
1535 unsigned n_fds,
1536 const char *home,
1537 const char *username,
1538 const char *shell,
1539 dev_t journal_stream_dev,
1540 ino_t journal_stream_ino,
1541 char ***ret) {
1542
1543 _cleanup_strv_free_ char **our_env = NULL;
1544 unsigned n_env = 0;
1545 char *x;
1546
1547 assert(u);
1548 assert(c);
1549 assert(ret);
1550
1551 our_env = new0(char*, 14);
1552 if (!our_env)
1553 return -ENOMEM;
1554
1555 if (n_fds > 0) {
1556 _cleanup_free_ char *joined = NULL;
1557
1558 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
1559 return -ENOMEM;
1560 our_env[n_env++] = x;
1561
1562 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1563 return -ENOMEM;
1564 our_env[n_env++] = x;
1565
1566 joined = strv_join(p->fd_names, ":");
1567 if (!joined)
1568 return -ENOMEM;
1569
1570 x = strjoin("LISTEN_FDNAMES=", joined);
1571 if (!x)
1572 return -ENOMEM;
1573 our_env[n_env++] = x;
1574 }
1575
1576 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
1577 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
1578 return -ENOMEM;
1579 our_env[n_env++] = x;
1580
1581 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
1582 return -ENOMEM;
1583 our_env[n_env++] = x;
1584 }
1585
1586 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1587 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1588 * check the database directly. */
1589 if (p->flags & EXEC_NSS_BYPASS_BUS) {
1590 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1591 if (!x)
1592 return -ENOMEM;
1593 our_env[n_env++] = x;
1594 }
1595
1596 if (home) {
1597 x = strappend("HOME=", home);
1598 if (!x)
1599 return -ENOMEM;
1600 our_env[n_env++] = x;
1601 }
1602
1603 if (username) {
1604 x = strappend("LOGNAME=", username);
1605 if (!x)
1606 return -ENOMEM;
1607 our_env[n_env++] = x;
1608
1609 x = strappend("USER=", username);
1610 if (!x)
1611 return -ENOMEM;
1612 our_env[n_env++] = x;
1613 }
1614
1615 if (shell) {
1616 x = strappend("SHELL=", shell);
1617 if (!x)
1618 return -ENOMEM;
1619 our_env[n_env++] = x;
1620 }
1621
1622 if (!sd_id128_is_null(u->invocation_id)) {
1623 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1624 return -ENOMEM;
1625
1626 our_env[n_env++] = x;
1627 }
1628
1629 if (exec_context_needs_term(c)) {
1630 const char *tty_path, *term = NULL;
1631
1632 tty_path = exec_context_tty_path(c);
1633
1634 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1635 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1636 * passes to PID 1 ends up all the way in the console login shown. */
1637
1638 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1639 term = getenv("TERM");
1640 if (!term)
1641 term = default_term_for_tty(tty_path);
1642
1643 x = strappend("TERM=", term);
1644 if (!x)
1645 return -ENOMEM;
1646 our_env[n_env++] = x;
1647 }
1648
1649 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1650 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1651 return -ENOMEM;
1652
1653 our_env[n_env++] = x;
1654 }
1655
1656 our_env[n_env++] = NULL;
1657 assert(n_env <= 12);
1658
1659 *ret = our_env;
1660 our_env = NULL;
1661
1662 return 0;
1663 }
1664
1665 static int build_pass_environment(const ExecContext *c, char ***ret) {
1666 _cleanup_strv_free_ char **pass_env = NULL;
1667 size_t n_env = 0, n_bufsize = 0;
1668 char **i;
1669
1670 STRV_FOREACH(i, c->pass_environment) {
1671 _cleanup_free_ char *x = NULL;
1672 char *v;
1673
1674 v = getenv(*i);
1675 if (!v)
1676 continue;
1677 x = strjoin(*i, "=", v);
1678 if (!x)
1679 return -ENOMEM;
1680
1681 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1682 return -ENOMEM;
1683
1684 pass_env[n_env++] = x;
1685 pass_env[n_env] = NULL;
1686 x = NULL;
1687 }
1688
1689 *ret = pass_env;
1690 pass_env = NULL;
1691
1692 return 0;
1693 }
1694
1695 static bool exec_needs_mount_namespace(
1696 const ExecContext *context,
1697 const ExecParameters *params,
1698 ExecRuntime *runtime) {
1699
1700 assert(context);
1701 assert(params);
1702
1703 if (context->root_image)
1704 return true;
1705
1706 if (!strv_isempty(context->read_write_paths) ||
1707 !strv_isempty(context->read_only_paths) ||
1708 !strv_isempty(context->inaccessible_paths))
1709 return true;
1710
1711 if (context->n_bind_mounts > 0)
1712 return true;
1713
1714 if (context->mount_flags != 0)
1715 return true;
1716
1717 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1718 return true;
1719
1720 if (context->private_devices ||
1721 context->protect_system != PROTECT_SYSTEM_NO ||
1722 context->protect_home != PROTECT_HOME_NO ||
1723 context->protect_kernel_tunables ||
1724 context->protect_kernel_modules ||
1725 context->protect_control_groups)
1726 return true;
1727
1728 if (context->mount_apivfs && (context->root_image || context->root_directory))
1729 return true;
1730
1731 return false;
1732 }
1733
1734 static int setup_private_users(uid_t uid, gid_t gid) {
1735 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1736 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1737 _cleanup_close_ int unshare_ready_fd = -1;
1738 _cleanup_(sigkill_waitp) pid_t pid = 0;
1739 uint64_t c = 1;
1740 siginfo_t si;
1741 ssize_t n;
1742 int r;
1743
1744 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1745 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1746 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1747 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1748 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1749 * continues execution normally. */
1750
1751 if (uid != 0 && uid_is_valid(uid)) {
1752 r = asprintf(&uid_map,
1753 "0 0 1\n" /* Map root → root */
1754 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1755 uid, uid);
1756 if (r < 0)
1757 return -ENOMEM;
1758 } else {
1759 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1760 if (!uid_map)
1761 return -ENOMEM;
1762 }
1763
1764 if (gid != 0 && gid_is_valid(gid)) {
1765 r = asprintf(&gid_map,
1766 "0 0 1\n" /* Map root → root */
1767 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1768 gid, gid);
1769 if (r < 0)
1770 return -ENOMEM;
1771 } else {
1772 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1773 if (!gid_map)
1774 return -ENOMEM;
1775 }
1776
1777 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1778 * namespace. */
1779 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1780 if (unshare_ready_fd < 0)
1781 return -errno;
1782
1783 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1784 * failed. */
1785 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1786 return -errno;
1787
1788 pid = fork();
1789 if (pid < 0)
1790 return -errno;
1791
1792 if (pid == 0) {
1793 _cleanup_close_ int fd = -1;
1794 const char *a;
1795 pid_t ppid;
1796
1797 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1798 * here, after the parent opened its own user namespace. */
1799
1800 ppid = getppid();
1801 errno_pipe[0] = safe_close(errno_pipe[0]);
1802
1803 /* Wait until the parent unshared the user namespace */
1804 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1805 r = -errno;
1806 goto child_fail;
1807 }
1808
1809 /* Disable the setgroups() system call in the child user namespace, for good. */
1810 a = procfs_file_alloca(ppid, "setgroups");
1811 fd = open(a, O_WRONLY|O_CLOEXEC);
1812 if (fd < 0) {
1813 if (errno != ENOENT) {
1814 r = -errno;
1815 goto child_fail;
1816 }
1817
1818 /* If the file is missing the kernel is too old, let's continue anyway. */
1819 } else {
1820 if (write(fd, "deny\n", 5) < 0) {
1821 r = -errno;
1822 goto child_fail;
1823 }
1824
1825 fd = safe_close(fd);
1826 }
1827
1828 /* First write the GID map */
1829 a = procfs_file_alloca(ppid, "gid_map");
1830 fd = open(a, O_WRONLY|O_CLOEXEC);
1831 if (fd < 0) {
1832 r = -errno;
1833 goto child_fail;
1834 }
1835 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1836 r = -errno;
1837 goto child_fail;
1838 }
1839 fd = safe_close(fd);
1840
1841 /* The write the UID map */
1842 a = procfs_file_alloca(ppid, "uid_map");
1843 fd = open(a, O_WRONLY|O_CLOEXEC);
1844 if (fd < 0) {
1845 r = -errno;
1846 goto child_fail;
1847 }
1848 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1849 r = -errno;
1850 goto child_fail;
1851 }
1852
1853 _exit(EXIT_SUCCESS);
1854
1855 child_fail:
1856 (void) write(errno_pipe[1], &r, sizeof(r));
1857 _exit(EXIT_FAILURE);
1858 }
1859
1860 errno_pipe[1] = safe_close(errno_pipe[1]);
1861
1862 if (unshare(CLONE_NEWUSER) < 0)
1863 return -errno;
1864
1865 /* Let the child know that the namespace is ready now */
1866 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1867 return -errno;
1868
1869 /* Try to read an error code from the child */
1870 n = read(errno_pipe[0], &r, sizeof(r));
1871 if (n < 0)
1872 return -errno;
1873 if (n == sizeof(r)) { /* an error code was sent to us */
1874 if (r < 0)
1875 return r;
1876 return -EIO;
1877 }
1878 if (n != 0) /* on success we should have read 0 bytes */
1879 return -EIO;
1880
1881 r = wait_for_terminate(pid, &si);
1882 if (r < 0)
1883 return r;
1884 pid = 0;
1885
1886 /* If something strange happened with the child, let's consider this fatal, too */
1887 if (si.si_code != CLD_EXITED || si.si_status != 0)
1888 return -EIO;
1889
1890 return 0;
1891 }
1892
1893 static int setup_exec_directory(
1894 const ExecContext *context,
1895 const ExecParameters *params,
1896 uid_t uid,
1897 gid_t gid,
1898 ExecDirectoryType type,
1899 int *exit_status) {
1900
1901 static const int exit_status_table[_EXEC_DIRECTORY_MAX] = {
1902 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1903 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1904 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1905 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1906 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1907 };
1908 char **rt;
1909 int r;
1910
1911 assert(context);
1912 assert(params);
1913 assert(type >= 0 && type < _EXEC_DIRECTORY_MAX);
1914 assert(exit_status);
1915
1916 if (!params->prefix[type])
1917 return 0;
1918
1919 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
1920 if (!uid_is_valid(uid))
1921 uid = 0;
1922 if (!gid_is_valid(gid))
1923 gid = 0;
1924 }
1925
1926 STRV_FOREACH(rt, context->directories[type].paths) {
1927 _cleanup_free_ char *p;
1928
1929 p = strjoin(params->prefix[type], "/", *rt);
1930 if (!p) {
1931 r = -ENOMEM;
1932 goto fail;
1933 }
1934
1935 r = mkdir_parents_label(p, 0755);
1936 if (r < 0)
1937 goto fail;
1938
1939 r = mkdir_p_label(p, context->directories[type].mode);
1940 if (r < 0)
1941 goto fail;
1942
1943 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
1944 * a service, and shall not be writable. */
1945 if (type == EXEC_DIRECTORY_CONFIGURATION)
1946 continue;
1947
1948 r = chmod_and_chown(p, context->directories[type].mode, uid, gid);
1949 if (r < 0)
1950 goto fail;
1951 }
1952
1953 return 0;
1954
1955 fail:
1956 *exit_status = exit_status_table[type];
1957
1958 return r;
1959 }
1960
1961 static int setup_smack(
1962 const ExecContext *context,
1963 const ExecCommand *command) {
1964
1965 int r;
1966
1967 assert(context);
1968 assert(command);
1969
1970 if (context->smack_process_label) {
1971 r = mac_smack_apply_pid(0, context->smack_process_label);
1972 if (r < 0)
1973 return r;
1974 }
1975 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1976 else {
1977 _cleanup_free_ char *exec_label = NULL;
1978
1979 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1980 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
1981 return r;
1982
1983 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1984 if (r < 0)
1985 return r;
1986 }
1987 #endif
1988
1989 return 0;
1990 }
1991
1992 static int compile_read_write_paths(
1993 const ExecContext *context,
1994 const ExecParameters *params,
1995 char ***ret) {
1996
1997 _cleanup_strv_free_ char **l = NULL;
1998 char **rt;
1999 ExecDirectoryType i;
2000
2001 /* Compile the list of writable paths. This is the combination of
2002 * the explicitly configured paths, plus all runtime directories. */
2003
2004 if (strv_isempty(context->read_write_paths)) {
2005 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
2006 if (!strv_isempty(context->directories[i].paths))
2007 break;
2008
2009 if (i == _EXEC_DIRECTORY_MAX) {
2010 *ret = NULL; /* NOP if neither is set */
2011 return 0;
2012 }
2013 }
2014
2015 l = strv_copy(context->read_write_paths);
2016 if (!l)
2017 return -ENOMEM;
2018
2019 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++) {
2020 if (!params->prefix[i])
2021 continue;
2022
2023 STRV_FOREACH(rt, context->directories[i].paths) {
2024 char *s;
2025
2026 s = strjoin(params->prefix[i], "/", *rt);
2027 if (!s)
2028 return -ENOMEM;
2029
2030 if (strv_consume(&l, s) < 0)
2031 return -ENOMEM;
2032 }
2033 }
2034
2035 *ret = l;
2036 l = NULL;
2037
2038 return 0;
2039 }
2040
2041 static int apply_mount_namespace(
2042 Unit *u,
2043 ExecCommand *command,
2044 const ExecContext *context,
2045 const ExecParameters *params,
2046 ExecRuntime *runtime) {
2047
2048 _cleanup_strv_free_ char **rw = NULL;
2049 char *tmp = NULL, *var = NULL;
2050 const char *root_dir = NULL, *root_image = NULL;
2051 NameSpaceInfo ns_info = {
2052 .ignore_protect_paths = false,
2053 .private_dev = context->private_devices,
2054 .protect_control_groups = context->protect_control_groups,
2055 .protect_kernel_tunables = context->protect_kernel_tunables,
2056 .protect_kernel_modules = context->protect_kernel_modules,
2057 .mount_apivfs = context->mount_apivfs,
2058 };
2059 bool needs_sandboxing;
2060 int r;
2061
2062 assert(context);
2063
2064 /* The runtime struct only contains the parent of the private /tmp,
2065 * which is non-accessible to world users. Inside of it there's a /tmp
2066 * that is sticky, and that's the one we want to use here. */
2067
2068 if (context->private_tmp && runtime) {
2069 if (runtime->tmp_dir)
2070 tmp = strjoina(runtime->tmp_dir, "/tmp");
2071 if (runtime->var_tmp_dir)
2072 var = strjoina(runtime->var_tmp_dir, "/tmp");
2073 }
2074
2075 r = compile_read_write_paths(context, params, &rw);
2076 if (r < 0)
2077 return r;
2078
2079 if (params->flags & EXEC_APPLY_CHROOT) {
2080 root_image = context->root_image;
2081
2082 if (!root_image)
2083 root_dir = context->root_directory;
2084 }
2085
2086 /*
2087 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2088 * sandbox info, otherwise enforce it, don't ignore protected paths and
2089 * fail if we are enable to apply the sandbox inside the mount namespace.
2090 */
2091 if (!context->dynamic_user && root_dir)
2092 ns_info.ignore_protect_paths = true;
2093
2094 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2095
2096 r = setup_namespace(root_dir, root_image,
2097 &ns_info, rw,
2098 needs_sandboxing ? context->read_only_paths : NULL,
2099 needs_sandboxing ? context->inaccessible_paths : NULL,
2100 context->bind_mounts,
2101 context->n_bind_mounts,
2102 tmp,
2103 var,
2104 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2105 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
2106 context->mount_flags,
2107 DISSECT_IMAGE_DISCARD_ON_LOOP);
2108
2109 /* If we couldn't set up the namespace this is probably due to a
2110 * missing capability. In this case, silently proceeed. */
2111 if (IN_SET(r, -EPERM, -EACCES)) {
2112 log_open();
2113 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2114 log_close();
2115 r = 0;
2116 }
2117
2118 return r;
2119 }
2120
2121 static int apply_working_directory(
2122 const ExecContext *context,
2123 const ExecParameters *params,
2124 const char *home,
2125 const bool needs_mount_ns,
2126 int *exit_status) {
2127
2128 const char *d, *wd;
2129
2130 assert(context);
2131 assert(exit_status);
2132
2133 if (context->working_directory_home) {
2134
2135 if (!home) {
2136 *exit_status = EXIT_CHDIR;
2137 return -ENXIO;
2138 }
2139
2140 wd = home;
2141
2142 } else if (context->working_directory)
2143 wd = context->working_directory;
2144 else
2145 wd = "/";
2146
2147 if (params->flags & EXEC_APPLY_CHROOT) {
2148 if (!needs_mount_ns && context->root_directory)
2149 if (chroot(context->root_directory) < 0) {
2150 *exit_status = EXIT_CHROOT;
2151 return -errno;
2152 }
2153
2154 d = wd;
2155 } else
2156 d = prefix_roota(context->root_directory, wd);
2157
2158 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2159 *exit_status = EXIT_CHDIR;
2160 return -errno;
2161 }
2162
2163 return 0;
2164 }
2165
2166 static int setup_keyring(
2167 Unit *u,
2168 const ExecContext *context,
2169 const ExecParameters *p,
2170 uid_t uid, gid_t gid) {
2171
2172 key_serial_t keyring;
2173 int r;
2174
2175 assert(u);
2176 assert(context);
2177 assert(p);
2178
2179 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2180 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2181 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2182 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2183 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2184 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2185
2186 if (!(p->flags & EXEC_NEW_KEYRING))
2187 return 0;
2188
2189 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2190 return 0;
2191
2192 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2193 if (keyring == -1) {
2194 if (errno == ENOSYS)
2195 log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
2196 else if (IN_SET(errno, EACCES, EPERM))
2197 log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
2198 else if (errno == EDQUOT)
2199 log_debug_errno(errno, "Out of kernel keyrings to allocate, ignoring.");
2200 else
2201 return log_error_errno(errno, "Setting up kernel keyring failed: %m");
2202
2203 return 0;
2204 }
2205
2206 /* Populate they keyring with the invocation ID by default. */
2207 if (!sd_id128_is_null(u->invocation_id)) {
2208 key_serial_t key;
2209
2210 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2211 if (key == -1)
2212 log_debug_errno(errno, "Failed to add invocation ID to keyring, ignoring: %m");
2213 else {
2214 if (keyctl(KEYCTL_SETPERM, key,
2215 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2216 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2217 return log_error_errno(errno, "Failed to restrict invocation ID permission: %m");
2218 }
2219 }
2220
2221 /* And now, make the keyring owned by the service's user */
2222 if (uid_is_valid(uid) || gid_is_valid(gid))
2223 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
2224 return log_error_errno(errno, "Failed to change ownership of session keyring: %m");
2225
2226 /* When requested link the user keyring into the session keyring. */
2227 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2228 uid_t saved_uid;
2229 gid_t saved_gid;
2230
2231 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2232 * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2233 * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2234
2235 saved_uid = getuid();
2236 saved_gid = getgid();
2237
2238 if (gid_is_valid(gid) && gid != saved_gid) {
2239 if (setregid(gid, -1) < 0)
2240 return log_error_errno(errno, "Failed to change GID for user keyring: %m");
2241 }
2242
2243 if (uid_is_valid(uid) && uid != saved_uid) {
2244 if (setreuid(uid, -1) < 0) {
2245 (void) setregid(saved_gid, -1);
2246 return log_error_errno(errno, "Failed to change UID for user keyring: %m");
2247 }
2248 }
2249
2250 if (keyctl(KEYCTL_LINK,
2251 KEY_SPEC_USER_KEYRING,
2252 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2253
2254 r = -errno;
2255
2256 (void) setreuid(saved_uid, -1);
2257 (void) setregid(saved_gid, -1);
2258
2259 return log_error_errno(r, "Failed to link user keyring into session keyring: %m");
2260 }
2261
2262 if (uid_is_valid(uid) && uid != saved_uid) {
2263 if (setreuid(saved_uid, -1) < 0) {
2264 (void) setregid(saved_gid, -1);
2265 return log_error_errno(errno, "Failed to change UID back for user keyring: %m");
2266 }
2267 }
2268
2269 if (gid_is_valid(gid) && gid != saved_gid) {
2270 if (setregid(saved_gid, -1) < 0)
2271 return log_error_errno(errno, "Failed to change GID back for user keyring: %m");
2272 }
2273 }
2274
2275 return 0;
2276 }
2277
2278 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2279 assert(array);
2280 assert(n);
2281
2282 if (!pair)
2283 return;
2284
2285 if (pair[0] >= 0)
2286 array[(*n)++] = pair[0];
2287 if (pair[1] >= 0)
2288 array[(*n)++] = pair[1];
2289 }
2290
2291 static int close_remaining_fds(
2292 const ExecParameters *params,
2293 ExecRuntime *runtime,
2294 DynamicCreds *dcreds,
2295 int user_lookup_fd,
2296 int socket_fd,
2297 int *fds, unsigned n_fds) {
2298
2299 unsigned n_dont_close = 0;
2300 int dont_close[n_fds + 12];
2301
2302 assert(params);
2303
2304 if (params->stdin_fd >= 0)
2305 dont_close[n_dont_close++] = params->stdin_fd;
2306 if (params->stdout_fd >= 0)
2307 dont_close[n_dont_close++] = params->stdout_fd;
2308 if (params->stderr_fd >= 0)
2309 dont_close[n_dont_close++] = params->stderr_fd;
2310
2311 if (socket_fd >= 0)
2312 dont_close[n_dont_close++] = socket_fd;
2313 if (n_fds > 0) {
2314 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2315 n_dont_close += n_fds;
2316 }
2317
2318 if (runtime)
2319 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2320
2321 if (dcreds) {
2322 if (dcreds->user)
2323 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2324 if (dcreds->group)
2325 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
2326 }
2327
2328 if (user_lookup_fd >= 0)
2329 dont_close[n_dont_close++] = user_lookup_fd;
2330
2331 return close_all_fds(dont_close, n_dont_close);
2332 }
2333
2334 static int send_user_lookup(
2335 Unit *unit,
2336 int user_lookup_fd,
2337 uid_t uid,
2338 gid_t gid) {
2339
2340 assert(unit);
2341
2342 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2343 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2344 * specified. */
2345
2346 if (user_lookup_fd < 0)
2347 return 0;
2348
2349 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2350 return 0;
2351
2352 if (writev(user_lookup_fd,
2353 (struct iovec[]) {
2354 { .iov_base = &uid, .iov_len = sizeof(uid) },
2355 { .iov_base = &gid, .iov_len = sizeof(gid) },
2356 { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
2357 return -errno;
2358
2359 return 0;
2360 }
2361
2362 static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2363 int r;
2364
2365 assert(c);
2366 assert(home);
2367 assert(buf);
2368
2369 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2370
2371 if (*home)
2372 return 0;
2373
2374 if (!c->working_directory_home)
2375 return 0;
2376
2377 if (uid == 0) {
2378 /* Hardcode /root as home directory for UID 0 */
2379 *home = "/root";
2380 return 1;
2381 }
2382
2383 r = get_home_dir(buf);
2384 if (r < 0)
2385 return r;
2386
2387 *home = *buf;
2388 return 1;
2389 }
2390
2391 static int exec_child(
2392 Unit *unit,
2393 ExecCommand *command,
2394 const ExecContext *context,
2395 const ExecParameters *params,
2396 ExecRuntime *runtime,
2397 DynamicCreds *dcreds,
2398 char **argv,
2399 int socket_fd,
2400 int named_iofds[3],
2401 int *fds,
2402 unsigned n_storage_fds,
2403 unsigned n_socket_fds,
2404 char **files_env,
2405 int user_lookup_fd,
2406 int *exit_status,
2407 char **error_message) {
2408
2409 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
2410 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
2411 _cleanup_free_ gid_t *supplementary_gids = NULL;
2412 const char *username = NULL, *groupname = NULL;
2413 const char *home = NULL, *shell = NULL;
2414 dev_t journal_stream_dev = 0;
2415 ino_t journal_stream_ino = 0;
2416 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2417 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2418 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2419 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
2420 #ifdef HAVE_SELINUX
2421 bool use_selinux = false;
2422 #endif
2423 #ifdef HAVE_SMACK
2424 bool use_smack = false;
2425 #endif
2426 #ifdef HAVE_APPARMOR
2427 bool use_apparmor = false;
2428 #endif
2429 uid_t uid = UID_INVALID;
2430 gid_t gid = GID_INVALID;
2431 int i, r, ngids = 0;
2432 unsigned n_fds;
2433 ExecDirectoryType dt;
2434 int secure_bits;
2435
2436 assert(unit);
2437 assert(command);
2438 assert(context);
2439 assert(params);
2440 assert(exit_status);
2441 assert(error_message);
2442 /* We don't always set error_message, hence it must be initialized */
2443 assert(*error_message == NULL);
2444
2445 rename_process_from_path(command->path);
2446
2447 /* We reset exactly these signals, since they are the
2448 * only ones we set to SIG_IGN in the main daemon. All
2449 * others we leave untouched because we set them to
2450 * SIG_DFL or a valid handler initially, both of which
2451 * will be demoted to SIG_DFL. */
2452 (void) default_signals(SIGNALS_CRASH_HANDLER,
2453 SIGNALS_IGNORE, -1);
2454
2455 if (context->ignore_sigpipe)
2456 (void) ignore_signals(SIGPIPE, -1);
2457
2458 r = reset_signal_mask();
2459 if (r < 0) {
2460 *exit_status = EXIT_SIGNAL_MASK;
2461 *error_message = strdup("Failed to set process signal mask");
2462 /* If strdup fails, here and below, we will just print the generic error message. */
2463 return r;
2464 }
2465
2466 if (params->idle_pipe)
2467 do_idle_pipe_dance(params->idle_pipe);
2468
2469 /* Close sockets very early to make sure we don't
2470 * block init reexecution because it cannot bind its
2471 * sockets */
2472
2473 log_forget_fds();
2474
2475 n_fds = n_storage_fds + n_socket_fds;
2476 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
2477 if (r < 0) {
2478 *exit_status = EXIT_FDS;
2479 *error_message = strdup("Failed to close unwanted file descriptors");
2480 return r;
2481 }
2482
2483 if (!context->same_pgrp)
2484 if (setsid() < 0) {
2485 *exit_status = EXIT_SETSID;
2486 *error_message = strdup("Failed to create new process session");
2487 return -errno;
2488 }
2489
2490 exec_context_tty_reset(context, params);
2491
2492 if (unit_shall_confirm_spawn(unit)) {
2493 const char *vc = params->confirm_spawn;
2494 _cleanup_free_ char *cmdline = NULL;
2495
2496 cmdline = exec_command_line(argv);
2497 if (!cmdline) {
2498 *exit_status = EXIT_MEMORY;
2499 return -ENOMEM;
2500 }
2501
2502 r = ask_for_confirmation(vc, unit, cmdline);
2503 if (r != CONFIRM_EXECUTE) {
2504 if (r == CONFIRM_PRETEND_SUCCESS) {
2505 *exit_status = EXIT_SUCCESS;
2506 return 0;
2507 }
2508 *exit_status = EXIT_CONFIRM;
2509 *error_message = strdup("Execution cancelled by the user");
2510 return -ECANCELED;
2511 }
2512 }
2513
2514 if (context->dynamic_user && dcreds) {
2515
2516 /* Make sure we bypass our own NSS module for any NSS checks */
2517 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2518 *exit_status = EXIT_USER;
2519 *error_message = strdup("Failed to update environment");
2520 return -errno;
2521 }
2522
2523 r = dynamic_creds_realize(dcreds, &uid, &gid);
2524 if (r < 0) {
2525 *exit_status = EXIT_USER;
2526 *error_message = strdup("Failed to update dynamic user credentials");
2527 return r;
2528 }
2529
2530 if (!uid_is_valid(uid)) {
2531 *exit_status = EXIT_USER;
2532 (void) asprintf(error_message, "UID validation failed for \""UID_FMT"\"", uid);
2533 /* If asprintf fails, here and below, we will just print the generic error message. */
2534 return -ESRCH;
2535 }
2536
2537 if (!gid_is_valid(gid)) {
2538 *exit_status = EXIT_USER;
2539 (void) asprintf(error_message, "GID validation failed for \""GID_FMT"\"", gid);
2540 return -ESRCH;
2541 }
2542
2543 if (dcreds->user)
2544 username = dcreds->user->name;
2545
2546 } else {
2547 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2548 if (r < 0) {
2549 *exit_status = EXIT_USER;
2550 *error_message = strdup("Failed to determine user credentials");
2551 return r;
2552 }
2553
2554 r = get_fixed_group(context, &groupname, &gid);
2555 if (r < 0) {
2556 *exit_status = EXIT_GROUP;
2557 *error_message = strdup("Failed to determine group credentials");
2558 return r;
2559 }
2560 }
2561
2562 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2563 r = get_supplementary_groups(context, username, groupname, gid,
2564 &supplementary_gids, &ngids);
2565 if (r < 0) {
2566 *exit_status = EXIT_GROUP;
2567 *error_message = strdup("Failed to determine supplementary groups");
2568 return r;
2569 }
2570
2571 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2572 if (r < 0) {
2573 *exit_status = EXIT_USER;
2574 *error_message = strdup("Failed to send user credentials to PID1");
2575 return r;
2576 }
2577
2578 user_lookup_fd = safe_close(user_lookup_fd);
2579
2580 r = acquire_home(context, uid, &home, &home_buffer);
2581 if (r < 0) {
2582 *exit_status = EXIT_CHDIR;
2583 *error_message = strdup("Failed to determine $HOME for user");
2584 return r;
2585 }
2586
2587 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2588 * must sure to drop O_NONBLOCK */
2589 if (socket_fd >= 0)
2590 (void) fd_nonblock(socket_fd, false);
2591
2592 r = setup_input(context, params, socket_fd, named_iofds);
2593 if (r < 0) {
2594 *exit_status = EXIT_STDIN;
2595 *error_message = strdup("Failed to set up standard input");
2596 return r;
2597 }
2598
2599 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2600 if (r < 0) {
2601 *exit_status = EXIT_STDOUT;
2602 *error_message = strdup("Failed to set up standard output");
2603 return r;
2604 }
2605
2606 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2607 if (r < 0) {
2608 *exit_status = EXIT_STDERR;
2609 *error_message = strdup("Failed to set up standard error output");
2610 return r;
2611 }
2612
2613 if (params->cgroup_path) {
2614 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2615 if (r < 0) {
2616 *exit_status = EXIT_CGROUP;
2617 (void) asprintf(error_message, "Failed to attach to cgroup %s", params->cgroup_path);
2618 return r;
2619 }
2620 }
2621
2622 if (context->oom_score_adjust_set) {
2623 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
2624
2625 /* When we can't make this change due to EPERM, then
2626 * let's silently skip over it. User namespaces
2627 * prohibit write access to this file, and we
2628 * shouldn't trip up over that. */
2629
2630 sprintf(t, "%i", context->oom_score_adjust);
2631 r = write_string_file("/proc/self/oom_score_adj", t, 0);
2632 if (r == -EPERM || r == -EACCES) {
2633 log_open();
2634 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
2635 log_close();
2636 } else if (r < 0) {
2637 *exit_status = EXIT_OOM_ADJUST;
2638 *error_message = strdup("Failed to adjust OOM setting");
2639 return -errno;
2640 }
2641 }
2642
2643 if (context->nice_set)
2644 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
2645 *exit_status = EXIT_NICE;
2646 *error_message = strdup("Failed to set up process scheduling priority (nice level)");
2647 return -errno;
2648 }
2649
2650 if (context->cpu_sched_set) {
2651 struct sched_param param = {
2652 .sched_priority = context->cpu_sched_priority,
2653 };
2654
2655 r = sched_setscheduler(0,
2656 context->cpu_sched_policy |
2657 (context->cpu_sched_reset_on_fork ?
2658 SCHED_RESET_ON_FORK : 0),
2659 &param);
2660 if (r < 0) {
2661 *exit_status = EXIT_SETSCHEDULER;
2662 *error_message = strdup("Failed to set up CPU scheduling");
2663 return -errno;
2664 }
2665 }
2666
2667 if (context->cpuset)
2668 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
2669 *exit_status = EXIT_CPUAFFINITY;
2670 *error_message = strdup("Failed to set up CPU affinity");
2671 return -errno;
2672 }
2673
2674 if (context->ioprio_set)
2675 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
2676 *exit_status = EXIT_IOPRIO;
2677 *error_message = strdup("Failed to set up IO scheduling priority");
2678 return -errno;
2679 }
2680
2681 if (context->timer_slack_nsec != NSEC_INFINITY)
2682 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
2683 *exit_status = EXIT_TIMERSLACK;
2684 *error_message = strdup("Failed to set up timer slack");
2685 return -errno;
2686 }
2687
2688 if (context->personality != PERSONALITY_INVALID) {
2689 r = safe_personality(context->personality);
2690 if (r < 0) {
2691 *exit_status = EXIT_PERSONALITY;
2692 *error_message = strdup("Failed to set up execution domain (personality)");
2693 return r;
2694 }
2695 }
2696
2697 if (context->utmp_id)
2698 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
2699 context->tty_path,
2700 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2701 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2702 USER_PROCESS,
2703 username);
2704
2705 if (context->user) {
2706 r = chown_terminal(STDIN_FILENO, uid);
2707 if (r < 0) {
2708 *exit_status = EXIT_STDIN;
2709 *error_message = strdup("Failed to change ownership of terminal");
2710 return r;
2711 }
2712 }
2713
2714 /* If delegation is enabled we'll pass ownership of the cgroup
2715 * (but only in systemd's own controller hierarchy!) to the
2716 * user of the new process. */
2717 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
2718 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2719 if (r < 0) {
2720 *exit_status = EXIT_CGROUP;
2721 *error_message = strdup("Failed to adjust control group access");
2722 return r;
2723 }
2724
2725
2726 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2727 if (r < 0) {
2728 *exit_status = EXIT_CGROUP;
2729 *error_message = strdup("Failed to adjust control group access");
2730 return r;
2731 }
2732 }
2733
2734 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
2735 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
2736 if (r < 0) {
2737 *error_message = strdup("Failed to set up special execution directory");
2738 return r;
2739 }
2740 }
2741
2742 r = build_environment(
2743 unit,
2744 context,
2745 params,
2746 n_fds,
2747 home,
2748 username,
2749 shell,
2750 journal_stream_dev,
2751 journal_stream_ino,
2752 &our_env);
2753 if (r < 0) {
2754 *exit_status = EXIT_MEMORY;
2755 return r;
2756 }
2757
2758 r = build_pass_environment(context, &pass_env);
2759 if (r < 0) {
2760 *exit_status = EXIT_MEMORY;
2761 return r;
2762 }
2763
2764 accum_env = strv_env_merge(5,
2765 params->environment,
2766 our_env,
2767 pass_env,
2768 context->environment,
2769 files_env,
2770 NULL);
2771 if (!accum_env) {
2772 *exit_status = EXIT_MEMORY;
2773 return -ENOMEM;
2774 }
2775 accum_env = strv_env_clean(accum_env);
2776
2777 (void) umask(context->umask);
2778
2779 r = setup_keyring(unit, context, params, uid, gid);
2780 if (r < 0) {
2781 *exit_status = EXIT_KEYRING;
2782 *error_message = strdup("Failed to set up kernel keyring");
2783 return r;
2784 }
2785
2786 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
2787 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2788
2789 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2790 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
2791
2792 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
2793 if (needs_ambient_hack)
2794 needs_setuid = false;
2795 else
2796 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
2797
2798 if (needs_sandboxing) {
2799 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
2800 * present. The actual MAC context application will happen later, as late as possible, to avoid
2801 * impacting our own code paths. */
2802
2803 #ifdef HAVE_SELINUX
2804 use_selinux = mac_selinux_use();
2805 #endif
2806 #ifdef HAVE_SMACK
2807 use_smack = mac_smack_use();
2808 #endif
2809 #ifdef HAVE_APPARMOR
2810 use_apparmor = mac_apparmor_use();
2811 #endif
2812 }
2813
2814 if (needs_setuid) {
2815 if (context->pam_name && username) {
2816 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
2817 if (r < 0) {
2818 *exit_status = EXIT_PAM;
2819 *error_message = strdup("Failed to set up PAM session");
2820 return r;
2821 }
2822 }
2823 }
2824
2825 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
2826 r = setup_netns(runtime->netns_storage_socket);
2827 if (r < 0) {
2828 *exit_status = EXIT_NETWORK;
2829 *error_message = strdup("Failed to set up network namespacing");
2830 return r;
2831 }
2832 }
2833
2834 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
2835 if (needs_mount_namespace) {
2836 r = apply_mount_namespace(unit, command, context, params, runtime);
2837 if (r < 0) {
2838 *exit_status = EXIT_NAMESPACE;
2839 *error_message = strdup("Failed to set up mount namespacing");
2840 return r;
2841 }
2842 }
2843
2844 /* Apply just after mount namespace setup */
2845 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
2846 if (r < 0) {
2847 *error_message = strdup("Changing to the requested working directory failed");
2848 return r;
2849 }
2850
2851 /* Drop groups as early as possbile */
2852 if (needs_setuid) {
2853 r = enforce_groups(context, gid, supplementary_gids, ngids);
2854 if (r < 0) {
2855 *error_message = strdup("Changing group credentials failed");
2856 *exit_status = EXIT_GROUP;
2857 return r;
2858 }
2859 }
2860
2861 if (needs_sandboxing) {
2862 #ifdef HAVE_SELINUX
2863 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
2864 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
2865 if (r < 0) {
2866 *error_message = strdup("Failed to determine SELinux context");
2867 *exit_status = EXIT_SELINUX_CONTEXT;
2868 return r;
2869 }
2870 }
2871 #endif
2872
2873 if (context->private_users) {
2874 r = setup_private_users(uid, gid);
2875 if (r < 0) {
2876 *error_message = strdup("Failed to set up user namespacing");
2877 *exit_status = EXIT_USER;
2878 return r;
2879 }
2880 }
2881 }
2882
2883 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
2884 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
2885 * was needed to upload the policy and can now be closed as well. */
2886 r = close_all_fds(fds, n_fds);
2887 if (r >= 0)
2888 r = shift_fds(fds, n_fds);
2889 if (r >= 0)
2890 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
2891 if (r < 0) {
2892 *error_message = strdup("Failed to adjust passed file descriptors");
2893 *exit_status = EXIT_FDS;
2894 return r;
2895 }
2896
2897 secure_bits = context->secure_bits;
2898
2899 if (needs_sandboxing) {
2900 uint64_t bset;
2901
2902 for (i = 0; i < _RLIMIT_MAX; i++) {
2903
2904 if (!context->rlimit[i])
2905 continue;
2906
2907 r = setrlimit_closest(i, context->rlimit[i]);
2908 if (r < 0) {
2909 *error_message = strdup("Failed to adjust resource limits");
2910 *exit_status = EXIT_LIMITS;
2911 return r;
2912 }
2913 }
2914
2915 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2916 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
2917 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
2918 *error_message = strdup("Failed to adjust RLIMIT_RTPRIO resource limit");
2919 *exit_status = EXIT_LIMITS;
2920 return -errno;
2921 }
2922 }
2923
2924 bset = context->capability_bounding_set;
2925 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
2926 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
2927 * instead of us doing that */
2928 if (needs_ambient_hack)
2929 bset |= (UINT64_C(1) << CAP_SETPCAP) |
2930 (UINT64_C(1) << CAP_SETUID) |
2931 (UINT64_C(1) << CAP_SETGID);
2932
2933 if (!cap_test_all(bset)) {
2934 r = capability_bounding_set_drop(bset, false);
2935 if (r < 0) {
2936 *exit_status = EXIT_CAPABILITIES;
2937 *error_message = strdup("Failed to drop capabilities");
2938 return r;
2939 }
2940 }
2941
2942 /* This is done before enforce_user, but ambient set
2943 * does not survive over setresuid() if keep_caps is not set. */
2944 if (!needs_ambient_hack &&
2945 context->capability_ambient_set != 0) {
2946 r = capability_ambient_set_apply(context->capability_ambient_set, true);
2947 if (r < 0) {
2948 *exit_status = EXIT_CAPABILITIES;
2949 *error_message = strdup("Failed to apply ambient capabilities (before UID change)");
2950 return r;
2951 }
2952 }
2953 }
2954
2955 if (needs_setuid) {
2956 if (context->user) {
2957 r = enforce_user(context, uid);
2958 if (r < 0) {
2959 *exit_status = EXIT_USER;
2960 (void) asprintf(error_message, "Failed to change UID to "UID_FMT, uid);
2961 return r;
2962 }
2963
2964 if (!needs_ambient_hack &&
2965 context->capability_ambient_set != 0) {
2966
2967 /* Fix the ambient capabilities after user change. */
2968 r = capability_ambient_set_apply(context->capability_ambient_set, false);
2969 if (r < 0) {
2970 *exit_status = EXIT_CAPABILITIES;
2971 *error_message = strdup("Failed to apply ambient capabilities (after UID change)");
2972 return r;
2973 }
2974
2975 /* If we were asked to change user and ambient capabilities
2976 * were requested, we had to add keep-caps to the securebits
2977 * so that we would maintain the inherited capability set
2978 * through the setresuid(). Make sure that the bit is added
2979 * also to the context secure_bits so that we don't try to
2980 * drop the bit away next. */
2981
2982 secure_bits |= 1<<SECURE_KEEP_CAPS;
2983 }
2984 }
2985 }
2986
2987 if (needs_sandboxing) {
2988 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
2989 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
2990 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
2991 * are restricted. */
2992
2993 #ifdef HAVE_SELINUX
2994 if (use_selinux) {
2995 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
2996
2997 if (exec_context) {
2998 r = setexeccon(exec_context);
2999 if (r < 0) {
3000 *exit_status = EXIT_SELINUX_CONTEXT;
3001 (void) asprintf(error_message, "Failed to change SELinux context to %s", exec_context);
3002 return r;
3003 }
3004 }
3005 }
3006 #endif
3007
3008 #ifdef HAVE_SMACK
3009 if (use_smack) {
3010 r = setup_smack(context, command);
3011 if (r < 0) {
3012 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3013 *error_message = strdup("Failed to set SMACK process label");
3014 return r;
3015 }
3016 }
3017 #endif
3018
3019 #ifdef HAVE_APPARMOR
3020 if (use_apparmor && context->apparmor_profile) {
3021 r = aa_change_onexec(context->apparmor_profile);
3022 if (r < 0 && !context->apparmor_profile_ignore) {
3023 *exit_status = EXIT_APPARMOR_PROFILE;
3024 (void) asprintf(error_message,
3025 "Failed to prepare AppArmor profile change to %s",
3026 context->apparmor_profile);
3027 return -errno;
3028 }
3029 }
3030 #endif
3031
3032 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3033 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
3034 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3035 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
3036 *exit_status = EXIT_SECUREBITS;
3037 *error_message = strdup("Failed to set process secure bits");
3038 return -errno;
3039 }
3040
3041 if (context_has_no_new_privileges(context))
3042 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
3043 *exit_status = EXIT_NO_NEW_PRIVILEGES;
3044 *error_message = strdup("Failed to disable new privileges");
3045 return -errno;
3046 }
3047
3048 #ifdef HAVE_SECCOMP
3049 r = apply_address_families(unit, context);
3050 if (r < 0) {
3051 *exit_status = EXIT_ADDRESS_FAMILIES;
3052 *error_message = strdup("Failed to restrict address families");
3053 return r;
3054 }
3055
3056 r = apply_memory_deny_write_execute(unit, context);
3057 if (r < 0) {
3058 *exit_status = EXIT_SECCOMP;
3059 *error_message = strdup("Failed to disable writing to executable memory");
3060 return r;
3061 }
3062
3063 r = apply_restrict_realtime(unit, context);
3064 if (r < 0) {
3065 *exit_status = EXIT_SECCOMP;
3066 *error_message = strdup("Failed to apply realtime restrictions");
3067 return r;
3068 }
3069
3070 r = apply_restrict_namespaces(unit, context);
3071 if (r < 0) {
3072 *exit_status = EXIT_SECCOMP;
3073 *error_message = strdup("Failed to apply namespace restrictions");
3074 return r;
3075 }
3076
3077 r = apply_protect_sysctl(unit, context);
3078 if (r < 0) {
3079 *exit_status = EXIT_SECCOMP;
3080 *error_message = strdup("Failed to apply sysctl restrictions");
3081 return r;
3082 }
3083
3084 r = apply_protect_kernel_modules(unit, context);
3085 if (r < 0) {
3086 *exit_status = EXIT_SECCOMP;
3087 *error_message = strdup("Failed to apply module loading restrictions");
3088 return r;
3089 }
3090
3091 r = apply_private_devices(unit, context);
3092 if (r < 0) {
3093 *exit_status = EXIT_SECCOMP;
3094 *error_message = strdup("Failed to set up private devices");
3095 return r;
3096 }
3097
3098 r = apply_syscall_archs(unit, context);
3099 if (r < 0) {
3100 *exit_status = EXIT_SECCOMP;
3101 *error_message = strdup("Failed to apply syscall architecture restrictions");
3102 return r;
3103 }
3104
3105 r = apply_lock_personality(unit, context);
3106 if (r < 0) {
3107 *exit_status = EXIT_SECCOMP;
3108 *error_message = strdup("Failed to lock personalities");
3109 return r;
3110 }
3111
3112 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3113 * by the filter as little as possible. */
3114 r = apply_syscall_filter(unit, context, needs_ambient_hack);
3115 if (r < 0) {
3116 *exit_status = EXIT_SECCOMP;
3117 *error_message = strdup("Failed to apply system call filters");
3118 return r;
3119 }
3120 #endif
3121 }
3122
3123 if (!strv_isempty(context->unset_environment)) {
3124 char **ee = NULL;
3125
3126 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3127 if (!ee) {
3128 *exit_status = EXIT_MEMORY;
3129 return -ENOMEM;
3130 }
3131
3132 strv_free(accum_env);
3133 accum_env = ee;
3134 }
3135
3136 final_argv = replace_env_argv(argv, accum_env);
3137 if (!final_argv) {
3138 *exit_status = EXIT_MEMORY;
3139 *error_message = strdup("Failed to prepare process arguments");
3140 return -ENOMEM;
3141 }
3142
3143 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
3144 _cleanup_free_ char *line;
3145
3146 line = exec_command_line(final_argv);
3147 if (line) {
3148 log_open();
3149 log_struct(LOG_DEBUG,
3150 "EXECUTABLE=%s", command->path,
3151 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
3152 LOG_UNIT_ID(unit),
3153 NULL);
3154 log_close();
3155 }
3156 }
3157
3158 execve(command->path, final_argv, accum_env);
3159 *exit_status = EXIT_EXEC;
3160 return -errno;
3161 }
3162
3163 int exec_spawn(Unit *unit,
3164 ExecCommand *command,
3165 const ExecContext *context,
3166 const ExecParameters *params,
3167 ExecRuntime *runtime,
3168 DynamicCreds *dcreds,
3169 pid_t *ret) {
3170
3171 _cleanup_strv_free_ char **files_env = NULL;
3172 int *fds = NULL;
3173 unsigned n_storage_fds = 0, n_socket_fds = 0;
3174 _cleanup_free_ char *line = NULL;
3175 int socket_fd, r;
3176 int named_iofds[3] = { -1, -1, -1 };
3177 char **argv;
3178 pid_t pid;
3179
3180 assert(unit);
3181 assert(command);
3182 assert(context);
3183 assert(ret);
3184 assert(params);
3185 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
3186
3187 if (context->std_input == EXEC_INPUT_SOCKET ||
3188 context->std_output == EXEC_OUTPUT_SOCKET ||
3189 context->std_error == EXEC_OUTPUT_SOCKET) {
3190
3191 if (params->n_socket_fds > 1) {
3192 log_unit_error(unit, "Got more than one socket.");
3193 return -EINVAL;
3194 }
3195
3196 if (params->n_socket_fds == 0) {
3197 log_unit_error(unit, "Got no socket.");
3198 return -EINVAL;
3199 }
3200
3201 socket_fd = params->fds[0];
3202 } else {
3203 socket_fd = -1;
3204 fds = params->fds;
3205 n_storage_fds = params->n_storage_fds;
3206 n_socket_fds = params->n_socket_fds;
3207 }
3208
3209 r = exec_context_named_iofds(unit, context, params, named_iofds);
3210 if (r < 0)
3211 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3212
3213 r = exec_context_load_environment(unit, context, &files_env);
3214 if (r < 0)
3215 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
3216
3217 argv = params->argv ?: command->argv;
3218 line = exec_command_line(argv);
3219 if (!line)
3220 return log_oom();
3221
3222 log_struct(LOG_DEBUG,
3223 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3224 "EXECUTABLE=%s", command->path,
3225 LOG_UNIT_ID(unit),
3226 NULL);
3227 pid = fork();
3228 if (pid < 0)
3229 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
3230
3231 if (pid == 0) {
3232 int exit_status;
3233 _cleanup_free_ char *error_message = NULL;
3234
3235 r = exec_child(unit,
3236 command,
3237 context,
3238 params,
3239 runtime,
3240 dcreds,
3241 argv,
3242 socket_fd,
3243 named_iofds,
3244 fds,
3245 n_storage_fds,
3246 n_socket_fds,
3247 files_env,
3248 unit->manager->user_lookup_fds[1],
3249 &exit_status,
3250 &error_message);
3251 if (r < 0) {
3252 log_open();
3253 if (error_message)
3254 log_struct_errno(LOG_ERR, r,
3255 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3256 LOG_UNIT_ID(unit),
3257 LOG_UNIT_MESSAGE(unit, "%s: %m",
3258 error_message),
3259 "EXECUTABLE=%s", command->path,
3260 NULL);
3261 else if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE))
3262 log_struct_errno(LOG_INFO, r,
3263 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3264 LOG_UNIT_ID(unit),
3265 LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
3266 command->path),
3267 "EXECUTABLE=%s", command->path,
3268 NULL);
3269 else
3270 log_struct_errno(LOG_ERR, r,
3271 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3272 LOG_UNIT_ID(unit),
3273 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3274 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3275 command->path),
3276 "EXECUTABLE=%s", command->path,
3277 NULL);
3278 }
3279
3280 _exit(exit_status);
3281 }
3282
3283 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
3284
3285 /* We add the new process to the cgroup both in the child (so
3286 * that we can be sure that no user code is ever executed
3287 * outside of the cgroup) and in the parent (so that we can be
3288 * sure that when we kill the cgroup the process will be
3289 * killed too). */
3290 if (params->cgroup_path)
3291 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
3292
3293 exec_status_start(&command->exec_status, pid);
3294
3295 *ret = pid;
3296 return 0;
3297 }
3298
3299 void exec_context_init(ExecContext *c) {
3300 ExecDirectoryType i;
3301
3302 assert(c);
3303
3304 c->umask = 0022;
3305 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
3306 c->cpu_sched_policy = SCHED_OTHER;
3307 c->syslog_priority = LOG_DAEMON|LOG_INFO;
3308 c->syslog_level_prefix = true;
3309 c->ignore_sigpipe = true;
3310 c->timer_slack_nsec = NSEC_INFINITY;
3311 c->personality = PERSONALITY_INVALID;
3312 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3313 c->directories[i].mode = 0755;
3314 c->capability_bounding_set = CAP_ALL;
3315 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
3316 }
3317
3318 void exec_context_done(ExecContext *c) {
3319 unsigned l;
3320 ExecDirectoryType i;
3321
3322 assert(c);
3323
3324 c->environment = strv_free(c->environment);
3325 c->environment_files = strv_free(c->environment_files);
3326 c->pass_environment = strv_free(c->pass_environment);
3327 c->unset_environment = strv_free(c->unset_environment);
3328
3329 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
3330 c->rlimit[l] = mfree(c->rlimit[l]);
3331
3332 for (l = 0; l < 3; l++)
3333 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3334
3335 c->working_directory = mfree(c->working_directory);
3336 c->root_directory = mfree(c->root_directory);
3337 c->root_image = mfree(c->root_image);
3338 c->tty_path = mfree(c->tty_path);
3339 c->syslog_identifier = mfree(c->syslog_identifier);
3340 c->user = mfree(c->user);
3341 c->group = mfree(c->group);
3342
3343 c->supplementary_groups = strv_free(c->supplementary_groups);
3344
3345 c->pam_name = mfree(c->pam_name);
3346
3347 c->read_only_paths = strv_free(c->read_only_paths);
3348 c->read_write_paths = strv_free(c->read_write_paths);
3349 c->inaccessible_paths = strv_free(c->inaccessible_paths);
3350
3351 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3352
3353 if (c->cpuset)
3354 CPU_FREE(c->cpuset);
3355
3356 c->utmp_id = mfree(c->utmp_id);
3357 c->selinux_context = mfree(c->selinux_context);
3358 c->apparmor_profile = mfree(c->apparmor_profile);
3359 c->smack_process_label = mfree(c->smack_process_label);
3360
3361 c->syscall_filter = set_free(c->syscall_filter);
3362 c->syscall_archs = set_free(c->syscall_archs);
3363 c->address_families = set_free(c->address_families);
3364
3365 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3366 c->directories[i].paths = strv_free(c->directories[i].paths);
3367 }
3368
3369 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3370 char **i;
3371
3372 assert(c);
3373
3374 if (!runtime_prefix)
3375 return 0;
3376
3377 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
3378 _cleanup_free_ char *p;
3379
3380 p = strjoin(runtime_prefix, "/", *i);
3381 if (!p)
3382 return -ENOMEM;
3383
3384 /* We execute this synchronously, since we need to be
3385 * sure this is gone when we start the service
3386 * next. */
3387 (void) rm_rf(p, REMOVE_ROOT);
3388 }
3389
3390 return 0;
3391 }
3392
3393 void exec_command_done(ExecCommand *c) {
3394 assert(c);
3395
3396 c->path = mfree(c->path);
3397
3398 c->argv = strv_free(c->argv);
3399 }
3400
3401 void exec_command_done_array(ExecCommand *c, unsigned n) {
3402 unsigned i;
3403
3404 for (i = 0; i < n; i++)
3405 exec_command_done(c+i);
3406 }
3407
3408 ExecCommand* exec_command_free_list(ExecCommand *c) {
3409 ExecCommand *i;
3410
3411 while ((i = c)) {
3412 LIST_REMOVE(command, c, i);
3413 exec_command_done(i);
3414 free(i);
3415 }
3416
3417 return NULL;
3418 }
3419
3420 void exec_command_free_array(ExecCommand **c, unsigned n) {
3421 unsigned i;
3422
3423 for (i = 0; i < n; i++)
3424 c[i] = exec_command_free_list(c[i]);
3425 }
3426
3427 typedef struct InvalidEnvInfo {
3428 Unit *unit;
3429 const char *path;
3430 } InvalidEnvInfo;
3431
3432 static void invalid_env(const char *p, void *userdata) {
3433 InvalidEnvInfo *info = userdata;
3434
3435 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
3436 }
3437
3438 const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3439 assert(c);
3440
3441 switch (fd_index) {
3442 case STDIN_FILENO:
3443 if (c->std_input != EXEC_INPUT_NAMED_FD)
3444 return NULL;
3445 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3446 case STDOUT_FILENO:
3447 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3448 return NULL;
3449 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3450 case STDERR_FILENO:
3451 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3452 return NULL;
3453 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3454 default:
3455 return NULL;
3456 }
3457 }
3458
3459 int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3460 unsigned i, targets;
3461 const char* stdio_fdname[3];
3462 unsigned n_fds;
3463
3464 assert(c);
3465 assert(p);
3466
3467 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3468 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3469 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3470
3471 for (i = 0; i < 3; i++)
3472 stdio_fdname[i] = exec_context_fdname(c, i);
3473
3474 n_fds = p->n_storage_fds + p->n_socket_fds;
3475
3476 for (i = 0; i < n_fds && targets > 0; i++)
3477 if (named_iofds[STDIN_FILENO] < 0 &&
3478 c->std_input == EXEC_INPUT_NAMED_FD &&
3479 stdio_fdname[STDIN_FILENO] &&
3480 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3481
3482 named_iofds[STDIN_FILENO] = p->fds[i];
3483 targets--;
3484
3485 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3486 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3487 stdio_fdname[STDOUT_FILENO] &&
3488 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3489
3490 named_iofds[STDOUT_FILENO] = p->fds[i];
3491 targets--;
3492
3493 } else if (named_iofds[STDERR_FILENO] < 0 &&
3494 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3495 stdio_fdname[STDERR_FILENO] &&
3496 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3497
3498 named_iofds[STDERR_FILENO] = p->fds[i];
3499 targets--;
3500 }
3501
3502 return targets == 0 ? 0 : -ENOENT;
3503 }
3504
3505 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
3506 char **i, **r = NULL;
3507
3508 assert(c);
3509 assert(l);
3510
3511 STRV_FOREACH(i, c->environment_files) {
3512 char *fn;
3513 int k;
3514 unsigned n;
3515 bool ignore = false;
3516 char **p;
3517 _cleanup_globfree_ glob_t pglob = {};
3518
3519 fn = *i;
3520
3521 if (fn[0] == '-') {
3522 ignore = true;
3523 fn++;
3524 }
3525
3526 if (!path_is_absolute(fn)) {
3527 if (ignore)
3528 continue;
3529
3530 strv_free(r);
3531 return -EINVAL;
3532 }
3533
3534 /* Filename supports globbing, take all matching files */
3535 k = safe_glob(fn, 0, &pglob);
3536 if (k < 0) {
3537 if (ignore)
3538 continue;
3539
3540 strv_free(r);
3541 return k;
3542 }
3543
3544 /* When we don't match anything, -ENOENT should be returned */
3545 assert(pglob.gl_pathc > 0);
3546
3547 for (n = 0; n < pglob.gl_pathc; n++) {
3548 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
3549 if (k < 0) {
3550 if (ignore)
3551 continue;
3552
3553 strv_free(r);
3554 return k;
3555 }
3556 /* Log invalid environment variables with filename */
3557 if (p) {
3558 InvalidEnvInfo info = {
3559 .unit = unit,
3560 .path = pglob.gl_pathv[n]
3561 };
3562
3563 p = strv_env_clean_with_callback(p, invalid_env, &info);
3564 }
3565
3566 if (r == NULL)
3567 r = p;
3568 else {
3569 char **m;
3570
3571 m = strv_env_merge(2, r, p);
3572 strv_free(r);
3573 strv_free(p);
3574 if (!m)
3575 return -ENOMEM;
3576
3577 r = m;
3578 }
3579 }
3580 }
3581
3582 *l = r;
3583
3584 return 0;
3585 }
3586
3587 static bool tty_may_match_dev_console(const char *tty) {
3588 _cleanup_free_ char *active = NULL;
3589 char *console;
3590
3591 if (!tty)
3592 return true;
3593
3594 tty = skip_dev_prefix(tty);
3595
3596 /* trivial identity? */
3597 if (streq(tty, "console"))
3598 return true;
3599
3600 console = resolve_dev_console(&active);
3601 /* if we could not resolve, assume it may */
3602 if (!console)
3603 return true;
3604
3605 /* "tty0" means the active VC, so it may be the same sometimes */
3606 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
3607 }
3608
3609 bool exec_context_may_touch_console(ExecContext *ec) {
3610
3611 return (ec->tty_reset ||
3612 ec->tty_vhangup ||
3613 ec->tty_vt_disallocate ||
3614 is_terminal_input(ec->std_input) ||
3615 is_terminal_output(ec->std_output) ||
3616 is_terminal_output(ec->std_error)) &&
3617 tty_may_match_dev_console(exec_context_tty_path(ec));
3618 }
3619
3620 static void strv_fprintf(FILE *f, char **l) {
3621 char **g;
3622
3623 assert(f);
3624
3625 STRV_FOREACH(g, l)
3626 fprintf(f, " %s", *g);
3627 }
3628
3629 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
3630 char **e, **d;
3631 unsigned i;
3632 ExecDirectoryType dt;
3633 int r;
3634
3635 assert(c);
3636 assert(f);
3637
3638 prefix = strempty(prefix);
3639
3640 fprintf(f,
3641 "%sUMask: %04o\n"
3642 "%sWorkingDirectory: %s\n"
3643 "%sRootDirectory: %s\n"
3644 "%sNonBlocking: %s\n"
3645 "%sPrivateTmp: %s\n"
3646 "%sPrivateDevices: %s\n"
3647 "%sProtectKernelTunables: %s\n"
3648 "%sProtectKernelModules: %s\n"
3649 "%sProtectControlGroups: %s\n"
3650 "%sPrivateNetwork: %s\n"
3651 "%sPrivateUsers: %s\n"
3652 "%sProtectHome: %s\n"
3653 "%sProtectSystem: %s\n"
3654 "%sMountAPIVFS: %s\n"
3655 "%sIgnoreSIGPIPE: %s\n"
3656 "%sMemoryDenyWriteExecute: %s\n"
3657 "%sRestrictRealtime: %s\n"
3658 "%sKeyringMode: %s\n",
3659 prefix, c->umask,
3660 prefix, c->working_directory ? c->working_directory : "/",
3661 prefix, c->root_directory ? c->root_directory : "/",
3662 prefix, yes_no(c->non_blocking),
3663 prefix, yes_no(c->private_tmp),
3664 prefix, yes_no(c->private_devices),
3665 prefix, yes_no(c->protect_kernel_tunables),
3666 prefix, yes_no(c->protect_kernel_modules),
3667 prefix, yes_no(c->protect_control_groups),
3668 prefix, yes_no(c->private_network),
3669 prefix, yes_no(c->private_users),
3670 prefix, protect_home_to_string(c->protect_home),
3671 prefix, protect_system_to_string(c->protect_system),
3672 prefix, yes_no(c->mount_apivfs),
3673 prefix, yes_no(c->ignore_sigpipe),
3674 prefix, yes_no(c->memory_deny_write_execute),
3675 prefix, yes_no(c->restrict_realtime),
3676 prefix, exec_keyring_mode_to_string(c->keyring_mode));
3677
3678 if (c->root_image)
3679 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3680
3681 STRV_FOREACH(e, c->environment)
3682 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3683
3684 STRV_FOREACH(e, c->environment_files)
3685 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
3686
3687 STRV_FOREACH(e, c->pass_environment)
3688 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3689
3690 STRV_FOREACH(e, c->unset_environment)
3691 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3692
3693 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3694
3695 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
3696 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3697
3698 STRV_FOREACH(d, c->directories[dt].paths)
3699 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3700 }
3701
3702 if (c->nice_set)
3703 fprintf(f,
3704 "%sNice: %i\n",
3705 prefix, c->nice);
3706
3707 if (c->oom_score_adjust_set)
3708 fprintf(f,
3709 "%sOOMScoreAdjust: %i\n",
3710 prefix, c->oom_score_adjust);
3711
3712 for (i = 0; i < RLIM_NLIMITS; i++)
3713 if (c->rlimit[i]) {
3714 fprintf(f, "%s%s: " RLIM_FMT "\n",
3715 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3716 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3717 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3718 }
3719
3720 if (c->ioprio_set) {
3721 _cleanup_free_ char *class_str = NULL;
3722
3723 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3724 if (r >= 0)
3725 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3726
3727 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
3728 }
3729
3730 if (c->cpu_sched_set) {
3731 _cleanup_free_ char *policy_str = NULL;
3732
3733 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3734 if (r >= 0)
3735 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3736
3737 fprintf(f,
3738 "%sCPUSchedulingPriority: %i\n"
3739 "%sCPUSchedulingResetOnFork: %s\n",
3740 prefix, c->cpu_sched_priority,
3741 prefix, yes_no(c->cpu_sched_reset_on_fork));
3742 }
3743
3744 if (c->cpuset) {
3745 fprintf(f, "%sCPUAffinity:", prefix);
3746 for (i = 0; i < c->cpuset_ncpus; i++)
3747 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
3748 fprintf(f, " %u", i);
3749 fputs("\n", f);
3750 }
3751
3752 if (c->timer_slack_nsec != NSEC_INFINITY)
3753 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
3754
3755 fprintf(f,
3756 "%sStandardInput: %s\n"
3757 "%sStandardOutput: %s\n"
3758 "%sStandardError: %s\n",
3759 prefix, exec_input_to_string(c->std_input),
3760 prefix, exec_output_to_string(c->std_output),
3761 prefix, exec_output_to_string(c->std_error));
3762
3763 if (c->tty_path)
3764 fprintf(f,
3765 "%sTTYPath: %s\n"
3766 "%sTTYReset: %s\n"
3767 "%sTTYVHangup: %s\n"
3768 "%sTTYVTDisallocate: %s\n",
3769 prefix, c->tty_path,
3770 prefix, yes_no(c->tty_reset),
3771 prefix, yes_no(c->tty_vhangup),
3772 prefix, yes_no(c->tty_vt_disallocate));
3773
3774 if (IN_SET(c->std_output,
3775 EXEC_OUTPUT_SYSLOG,
3776 EXEC_OUTPUT_KMSG,
3777 EXEC_OUTPUT_JOURNAL,
3778 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3779 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3780 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
3781 IN_SET(c->std_error,
3782 EXEC_OUTPUT_SYSLOG,
3783 EXEC_OUTPUT_KMSG,
3784 EXEC_OUTPUT_JOURNAL,
3785 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3786 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3787 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
3788
3789 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
3790
3791 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3792 if (r >= 0)
3793 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
3794
3795 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3796 if (r >= 0)
3797 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
3798 }
3799
3800 if (c->secure_bits) {
3801 _cleanup_free_ char *str = NULL;
3802
3803 r = secure_bits_to_string_alloc(c->secure_bits, &str);
3804 if (r >= 0)
3805 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
3806 }
3807
3808 if (c->capability_bounding_set != CAP_ALL) {
3809 _cleanup_free_ char *str = NULL;
3810
3811 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
3812 if (r >= 0)
3813 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
3814 }
3815
3816 if (c->capability_ambient_set != 0) {
3817 _cleanup_free_ char *str = NULL;
3818
3819 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
3820 if (r >= 0)
3821 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
3822 }
3823
3824 if (c->user)
3825 fprintf(f, "%sUser: %s\n", prefix, c->user);
3826 if (c->group)
3827 fprintf(f, "%sGroup: %s\n", prefix, c->group);
3828
3829 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
3830
3831 if (strv_length(c->supplementary_groups) > 0) {
3832 fprintf(f, "%sSupplementaryGroups:", prefix);
3833 strv_fprintf(f, c->supplementary_groups);
3834 fputs("\n", f);
3835 }
3836
3837 if (c->pam_name)
3838 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
3839
3840 if (strv_length(c->read_write_paths) > 0) {
3841 fprintf(f, "%sReadWritePaths:", prefix);
3842 strv_fprintf(f, c->read_write_paths);
3843 fputs("\n", f);
3844 }
3845
3846 if (strv_length(c->read_only_paths) > 0) {
3847 fprintf(f, "%sReadOnlyPaths:", prefix);
3848 strv_fprintf(f, c->read_only_paths);
3849 fputs("\n", f);
3850 }
3851
3852 if (strv_length(c->inaccessible_paths) > 0) {
3853 fprintf(f, "%sInaccessiblePaths:", prefix);
3854 strv_fprintf(f, c->inaccessible_paths);
3855 fputs("\n", f);
3856 }
3857
3858 if (c->n_bind_mounts > 0)
3859 for (i = 0; i < c->n_bind_mounts; i++) {
3860 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
3861 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
3862 c->bind_mounts[i].source,
3863 c->bind_mounts[i].destination,
3864 c->bind_mounts[i].recursive ? "rbind" : "norbind");
3865 }
3866
3867 if (c->utmp_id)
3868 fprintf(f,
3869 "%sUtmpIdentifier: %s\n",
3870 prefix, c->utmp_id);
3871
3872 if (c->selinux_context)
3873 fprintf(f,
3874 "%sSELinuxContext: %s%s\n",
3875 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
3876
3877 if (c->apparmor_profile)
3878 fprintf(f,
3879 "%sAppArmorProfile: %s%s\n",
3880 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3881
3882 if (c->smack_process_label)
3883 fprintf(f,
3884 "%sSmackProcessLabel: %s%s\n",
3885 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
3886
3887 if (c->personality != PERSONALITY_INVALID)
3888 fprintf(f,
3889 "%sPersonality: %s\n",
3890 prefix, strna(personality_to_string(c->personality)));
3891
3892 fprintf(f,
3893 "%sLockPersonality: %s\n",
3894 prefix, yes_no(c->lock_personality));
3895
3896 if (c->syscall_filter) {
3897 #ifdef HAVE_SECCOMP
3898 Iterator j;
3899 void *id;
3900 bool first = true;
3901 #endif
3902
3903 fprintf(f,
3904 "%sSystemCallFilter: ",
3905 prefix);
3906
3907 if (!c->syscall_whitelist)
3908 fputc('~', f);
3909
3910 #ifdef HAVE_SECCOMP
3911 SET_FOREACH(id, c->syscall_filter, j) {
3912 _cleanup_free_ char *name = NULL;
3913
3914 if (first)
3915 first = false;
3916 else
3917 fputc(' ', f);
3918
3919 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
3920 fputs(strna(name), f);
3921 }
3922 #endif
3923
3924 fputc('\n', f);
3925 }
3926
3927 if (c->syscall_archs) {
3928 #ifdef HAVE_SECCOMP
3929 Iterator j;
3930 void *id;
3931 #endif
3932
3933 fprintf(f,
3934 "%sSystemCallArchitectures:",
3935 prefix);
3936
3937 #ifdef HAVE_SECCOMP
3938 SET_FOREACH(id, c->syscall_archs, j)
3939 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
3940 #endif
3941 fputc('\n', f);
3942 }
3943
3944 if (exec_context_restrict_namespaces_set(c)) {
3945 _cleanup_free_ char *s = NULL;
3946
3947 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
3948 if (r >= 0)
3949 fprintf(f, "%sRestrictNamespaces: %s\n",
3950 prefix, s);
3951 }
3952
3953 if (c->syscall_errno > 0)
3954 fprintf(f,
3955 "%sSystemCallErrorNumber: %s\n",
3956 prefix, strna(errno_to_name(c->syscall_errno)));
3957
3958 if (c->apparmor_profile)
3959 fprintf(f,
3960 "%sAppArmorProfile: %s%s\n",
3961 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3962 }
3963
3964 bool exec_context_maintains_privileges(ExecContext *c) {
3965 assert(c);
3966
3967 /* Returns true if the process forked off would run under
3968 * an unchanged UID or as root. */
3969
3970 if (!c->user)
3971 return true;
3972
3973 if (streq(c->user, "root") || streq(c->user, "0"))
3974 return true;
3975
3976 return false;
3977 }
3978
3979 int exec_context_get_effective_ioprio(ExecContext *c) {
3980 int p;
3981
3982 assert(c);
3983
3984 if (c->ioprio_set)
3985 return c->ioprio;
3986
3987 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
3988 if (p < 0)
3989 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
3990
3991 return p;
3992 }
3993
3994 void exec_status_start(ExecStatus *s, pid_t pid) {
3995 assert(s);
3996
3997 zero(*s);
3998 s->pid = pid;
3999 dual_timestamp_get(&s->start_timestamp);
4000 }
4001
4002 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
4003 assert(s);
4004
4005 if (s->pid && s->pid != pid)
4006 zero(*s);
4007
4008 s->pid = pid;
4009 dual_timestamp_get(&s->exit_timestamp);
4010
4011 s->code = code;
4012 s->status = status;
4013
4014 if (context) {
4015 if (context->utmp_id)
4016 utmp_put_dead_process(context->utmp_id, pid, code, status);
4017
4018 exec_context_tty_reset(context, NULL);
4019 }
4020 }
4021
4022 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
4023 char buf[FORMAT_TIMESTAMP_MAX];
4024
4025 assert(s);
4026 assert(f);
4027
4028 if (s->pid <= 0)
4029 return;
4030
4031 prefix = strempty(prefix);
4032
4033 fprintf(f,
4034 "%sPID: "PID_FMT"\n",
4035 prefix, s->pid);
4036
4037 if (dual_timestamp_is_set(&s->start_timestamp))
4038 fprintf(f,
4039 "%sStart Timestamp: %s\n",
4040 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
4041
4042 if (dual_timestamp_is_set(&s->exit_timestamp))
4043 fprintf(f,
4044 "%sExit Timestamp: %s\n"
4045 "%sExit Code: %s\n"
4046 "%sExit Status: %i\n",
4047 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
4048 prefix, sigchld_code_to_string(s->code),
4049 prefix, s->status);
4050 }
4051
4052 char *exec_command_line(char **argv) {
4053 size_t k;
4054 char *n, *p, **a;
4055 bool first = true;
4056
4057 assert(argv);
4058
4059 k = 1;
4060 STRV_FOREACH(a, argv)
4061 k += strlen(*a)+3;
4062
4063 n = new(char, k);
4064 if (!n)
4065 return NULL;
4066
4067 p = n;
4068 STRV_FOREACH(a, argv) {
4069
4070 if (!first)
4071 *(p++) = ' ';
4072 else
4073 first = false;
4074
4075 if (strpbrk(*a, WHITESPACE)) {
4076 *(p++) = '\'';
4077 p = stpcpy(p, *a);
4078 *(p++) = '\'';
4079 } else
4080 p = stpcpy(p, *a);
4081
4082 }
4083
4084 *p = 0;
4085
4086 /* FIXME: this doesn't really handle arguments that have
4087 * spaces and ticks in them */
4088
4089 return n;
4090 }
4091
4092 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
4093 _cleanup_free_ char *cmd = NULL;
4094 const char *prefix2;
4095
4096 assert(c);
4097 assert(f);
4098
4099 prefix = strempty(prefix);
4100 prefix2 = strjoina(prefix, "\t");
4101
4102 cmd = exec_command_line(c->argv);
4103 fprintf(f,
4104 "%sCommand Line: %s\n",
4105 prefix, cmd ? cmd : strerror(ENOMEM));
4106
4107 exec_status_dump(&c->exec_status, f, prefix2);
4108 }
4109
4110 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4111 assert(f);
4112
4113 prefix = strempty(prefix);
4114
4115 LIST_FOREACH(command, c, c)
4116 exec_command_dump(c, f, prefix);
4117 }
4118
4119 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4120 ExecCommand *end;
4121
4122 assert(l);
4123 assert(e);
4124
4125 if (*l) {
4126 /* It's kind of important, that we keep the order here */
4127 LIST_FIND_TAIL(command, *l, end);
4128 LIST_INSERT_AFTER(command, *l, end, e);
4129 } else
4130 *l = e;
4131 }
4132
4133 int exec_command_set(ExecCommand *c, const char *path, ...) {
4134 va_list ap;
4135 char **l, *p;
4136
4137 assert(c);
4138 assert(path);
4139
4140 va_start(ap, path);
4141 l = strv_new_ap(path, ap);
4142 va_end(ap);
4143
4144 if (!l)
4145 return -ENOMEM;
4146
4147 p = strdup(path);
4148 if (!p) {
4149 strv_free(l);
4150 return -ENOMEM;
4151 }
4152
4153 free(c->path);
4154 c->path = p;
4155
4156 strv_free(c->argv);
4157 c->argv = l;
4158
4159 return 0;
4160 }
4161
4162 int exec_command_append(ExecCommand *c, const char *path, ...) {
4163 _cleanup_strv_free_ char **l = NULL;
4164 va_list ap;
4165 int r;
4166
4167 assert(c);
4168 assert(path);
4169
4170 va_start(ap, path);
4171 l = strv_new_ap(path, ap);
4172 va_end(ap);
4173
4174 if (!l)
4175 return -ENOMEM;
4176
4177 r = strv_extend_strv(&c->argv, l, false);
4178 if (r < 0)
4179 return r;
4180
4181 return 0;
4182 }
4183
4184
4185 static int exec_runtime_allocate(ExecRuntime **rt) {
4186
4187 if (*rt)
4188 return 0;
4189
4190 *rt = new0(ExecRuntime, 1);
4191 if (!*rt)
4192 return -ENOMEM;
4193
4194 (*rt)->n_ref = 1;
4195 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4196
4197 return 0;
4198 }
4199
4200 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4201 int r;
4202
4203 assert(rt);
4204 assert(c);
4205 assert(id);
4206
4207 if (*rt)
4208 return 1;
4209
4210 if (!c->private_network && !c->private_tmp)
4211 return 0;
4212
4213 r = exec_runtime_allocate(rt);
4214 if (r < 0)
4215 return r;
4216
4217 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
4218 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
4219 return -errno;
4220 }
4221
4222 if (c->private_tmp && !(*rt)->tmp_dir) {
4223 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4224 if (r < 0)
4225 return r;
4226 }
4227
4228 return 1;
4229 }
4230
4231 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4232 assert(r);
4233 assert(r->n_ref > 0);
4234
4235 r->n_ref++;
4236 return r;
4237 }
4238
4239 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4240
4241 if (!r)
4242 return NULL;
4243
4244 assert(r->n_ref > 0);
4245
4246 r->n_ref--;
4247 if (r->n_ref > 0)
4248 return NULL;
4249
4250 free(r->tmp_dir);
4251 free(r->var_tmp_dir);
4252 safe_close_pair(r->netns_storage_socket);
4253 return mfree(r);
4254 }
4255
4256 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
4257 assert(u);
4258 assert(f);
4259 assert(fds);
4260
4261 if (!rt)
4262 return 0;
4263
4264 if (rt->tmp_dir)
4265 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4266
4267 if (rt->var_tmp_dir)
4268 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4269
4270 if (rt->netns_storage_socket[0] >= 0) {
4271 int copy;
4272
4273 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4274 if (copy < 0)
4275 return copy;
4276
4277 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4278 }
4279
4280 if (rt->netns_storage_socket[1] >= 0) {
4281 int copy;
4282
4283 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4284 if (copy < 0)
4285 return copy;
4286
4287 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4288 }
4289
4290 return 0;
4291 }
4292
4293 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
4294 int r;
4295
4296 assert(rt);
4297 assert(key);
4298 assert(value);
4299
4300 if (streq(key, "tmp-dir")) {
4301 char *copy;
4302
4303 r = exec_runtime_allocate(rt);
4304 if (r < 0)
4305 return log_oom();
4306
4307 copy = strdup(value);
4308 if (!copy)
4309 return log_oom();
4310
4311 free((*rt)->tmp_dir);
4312 (*rt)->tmp_dir = copy;
4313
4314 } else if (streq(key, "var-tmp-dir")) {
4315 char *copy;
4316
4317 r = exec_runtime_allocate(rt);
4318 if (r < 0)
4319 return log_oom();
4320
4321 copy = strdup(value);
4322 if (!copy)
4323 return log_oom();
4324
4325 free((*rt)->var_tmp_dir);
4326 (*rt)->var_tmp_dir = copy;
4327
4328 } else if (streq(key, "netns-socket-0")) {
4329 int fd;
4330
4331 r = exec_runtime_allocate(rt);
4332 if (r < 0)
4333 return log_oom();
4334
4335 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4336 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4337 else {
4338 safe_close((*rt)->netns_storage_socket[0]);
4339 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4340 }
4341 } else if (streq(key, "netns-socket-1")) {
4342 int fd;
4343
4344 r = exec_runtime_allocate(rt);
4345 if (r < 0)
4346 return log_oom();
4347
4348 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4349 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4350 else {
4351 safe_close((*rt)->netns_storage_socket[1]);
4352 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4353 }
4354 } else
4355 return 0;
4356
4357 return 1;
4358 }
4359
4360 static void *remove_tmpdir_thread(void *p) {
4361 _cleanup_free_ char *path = p;
4362
4363 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4364 return NULL;
4365 }
4366
4367 void exec_runtime_destroy(ExecRuntime *rt) {
4368 int r;
4369
4370 if (!rt)
4371 return;
4372
4373 /* If there are multiple users of this, let's leave the stuff around */
4374 if (rt->n_ref > 1)
4375 return;
4376
4377 if (rt->tmp_dir) {
4378 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4379
4380 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4381 if (r < 0) {
4382 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4383 free(rt->tmp_dir);
4384 }
4385
4386 rt->tmp_dir = NULL;
4387 }
4388
4389 if (rt->var_tmp_dir) {
4390 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4391
4392 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4393 if (r < 0) {
4394 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4395 free(rt->var_tmp_dir);
4396 }
4397
4398 rt->var_tmp_dir = NULL;
4399 }
4400
4401 safe_close_pair(rt->netns_storage_socket);
4402 }
4403
4404 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4405 [EXEC_INPUT_NULL] = "null",
4406 [EXEC_INPUT_TTY] = "tty",
4407 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4408 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
4409 [EXEC_INPUT_SOCKET] = "socket",
4410 [EXEC_INPUT_NAMED_FD] = "fd",
4411 };
4412
4413 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4414
4415 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
4416 [EXEC_OUTPUT_INHERIT] = "inherit",
4417 [EXEC_OUTPUT_NULL] = "null",
4418 [EXEC_OUTPUT_TTY] = "tty",
4419 [EXEC_OUTPUT_SYSLOG] = "syslog",
4420 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
4421 [EXEC_OUTPUT_KMSG] = "kmsg",
4422 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
4423 [EXEC_OUTPUT_JOURNAL] = "journal",
4424 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
4425 [EXEC_OUTPUT_SOCKET] = "socket",
4426 [EXEC_OUTPUT_NAMED_FD] = "fd",
4427 };
4428
4429 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
4430
4431 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4432 [EXEC_UTMP_INIT] = "init",
4433 [EXEC_UTMP_LOGIN] = "login",
4434 [EXEC_UTMP_USER] = "user",
4435 };
4436
4437 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
4438
4439 static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4440 [EXEC_PRESERVE_NO] = "no",
4441 [EXEC_PRESERVE_YES] = "yes",
4442 [EXEC_PRESERVE_RESTART] = "restart",
4443 };
4444
4445 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
4446
4447 static const char* const exec_directory_type_table[_EXEC_DIRECTORY_MAX] = {
4448 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4449 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4450 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4451 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4452 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4453 };
4454
4455 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
4456
4457 static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
4458 [EXEC_KEYRING_INHERIT] = "inherit",
4459 [EXEC_KEYRING_PRIVATE] = "private",
4460 [EXEC_KEYRING_SHARED] = "shared",
4461 };
4462
4463 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);