]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
Merge pull request #6919 from poettering/ebpf-followup
[thirdparty/systemd.git] / src / core / execute.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <glob.h>
23 #include <grp.h>
24 #include <poll.h>
25 #include <signal.h>
26 #include <string.h>
27 #include <sys/capability.h>
28 #include <sys/eventfd.h>
29 #include <sys/mman.h>
30 #include <sys/personality.h>
31 #include <sys/prctl.h>
32 #include <sys/shm.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/types.h>
36 #include <sys/un.h>
37 #include <unistd.h>
38 #include <utmpx.h>
39
40 #ifdef HAVE_PAM
41 #include <security/pam_appl.h>
42 #endif
43
44 #ifdef HAVE_SELINUX
45 #include <selinux/selinux.h>
46 #endif
47
48 #ifdef HAVE_SECCOMP
49 #include <seccomp.h>
50 #endif
51
52 #ifdef HAVE_APPARMOR
53 #include <sys/apparmor.h>
54 #endif
55
56 #include "sd-messages.h"
57
58 #include "af-list.h"
59 #include "alloc-util.h"
60 #ifdef HAVE_APPARMOR
61 #include "apparmor-util.h"
62 #endif
63 #include "async.h"
64 #include "barrier.h"
65 #include "cap-list.h"
66 #include "capability-util.h"
67 #include "def.h"
68 #include "env-util.h"
69 #include "errno-list.h"
70 #include "execute.h"
71 #include "exit-status.h"
72 #include "fd-util.h"
73 #include "fileio.h"
74 #include "format-util.h"
75 #include "fs-util.h"
76 #include "glob-util.h"
77 #include "io-util.h"
78 #include "ioprio.h"
79 #include "log.h"
80 #include "macro.h"
81 #include "missing.h"
82 #include "mkdir.h"
83 #include "namespace.h"
84 #include "parse-util.h"
85 #include "path-util.h"
86 #include "process-util.h"
87 #include "rlimit-util.h"
88 #include "rm-rf.h"
89 #ifdef HAVE_SECCOMP
90 #include "seccomp-util.h"
91 #endif
92 #include "securebits.h"
93 #include "securebits-util.h"
94 #include "selinux-util.h"
95 #include "signal-util.h"
96 #include "smack-util.h"
97 #include "special.h"
98 #include "string-table.h"
99 #include "string-util.h"
100 #include "strv.h"
101 #include "syslog-util.h"
102 #include "terminal-util.h"
103 #include "unit.h"
104 #include "user-util.h"
105 #include "util.h"
106 #include "utmp-wtmp.h"
107
108 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
109 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
110
111 /* This assumes there is a 'tty' group */
112 #define TTY_MODE 0620
113
114 #define SNDBUF_SIZE (8*1024*1024)
115
116 static int shift_fds(int fds[], unsigned n_fds) {
117 int start, restart_from;
118
119 if (n_fds <= 0)
120 return 0;
121
122 /* Modifies the fds array! (sorts it) */
123
124 assert(fds);
125
126 start = 0;
127 for (;;) {
128 int i;
129
130 restart_from = -1;
131
132 for (i = start; i < (int) n_fds; i++) {
133 int nfd;
134
135 /* Already at right index? */
136 if (fds[i] == i+3)
137 continue;
138
139 nfd = fcntl(fds[i], F_DUPFD, i + 3);
140 if (nfd < 0)
141 return -errno;
142
143 safe_close(fds[i]);
144 fds[i] = nfd;
145
146 /* Hmm, the fd we wanted isn't free? Then
147 * let's remember that and try again from here */
148 if (nfd != i+3 && restart_from < 0)
149 restart_from = i;
150 }
151
152 if (restart_from < 0)
153 break;
154
155 start = restart_from;
156 }
157
158 return 0;
159 }
160
161 static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
162 unsigned i, n_fds;
163 int r;
164
165 n_fds = n_storage_fds + n_socket_fds;
166 if (n_fds <= 0)
167 return 0;
168
169 assert(fds);
170
171 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
172 * O_NONBLOCK only applies to socket activation though. */
173
174 for (i = 0; i < n_fds; i++) {
175
176 if (i < n_socket_fds) {
177 r = fd_nonblock(fds[i], nonblock);
178 if (r < 0)
179 return r;
180 }
181
182 /* We unconditionally drop FD_CLOEXEC from the fds,
183 * since after all we want to pass these fds to our
184 * children */
185
186 r = fd_cloexec(fds[i], false);
187 if (r < 0)
188 return r;
189 }
190
191 return 0;
192 }
193
194 static const char *exec_context_tty_path(const ExecContext *context) {
195 assert(context);
196
197 if (context->stdio_as_fds)
198 return NULL;
199
200 if (context->tty_path)
201 return context->tty_path;
202
203 return "/dev/console";
204 }
205
206 static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
207 const char *path;
208
209 assert(context);
210
211 path = exec_context_tty_path(context);
212
213 if (context->tty_vhangup) {
214 if (p && p->stdin_fd >= 0)
215 (void) terminal_vhangup_fd(p->stdin_fd);
216 else if (path)
217 (void) terminal_vhangup(path);
218 }
219
220 if (context->tty_reset) {
221 if (p && p->stdin_fd >= 0)
222 (void) reset_terminal_fd(p->stdin_fd, true);
223 else if (path)
224 (void) reset_terminal(path);
225 }
226
227 if (context->tty_vt_disallocate && path)
228 (void) vt_disallocate(path);
229 }
230
231 static bool is_terminal_input(ExecInput i) {
232 return IN_SET(i,
233 EXEC_INPUT_TTY,
234 EXEC_INPUT_TTY_FORCE,
235 EXEC_INPUT_TTY_FAIL);
236 }
237
238 static bool is_terminal_output(ExecOutput o) {
239 return IN_SET(o,
240 EXEC_OUTPUT_TTY,
241 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
242 EXEC_OUTPUT_KMSG_AND_CONSOLE,
243 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
244 }
245
246 static bool is_syslog_output(ExecOutput o) {
247 return IN_SET(o,
248 EXEC_OUTPUT_SYSLOG,
249 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
250 }
251
252 static bool is_kmsg_output(ExecOutput o) {
253 return IN_SET(o,
254 EXEC_OUTPUT_KMSG,
255 EXEC_OUTPUT_KMSG_AND_CONSOLE);
256 }
257
258 static bool exec_context_needs_term(const ExecContext *c) {
259 assert(c);
260
261 /* Return true if the execution context suggests we should set $TERM to something useful. */
262
263 if (is_terminal_input(c->std_input))
264 return true;
265
266 if (is_terminal_output(c->std_output))
267 return true;
268
269 if (is_terminal_output(c->std_error))
270 return true;
271
272 return !!c->tty_path;
273 }
274
275 static int open_null_as(int flags, int nfd) {
276 int fd, r;
277
278 assert(nfd >= 0);
279
280 fd = open("/dev/null", flags|O_NOCTTY);
281 if (fd < 0)
282 return -errno;
283
284 if (fd != nfd) {
285 r = dup2(fd, nfd) < 0 ? -errno : nfd;
286 safe_close(fd);
287 } else
288 r = nfd;
289
290 return r;
291 }
292
293 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
294 static const union sockaddr_union sa = {
295 .un.sun_family = AF_UNIX,
296 .un.sun_path = "/run/systemd/journal/stdout",
297 };
298 uid_t olduid = UID_INVALID;
299 gid_t oldgid = GID_INVALID;
300 int r;
301
302 if (gid_is_valid(gid)) {
303 oldgid = getgid();
304
305 if (setegid(gid) < 0)
306 return -errno;
307 }
308
309 if (uid_is_valid(uid)) {
310 olduid = getuid();
311
312 if (seteuid(uid) < 0) {
313 r = -errno;
314 goto restore_gid;
315 }
316 }
317
318 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
319
320 /* If we fail to restore the uid or gid, things will likely
321 fail later on. This should only happen if an LSM interferes. */
322
323 if (uid_is_valid(uid))
324 (void) seteuid(olduid);
325
326 restore_gid:
327 if (gid_is_valid(gid))
328 (void) setegid(oldgid);
329
330 return r;
331 }
332
333 static int connect_logger_as(
334 Unit *unit,
335 const ExecContext *context,
336 const ExecParameters *params,
337 ExecOutput output,
338 const char *ident,
339 int nfd,
340 uid_t uid,
341 gid_t gid) {
342
343 int fd, r;
344
345 assert(context);
346 assert(params);
347 assert(output < _EXEC_OUTPUT_MAX);
348 assert(ident);
349 assert(nfd >= 0);
350
351 fd = socket(AF_UNIX, SOCK_STREAM, 0);
352 if (fd < 0)
353 return -errno;
354
355 r = connect_journal_socket(fd, uid, gid);
356 if (r < 0)
357 return r;
358
359 if (shutdown(fd, SHUT_RD) < 0) {
360 safe_close(fd);
361 return -errno;
362 }
363
364 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
365
366 dprintf(fd,
367 "%s\n"
368 "%s\n"
369 "%i\n"
370 "%i\n"
371 "%i\n"
372 "%i\n"
373 "%i\n",
374 context->syslog_identifier ?: ident,
375 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
376 context->syslog_priority,
377 !!context->syslog_level_prefix,
378 is_syslog_output(output),
379 is_kmsg_output(output),
380 is_terminal_output(output));
381
382 if (fd == nfd)
383 return nfd;
384
385 r = dup2(fd, nfd) < 0 ? -errno : nfd;
386 safe_close(fd);
387
388 return r;
389 }
390 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
391 int fd, r;
392
393 assert(path);
394 assert(nfd >= 0);
395
396 fd = open_terminal(path, mode | O_NOCTTY);
397 if (fd < 0)
398 return fd;
399
400 if (fd != nfd) {
401 r = dup2(fd, nfd) < 0 ? -errno : nfd;
402 safe_close(fd);
403 } else
404 r = nfd;
405
406 return r;
407 }
408
409 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
410
411 if (is_terminal_input(std_input) && !apply_tty_stdin)
412 return EXEC_INPUT_NULL;
413
414 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
415 return EXEC_INPUT_NULL;
416
417 return std_input;
418 }
419
420 static int fixup_output(ExecOutput std_output, int socket_fd) {
421
422 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
423 return EXEC_OUTPUT_INHERIT;
424
425 return std_output;
426 }
427
428 static int setup_input(
429 const ExecContext *context,
430 const ExecParameters *params,
431 int socket_fd,
432 int named_iofds[3]) {
433
434 ExecInput i;
435
436 assert(context);
437 assert(params);
438
439 if (params->stdin_fd >= 0) {
440 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
441 return -errno;
442
443 /* Try to make this the controlling tty, if it is a tty, and reset it */
444 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
445 (void) reset_terminal_fd(STDIN_FILENO, true);
446
447 return STDIN_FILENO;
448 }
449
450 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
451
452 switch (i) {
453
454 case EXEC_INPUT_NULL:
455 return open_null_as(O_RDONLY, STDIN_FILENO);
456
457 case EXEC_INPUT_TTY:
458 case EXEC_INPUT_TTY_FORCE:
459 case EXEC_INPUT_TTY_FAIL: {
460 int fd, r;
461
462 fd = acquire_terminal(exec_context_tty_path(context),
463 i == EXEC_INPUT_TTY_FAIL,
464 i == EXEC_INPUT_TTY_FORCE,
465 false,
466 USEC_INFINITY);
467 if (fd < 0)
468 return fd;
469
470 if (fd != STDIN_FILENO) {
471 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
472 safe_close(fd);
473 } else
474 r = STDIN_FILENO;
475
476 return r;
477 }
478
479 case EXEC_INPUT_SOCKET:
480 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
481
482 case EXEC_INPUT_NAMED_FD:
483 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
484 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
485
486 default:
487 assert_not_reached("Unknown input type");
488 }
489 }
490
491 static int setup_output(
492 Unit *unit,
493 const ExecContext *context,
494 const ExecParameters *params,
495 int fileno,
496 int socket_fd,
497 int named_iofds[3],
498 const char *ident,
499 uid_t uid,
500 gid_t gid,
501 dev_t *journal_stream_dev,
502 ino_t *journal_stream_ino) {
503
504 ExecOutput o;
505 ExecInput i;
506 int r;
507
508 assert(unit);
509 assert(context);
510 assert(params);
511 assert(ident);
512 assert(journal_stream_dev);
513 assert(journal_stream_ino);
514
515 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
516
517 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
518 return -errno;
519
520 return STDOUT_FILENO;
521 }
522
523 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
524 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
525 return -errno;
526
527 return STDERR_FILENO;
528 }
529
530 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
531 o = fixup_output(context->std_output, socket_fd);
532
533 if (fileno == STDERR_FILENO) {
534 ExecOutput e;
535 e = fixup_output(context->std_error, socket_fd);
536
537 /* This expects the input and output are already set up */
538
539 /* Don't change the stderr file descriptor if we inherit all
540 * the way and are not on a tty */
541 if (e == EXEC_OUTPUT_INHERIT &&
542 o == EXEC_OUTPUT_INHERIT &&
543 i == EXEC_INPUT_NULL &&
544 !is_terminal_input(context->std_input) &&
545 getppid () != 1)
546 return fileno;
547
548 /* Duplicate from stdout if possible */
549 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
550 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
551
552 o = e;
553
554 } else if (o == EXEC_OUTPUT_INHERIT) {
555 /* If input got downgraded, inherit the original value */
556 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
557 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
558
559 /* If the input is connected to anything that's not a /dev/null, inherit that... */
560 if (i != EXEC_INPUT_NULL)
561 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
562
563 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
564 if (getppid() != 1)
565 return fileno;
566
567 /* We need to open /dev/null here anew, to get the right access mode. */
568 return open_null_as(O_WRONLY, fileno);
569 }
570
571 switch (o) {
572
573 case EXEC_OUTPUT_NULL:
574 return open_null_as(O_WRONLY, fileno);
575
576 case EXEC_OUTPUT_TTY:
577 if (is_terminal_input(i))
578 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
579
580 /* We don't reset the terminal if this is just about output */
581 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
582
583 case EXEC_OUTPUT_SYSLOG:
584 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
585 case EXEC_OUTPUT_KMSG:
586 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
587 case EXEC_OUTPUT_JOURNAL:
588 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
589 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
590 if (r < 0) {
591 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
592 r = open_null_as(O_WRONLY, fileno);
593 } else {
594 struct stat st;
595
596 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
597 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
598 * services to detect whether they are connected to the journal or not.
599 *
600 * If both stdout and stderr are connected to a stream then let's make sure to store the data
601 * about STDERR as that's usually the best way to do logging. */
602
603 if (fstat(fileno, &st) >= 0 &&
604 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
605 *journal_stream_dev = st.st_dev;
606 *journal_stream_ino = st.st_ino;
607 }
608 }
609 return r;
610
611 case EXEC_OUTPUT_SOCKET:
612 assert(socket_fd >= 0);
613 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
614
615 case EXEC_OUTPUT_NAMED_FD:
616 (void) fd_nonblock(named_iofds[fileno], false);
617 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
618
619 default:
620 assert_not_reached("Unknown error type");
621 }
622 }
623
624 static int chown_terminal(int fd, uid_t uid) {
625 struct stat st;
626
627 assert(fd >= 0);
628
629 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
630 if (isatty(fd) < 1)
631 return 0;
632
633 /* This might fail. What matters are the results. */
634 (void) fchown(fd, uid, -1);
635 (void) fchmod(fd, TTY_MODE);
636
637 if (fstat(fd, &st) < 0)
638 return -errno;
639
640 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
641 return -EPERM;
642
643 return 0;
644 }
645
646 static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
647 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
648 int r;
649
650 assert(_saved_stdin);
651 assert(_saved_stdout);
652
653 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
654 if (saved_stdin < 0)
655 return -errno;
656
657 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
658 if (saved_stdout < 0)
659 return -errno;
660
661 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
662 if (fd < 0)
663 return fd;
664
665 r = chown_terminal(fd, getuid());
666 if (r < 0)
667 return r;
668
669 r = reset_terminal_fd(fd, true);
670 if (r < 0)
671 return r;
672
673 if (dup2(fd, STDIN_FILENO) < 0)
674 return -errno;
675
676 if (dup2(fd, STDOUT_FILENO) < 0)
677 return -errno;
678
679 if (fd >= 2)
680 safe_close(fd);
681 fd = -1;
682
683 *_saved_stdin = saved_stdin;
684 *_saved_stdout = saved_stdout;
685
686 saved_stdin = saved_stdout = -1;
687
688 return 0;
689 }
690
691 static void write_confirm_error_fd(int err, int fd, const Unit *u) {
692 assert(err < 0);
693
694 if (err == -ETIMEDOUT)
695 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
696 else {
697 errno = -err;
698 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
699 }
700 }
701
702 static void write_confirm_error(int err, const char *vc, const Unit *u) {
703 _cleanup_close_ int fd = -1;
704
705 assert(vc);
706
707 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
708 if (fd < 0)
709 return;
710
711 write_confirm_error_fd(err, fd, u);
712 }
713
714 static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
715 int r = 0;
716
717 assert(saved_stdin);
718 assert(saved_stdout);
719
720 release_terminal();
721
722 if (*saved_stdin >= 0)
723 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
724 r = -errno;
725
726 if (*saved_stdout >= 0)
727 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
728 r = -errno;
729
730 *saved_stdin = safe_close(*saved_stdin);
731 *saved_stdout = safe_close(*saved_stdout);
732
733 return r;
734 }
735
736 enum {
737 CONFIRM_PRETEND_FAILURE = -1,
738 CONFIRM_PRETEND_SUCCESS = 0,
739 CONFIRM_EXECUTE = 1,
740 };
741
742 static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
743 int saved_stdout = -1, saved_stdin = -1, r;
744 _cleanup_free_ char *e = NULL;
745 char c;
746
747 /* For any internal errors, assume a positive response. */
748 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
749 if (r < 0) {
750 write_confirm_error(r, vc, u);
751 return CONFIRM_EXECUTE;
752 }
753
754 /* confirm_spawn might have been disabled while we were sleeping. */
755 if (manager_is_confirm_spawn_disabled(u->manager)) {
756 r = 1;
757 goto restore_stdio;
758 }
759
760 e = ellipsize(cmdline, 60, 100);
761 if (!e) {
762 log_oom();
763 r = CONFIRM_EXECUTE;
764 goto restore_stdio;
765 }
766
767 for (;;) {
768 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
769 if (r < 0) {
770 write_confirm_error_fd(r, STDOUT_FILENO, u);
771 r = CONFIRM_EXECUTE;
772 goto restore_stdio;
773 }
774
775 switch (c) {
776 case 'c':
777 printf("Resuming normal execution.\n");
778 manager_disable_confirm_spawn();
779 r = 1;
780 break;
781 case 'D':
782 unit_dump(u, stdout, " ");
783 continue; /* ask again */
784 case 'f':
785 printf("Failing execution.\n");
786 r = CONFIRM_PRETEND_FAILURE;
787 break;
788 case 'h':
789 printf(" c - continue, proceed without asking anymore\n"
790 " D - dump, show the state of the unit\n"
791 " f - fail, don't execute the command and pretend it failed\n"
792 " h - help\n"
793 " i - info, show a short summary of the unit\n"
794 " j - jobs, show jobs that are in progress\n"
795 " s - skip, don't execute the command and pretend it succeeded\n"
796 " y - yes, execute the command\n");
797 continue; /* ask again */
798 case 'i':
799 printf(" Description: %s\n"
800 " Unit: %s\n"
801 " Command: %s\n",
802 u->id, u->description, cmdline);
803 continue; /* ask again */
804 case 'j':
805 manager_dump_jobs(u->manager, stdout, " ");
806 continue; /* ask again */
807 case 'n':
808 /* 'n' was removed in favor of 'f'. */
809 printf("Didn't understand 'n', did you mean 'f'?\n");
810 continue; /* ask again */
811 case 's':
812 printf("Skipping execution.\n");
813 r = CONFIRM_PRETEND_SUCCESS;
814 break;
815 case 'y':
816 r = CONFIRM_EXECUTE;
817 break;
818 default:
819 assert_not_reached("Unhandled choice");
820 }
821 break;
822 }
823
824 restore_stdio:
825 restore_confirm_stdio(&saved_stdin, &saved_stdout);
826 return r;
827 }
828
829 static int get_fixed_user(const ExecContext *c, const char **user,
830 uid_t *uid, gid_t *gid,
831 const char **home, const char **shell) {
832 int r;
833 const char *name;
834
835 assert(c);
836
837 if (!c->user)
838 return 0;
839
840 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
841 * (i.e. are "/" or "/bin/nologin"). */
842
843 name = c->user;
844 r = get_user_creds_clean(&name, uid, gid, home, shell);
845 if (r < 0)
846 return r;
847
848 *user = name;
849 return 0;
850 }
851
852 static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
853 int r;
854 const char *name;
855
856 assert(c);
857
858 if (!c->group)
859 return 0;
860
861 name = c->group;
862 r = get_group_creds(&name, gid);
863 if (r < 0)
864 return r;
865
866 *group = name;
867 return 0;
868 }
869
870 static int get_supplementary_groups(const ExecContext *c, const char *user,
871 const char *group, gid_t gid,
872 gid_t **supplementary_gids, int *ngids) {
873 char **i;
874 int r, k = 0;
875 int ngroups_max;
876 bool keep_groups = false;
877 gid_t *groups = NULL;
878 _cleanup_free_ gid_t *l_gids = NULL;
879
880 assert(c);
881
882 /*
883 * If user is given, then lookup GID and supplementary groups list.
884 * We avoid NSS lookups for gid=0. Also we have to initialize groups
885 * here and as early as possible so we keep the list of supplementary
886 * groups of the caller.
887 */
888 if (user && gid_is_valid(gid) && gid != 0) {
889 /* First step, initialize groups from /etc/groups */
890 if (initgroups(user, gid) < 0)
891 return -errno;
892
893 keep_groups = true;
894 }
895
896 if (!c->supplementary_groups)
897 return 0;
898
899 /*
900 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
901 * be positive, otherwise fail.
902 */
903 errno = 0;
904 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
905 if (ngroups_max <= 0) {
906 if (errno > 0)
907 return -errno;
908 else
909 return -EOPNOTSUPP; /* For all other values */
910 }
911
912 l_gids = new(gid_t, ngroups_max);
913 if (!l_gids)
914 return -ENOMEM;
915
916 if (keep_groups) {
917 /*
918 * Lookup the list of groups that the user belongs to, we
919 * avoid NSS lookups here too for gid=0.
920 */
921 k = ngroups_max;
922 if (getgrouplist(user, gid, l_gids, &k) < 0)
923 return -EINVAL;
924 } else
925 k = 0;
926
927 STRV_FOREACH(i, c->supplementary_groups) {
928 const char *g;
929
930 if (k >= ngroups_max)
931 return -E2BIG;
932
933 g = *i;
934 r = get_group_creds(&g, l_gids+k);
935 if (r < 0)
936 return r;
937
938 k++;
939 }
940
941 /*
942 * Sets ngids to zero to drop all supplementary groups, happens
943 * when we are under root and SupplementaryGroups= is empty.
944 */
945 if (k == 0) {
946 *ngids = 0;
947 return 0;
948 }
949
950 /* Otherwise get the final list of supplementary groups */
951 groups = memdup(l_gids, sizeof(gid_t) * k);
952 if (!groups)
953 return -ENOMEM;
954
955 *supplementary_gids = groups;
956 *ngids = k;
957
958 groups = NULL;
959
960 return 0;
961 }
962
963 static int enforce_groups(const ExecContext *context, gid_t gid,
964 gid_t *supplementary_gids, int ngids) {
965 int r;
966
967 assert(context);
968
969 /* Handle SupplementaryGroups= even if it is empty */
970 if (context->supplementary_groups) {
971 r = maybe_setgroups(ngids, supplementary_gids);
972 if (r < 0)
973 return r;
974 }
975
976 if (gid_is_valid(gid)) {
977 /* Then set our gids */
978 if (setresgid(gid, gid, gid) < 0)
979 return -errno;
980 }
981
982 return 0;
983 }
984
985 static int enforce_user(const ExecContext *context, uid_t uid) {
986 assert(context);
987
988 if (!uid_is_valid(uid))
989 return 0;
990
991 /* Sets (but doesn't look up) the uid and make sure we keep the
992 * capabilities while doing so. */
993
994 if (context->capability_ambient_set != 0) {
995
996 /* First step: If we need to keep capabilities but
997 * drop privileges we need to make sure we keep our
998 * caps, while we drop privileges. */
999 if (uid != 0) {
1000 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
1001
1002 if (prctl(PR_GET_SECUREBITS) != sb)
1003 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1004 return -errno;
1005 }
1006 }
1007
1008 /* Second step: actually set the uids */
1009 if (setresuid(uid, uid, uid) < 0)
1010 return -errno;
1011
1012 /* At this point we should have all necessary capabilities but
1013 are otherwise a normal user. However, the caps might got
1014 corrupted due to the setresuid() so we need clean them up
1015 later. This is done outside of this call. */
1016
1017 return 0;
1018 }
1019
1020 #ifdef HAVE_PAM
1021
1022 static int null_conv(
1023 int num_msg,
1024 const struct pam_message **msg,
1025 struct pam_response **resp,
1026 void *appdata_ptr) {
1027
1028 /* We don't support conversations */
1029
1030 return PAM_CONV_ERR;
1031 }
1032
1033 #endif
1034
1035 static int setup_pam(
1036 const char *name,
1037 const char *user,
1038 uid_t uid,
1039 gid_t gid,
1040 const char *tty,
1041 char ***env,
1042 int fds[], unsigned n_fds) {
1043
1044 #ifdef HAVE_PAM
1045
1046 static const struct pam_conv conv = {
1047 .conv = null_conv,
1048 .appdata_ptr = NULL
1049 };
1050
1051 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
1052 pam_handle_t *handle = NULL;
1053 sigset_t old_ss;
1054 int pam_code = PAM_SUCCESS, r;
1055 char **nv, **e = NULL;
1056 bool close_session = false;
1057 pid_t pam_pid = 0, parent_pid;
1058 int flags = 0;
1059
1060 assert(name);
1061 assert(user);
1062 assert(env);
1063
1064 /* We set up PAM in the parent process, then fork. The child
1065 * will then stay around until killed via PR_GET_PDEATHSIG or
1066 * systemd via the cgroup logic. It will then remove the PAM
1067 * session again. The parent process will exec() the actual
1068 * daemon. We do things this way to ensure that the main PID
1069 * of the daemon is the one we initially fork()ed. */
1070
1071 r = barrier_create(&barrier);
1072 if (r < 0)
1073 goto fail;
1074
1075 if (log_get_max_level() < LOG_DEBUG)
1076 flags |= PAM_SILENT;
1077
1078 pam_code = pam_start(name, user, &conv, &handle);
1079 if (pam_code != PAM_SUCCESS) {
1080 handle = NULL;
1081 goto fail;
1082 }
1083
1084 if (tty) {
1085 pam_code = pam_set_item(handle, PAM_TTY, tty);
1086 if (pam_code != PAM_SUCCESS)
1087 goto fail;
1088 }
1089
1090 STRV_FOREACH(nv, *env) {
1091 pam_code = pam_putenv(handle, *nv);
1092 if (pam_code != PAM_SUCCESS)
1093 goto fail;
1094 }
1095
1096 pam_code = pam_acct_mgmt(handle, flags);
1097 if (pam_code != PAM_SUCCESS)
1098 goto fail;
1099
1100 pam_code = pam_open_session(handle, flags);
1101 if (pam_code != PAM_SUCCESS)
1102 goto fail;
1103
1104 close_session = true;
1105
1106 e = pam_getenvlist(handle);
1107 if (!e) {
1108 pam_code = PAM_BUF_ERR;
1109 goto fail;
1110 }
1111
1112 /* Block SIGTERM, so that we know that it won't get lost in
1113 * the child */
1114
1115 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
1116
1117 parent_pid = getpid_cached();
1118
1119 pam_pid = fork();
1120 if (pam_pid < 0) {
1121 r = -errno;
1122 goto fail;
1123 }
1124
1125 if (pam_pid == 0) {
1126 int sig, ret = EXIT_PAM;
1127
1128 /* The child's job is to reset the PAM session on
1129 * termination */
1130 barrier_set_role(&barrier, BARRIER_CHILD);
1131
1132 /* This string must fit in 10 chars (i.e. the length
1133 * of "/sbin/init"), to look pretty in /bin/ps */
1134 rename_process("(sd-pam)");
1135
1136 /* Make sure we don't keep open the passed fds in this
1137 child. We assume that otherwise only those fds are
1138 open here that have been opened by PAM. */
1139 close_many(fds, n_fds);
1140
1141 /* Drop privileges - we don't need any to pam_close_session
1142 * and this will make PR_SET_PDEATHSIG work in most cases.
1143 * If this fails, ignore the error - but expect sd-pam threads
1144 * to fail to exit normally */
1145
1146 r = maybe_setgroups(0, NULL);
1147 if (r < 0)
1148 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
1149 if (setresgid(gid, gid, gid) < 0)
1150 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
1151 if (setresuid(uid, uid, uid) < 0)
1152 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
1153
1154 (void) ignore_signals(SIGPIPE, -1);
1155
1156 /* Wait until our parent died. This will only work if
1157 * the above setresuid() succeeds, otherwise the kernel
1158 * will not allow unprivileged parents kill their privileged
1159 * children this way. We rely on the control groups kill logic
1160 * to do the rest for us. */
1161 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1162 goto child_finish;
1163
1164 /* Tell the parent that our setup is done. This is especially
1165 * important regarding dropping privileges. Otherwise, unit
1166 * setup might race against our setresuid(2) call.
1167 *
1168 * If the parent aborted, we'll detect this below, hence ignore
1169 * return failure here. */
1170 (void) barrier_place(&barrier);
1171
1172 /* Check if our parent process might already have died? */
1173 if (getppid() == parent_pid) {
1174 sigset_t ss;
1175
1176 assert_se(sigemptyset(&ss) >= 0);
1177 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1178
1179 for (;;) {
1180 if (sigwait(&ss, &sig) < 0) {
1181 if (errno == EINTR)
1182 continue;
1183
1184 goto child_finish;
1185 }
1186
1187 assert(sig == SIGTERM);
1188 break;
1189 }
1190 }
1191
1192 /* If our parent died we'll end the session */
1193 if (getppid() != parent_pid) {
1194 pam_code = pam_close_session(handle, flags);
1195 if (pam_code != PAM_SUCCESS)
1196 goto child_finish;
1197 }
1198
1199 ret = 0;
1200
1201 child_finish:
1202 pam_end(handle, pam_code | flags);
1203 _exit(ret);
1204 }
1205
1206 barrier_set_role(&barrier, BARRIER_PARENT);
1207
1208 /* If the child was forked off successfully it will do all the
1209 * cleanups, so forget about the handle here. */
1210 handle = NULL;
1211
1212 /* Unblock SIGTERM again in the parent */
1213 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
1214
1215 /* We close the log explicitly here, since the PAM modules
1216 * might have opened it, but we don't want this fd around. */
1217 closelog();
1218
1219 /* Synchronously wait for the child to initialize. We don't care for
1220 * errors as we cannot recover. However, warn loudly if it happens. */
1221 if (!barrier_place_and_sync(&barrier))
1222 log_error("PAM initialization failed");
1223
1224 strv_free(*env);
1225 *env = e;
1226
1227 return 0;
1228
1229 fail:
1230 if (pam_code != PAM_SUCCESS) {
1231 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
1232 r = -EPERM; /* PAM errors do not map to errno */
1233 } else
1234 log_error_errno(r, "PAM failed: %m");
1235
1236 if (handle) {
1237 if (close_session)
1238 pam_code = pam_close_session(handle, flags);
1239
1240 pam_end(handle, pam_code | flags);
1241 }
1242
1243 strv_free(e);
1244 closelog();
1245
1246 return r;
1247 #else
1248 return 0;
1249 #endif
1250 }
1251
1252 static void rename_process_from_path(const char *path) {
1253 char process_name[11];
1254 const char *p;
1255 size_t l;
1256
1257 /* This resulting string must fit in 10 chars (i.e. the length
1258 * of "/sbin/init") to look pretty in /bin/ps */
1259
1260 p = basename(path);
1261 if (isempty(p)) {
1262 rename_process("(...)");
1263 return;
1264 }
1265
1266 l = strlen(p);
1267 if (l > 8) {
1268 /* The end of the process name is usually more
1269 * interesting, since the first bit might just be
1270 * "systemd-" */
1271 p = p + l - 8;
1272 l = 8;
1273 }
1274
1275 process_name[0] = '(';
1276 memcpy(process_name+1, p, l);
1277 process_name[1+l] = ')';
1278 process_name[1+l+1] = 0;
1279
1280 rename_process(process_name);
1281 }
1282
1283 static bool context_has_address_families(const ExecContext *c) {
1284 assert(c);
1285
1286 return c->address_families_whitelist ||
1287 !set_isempty(c->address_families);
1288 }
1289
1290 static bool context_has_syscall_filters(const ExecContext *c) {
1291 assert(c);
1292
1293 return c->syscall_whitelist ||
1294 !set_isempty(c->syscall_filter);
1295 }
1296
1297 static bool context_has_no_new_privileges(const ExecContext *c) {
1298 assert(c);
1299
1300 if (c->no_new_privileges)
1301 return true;
1302
1303 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1304 return false;
1305
1306 /* We need NNP if we have any form of seccomp and are unprivileged */
1307 return context_has_address_families(c) ||
1308 c->memory_deny_write_execute ||
1309 c->restrict_realtime ||
1310 exec_context_restrict_namespaces_set(c) ||
1311 c->protect_kernel_tunables ||
1312 c->protect_kernel_modules ||
1313 c->private_devices ||
1314 context_has_syscall_filters(c) ||
1315 !set_isempty(c->syscall_archs) ||
1316 c->lock_personality;
1317 }
1318
1319 #ifdef HAVE_SECCOMP
1320
1321 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1322
1323 if (is_seccomp_available())
1324 return false;
1325
1326 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1327 return true;
1328 }
1329
1330 static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
1331 uint32_t negative_action, default_action, action;
1332 int r;
1333
1334 assert(u);
1335 assert(c);
1336
1337 if (!context_has_syscall_filters(c))
1338 return 0;
1339
1340 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1341 return 0;
1342
1343 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1344
1345 if (c->syscall_whitelist) {
1346 default_action = negative_action;
1347 action = SCMP_ACT_ALLOW;
1348 } else {
1349 default_action = SCMP_ACT_ALLOW;
1350 action = negative_action;
1351 }
1352
1353 if (needs_ambient_hack) {
1354 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1355 if (r < 0)
1356 return r;
1357 }
1358
1359 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
1360 }
1361
1362 static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1363 assert(u);
1364 assert(c);
1365
1366 if (set_isempty(c->syscall_archs))
1367 return 0;
1368
1369 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1370 return 0;
1371
1372 return seccomp_restrict_archs(c->syscall_archs);
1373 }
1374
1375 static int apply_address_families(const Unit* u, const ExecContext *c) {
1376 assert(u);
1377 assert(c);
1378
1379 if (!context_has_address_families(c))
1380 return 0;
1381
1382 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1383 return 0;
1384
1385 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
1386 }
1387
1388 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
1389 assert(u);
1390 assert(c);
1391
1392 if (!c->memory_deny_write_execute)
1393 return 0;
1394
1395 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1396 return 0;
1397
1398 return seccomp_memory_deny_write_execute();
1399 }
1400
1401 static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
1402 assert(u);
1403 assert(c);
1404
1405 if (!c->restrict_realtime)
1406 return 0;
1407
1408 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1409 return 0;
1410
1411 return seccomp_restrict_realtime();
1412 }
1413
1414 static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
1415 assert(u);
1416 assert(c);
1417
1418 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1419 * let's protect even those systems where this is left on in the kernel. */
1420
1421 if (!c->protect_kernel_tunables)
1422 return 0;
1423
1424 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1425 return 0;
1426
1427 return seccomp_protect_sysctl();
1428 }
1429
1430 static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
1431 assert(u);
1432 assert(c);
1433
1434 /* Turn off module syscalls on ProtectKernelModules=yes */
1435
1436 if (!c->protect_kernel_modules)
1437 return 0;
1438
1439 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1440 return 0;
1441
1442 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
1443 }
1444
1445 static int apply_private_devices(const Unit *u, const ExecContext *c) {
1446 assert(u);
1447 assert(c);
1448
1449 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1450
1451 if (!c->private_devices)
1452 return 0;
1453
1454 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1455 return 0;
1456
1457 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
1458 }
1459
1460 static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
1461 assert(u);
1462 assert(c);
1463
1464 if (!exec_context_restrict_namespaces_set(c))
1465 return 0;
1466
1467 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1468 return 0;
1469
1470 return seccomp_restrict_namespaces(c->restrict_namespaces);
1471 }
1472
1473 static int apply_lock_personality(const Unit* u, const ExecContext *c) {
1474 unsigned long personality;
1475 int r;
1476
1477 assert(u);
1478 assert(c);
1479
1480 if (!c->lock_personality)
1481 return 0;
1482
1483 if (skip_seccomp_unavailable(u, "LockPersonality="))
1484 return 0;
1485
1486 personality = c->personality;
1487
1488 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1489 if (personality == PERSONALITY_INVALID) {
1490
1491 r = opinionated_personality(&personality);
1492 if (r < 0)
1493 return r;
1494 }
1495
1496 return seccomp_lock_personality(personality);
1497 }
1498
1499 #endif
1500
1501 static void do_idle_pipe_dance(int idle_pipe[4]) {
1502 assert(idle_pipe);
1503
1504 idle_pipe[1] = safe_close(idle_pipe[1]);
1505 idle_pipe[2] = safe_close(idle_pipe[2]);
1506
1507 if (idle_pipe[0] >= 0) {
1508 int r;
1509
1510 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1511
1512 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1513 ssize_t n;
1514
1515 /* Signal systemd that we are bored and want to continue. */
1516 n = write(idle_pipe[3], "x", 1);
1517 if (n > 0)
1518 /* Wait for systemd to react to the signal above. */
1519 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1520 }
1521
1522 idle_pipe[0] = safe_close(idle_pipe[0]);
1523
1524 }
1525
1526 idle_pipe[3] = safe_close(idle_pipe[3]);
1527 }
1528
1529 static int build_environment(
1530 Unit *u,
1531 const ExecContext *c,
1532 const ExecParameters *p,
1533 unsigned n_fds,
1534 const char *home,
1535 const char *username,
1536 const char *shell,
1537 dev_t journal_stream_dev,
1538 ino_t journal_stream_ino,
1539 char ***ret) {
1540
1541 _cleanup_strv_free_ char **our_env = NULL;
1542 unsigned n_env = 0;
1543 char *x;
1544
1545 assert(u);
1546 assert(c);
1547 assert(ret);
1548
1549 our_env = new0(char*, 14);
1550 if (!our_env)
1551 return -ENOMEM;
1552
1553 if (n_fds > 0) {
1554 _cleanup_free_ char *joined = NULL;
1555
1556 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
1557 return -ENOMEM;
1558 our_env[n_env++] = x;
1559
1560 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1561 return -ENOMEM;
1562 our_env[n_env++] = x;
1563
1564 joined = strv_join(p->fd_names, ":");
1565 if (!joined)
1566 return -ENOMEM;
1567
1568 x = strjoin("LISTEN_FDNAMES=", joined);
1569 if (!x)
1570 return -ENOMEM;
1571 our_env[n_env++] = x;
1572 }
1573
1574 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
1575 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
1576 return -ENOMEM;
1577 our_env[n_env++] = x;
1578
1579 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
1580 return -ENOMEM;
1581 our_env[n_env++] = x;
1582 }
1583
1584 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1585 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1586 * check the database directly. */
1587 if (p->flags & EXEC_NSS_BYPASS_BUS) {
1588 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1589 if (!x)
1590 return -ENOMEM;
1591 our_env[n_env++] = x;
1592 }
1593
1594 if (home) {
1595 x = strappend("HOME=", home);
1596 if (!x)
1597 return -ENOMEM;
1598 our_env[n_env++] = x;
1599 }
1600
1601 if (username) {
1602 x = strappend("LOGNAME=", username);
1603 if (!x)
1604 return -ENOMEM;
1605 our_env[n_env++] = x;
1606
1607 x = strappend("USER=", username);
1608 if (!x)
1609 return -ENOMEM;
1610 our_env[n_env++] = x;
1611 }
1612
1613 if (shell) {
1614 x = strappend("SHELL=", shell);
1615 if (!x)
1616 return -ENOMEM;
1617 our_env[n_env++] = x;
1618 }
1619
1620 if (!sd_id128_is_null(u->invocation_id)) {
1621 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1622 return -ENOMEM;
1623
1624 our_env[n_env++] = x;
1625 }
1626
1627 if (exec_context_needs_term(c)) {
1628 const char *tty_path, *term = NULL;
1629
1630 tty_path = exec_context_tty_path(c);
1631
1632 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1633 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1634 * passes to PID 1 ends up all the way in the console login shown. */
1635
1636 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1637 term = getenv("TERM");
1638 if (!term)
1639 term = default_term_for_tty(tty_path);
1640
1641 x = strappend("TERM=", term);
1642 if (!x)
1643 return -ENOMEM;
1644 our_env[n_env++] = x;
1645 }
1646
1647 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1648 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1649 return -ENOMEM;
1650
1651 our_env[n_env++] = x;
1652 }
1653
1654 our_env[n_env++] = NULL;
1655 assert(n_env <= 12);
1656
1657 *ret = our_env;
1658 our_env = NULL;
1659
1660 return 0;
1661 }
1662
1663 static int build_pass_environment(const ExecContext *c, char ***ret) {
1664 _cleanup_strv_free_ char **pass_env = NULL;
1665 size_t n_env = 0, n_bufsize = 0;
1666 char **i;
1667
1668 STRV_FOREACH(i, c->pass_environment) {
1669 _cleanup_free_ char *x = NULL;
1670 char *v;
1671
1672 v = getenv(*i);
1673 if (!v)
1674 continue;
1675 x = strjoin(*i, "=", v);
1676 if (!x)
1677 return -ENOMEM;
1678
1679 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1680 return -ENOMEM;
1681
1682 pass_env[n_env++] = x;
1683 pass_env[n_env] = NULL;
1684 x = NULL;
1685 }
1686
1687 *ret = pass_env;
1688 pass_env = NULL;
1689
1690 return 0;
1691 }
1692
1693 static bool exec_needs_mount_namespace(
1694 const ExecContext *context,
1695 const ExecParameters *params,
1696 ExecRuntime *runtime) {
1697
1698 assert(context);
1699 assert(params);
1700
1701 if (context->root_image)
1702 return true;
1703
1704 if (!strv_isempty(context->read_write_paths) ||
1705 !strv_isempty(context->read_only_paths) ||
1706 !strv_isempty(context->inaccessible_paths))
1707 return true;
1708
1709 if (context->n_bind_mounts > 0)
1710 return true;
1711
1712 if (context->mount_flags != 0)
1713 return true;
1714
1715 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1716 return true;
1717
1718 if (context->private_devices ||
1719 context->protect_system != PROTECT_SYSTEM_NO ||
1720 context->protect_home != PROTECT_HOME_NO ||
1721 context->protect_kernel_tunables ||
1722 context->protect_kernel_modules ||
1723 context->protect_control_groups)
1724 return true;
1725
1726 if (context->mount_apivfs && (context->root_image || context->root_directory))
1727 return true;
1728
1729 return false;
1730 }
1731
1732 static int setup_private_users(uid_t uid, gid_t gid) {
1733 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1734 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1735 _cleanup_close_ int unshare_ready_fd = -1;
1736 _cleanup_(sigkill_waitp) pid_t pid = 0;
1737 uint64_t c = 1;
1738 siginfo_t si;
1739 ssize_t n;
1740 int r;
1741
1742 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1743 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1744 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1745 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1746 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1747 * continues execution normally. */
1748
1749 if (uid != 0 && uid_is_valid(uid)) {
1750 r = asprintf(&uid_map,
1751 "0 0 1\n" /* Map root → root */
1752 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1753 uid, uid);
1754 if (r < 0)
1755 return -ENOMEM;
1756 } else {
1757 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1758 if (!uid_map)
1759 return -ENOMEM;
1760 }
1761
1762 if (gid != 0 && gid_is_valid(gid)) {
1763 r = asprintf(&gid_map,
1764 "0 0 1\n" /* Map root → root */
1765 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1766 gid, gid);
1767 if (r < 0)
1768 return -ENOMEM;
1769 } else {
1770 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1771 if (!gid_map)
1772 return -ENOMEM;
1773 }
1774
1775 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1776 * namespace. */
1777 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1778 if (unshare_ready_fd < 0)
1779 return -errno;
1780
1781 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1782 * failed. */
1783 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1784 return -errno;
1785
1786 pid = fork();
1787 if (pid < 0)
1788 return -errno;
1789
1790 if (pid == 0) {
1791 _cleanup_close_ int fd = -1;
1792 const char *a;
1793 pid_t ppid;
1794
1795 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1796 * here, after the parent opened its own user namespace. */
1797
1798 ppid = getppid();
1799 errno_pipe[0] = safe_close(errno_pipe[0]);
1800
1801 /* Wait until the parent unshared the user namespace */
1802 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1803 r = -errno;
1804 goto child_fail;
1805 }
1806
1807 /* Disable the setgroups() system call in the child user namespace, for good. */
1808 a = procfs_file_alloca(ppid, "setgroups");
1809 fd = open(a, O_WRONLY|O_CLOEXEC);
1810 if (fd < 0) {
1811 if (errno != ENOENT) {
1812 r = -errno;
1813 goto child_fail;
1814 }
1815
1816 /* If the file is missing the kernel is too old, let's continue anyway. */
1817 } else {
1818 if (write(fd, "deny\n", 5) < 0) {
1819 r = -errno;
1820 goto child_fail;
1821 }
1822
1823 fd = safe_close(fd);
1824 }
1825
1826 /* First write the GID map */
1827 a = procfs_file_alloca(ppid, "gid_map");
1828 fd = open(a, O_WRONLY|O_CLOEXEC);
1829 if (fd < 0) {
1830 r = -errno;
1831 goto child_fail;
1832 }
1833 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1834 r = -errno;
1835 goto child_fail;
1836 }
1837 fd = safe_close(fd);
1838
1839 /* The write the UID map */
1840 a = procfs_file_alloca(ppid, "uid_map");
1841 fd = open(a, O_WRONLY|O_CLOEXEC);
1842 if (fd < 0) {
1843 r = -errno;
1844 goto child_fail;
1845 }
1846 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1847 r = -errno;
1848 goto child_fail;
1849 }
1850
1851 _exit(EXIT_SUCCESS);
1852
1853 child_fail:
1854 (void) write(errno_pipe[1], &r, sizeof(r));
1855 _exit(EXIT_FAILURE);
1856 }
1857
1858 errno_pipe[1] = safe_close(errno_pipe[1]);
1859
1860 if (unshare(CLONE_NEWUSER) < 0)
1861 return -errno;
1862
1863 /* Let the child know that the namespace is ready now */
1864 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1865 return -errno;
1866
1867 /* Try to read an error code from the child */
1868 n = read(errno_pipe[0], &r, sizeof(r));
1869 if (n < 0)
1870 return -errno;
1871 if (n == sizeof(r)) { /* an error code was sent to us */
1872 if (r < 0)
1873 return r;
1874 return -EIO;
1875 }
1876 if (n != 0) /* on success we should have read 0 bytes */
1877 return -EIO;
1878
1879 r = wait_for_terminate(pid, &si);
1880 if (r < 0)
1881 return r;
1882 pid = 0;
1883
1884 /* If something strange happened with the child, let's consider this fatal, too */
1885 if (si.si_code != CLD_EXITED || si.si_status != 0)
1886 return -EIO;
1887
1888 return 0;
1889 }
1890
1891 static int setup_exec_directory(
1892 const ExecContext *context,
1893 const ExecParameters *params,
1894 uid_t uid,
1895 gid_t gid,
1896 ExecDirectoryType type,
1897 int *exit_status) {
1898
1899 static const int exit_status_table[_EXEC_DIRECTORY_MAX] = {
1900 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1901 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1902 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1903 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1904 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1905 };
1906 char **rt;
1907 int r;
1908
1909 assert(context);
1910 assert(params);
1911 assert(type >= 0 && type < _EXEC_DIRECTORY_MAX);
1912 assert(exit_status);
1913
1914 if (!params->prefix[type])
1915 return 0;
1916
1917 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
1918 if (!uid_is_valid(uid))
1919 uid = 0;
1920 if (!gid_is_valid(gid))
1921 gid = 0;
1922 }
1923
1924 STRV_FOREACH(rt, context->directories[type].paths) {
1925 _cleanup_free_ char *p;
1926
1927 p = strjoin(params->prefix[type], "/", *rt);
1928 if (!p) {
1929 r = -ENOMEM;
1930 goto fail;
1931 }
1932
1933 r = mkdir_parents_label(p, 0755);
1934 if (r < 0)
1935 goto fail;
1936
1937 r = mkdir_p_label(p, context->directories[type].mode);
1938 if (r < 0)
1939 goto fail;
1940
1941 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
1942 * a service, and shall not be writable. */
1943 if (type == EXEC_DIRECTORY_CONFIGURATION)
1944 continue;
1945
1946 r = chmod_and_chown(p, context->directories[type].mode, uid, gid);
1947 if (r < 0)
1948 goto fail;
1949 }
1950
1951 return 0;
1952
1953 fail:
1954 *exit_status = exit_status_table[type];
1955
1956 return r;
1957 }
1958
1959 static int setup_smack(
1960 const ExecContext *context,
1961 const ExecCommand *command) {
1962
1963 int r;
1964
1965 assert(context);
1966 assert(command);
1967
1968 if (context->smack_process_label) {
1969 r = mac_smack_apply_pid(0, context->smack_process_label);
1970 if (r < 0)
1971 return r;
1972 }
1973 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1974 else {
1975 _cleanup_free_ char *exec_label = NULL;
1976
1977 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1978 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
1979 return r;
1980
1981 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1982 if (r < 0)
1983 return r;
1984 }
1985 #endif
1986
1987 return 0;
1988 }
1989
1990 static int compile_read_write_paths(
1991 const ExecContext *context,
1992 const ExecParameters *params,
1993 char ***ret) {
1994
1995 _cleanup_strv_free_ char **l = NULL;
1996 char **rt;
1997 ExecDirectoryType i;
1998
1999 /* Compile the list of writable paths. This is the combination of
2000 * the explicitly configured paths, plus all runtime directories. */
2001
2002 if (strv_isempty(context->read_write_paths)) {
2003 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
2004 if (!strv_isempty(context->directories[i].paths))
2005 break;
2006
2007 if (i == _EXEC_DIRECTORY_MAX) {
2008 *ret = NULL; /* NOP if neither is set */
2009 return 0;
2010 }
2011 }
2012
2013 l = strv_copy(context->read_write_paths);
2014 if (!l)
2015 return -ENOMEM;
2016
2017 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++) {
2018 if (!params->prefix[i])
2019 continue;
2020
2021 STRV_FOREACH(rt, context->directories[i].paths) {
2022 char *s;
2023
2024 s = strjoin(params->prefix[i], "/", *rt);
2025 if (!s)
2026 return -ENOMEM;
2027
2028 if (strv_consume(&l, s) < 0)
2029 return -ENOMEM;
2030 }
2031 }
2032
2033 *ret = l;
2034 l = NULL;
2035
2036 return 0;
2037 }
2038
2039 static int apply_mount_namespace(
2040 Unit *u,
2041 ExecCommand *command,
2042 const ExecContext *context,
2043 const ExecParameters *params,
2044 ExecRuntime *runtime) {
2045
2046 _cleanup_strv_free_ char **rw = NULL;
2047 char *tmp = NULL, *var = NULL;
2048 const char *root_dir = NULL, *root_image = NULL;
2049 NameSpaceInfo ns_info = {
2050 .ignore_protect_paths = false,
2051 .private_dev = context->private_devices,
2052 .protect_control_groups = context->protect_control_groups,
2053 .protect_kernel_tunables = context->protect_kernel_tunables,
2054 .protect_kernel_modules = context->protect_kernel_modules,
2055 .mount_apivfs = context->mount_apivfs,
2056 };
2057 bool needs_sandboxing;
2058 int r;
2059
2060 assert(context);
2061
2062 /* The runtime struct only contains the parent of the private /tmp,
2063 * which is non-accessible to world users. Inside of it there's a /tmp
2064 * that is sticky, and that's the one we want to use here. */
2065
2066 if (context->private_tmp && runtime) {
2067 if (runtime->tmp_dir)
2068 tmp = strjoina(runtime->tmp_dir, "/tmp");
2069 if (runtime->var_tmp_dir)
2070 var = strjoina(runtime->var_tmp_dir, "/tmp");
2071 }
2072
2073 r = compile_read_write_paths(context, params, &rw);
2074 if (r < 0)
2075 return r;
2076
2077 if (params->flags & EXEC_APPLY_CHROOT) {
2078 root_image = context->root_image;
2079
2080 if (!root_image)
2081 root_dir = context->root_directory;
2082 }
2083
2084 /*
2085 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2086 * sandbox info, otherwise enforce it, don't ignore protected paths and
2087 * fail if we are enable to apply the sandbox inside the mount namespace.
2088 */
2089 if (!context->dynamic_user && root_dir)
2090 ns_info.ignore_protect_paths = true;
2091
2092 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2093
2094 r = setup_namespace(root_dir, root_image,
2095 &ns_info, rw,
2096 needs_sandboxing ? context->read_only_paths : NULL,
2097 needs_sandboxing ? context->inaccessible_paths : NULL,
2098 context->bind_mounts,
2099 context->n_bind_mounts,
2100 tmp,
2101 var,
2102 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2103 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
2104 context->mount_flags,
2105 DISSECT_IMAGE_DISCARD_ON_LOOP);
2106
2107 /* If we couldn't set up the namespace this is probably due to a
2108 * missing capability. In this case, silently proceeed. */
2109 if (IN_SET(r, -EPERM, -EACCES)) {
2110 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2111 return 0;
2112 }
2113
2114 return r;
2115 }
2116
2117 static int apply_working_directory(
2118 const ExecContext *context,
2119 const ExecParameters *params,
2120 const char *home,
2121 const bool needs_mount_ns,
2122 int *exit_status) {
2123
2124 const char *d, *wd;
2125
2126 assert(context);
2127 assert(exit_status);
2128
2129 if (context->working_directory_home) {
2130
2131 if (!home) {
2132 *exit_status = EXIT_CHDIR;
2133 return -ENXIO;
2134 }
2135
2136 wd = home;
2137
2138 } else if (context->working_directory)
2139 wd = context->working_directory;
2140 else
2141 wd = "/";
2142
2143 if (params->flags & EXEC_APPLY_CHROOT) {
2144 if (!needs_mount_ns && context->root_directory)
2145 if (chroot(context->root_directory) < 0) {
2146 *exit_status = EXIT_CHROOT;
2147 return -errno;
2148 }
2149
2150 d = wd;
2151 } else
2152 d = prefix_roota(context->root_directory, wd);
2153
2154 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2155 *exit_status = EXIT_CHDIR;
2156 return -errno;
2157 }
2158
2159 return 0;
2160 }
2161
2162 static int setup_keyring(
2163 Unit *u,
2164 const ExecContext *context,
2165 const ExecParameters *p,
2166 uid_t uid, gid_t gid) {
2167
2168 key_serial_t keyring;
2169 int r;
2170
2171 assert(u);
2172 assert(context);
2173 assert(p);
2174
2175 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2176 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2177 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2178 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2179 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2180 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2181
2182 if (!(p->flags & EXEC_NEW_KEYRING))
2183 return 0;
2184
2185 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2186 return 0;
2187
2188 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2189 if (keyring == -1) {
2190 if (errno == ENOSYS)
2191 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
2192 else if (IN_SET(errno, EACCES, EPERM))
2193 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
2194 else if (errno == EDQUOT)
2195 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
2196 else
2197 return log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
2198
2199 return 0;
2200 }
2201
2202 /* Populate they keyring with the invocation ID by default. */
2203 if (!sd_id128_is_null(u->invocation_id)) {
2204 key_serial_t key;
2205
2206 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2207 if (key == -1)
2208 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
2209 else {
2210 if (keyctl(KEYCTL_SETPERM, key,
2211 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2212 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2213 return log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
2214 }
2215 }
2216
2217 /* And now, make the keyring owned by the service's user */
2218 if (uid_is_valid(uid) || gid_is_valid(gid))
2219 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
2220 return log_unit_error_errno(u, errno, "Failed to change ownership of session keyring: %m");
2221
2222 /* When requested link the user keyring into the session keyring. */
2223 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2224 uid_t saved_uid;
2225 gid_t saved_gid;
2226
2227 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
2228 * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
2229 * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
2230
2231 saved_uid = getuid();
2232 saved_gid = getgid();
2233
2234 if (gid_is_valid(gid) && gid != saved_gid) {
2235 if (setregid(gid, -1) < 0)
2236 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2237 }
2238
2239 if (uid_is_valid(uid) && uid != saved_uid) {
2240 if (setreuid(uid, -1) < 0) {
2241 (void) setregid(saved_gid, -1);
2242 return log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2243 }
2244 }
2245
2246 if (keyctl(KEYCTL_LINK,
2247 KEY_SPEC_USER_KEYRING,
2248 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2249
2250 r = -errno;
2251
2252 (void) setreuid(saved_uid, -1);
2253 (void) setregid(saved_gid, -1);
2254
2255 return log_unit_error_errno(u, r, "Failed to link user keyring into session keyring: %m");
2256 }
2257
2258 if (uid_is_valid(uid) && uid != saved_uid) {
2259 if (setreuid(saved_uid, -1) < 0) {
2260 (void) setregid(saved_gid, -1);
2261 return log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2262 }
2263 }
2264
2265 if (gid_is_valid(gid) && gid != saved_gid) {
2266 if (setregid(saved_gid, -1) < 0)
2267 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2268 }
2269 }
2270
2271 return 0;
2272 }
2273
2274 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2275 assert(array);
2276 assert(n);
2277
2278 if (!pair)
2279 return;
2280
2281 if (pair[0] >= 0)
2282 array[(*n)++] = pair[0];
2283 if (pair[1] >= 0)
2284 array[(*n)++] = pair[1];
2285 }
2286
2287 static int close_remaining_fds(
2288 const ExecParameters *params,
2289 ExecRuntime *runtime,
2290 DynamicCreds *dcreds,
2291 int user_lookup_fd,
2292 int socket_fd,
2293 int *fds, unsigned n_fds) {
2294
2295 unsigned n_dont_close = 0;
2296 int dont_close[n_fds + 12];
2297
2298 assert(params);
2299
2300 if (params->stdin_fd >= 0)
2301 dont_close[n_dont_close++] = params->stdin_fd;
2302 if (params->stdout_fd >= 0)
2303 dont_close[n_dont_close++] = params->stdout_fd;
2304 if (params->stderr_fd >= 0)
2305 dont_close[n_dont_close++] = params->stderr_fd;
2306
2307 if (socket_fd >= 0)
2308 dont_close[n_dont_close++] = socket_fd;
2309 if (n_fds > 0) {
2310 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2311 n_dont_close += n_fds;
2312 }
2313
2314 if (runtime)
2315 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2316
2317 if (dcreds) {
2318 if (dcreds->user)
2319 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2320 if (dcreds->group)
2321 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
2322 }
2323
2324 if (user_lookup_fd >= 0)
2325 dont_close[n_dont_close++] = user_lookup_fd;
2326
2327 return close_all_fds(dont_close, n_dont_close);
2328 }
2329
2330 static int send_user_lookup(
2331 Unit *unit,
2332 int user_lookup_fd,
2333 uid_t uid,
2334 gid_t gid) {
2335
2336 assert(unit);
2337
2338 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2339 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2340 * specified. */
2341
2342 if (user_lookup_fd < 0)
2343 return 0;
2344
2345 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2346 return 0;
2347
2348 if (writev(user_lookup_fd,
2349 (struct iovec[]) {
2350 IOVEC_INIT(&uid, sizeof(uid)),
2351 IOVEC_INIT(&gid, sizeof(gid)),
2352 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
2353 return -errno;
2354
2355 return 0;
2356 }
2357
2358 static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2359 int r;
2360
2361 assert(c);
2362 assert(home);
2363 assert(buf);
2364
2365 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2366
2367 if (*home)
2368 return 0;
2369
2370 if (!c->working_directory_home)
2371 return 0;
2372
2373 if (uid == 0) {
2374 /* Hardcode /root as home directory for UID 0 */
2375 *home = "/root";
2376 return 1;
2377 }
2378
2379 r = get_home_dir(buf);
2380 if (r < 0)
2381 return r;
2382
2383 *home = *buf;
2384 return 1;
2385 }
2386
2387 static int exec_child(
2388 Unit *unit,
2389 ExecCommand *command,
2390 const ExecContext *context,
2391 const ExecParameters *params,
2392 ExecRuntime *runtime,
2393 DynamicCreds *dcreds,
2394 char **argv,
2395 int socket_fd,
2396 int named_iofds[3],
2397 int *fds,
2398 unsigned n_storage_fds,
2399 unsigned n_socket_fds,
2400 char **files_env,
2401 int user_lookup_fd,
2402 int *exit_status) {
2403
2404 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
2405 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
2406 _cleanup_free_ gid_t *supplementary_gids = NULL;
2407 const char *username = NULL, *groupname = NULL;
2408 const char *home = NULL, *shell = NULL;
2409 dev_t journal_stream_dev = 0;
2410 ino_t journal_stream_ino = 0;
2411 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2412 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2413 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2414 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
2415 #ifdef HAVE_SELINUX
2416 bool use_selinux = false;
2417 #endif
2418 #ifdef HAVE_SMACK
2419 bool use_smack = false;
2420 #endif
2421 #ifdef HAVE_APPARMOR
2422 bool use_apparmor = false;
2423 #endif
2424 uid_t uid = UID_INVALID;
2425 gid_t gid = GID_INVALID;
2426 int i, r, ngids = 0;
2427 unsigned n_fds;
2428 ExecDirectoryType dt;
2429 int secure_bits;
2430
2431 assert(unit);
2432 assert(command);
2433 assert(context);
2434 assert(params);
2435 assert(exit_status);
2436
2437 rename_process_from_path(command->path);
2438
2439 /* We reset exactly these signals, since they are the
2440 * only ones we set to SIG_IGN in the main daemon. All
2441 * others we leave untouched because we set them to
2442 * SIG_DFL or a valid handler initially, both of which
2443 * will be demoted to SIG_DFL. */
2444 (void) default_signals(SIGNALS_CRASH_HANDLER,
2445 SIGNALS_IGNORE, -1);
2446
2447 if (context->ignore_sigpipe)
2448 (void) ignore_signals(SIGPIPE, -1);
2449
2450 r = reset_signal_mask();
2451 if (r < 0) {
2452 *exit_status = EXIT_SIGNAL_MASK;
2453 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
2454 }
2455
2456 if (params->idle_pipe)
2457 do_idle_pipe_dance(params->idle_pipe);
2458
2459 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
2460 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
2461 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
2462 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
2463
2464 log_forget_fds();
2465 log_set_open_when_needed(true);
2466
2467 /* In case anything used libc syslog(), close this here, too */
2468 closelog();
2469
2470 n_fds = n_storage_fds + n_socket_fds;
2471 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
2472 if (r < 0) {
2473 *exit_status = EXIT_FDS;
2474 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
2475 }
2476
2477 if (!context->same_pgrp)
2478 if (setsid() < 0) {
2479 *exit_status = EXIT_SETSID;
2480 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
2481 }
2482
2483 exec_context_tty_reset(context, params);
2484
2485 if (unit_shall_confirm_spawn(unit)) {
2486 const char *vc = params->confirm_spawn;
2487 _cleanup_free_ char *cmdline = NULL;
2488
2489 cmdline = exec_command_line(argv);
2490 if (!cmdline) {
2491 *exit_status = EXIT_MEMORY;
2492 return log_oom();
2493 }
2494
2495 r = ask_for_confirmation(vc, unit, cmdline);
2496 if (r != CONFIRM_EXECUTE) {
2497 if (r == CONFIRM_PRETEND_SUCCESS) {
2498 *exit_status = EXIT_SUCCESS;
2499 return 0;
2500 }
2501 *exit_status = EXIT_CONFIRM;
2502 log_unit_error(unit, "Execution cancelled by the user");
2503 return -ECANCELED;
2504 }
2505 }
2506
2507 if (context->dynamic_user && dcreds) {
2508
2509 /* Make sure we bypass our own NSS module for any NSS checks */
2510 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2511 *exit_status = EXIT_USER;
2512 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
2513 }
2514
2515 r = dynamic_creds_realize(dcreds, &uid, &gid);
2516 if (r < 0) {
2517 *exit_status = EXIT_USER;
2518 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
2519 }
2520
2521 if (!uid_is_valid(uid)) {
2522 *exit_status = EXIT_USER;
2523 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
2524 return -ESRCH;
2525 }
2526
2527 if (!gid_is_valid(gid)) {
2528 *exit_status = EXIT_USER;
2529 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
2530 return -ESRCH;
2531 }
2532
2533 if (dcreds->user)
2534 username = dcreds->user->name;
2535
2536 } else {
2537 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2538 if (r < 0) {
2539 *exit_status = EXIT_USER;
2540 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
2541 }
2542
2543 r = get_fixed_group(context, &groupname, &gid);
2544 if (r < 0) {
2545 *exit_status = EXIT_GROUP;
2546 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
2547 }
2548 }
2549
2550 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2551 r = get_supplementary_groups(context, username, groupname, gid,
2552 &supplementary_gids, &ngids);
2553 if (r < 0) {
2554 *exit_status = EXIT_GROUP;
2555 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
2556 }
2557
2558 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2559 if (r < 0) {
2560 *exit_status = EXIT_USER;
2561 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
2562 }
2563
2564 user_lookup_fd = safe_close(user_lookup_fd);
2565
2566 r = acquire_home(context, uid, &home, &home_buffer);
2567 if (r < 0) {
2568 *exit_status = EXIT_CHDIR;
2569 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
2570 }
2571
2572 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2573 * must sure to drop O_NONBLOCK */
2574 if (socket_fd >= 0)
2575 (void) fd_nonblock(socket_fd, false);
2576
2577 r = setup_input(context, params, socket_fd, named_iofds);
2578 if (r < 0) {
2579 *exit_status = EXIT_STDIN;
2580 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
2581 }
2582
2583 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2584 if (r < 0) {
2585 *exit_status = EXIT_STDOUT;
2586 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
2587 }
2588
2589 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2590 if (r < 0) {
2591 *exit_status = EXIT_STDERR;
2592 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
2593 }
2594
2595 if (params->cgroup_path) {
2596 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2597 if (r < 0) {
2598 *exit_status = EXIT_CGROUP;
2599 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
2600 }
2601 }
2602
2603 if (context->oom_score_adjust_set) {
2604 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
2605
2606 /* When we can't make this change due to EPERM, then
2607 * let's silently skip over it. User namespaces
2608 * prohibit write access to this file, and we
2609 * shouldn't trip up over that. */
2610
2611 sprintf(t, "%i", context->oom_score_adjust);
2612 r = write_string_file("/proc/self/oom_score_adj", t, 0);
2613 if (IN_SET(r, -EPERM, -EACCES))
2614 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
2615 else if (r < 0) {
2616 *exit_status = EXIT_OOM_ADJUST;
2617 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
2618 }
2619 }
2620
2621 if (context->nice_set)
2622 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
2623 *exit_status = EXIT_NICE;
2624 return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
2625 }
2626
2627 if (context->cpu_sched_set) {
2628 struct sched_param param = {
2629 .sched_priority = context->cpu_sched_priority,
2630 };
2631
2632 r = sched_setscheduler(0,
2633 context->cpu_sched_policy |
2634 (context->cpu_sched_reset_on_fork ?
2635 SCHED_RESET_ON_FORK : 0),
2636 &param);
2637 if (r < 0) {
2638 *exit_status = EXIT_SETSCHEDULER;
2639 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
2640 }
2641 }
2642
2643 if (context->cpuset)
2644 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
2645 *exit_status = EXIT_CPUAFFINITY;
2646 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
2647 }
2648
2649 if (context->ioprio_set)
2650 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
2651 *exit_status = EXIT_IOPRIO;
2652 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
2653 }
2654
2655 if (context->timer_slack_nsec != NSEC_INFINITY)
2656 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
2657 *exit_status = EXIT_TIMERSLACK;
2658 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
2659 }
2660
2661 if (context->personality != PERSONALITY_INVALID) {
2662 r = safe_personality(context->personality);
2663 if (r < 0) {
2664 *exit_status = EXIT_PERSONALITY;
2665 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
2666 }
2667 }
2668
2669 if (context->utmp_id)
2670 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
2671 context->tty_path,
2672 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2673 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2674 USER_PROCESS,
2675 username);
2676
2677 if (context->user) {
2678 r = chown_terminal(STDIN_FILENO, uid);
2679 if (r < 0) {
2680 *exit_status = EXIT_STDIN;
2681 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
2682 }
2683 }
2684
2685 /* If delegation is enabled we'll pass ownership of the cgroup
2686 * (but only in systemd's own controller hierarchy!) to the
2687 * user of the new process. */
2688 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
2689 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2690 if (r < 0) {
2691 *exit_status = EXIT_CGROUP;
2692 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
2693 }
2694
2695 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2696 if (r < 0) {
2697 *exit_status = EXIT_CGROUP;
2698 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
2699 }
2700 }
2701
2702 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
2703 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
2704 if (r < 0)
2705 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
2706 }
2707
2708 r = build_environment(
2709 unit,
2710 context,
2711 params,
2712 n_fds,
2713 home,
2714 username,
2715 shell,
2716 journal_stream_dev,
2717 journal_stream_ino,
2718 &our_env);
2719 if (r < 0) {
2720 *exit_status = EXIT_MEMORY;
2721 return log_oom();
2722 }
2723
2724 r = build_pass_environment(context, &pass_env);
2725 if (r < 0) {
2726 *exit_status = EXIT_MEMORY;
2727 return log_oom();
2728 }
2729
2730 accum_env = strv_env_merge(5,
2731 params->environment,
2732 our_env,
2733 pass_env,
2734 context->environment,
2735 files_env,
2736 NULL);
2737 if (!accum_env) {
2738 *exit_status = EXIT_MEMORY;
2739 return log_oom();
2740 }
2741 accum_env = strv_env_clean(accum_env);
2742
2743 (void) umask(context->umask);
2744
2745 r = setup_keyring(unit, context, params, uid, gid);
2746 if (r < 0) {
2747 *exit_status = EXIT_KEYRING;
2748 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
2749 }
2750
2751 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
2752 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2753
2754 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2755 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
2756
2757 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
2758 if (needs_ambient_hack)
2759 needs_setuid = false;
2760 else
2761 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
2762
2763 if (needs_sandboxing) {
2764 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
2765 * present. The actual MAC context application will happen later, as late as possible, to avoid
2766 * impacting our own code paths. */
2767
2768 #ifdef HAVE_SELINUX
2769 use_selinux = mac_selinux_use();
2770 #endif
2771 #ifdef HAVE_SMACK
2772 use_smack = mac_smack_use();
2773 #endif
2774 #ifdef HAVE_APPARMOR
2775 use_apparmor = mac_apparmor_use();
2776 #endif
2777 }
2778
2779 if (needs_setuid) {
2780 if (context->pam_name && username) {
2781 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
2782 if (r < 0) {
2783 *exit_status = EXIT_PAM;
2784 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
2785 }
2786 }
2787 }
2788
2789 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
2790 r = setup_netns(runtime->netns_storage_socket);
2791 if (r < 0) {
2792 *exit_status = EXIT_NETWORK;
2793 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
2794 }
2795 }
2796
2797 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
2798 if (needs_mount_namespace) {
2799 r = apply_mount_namespace(unit, command, context, params, runtime);
2800 if (r < 0) {
2801 *exit_status = EXIT_NAMESPACE;
2802 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
2803 }
2804 }
2805
2806 /* Apply just after mount namespace setup */
2807 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
2808 if (r < 0)
2809 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
2810
2811 /* Drop groups as early as possbile */
2812 if (needs_setuid) {
2813 r = enforce_groups(context, gid, supplementary_gids, ngids);
2814 if (r < 0) {
2815 *exit_status = EXIT_GROUP;
2816 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
2817 }
2818 }
2819
2820 if (needs_sandboxing) {
2821 #ifdef HAVE_SELINUX
2822 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
2823 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
2824 if (r < 0) {
2825 *exit_status = EXIT_SELINUX_CONTEXT;
2826 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
2827 }
2828 }
2829 #endif
2830
2831 if (context->private_users) {
2832 r = setup_private_users(uid, gid);
2833 if (r < 0) {
2834 *exit_status = EXIT_USER;
2835 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
2836 }
2837 }
2838 }
2839
2840 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
2841 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
2842 * was needed to upload the policy and can now be closed as well. */
2843 r = close_all_fds(fds, n_fds);
2844 if (r >= 0)
2845 r = shift_fds(fds, n_fds);
2846 if (r >= 0)
2847 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
2848 if (r < 0) {
2849 *exit_status = EXIT_FDS;
2850 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
2851 }
2852
2853 secure_bits = context->secure_bits;
2854
2855 if (needs_sandboxing) {
2856 uint64_t bset;
2857
2858 for (i = 0; i < _RLIMIT_MAX; i++) {
2859
2860 if (!context->rlimit[i])
2861 continue;
2862
2863 r = setrlimit_closest(i, context->rlimit[i]);
2864 if (r < 0) {
2865 *exit_status = EXIT_LIMITS;
2866 return log_unit_error_errno(unit, r, "Failed to adjust resource limit %s: %m", rlimit_to_string(i));
2867 }
2868 }
2869
2870 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2871 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
2872 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
2873 *exit_status = EXIT_LIMITS;
2874 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
2875 }
2876 }
2877
2878 bset = context->capability_bounding_set;
2879 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
2880 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
2881 * instead of us doing that */
2882 if (needs_ambient_hack)
2883 bset |= (UINT64_C(1) << CAP_SETPCAP) |
2884 (UINT64_C(1) << CAP_SETUID) |
2885 (UINT64_C(1) << CAP_SETGID);
2886
2887 if (!cap_test_all(bset)) {
2888 r = capability_bounding_set_drop(bset, false);
2889 if (r < 0) {
2890 *exit_status = EXIT_CAPABILITIES;
2891 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
2892 }
2893 }
2894
2895 /* This is done before enforce_user, but ambient set
2896 * does not survive over setresuid() if keep_caps is not set. */
2897 if (!needs_ambient_hack &&
2898 context->capability_ambient_set != 0) {
2899 r = capability_ambient_set_apply(context->capability_ambient_set, true);
2900 if (r < 0) {
2901 *exit_status = EXIT_CAPABILITIES;
2902 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
2903 }
2904 }
2905 }
2906
2907 if (needs_setuid) {
2908 if (context->user) {
2909 r = enforce_user(context, uid);
2910 if (r < 0) {
2911 *exit_status = EXIT_USER;
2912 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
2913 }
2914
2915 if (!needs_ambient_hack &&
2916 context->capability_ambient_set != 0) {
2917
2918 /* Fix the ambient capabilities after user change. */
2919 r = capability_ambient_set_apply(context->capability_ambient_set, false);
2920 if (r < 0) {
2921 *exit_status = EXIT_CAPABILITIES;
2922 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
2923 }
2924
2925 /* If we were asked to change user and ambient capabilities
2926 * were requested, we had to add keep-caps to the securebits
2927 * so that we would maintain the inherited capability set
2928 * through the setresuid(). Make sure that the bit is added
2929 * also to the context secure_bits so that we don't try to
2930 * drop the bit away next. */
2931
2932 secure_bits |= 1<<SECURE_KEEP_CAPS;
2933 }
2934 }
2935 }
2936
2937 if (needs_sandboxing) {
2938 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
2939 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
2940 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
2941 * are restricted. */
2942
2943 #ifdef HAVE_SELINUX
2944 if (use_selinux) {
2945 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
2946
2947 if (exec_context) {
2948 r = setexeccon(exec_context);
2949 if (r < 0) {
2950 *exit_status = EXIT_SELINUX_CONTEXT;
2951 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
2952 }
2953 }
2954 }
2955 #endif
2956
2957 #ifdef HAVE_SMACK
2958 if (use_smack) {
2959 r = setup_smack(context, command);
2960 if (r < 0) {
2961 *exit_status = EXIT_SMACK_PROCESS_LABEL;
2962 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
2963 }
2964 }
2965 #endif
2966
2967 #ifdef HAVE_APPARMOR
2968 if (use_apparmor && context->apparmor_profile) {
2969 r = aa_change_onexec(context->apparmor_profile);
2970 if (r < 0 && !context->apparmor_profile_ignore) {
2971 *exit_status = EXIT_APPARMOR_PROFILE;
2972 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
2973 }
2974 }
2975 #endif
2976
2977 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
2978 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
2979 if (prctl(PR_GET_SECUREBITS) != secure_bits)
2980 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
2981 *exit_status = EXIT_SECUREBITS;
2982 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
2983 }
2984
2985 if (context_has_no_new_privileges(context))
2986 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2987 *exit_status = EXIT_NO_NEW_PRIVILEGES;
2988 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
2989 }
2990
2991 #ifdef HAVE_SECCOMP
2992 r = apply_address_families(unit, context);
2993 if (r < 0) {
2994 *exit_status = EXIT_ADDRESS_FAMILIES;
2995 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
2996 }
2997
2998 r = apply_memory_deny_write_execute(unit, context);
2999 if (r < 0) {
3000 *exit_status = EXIT_SECCOMP;
3001 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
3002 }
3003
3004 r = apply_restrict_realtime(unit, context);
3005 if (r < 0) {
3006 *exit_status = EXIT_SECCOMP;
3007 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
3008 }
3009
3010 r = apply_restrict_namespaces(unit, context);
3011 if (r < 0) {
3012 *exit_status = EXIT_SECCOMP;
3013 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
3014 }
3015
3016 r = apply_protect_sysctl(unit, context);
3017 if (r < 0) {
3018 *exit_status = EXIT_SECCOMP;
3019 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
3020 }
3021
3022 r = apply_protect_kernel_modules(unit, context);
3023 if (r < 0) {
3024 *exit_status = EXIT_SECCOMP;
3025 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
3026 }
3027
3028 r = apply_private_devices(unit, context);
3029 if (r < 0) {
3030 *exit_status = EXIT_SECCOMP;
3031 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
3032 }
3033
3034 r = apply_syscall_archs(unit, context);
3035 if (r < 0) {
3036 *exit_status = EXIT_SECCOMP;
3037 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
3038 }
3039
3040 r = apply_lock_personality(unit, context);
3041 if (r < 0) {
3042 *exit_status = EXIT_SECCOMP;
3043 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
3044 }
3045
3046 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3047 * by the filter as little as possible. */
3048 r = apply_syscall_filter(unit, context, needs_ambient_hack);
3049 if (r < 0) {
3050 *exit_status = EXIT_SECCOMP;
3051 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
3052 }
3053 #endif
3054 }
3055
3056 if (!strv_isempty(context->unset_environment)) {
3057 char **ee = NULL;
3058
3059 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3060 if (!ee) {
3061 *exit_status = EXIT_MEMORY;
3062 return log_oom();
3063 }
3064
3065 strv_free(accum_env);
3066 accum_env = ee;
3067 }
3068
3069 final_argv = replace_env_argv(argv, accum_env);
3070 if (!final_argv) {
3071 *exit_status = EXIT_MEMORY;
3072 return log_oom();
3073 }
3074
3075 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
3076 _cleanup_free_ char *line;
3077
3078 line = exec_command_line(final_argv);
3079 if (line) {
3080 log_struct(LOG_DEBUG,
3081 "EXECUTABLE=%s", command->path,
3082 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
3083 LOG_UNIT_ID(unit),
3084 LOG_UNIT_INVOCATION_ID(unit),
3085 NULL);
3086 }
3087 }
3088
3089 execve(command->path, final_argv, accum_env);
3090
3091 if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
3092
3093 log_struct_errno(LOG_INFO, errno,
3094 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3095 LOG_UNIT_ID(unit),
3096 LOG_UNIT_INVOCATION_ID(unit),
3097 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
3098 command->path),
3099 "EXECUTABLE=%s", command->path,
3100 NULL);
3101
3102 return 0;
3103 }
3104
3105 *exit_status = EXIT_EXEC;
3106 return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
3107 }
3108
3109 int exec_spawn(Unit *unit,
3110 ExecCommand *command,
3111 const ExecContext *context,
3112 const ExecParameters *params,
3113 ExecRuntime *runtime,
3114 DynamicCreds *dcreds,
3115 pid_t *ret) {
3116
3117 _cleanup_strv_free_ char **files_env = NULL;
3118 int *fds = NULL;
3119 unsigned n_storage_fds = 0, n_socket_fds = 0;
3120 _cleanup_free_ char *line = NULL;
3121 int socket_fd, r;
3122 int named_iofds[3] = { -1, -1, -1 };
3123 char **argv;
3124 pid_t pid;
3125
3126 assert(unit);
3127 assert(command);
3128 assert(context);
3129 assert(ret);
3130 assert(params);
3131 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
3132
3133 if (context->std_input == EXEC_INPUT_SOCKET ||
3134 context->std_output == EXEC_OUTPUT_SOCKET ||
3135 context->std_error == EXEC_OUTPUT_SOCKET) {
3136
3137 if (params->n_socket_fds > 1) {
3138 log_unit_error(unit, "Got more than one socket.");
3139 return -EINVAL;
3140 }
3141
3142 if (params->n_socket_fds == 0) {
3143 log_unit_error(unit, "Got no socket.");
3144 return -EINVAL;
3145 }
3146
3147 socket_fd = params->fds[0];
3148 } else {
3149 socket_fd = -1;
3150 fds = params->fds;
3151 n_storage_fds = params->n_storage_fds;
3152 n_socket_fds = params->n_socket_fds;
3153 }
3154
3155 r = exec_context_named_iofds(unit, context, params, named_iofds);
3156 if (r < 0)
3157 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3158
3159 r = exec_context_load_environment(unit, context, &files_env);
3160 if (r < 0)
3161 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
3162
3163 argv = params->argv ?: command->argv;
3164 line = exec_command_line(argv);
3165 if (!line)
3166 return log_oom();
3167
3168 log_struct(LOG_DEBUG,
3169 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3170 "EXECUTABLE=%s", command->path,
3171 LOG_UNIT_ID(unit),
3172 LOG_UNIT_INVOCATION_ID(unit),
3173 NULL);
3174
3175 pid = fork();
3176 if (pid < 0)
3177 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
3178
3179 if (pid == 0) {
3180 int exit_status = EXIT_SUCCESS;
3181
3182 r = exec_child(unit,
3183 command,
3184 context,
3185 params,
3186 runtime,
3187 dcreds,
3188 argv,
3189 socket_fd,
3190 named_iofds,
3191 fds,
3192 n_storage_fds,
3193 n_socket_fds,
3194 files_env,
3195 unit->manager->user_lookup_fds[1],
3196 &exit_status);
3197
3198 if (r < 0) {
3199 log_struct_errno(LOG_ERR, r,
3200 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3201 LOG_UNIT_ID(unit),
3202 LOG_UNIT_INVOCATION_ID(unit),
3203 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3204 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3205 command->path),
3206 "EXECUTABLE=%s", command->path,
3207 NULL);
3208 }
3209
3210 _exit(exit_status);
3211 }
3212
3213 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
3214
3215 /* We add the new process to the cgroup both in the child (so
3216 * that we can be sure that no user code is ever executed
3217 * outside of the cgroup) and in the parent (so that we can be
3218 * sure that when we kill the cgroup the process will be
3219 * killed too). */
3220 if (params->cgroup_path)
3221 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
3222
3223 exec_status_start(&command->exec_status, pid);
3224
3225 *ret = pid;
3226 return 0;
3227 }
3228
3229 void exec_context_init(ExecContext *c) {
3230 ExecDirectoryType i;
3231
3232 assert(c);
3233
3234 c->umask = 0022;
3235 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
3236 c->cpu_sched_policy = SCHED_OTHER;
3237 c->syslog_priority = LOG_DAEMON|LOG_INFO;
3238 c->syslog_level_prefix = true;
3239 c->ignore_sigpipe = true;
3240 c->timer_slack_nsec = NSEC_INFINITY;
3241 c->personality = PERSONALITY_INVALID;
3242 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3243 c->directories[i].mode = 0755;
3244 c->capability_bounding_set = CAP_ALL;
3245 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
3246 }
3247
3248 void exec_context_done(ExecContext *c) {
3249 unsigned l;
3250 ExecDirectoryType i;
3251
3252 assert(c);
3253
3254 c->environment = strv_free(c->environment);
3255 c->environment_files = strv_free(c->environment_files);
3256 c->pass_environment = strv_free(c->pass_environment);
3257 c->unset_environment = strv_free(c->unset_environment);
3258
3259 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
3260 c->rlimit[l] = mfree(c->rlimit[l]);
3261
3262 for (l = 0; l < 3; l++)
3263 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3264
3265 c->working_directory = mfree(c->working_directory);
3266 c->root_directory = mfree(c->root_directory);
3267 c->root_image = mfree(c->root_image);
3268 c->tty_path = mfree(c->tty_path);
3269 c->syslog_identifier = mfree(c->syslog_identifier);
3270 c->user = mfree(c->user);
3271 c->group = mfree(c->group);
3272
3273 c->supplementary_groups = strv_free(c->supplementary_groups);
3274
3275 c->pam_name = mfree(c->pam_name);
3276
3277 c->read_only_paths = strv_free(c->read_only_paths);
3278 c->read_write_paths = strv_free(c->read_write_paths);
3279 c->inaccessible_paths = strv_free(c->inaccessible_paths);
3280
3281 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3282
3283 if (c->cpuset)
3284 CPU_FREE(c->cpuset);
3285
3286 c->utmp_id = mfree(c->utmp_id);
3287 c->selinux_context = mfree(c->selinux_context);
3288 c->apparmor_profile = mfree(c->apparmor_profile);
3289 c->smack_process_label = mfree(c->smack_process_label);
3290
3291 c->syscall_filter = set_free(c->syscall_filter);
3292 c->syscall_archs = set_free(c->syscall_archs);
3293 c->address_families = set_free(c->address_families);
3294
3295 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3296 c->directories[i].paths = strv_free(c->directories[i].paths);
3297 }
3298
3299 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3300 char **i;
3301
3302 assert(c);
3303
3304 if (!runtime_prefix)
3305 return 0;
3306
3307 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
3308 _cleanup_free_ char *p;
3309
3310 p = strjoin(runtime_prefix, "/", *i);
3311 if (!p)
3312 return -ENOMEM;
3313
3314 /* We execute this synchronously, since we need to be
3315 * sure this is gone when we start the service
3316 * next. */
3317 (void) rm_rf(p, REMOVE_ROOT);
3318 }
3319
3320 return 0;
3321 }
3322
3323 void exec_command_done(ExecCommand *c) {
3324 assert(c);
3325
3326 c->path = mfree(c->path);
3327
3328 c->argv = strv_free(c->argv);
3329 }
3330
3331 void exec_command_done_array(ExecCommand *c, unsigned n) {
3332 unsigned i;
3333
3334 for (i = 0; i < n; i++)
3335 exec_command_done(c+i);
3336 }
3337
3338 ExecCommand* exec_command_free_list(ExecCommand *c) {
3339 ExecCommand *i;
3340
3341 while ((i = c)) {
3342 LIST_REMOVE(command, c, i);
3343 exec_command_done(i);
3344 free(i);
3345 }
3346
3347 return NULL;
3348 }
3349
3350 void exec_command_free_array(ExecCommand **c, unsigned n) {
3351 unsigned i;
3352
3353 for (i = 0; i < n; i++)
3354 c[i] = exec_command_free_list(c[i]);
3355 }
3356
3357 typedef struct InvalidEnvInfo {
3358 Unit *unit;
3359 const char *path;
3360 } InvalidEnvInfo;
3361
3362 static void invalid_env(const char *p, void *userdata) {
3363 InvalidEnvInfo *info = userdata;
3364
3365 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
3366 }
3367
3368 const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3369 assert(c);
3370
3371 switch (fd_index) {
3372 case STDIN_FILENO:
3373 if (c->std_input != EXEC_INPUT_NAMED_FD)
3374 return NULL;
3375 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3376 case STDOUT_FILENO:
3377 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3378 return NULL;
3379 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3380 case STDERR_FILENO:
3381 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3382 return NULL;
3383 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3384 default:
3385 return NULL;
3386 }
3387 }
3388
3389 int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3390 unsigned i, targets;
3391 const char* stdio_fdname[3];
3392 unsigned n_fds;
3393
3394 assert(c);
3395 assert(p);
3396
3397 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3398 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3399 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3400
3401 for (i = 0; i < 3; i++)
3402 stdio_fdname[i] = exec_context_fdname(c, i);
3403
3404 n_fds = p->n_storage_fds + p->n_socket_fds;
3405
3406 for (i = 0; i < n_fds && targets > 0; i++)
3407 if (named_iofds[STDIN_FILENO] < 0 &&
3408 c->std_input == EXEC_INPUT_NAMED_FD &&
3409 stdio_fdname[STDIN_FILENO] &&
3410 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3411
3412 named_iofds[STDIN_FILENO] = p->fds[i];
3413 targets--;
3414
3415 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3416 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3417 stdio_fdname[STDOUT_FILENO] &&
3418 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3419
3420 named_iofds[STDOUT_FILENO] = p->fds[i];
3421 targets--;
3422
3423 } else if (named_iofds[STDERR_FILENO] < 0 &&
3424 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3425 stdio_fdname[STDERR_FILENO] &&
3426 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3427
3428 named_iofds[STDERR_FILENO] = p->fds[i];
3429 targets--;
3430 }
3431
3432 return targets == 0 ? 0 : -ENOENT;
3433 }
3434
3435 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
3436 char **i, **r = NULL;
3437
3438 assert(c);
3439 assert(l);
3440
3441 STRV_FOREACH(i, c->environment_files) {
3442 char *fn;
3443 int k;
3444 unsigned n;
3445 bool ignore = false;
3446 char **p;
3447 _cleanup_globfree_ glob_t pglob = {};
3448
3449 fn = *i;
3450
3451 if (fn[0] == '-') {
3452 ignore = true;
3453 fn++;
3454 }
3455
3456 if (!path_is_absolute(fn)) {
3457 if (ignore)
3458 continue;
3459
3460 strv_free(r);
3461 return -EINVAL;
3462 }
3463
3464 /* Filename supports globbing, take all matching files */
3465 k = safe_glob(fn, 0, &pglob);
3466 if (k < 0) {
3467 if (ignore)
3468 continue;
3469
3470 strv_free(r);
3471 return k;
3472 }
3473
3474 /* When we don't match anything, -ENOENT should be returned */
3475 assert(pglob.gl_pathc > 0);
3476
3477 for (n = 0; n < pglob.gl_pathc; n++) {
3478 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
3479 if (k < 0) {
3480 if (ignore)
3481 continue;
3482
3483 strv_free(r);
3484 return k;
3485 }
3486 /* Log invalid environment variables with filename */
3487 if (p) {
3488 InvalidEnvInfo info = {
3489 .unit = unit,
3490 .path = pglob.gl_pathv[n]
3491 };
3492
3493 p = strv_env_clean_with_callback(p, invalid_env, &info);
3494 }
3495
3496 if (r == NULL)
3497 r = p;
3498 else {
3499 char **m;
3500
3501 m = strv_env_merge(2, r, p);
3502 strv_free(r);
3503 strv_free(p);
3504 if (!m)
3505 return -ENOMEM;
3506
3507 r = m;
3508 }
3509 }
3510 }
3511
3512 *l = r;
3513
3514 return 0;
3515 }
3516
3517 static bool tty_may_match_dev_console(const char *tty) {
3518 _cleanup_free_ char *active = NULL;
3519 char *console;
3520
3521 if (!tty)
3522 return true;
3523
3524 tty = skip_dev_prefix(tty);
3525
3526 /* trivial identity? */
3527 if (streq(tty, "console"))
3528 return true;
3529
3530 console = resolve_dev_console(&active);
3531 /* if we could not resolve, assume it may */
3532 if (!console)
3533 return true;
3534
3535 /* "tty0" means the active VC, so it may be the same sometimes */
3536 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
3537 }
3538
3539 bool exec_context_may_touch_console(ExecContext *ec) {
3540
3541 return (ec->tty_reset ||
3542 ec->tty_vhangup ||
3543 ec->tty_vt_disallocate ||
3544 is_terminal_input(ec->std_input) ||
3545 is_terminal_output(ec->std_output) ||
3546 is_terminal_output(ec->std_error)) &&
3547 tty_may_match_dev_console(exec_context_tty_path(ec));
3548 }
3549
3550 static void strv_fprintf(FILE *f, char **l) {
3551 char **g;
3552
3553 assert(f);
3554
3555 STRV_FOREACH(g, l)
3556 fprintf(f, " %s", *g);
3557 }
3558
3559 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
3560 char **e, **d;
3561 unsigned i;
3562 ExecDirectoryType dt;
3563 int r;
3564
3565 assert(c);
3566 assert(f);
3567
3568 prefix = strempty(prefix);
3569
3570 fprintf(f,
3571 "%sUMask: %04o\n"
3572 "%sWorkingDirectory: %s\n"
3573 "%sRootDirectory: %s\n"
3574 "%sNonBlocking: %s\n"
3575 "%sPrivateTmp: %s\n"
3576 "%sPrivateDevices: %s\n"
3577 "%sProtectKernelTunables: %s\n"
3578 "%sProtectKernelModules: %s\n"
3579 "%sProtectControlGroups: %s\n"
3580 "%sPrivateNetwork: %s\n"
3581 "%sPrivateUsers: %s\n"
3582 "%sProtectHome: %s\n"
3583 "%sProtectSystem: %s\n"
3584 "%sMountAPIVFS: %s\n"
3585 "%sIgnoreSIGPIPE: %s\n"
3586 "%sMemoryDenyWriteExecute: %s\n"
3587 "%sRestrictRealtime: %s\n"
3588 "%sKeyringMode: %s\n",
3589 prefix, c->umask,
3590 prefix, c->working_directory ? c->working_directory : "/",
3591 prefix, c->root_directory ? c->root_directory : "/",
3592 prefix, yes_no(c->non_blocking),
3593 prefix, yes_no(c->private_tmp),
3594 prefix, yes_no(c->private_devices),
3595 prefix, yes_no(c->protect_kernel_tunables),
3596 prefix, yes_no(c->protect_kernel_modules),
3597 prefix, yes_no(c->protect_control_groups),
3598 prefix, yes_no(c->private_network),
3599 prefix, yes_no(c->private_users),
3600 prefix, protect_home_to_string(c->protect_home),
3601 prefix, protect_system_to_string(c->protect_system),
3602 prefix, yes_no(c->mount_apivfs),
3603 prefix, yes_no(c->ignore_sigpipe),
3604 prefix, yes_no(c->memory_deny_write_execute),
3605 prefix, yes_no(c->restrict_realtime),
3606 prefix, exec_keyring_mode_to_string(c->keyring_mode));
3607
3608 if (c->root_image)
3609 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3610
3611 STRV_FOREACH(e, c->environment)
3612 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3613
3614 STRV_FOREACH(e, c->environment_files)
3615 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
3616
3617 STRV_FOREACH(e, c->pass_environment)
3618 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3619
3620 STRV_FOREACH(e, c->unset_environment)
3621 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3622
3623 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3624
3625 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
3626 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3627
3628 STRV_FOREACH(d, c->directories[dt].paths)
3629 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3630 }
3631
3632 if (c->nice_set)
3633 fprintf(f,
3634 "%sNice: %i\n",
3635 prefix, c->nice);
3636
3637 if (c->oom_score_adjust_set)
3638 fprintf(f,
3639 "%sOOMScoreAdjust: %i\n",
3640 prefix, c->oom_score_adjust);
3641
3642 for (i = 0; i < RLIM_NLIMITS; i++)
3643 if (c->rlimit[i]) {
3644 fprintf(f, "%s%s: " RLIM_FMT "\n",
3645 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3646 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3647 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3648 }
3649
3650 if (c->ioprio_set) {
3651 _cleanup_free_ char *class_str = NULL;
3652
3653 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3654 if (r >= 0)
3655 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3656
3657 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
3658 }
3659
3660 if (c->cpu_sched_set) {
3661 _cleanup_free_ char *policy_str = NULL;
3662
3663 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3664 if (r >= 0)
3665 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3666
3667 fprintf(f,
3668 "%sCPUSchedulingPriority: %i\n"
3669 "%sCPUSchedulingResetOnFork: %s\n",
3670 prefix, c->cpu_sched_priority,
3671 prefix, yes_no(c->cpu_sched_reset_on_fork));
3672 }
3673
3674 if (c->cpuset) {
3675 fprintf(f, "%sCPUAffinity:", prefix);
3676 for (i = 0; i < c->cpuset_ncpus; i++)
3677 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
3678 fprintf(f, " %u", i);
3679 fputs("\n", f);
3680 }
3681
3682 if (c->timer_slack_nsec != NSEC_INFINITY)
3683 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
3684
3685 fprintf(f,
3686 "%sStandardInput: %s\n"
3687 "%sStandardOutput: %s\n"
3688 "%sStandardError: %s\n",
3689 prefix, exec_input_to_string(c->std_input),
3690 prefix, exec_output_to_string(c->std_output),
3691 prefix, exec_output_to_string(c->std_error));
3692
3693 if (c->tty_path)
3694 fprintf(f,
3695 "%sTTYPath: %s\n"
3696 "%sTTYReset: %s\n"
3697 "%sTTYVHangup: %s\n"
3698 "%sTTYVTDisallocate: %s\n",
3699 prefix, c->tty_path,
3700 prefix, yes_no(c->tty_reset),
3701 prefix, yes_no(c->tty_vhangup),
3702 prefix, yes_no(c->tty_vt_disallocate));
3703
3704 if (IN_SET(c->std_output,
3705 EXEC_OUTPUT_SYSLOG,
3706 EXEC_OUTPUT_KMSG,
3707 EXEC_OUTPUT_JOURNAL,
3708 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3709 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3710 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
3711 IN_SET(c->std_error,
3712 EXEC_OUTPUT_SYSLOG,
3713 EXEC_OUTPUT_KMSG,
3714 EXEC_OUTPUT_JOURNAL,
3715 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3716 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3717 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
3718
3719 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
3720
3721 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3722 if (r >= 0)
3723 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
3724
3725 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3726 if (r >= 0)
3727 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
3728 }
3729
3730 if (c->secure_bits) {
3731 _cleanup_free_ char *str = NULL;
3732
3733 r = secure_bits_to_string_alloc(c->secure_bits, &str);
3734 if (r >= 0)
3735 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
3736 }
3737
3738 if (c->capability_bounding_set != CAP_ALL) {
3739 _cleanup_free_ char *str = NULL;
3740
3741 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
3742 if (r >= 0)
3743 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
3744 }
3745
3746 if (c->capability_ambient_set != 0) {
3747 _cleanup_free_ char *str = NULL;
3748
3749 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
3750 if (r >= 0)
3751 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
3752 }
3753
3754 if (c->user)
3755 fprintf(f, "%sUser: %s\n", prefix, c->user);
3756 if (c->group)
3757 fprintf(f, "%sGroup: %s\n", prefix, c->group);
3758
3759 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
3760
3761 if (strv_length(c->supplementary_groups) > 0) {
3762 fprintf(f, "%sSupplementaryGroups:", prefix);
3763 strv_fprintf(f, c->supplementary_groups);
3764 fputs("\n", f);
3765 }
3766
3767 if (c->pam_name)
3768 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
3769
3770 if (strv_length(c->read_write_paths) > 0) {
3771 fprintf(f, "%sReadWritePaths:", prefix);
3772 strv_fprintf(f, c->read_write_paths);
3773 fputs("\n", f);
3774 }
3775
3776 if (strv_length(c->read_only_paths) > 0) {
3777 fprintf(f, "%sReadOnlyPaths:", prefix);
3778 strv_fprintf(f, c->read_only_paths);
3779 fputs("\n", f);
3780 }
3781
3782 if (strv_length(c->inaccessible_paths) > 0) {
3783 fprintf(f, "%sInaccessiblePaths:", prefix);
3784 strv_fprintf(f, c->inaccessible_paths);
3785 fputs("\n", f);
3786 }
3787
3788 if (c->n_bind_mounts > 0)
3789 for (i = 0; i < c->n_bind_mounts; i++) {
3790 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
3791 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
3792 c->bind_mounts[i].source,
3793 c->bind_mounts[i].destination,
3794 c->bind_mounts[i].recursive ? "rbind" : "norbind");
3795 }
3796
3797 if (c->utmp_id)
3798 fprintf(f,
3799 "%sUtmpIdentifier: %s\n",
3800 prefix, c->utmp_id);
3801
3802 if (c->selinux_context)
3803 fprintf(f,
3804 "%sSELinuxContext: %s%s\n",
3805 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
3806
3807 if (c->apparmor_profile)
3808 fprintf(f,
3809 "%sAppArmorProfile: %s%s\n",
3810 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3811
3812 if (c->smack_process_label)
3813 fprintf(f,
3814 "%sSmackProcessLabel: %s%s\n",
3815 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
3816
3817 if (c->personality != PERSONALITY_INVALID)
3818 fprintf(f,
3819 "%sPersonality: %s\n",
3820 prefix, strna(personality_to_string(c->personality)));
3821
3822 fprintf(f,
3823 "%sLockPersonality: %s\n",
3824 prefix, yes_no(c->lock_personality));
3825
3826 if (c->syscall_filter) {
3827 #ifdef HAVE_SECCOMP
3828 Iterator j;
3829 void *id;
3830 bool first = true;
3831 #endif
3832
3833 fprintf(f,
3834 "%sSystemCallFilter: ",
3835 prefix);
3836
3837 if (!c->syscall_whitelist)
3838 fputc('~', f);
3839
3840 #ifdef HAVE_SECCOMP
3841 SET_FOREACH(id, c->syscall_filter, j) {
3842 _cleanup_free_ char *name = NULL;
3843
3844 if (first)
3845 first = false;
3846 else
3847 fputc(' ', f);
3848
3849 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
3850 fputs(strna(name), f);
3851 }
3852 #endif
3853
3854 fputc('\n', f);
3855 }
3856
3857 if (c->syscall_archs) {
3858 #ifdef HAVE_SECCOMP
3859 Iterator j;
3860 void *id;
3861 #endif
3862
3863 fprintf(f,
3864 "%sSystemCallArchitectures:",
3865 prefix);
3866
3867 #ifdef HAVE_SECCOMP
3868 SET_FOREACH(id, c->syscall_archs, j)
3869 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
3870 #endif
3871 fputc('\n', f);
3872 }
3873
3874 if (exec_context_restrict_namespaces_set(c)) {
3875 _cleanup_free_ char *s = NULL;
3876
3877 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
3878 if (r >= 0)
3879 fprintf(f, "%sRestrictNamespaces: %s\n",
3880 prefix, s);
3881 }
3882
3883 if (c->syscall_errno > 0)
3884 fprintf(f,
3885 "%sSystemCallErrorNumber: %s\n",
3886 prefix, strna(errno_to_name(c->syscall_errno)));
3887
3888 if (c->apparmor_profile)
3889 fprintf(f,
3890 "%sAppArmorProfile: %s%s\n",
3891 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3892 }
3893
3894 bool exec_context_maintains_privileges(ExecContext *c) {
3895 assert(c);
3896
3897 /* Returns true if the process forked off would run under
3898 * an unchanged UID or as root. */
3899
3900 if (!c->user)
3901 return true;
3902
3903 if (streq(c->user, "root") || streq(c->user, "0"))
3904 return true;
3905
3906 return false;
3907 }
3908
3909 int exec_context_get_effective_ioprio(ExecContext *c) {
3910 int p;
3911
3912 assert(c);
3913
3914 if (c->ioprio_set)
3915 return c->ioprio;
3916
3917 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
3918 if (p < 0)
3919 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
3920
3921 return p;
3922 }
3923
3924 void exec_status_start(ExecStatus *s, pid_t pid) {
3925 assert(s);
3926
3927 zero(*s);
3928 s->pid = pid;
3929 dual_timestamp_get(&s->start_timestamp);
3930 }
3931
3932 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
3933 assert(s);
3934
3935 if (s->pid && s->pid != pid)
3936 zero(*s);
3937
3938 s->pid = pid;
3939 dual_timestamp_get(&s->exit_timestamp);
3940
3941 s->code = code;
3942 s->status = status;
3943
3944 if (context) {
3945 if (context->utmp_id)
3946 utmp_put_dead_process(context->utmp_id, pid, code, status);
3947
3948 exec_context_tty_reset(context, NULL);
3949 }
3950 }
3951
3952 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
3953 char buf[FORMAT_TIMESTAMP_MAX];
3954
3955 assert(s);
3956 assert(f);
3957
3958 if (s->pid <= 0)
3959 return;
3960
3961 prefix = strempty(prefix);
3962
3963 fprintf(f,
3964 "%sPID: "PID_FMT"\n",
3965 prefix, s->pid);
3966
3967 if (dual_timestamp_is_set(&s->start_timestamp))
3968 fprintf(f,
3969 "%sStart Timestamp: %s\n",
3970 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
3971
3972 if (dual_timestamp_is_set(&s->exit_timestamp))
3973 fprintf(f,
3974 "%sExit Timestamp: %s\n"
3975 "%sExit Code: %s\n"
3976 "%sExit Status: %i\n",
3977 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
3978 prefix, sigchld_code_to_string(s->code),
3979 prefix, s->status);
3980 }
3981
3982 char *exec_command_line(char **argv) {
3983 size_t k;
3984 char *n, *p, **a;
3985 bool first = true;
3986
3987 assert(argv);
3988
3989 k = 1;
3990 STRV_FOREACH(a, argv)
3991 k += strlen(*a)+3;
3992
3993 n = new(char, k);
3994 if (!n)
3995 return NULL;
3996
3997 p = n;
3998 STRV_FOREACH(a, argv) {
3999
4000 if (!first)
4001 *(p++) = ' ';
4002 else
4003 first = false;
4004
4005 if (strpbrk(*a, WHITESPACE)) {
4006 *(p++) = '\'';
4007 p = stpcpy(p, *a);
4008 *(p++) = '\'';
4009 } else
4010 p = stpcpy(p, *a);
4011
4012 }
4013
4014 *p = 0;
4015
4016 /* FIXME: this doesn't really handle arguments that have
4017 * spaces and ticks in them */
4018
4019 return n;
4020 }
4021
4022 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
4023 _cleanup_free_ char *cmd = NULL;
4024 const char *prefix2;
4025
4026 assert(c);
4027 assert(f);
4028
4029 prefix = strempty(prefix);
4030 prefix2 = strjoina(prefix, "\t");
4031
4032 cmd = exec_command_line(c->argv);
4033 fprintf(f,
4034 "%sCommand Line: %s\n",
4035 prefix, cmd ? cmd : strerror(ENOMEM));
4036
4037 exec_status_dump(&c->exec_status, f, prefix2);
4038 }
4039
4040 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4041 assert(f);
4042
4043 prefix = strempty(prefix);
4044
4045 LIST_FOREACH(command, c, c)
4046 exec_command_dump(c, f, prefix);
4047 }
4048
4049 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4050 ExecCommand *end;
4051
4052 assert(l);
4053 assert(e);
4054
4055 if (*l) {
4056 /* It's kind of important, that we keep the order here */
4057 LIST_FIND_TAIL(command, *l, end);
4058 LIST_INSERT_AFTER(command, *l, end, e);
4059 } else
4060 *l = e;
4061 }
4062
4063 int exec_command_set(ExecCommand *c, const char *path, ...) {
4064 va_list ap;
4065 char **l, *p;
4066
4067 assert(c);
4068 assert(path);
4069
4070 va_start(ap, path);
4071 l = strv_new_ap(path, ap);
4072 va_end(ap);
4073
4074 if (!l)
4075 return -ENOMEM;
4076
4077 p = strdup(path);
4078 if (!p) {
4079 strv_free(l);
4080 return -ENOMEM;
4081 }
4082
4083 free(c->path);
4084 c->path = p;
4085
4086 strv_free(c->argv);
4087 c->argv = l;
4088
4089 return 0;
4090 }
4091
4092 int exec_command_append(ExecCommand *c, const char *path, ...) {
4093 _cleanup_strv_free_ char **l = NULL;
4094 va_list ap;
4095 int r;
4096
4097 assert(c);
4098 assert(path);
4099
4100 va_start(ap, path);
4101 l = strv_new_ap(path, ap);
4102 va_end(ap);
4103
4104 if (!l)
4105 return -ENOMEM;
4106
4107 r = strv_extend_strv(&c->argv, l, false);
4108 if (r < 0)
4109 return r;
4110
4111 return 0;
4112 }
4113
4114
4115 static int exec_runtime_allocate(ExecRuntime **rt) {
4116
4117 if (*rt)
4118 return 0;
4119
4120 *rt = new0(ExecRuntime, 1);
4121 if (!*rt)
4122 return -ENOMEM;
4123
4124 (*rt)->n_ref = 1;
4125 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4126
4127 return 0;
4128 }
4129
4130 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4131 int r;
4132
4133 assert(rt);
4134 assert(c);
4135 assert(id);
4136
4137 if (*rt)
4138 return 1;
4139
4140 if (!c->private_network && !c->private_tmp)
4141 return 0;
4142
4143 r = exec_runtime_allocate(rt);
4144 if (r < 0)
4145 return r;
4146
4147 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
4148 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
4149 return -errno;
4150 }
4151
4152 if (c->private_tmp && !(*rt)->tmp_dir) {
4153 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4154 if (r < 0)
4155 return r;
4156 }
4157
4158 return 1;
4159 }
4160
4161 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4162 assert(r);
4163 assert(r->n_ref > 0);
4164
4165 r->n_ref++;
4166 return r;
4167 }
4168
4169 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4170
4171 if (!r)
4172 return NULL;
4173
4174 assert(r->n_ref > 0);
4175
4176 r->n_ref--;
4177 if (r->n_ref > 0)
4178 return NULL;
4179
4180 free(r->tmp_dir);
4181 free(r->var_tmp_dir);
4182 safe_close_pair(r->netns_storage_socket);
4183 return mfree(r);
4184 }
4185
4186 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
4187 assert(u);
4188 assert(f);
4189 assert(fds);
4190
4191 if (!rt)
4192 return 0;
4193
4194 if (rt->tmp_dir)
4195 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4196
4197 if (rt->var_tmp_dir)
4198 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4199
4200 if (rt->netns_storage_socket[0] >= 0) {
4201 int copy;
4202
4203 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4204 if (copy < 0)
4205 return copy;
4206
4207 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4208 }
4209
4210 if (rt->netns_storage_socket[1] >= 0) {
4211 int copy;
4212
4213 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4214 if (copy < 0)
4215 return copy;
4216
4217 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4218 }
4219
4220 return 0;
4221 }
4222
4223 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
4224 int r;
4225
4226 assert(rt);
4227 assert(key);
4228 assert(value);
4229
4230 if (streq(key, "tmp-dir")) {
4231 char *copy;
4232
4233 r = exec_runtime_allocate(rt);
4234 if (r < 0)
4235 return log_oom();
4236
4237 copy = strdup(value);
4238 if (!copy)
4239 return log_oom();
4240
4241 free((*rt)->tmp_dir);
4242 (*rt)->tmp_dir = copy;
4243
4244 } else if (streq(key, "var-tmp-dir")) {
4245 char *copy;
4246
4247 r = exec_runtime_allocate(rt);
4248 if (r < 0)
4249 return log_oom();
4250
4251 copy = strdup(value);
4252 if (!copy)
4253 return log_oom();
4254
4255 free((*rt)->var_tmp_dir);
4256 (*rt)->var_tmp_dir = copy;
4257
4258 } else if (streq(key, "netns-socket-0")) {
4259 int fd;
4260
4261 r = exec_runtime_allocate(rt);
4262 if (r < 0)
4263 return log_oom();
4264
4265 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4266 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4267 else {
4268 safe_close((*rt)->netns_storage_socket[0]);
4269 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4270 }
4271 } else if (streq(key, "netns-socket-1")) {
4272 int fd;
4273
4274 r = exec_runtime_allocate(rt);
4275 if (r < 0)
4276 return log_oom();
4277
4278 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4279 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4280 else {
4281 safe_close((*rt)->netns_storage_socket[1]);
4282 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4283 }
4284 } else
4285 return 0;
4286
4287 return 1;
4288 }
4289
4290 static void *remove_tmpdir_thread(void *p) {
4291 _cleanup_free_ char *path = p;
4292
4293 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4294 return NULL;
4295 }
4296
4297 void exec_runtime_destroy(ExecRuntime *rt) {
4298 int r;
4299
4300 if (!rt)
4301 return;
4302
4303 /* If there are multiple users of this, let's leave the stuff around */
4304 if (rt->n_ref > 1)
4305 return;
4306
4307 if (rt->tmp_dir) {
4308 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4309
4310 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4311 if (r < 0) {
4312 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4313 free(rt->tmp_dir);
4314 }
4315
4316 rt->tmp_dir = NULL;
4317 }
4318
4319 if (rt->var_tmp_dir) {
4320 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4321
4322 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4323 if (r < 0) {
4324 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4325 free(rt->var_tmp_dir);
4326 }
4327
4328 rt->var_tmp_dir = NULL;
4329 }
4330
4331 safe_close_pair(rt->netns_storage_socket);
4332 }
4333
4334 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4335 [EXEC_INPUT_NULL] = "null",
4336 [EXEC_INPUT_TTY] = "tty",
4337 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4338 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
4339 [EXEC_INPUT_SOCKET] = "socket",
4340 [EXEC_INPUT_NAMED_FD] = "fd",
4341 };
4342
4343 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4344
4345 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
4346 [EXEC_OUTPUT_INHERIT] = "inherit",
4347 [EXEC_OUTPUT_NULL] = "null",
4348 [EXEC_OUTPUT_TTY] = "tty",
4349 [EXEC_OUTPUT_SYSLOG] = "syslog",
4350 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
4351 [EXEC_OUTPUT_KMSG] = "kmsg",
4352 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
4353 [EXEC_OUTPUT_JOURNAL] = "journal",
4354 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
4355 [EXEC_OUTPUT_SOCKET] = "socket",
4356 [EXEC_OUTPUT_NAMED_FD] = "fd",
4357 };
4358
4359 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
4360
4361 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4362 [EXEC_UTMP_INIT] = "init",
4363 [EXEC_UTMP_LOGIN] = "login",
4364 [EXEC_UTMP_USER] = "user",
4365 };
4366
4367 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
4368
4369 static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4370 [EXEC_PRESERVE_NO] = "no",
4371 [EXEC_PRESERVE_YES] = "yes",
4372 [EXEC_PRESERVE_RESTART] = "restart",
4373 };
4374
4375 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
4376
4377 static const char* const exec_directory_type_table[_EXEC_DIRECTORY_MAX] = {
4378 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4379 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4380 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4381 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4382 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4383 };
4384
4385 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
4386
4387 static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
4388 [EXEC_KEYRING_INHERIT] = "inherit",
4389 [EXEC_KEYRING_PRIVATE] = "private",
4390 [EXEC_KEYRING_SHARED] = "shared",
4391 };
4392
4393 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);