]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
Merge pull request #6790 from poettering/unit-unsetenv
[thirdparty/systemd.git] / src / core / execute.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <glob.h>
23 #include <grp.h>
24 #include <poll.h>
25 #include <signal.h>
26 #include <string.h>
27 #include <sys/capability.h>
28 #include <sys/eventfd.h>
29 #include <sys/mman.h>
30 #include <sys/personality.h>
31 #include <sys/prctl.h>
32 #include <sys/shm.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/types.h>
36 #include <sys/un.h>
37 #include <unistd.h>
38 #include <utmpx.h>
39
40 #ifdef HAVE_PAM
41 #include <security/pam_appl.h>
42 #endif
43
44 #ifdef HAVE_SELINUX
45 #include <selinux/selinux.h>
46 #endif
47
48 #ifdef HAVE_SECCOMP
49 #include <seccomp.h>
50 #endif
51
52 #ifdef HAVE_APPARMOR
53 #include <sys/apparmor.h>
54 #endif
55
56 #include "sd-messages.h"
57
58 #include "af-list.h"
59 #include "alloc-util.h"
60 #ifdef HAVE_APPARMOR
61 #include "apparmor-util.h"
62 #endif
63 #include "async.h"
64 #include "barrier.h"
65 #include "cap-list.h"
66 #include "capability-util.h"
67 #include "def.h"
68 #include "env-util.h"
69 #include "errno-list.h"
70 #include "execute.h"
71 #include "exit-status.h"
72 #include "fd-util.h"
73 #include "fileio.h"
74 #include "format-util.h"
75 #include "fs-util.h"
76 #include "glob-util.h"
77 #include "io-util.h"
78 #include "ioprio.h"
79 #include "log.h"
80 #include "macro.h"
81 #include "missing.h"
82 #include "mkdir.h"
83 #include "namespace.h"
84 #include "parse-util.h"
85 #include "path-util.h"
86 #include "process-util.h"
87 #include "rlimit-util.h"
88 #include "rm-rf.h"
89 #ifdef HAVE_SECCOMP
90 #include "seccomp-util.h"
91 #endif
92 #include "securebits.h"
93 #include "securebits-util.h"
94 #include "selinux-util.h"
95 #include "signal-util.h"
96 #include "smack-util.h"
97 #include "special.h"
98 #include "string-table.h"
99 #include "string-util.h"
100 #include "strv.h"
101 #include "syslog-util.h"
102 #include "terminal-util.h"
103 #include "unit.h"
104 #include "user-util.h"
105 #include "util.h"
106 #include "utmp-wtmp.h"
107
108 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
109 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
110
111 /* This assumes there is a 'tty' group */
112 #define TTY_MODE 0620
113
114 #define SNDBUF_SIZE (8*1024*1024)
115
116 static int shift_fds(int fds[], unsigned n_fds) {
117 int start, restart_from;
118
119 if (n_fds <= 0)
120 return 0;
121
122 /* Modifies the fds array! (sorts it) */
123
124 assert(fds);
125
126 start = 0;
127 for (;;) {
128 int i;
129
130 restart_from = -1;
131
132 for (i = start; i < (int) n_fds; i++) {
133 int nfd;
134
135 /* Already at right index? */
136 if (fds[i] == i+3)
137 continue;
138
139 nfd = fcntl(fds[i], F_DUPFD, i + 3);
140 if (nfd < 0)
141 return -errno;
142
143 safe_close(fds[i]);
144 fds[i] = nfd;
145
146 /* Hmm, the fd we wanted isn't free? Then
147 * let's remember that and try again from here */
148 if (nfd != i+3 && restart_from < 0)
149 restart_from = i;
150 }
151
152 if (restart_from < 0)
153 break;
154
155 start = restart_from;
156 }
157
158 return 0;
159 }
160
161 static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
162 unsigned i, n_fds;
163 int r;
164
165 n_fds = n_storage_fds + n_socket_fds;
166 if (n_fds <= 0)
167 return 0;
168
169 assert(fds);
170
171 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
172 * O_NONBLOCK only applies to socket activation though. */
173
174 for (i = 0; i < n_fds; i++) {
175
176 if (i < n_socket_fds) {
177 r = fd_nonblock(fds[i], nonblock);
178 if (r < 0)
179 return r;
180 }
181
182 /* We unconditionally drop FD_CLOEXEC from the fds,
183 * since after all we want to pass these fds to our
184 * children */
185
186 r = fd_cloexec(fds[i], false);
187 if (r < 0)
188 return r;
189 }
190
191 return 0;
192 }
193
194 static const char *exec_context_tty_path(const ExecContext *context) {
195 assert(context);
196
197 if (context->stdio_as_fds)
198 return NULL;
199
200 if (context->tty_path)
201 return context->tty_path;
202
203 return "/dev/console";
204 }
205
206 static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
207 const char *path;
208
209 assert(context);
210
211 path = exec_context_tty_path(context);
212
213 if (context->tty_vhangup) {
214 if (p && p->stdin_fd >= 0)
215 (void) terminal_vhangup_fd(p->stdin_fd);
216 else if (path)
217 (void) terminal_vhangup(path);
218 }
219
220 if (context->tty_reset) {
221 if (p && p->stdin_fd >= 0)
222 (void) reset_terminal_fd(p->stdin_fd, true);
223 else if (path)
224 (void) reset_terminal(path);
225 }
226
227 if (context->tty_vt_disallocate && path)
228 (void) vt_disallocate(path);
229 }
230
231 static bool is_terminal_input(ExecInput i) {
232 return IN_SET(i,
233 EXEC_INPUT_TTY,
234 EXEC_INPUT_TTY_FORCE,
235 EXEC_INPUT_TTY_FAIL);
236 }
237
238 static bool is_terminal_output(ExecOutput o) {
239 return IN_SET(o,
240 EXEC_OUTPUT_TTY,
241 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
242 EXEC_OUTPUT_KMSG_AND_CONSOLE,
243 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
244 }
245
246 static bool is_syslog_output(ExecOutput o) {
247 return IN_SET(o,
248 EXEC_OUTPUT_SYSLOG,
249 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
250 }
251
252 static bool is_kmsg_output(ExecOutput o) {
253 return IN_SET(o,
254 EXEC_OUTPUT_KMSG,
255 EXEC_OUTPUT_KMSG_AND_CONSOLE);
256 }
257
258 static bool exec_context_needs_term(const ExecContext *c) {
259 assert(c);
260
261 /* Return true if the execution context suggests we should set $TERM to something useful. */
262
263 if (is_terminal_input(c->std_input))
264 return true;
265
266 if (is_terminal_output(c->std_output))
267 return true;
268
269 if (is_terminal_output(c->std_error))
270 return true;
271
272 return !!c->tty_path;
273 }
274
275 static int open_null_as(int flags, int nfd) {
276 int fd, r;
277
278 assert(nfd >= 0);
279
280 fd = open("/dev/null", flags|O_NOCTTY);
281 if (fd < 0)
282 return -errno;
283
284 if (fd != nfd) {
285 r = dup2(fd, nfd) < 0 ? -errno : nfd;
286 safe_close(fd);
287 } else
288 r = nfd;
289
290 return r;
291 }
292
293 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
294 static const union sockaddr_union sa = {
295 .un.sun_family = AF_UNIX,
296 .un.sun_path = "/run/systemd/journal/stdout",
297 };
298 uid_t olduid = UID_INVALID;
299 gid_t oldgid = GID_INVALID;
300 int r;
301
302 if (gid_is_valid(gid)) {
303 oldgid = getgid();
304
305 if (setegid(gid) < 0)
306 return -errno;
307 }
308
309 if (uid_is_valid(uid)) {
310 olduid = getuid();
311
312 if (seteuid(uid) < 0) {
313 r = -errno;
314 goto restore_gid;
315 }
316 }
317
318 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
319
320 /* If we fail to restore the uid or gid, things will likely
321 fail later on. This should only happen if an LSM interferes. */
322
323 if (uid_is_valid(uid))
324 (void) seteuid(olduid);
325
326 restore_gid:
327 if (gid_is_valid(gid))
328 (void) setegid(oldgid);
329
330 return r;
331 }
332
333 static int connect_logger_as(
334 Unit *unit,
335 const ExecContext *context,
336 const ExecParameters *params,
337 ExecOutput output,
338 const char *ident,
339 int nfd,
340 uid_t uid,
341 gid_t gid) {
342
343 int fd, r;
344
345 assert(context);
346 assert(params);
347 assert(output < _EXEC_OUTPUT_MAX);
348 assert(ident);
349 assert(nfd >= 0);
350
351 fd = socket(AF_UNIX, SOCK_STREAM, 0);
352 if (fd < 0)
353 return -errno;
354
355 r = connect_journal_socket(fd, uid, gid);
356 if (r < 0)
357 return r;
358
359 if (shutdown(fd, SHUT_RD) < 0) {
360 safe_close(fd);
361 return -errno;
362 }
363
364 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
365
366 dprintf(fd,
367 "%s\n"
368 "%s\n"
369 "%i\n"
370 "%i\n"
371 "%i\n"
372 "%i\n"
373 "%i\n",
374 context->syslog_identifier ?: ident,
375 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
376 context->syslog_priority,
377 !!context->syslog_level_prefix,
378 is_syslog_output(output),
379 is_kmsg_output(output),
380 is_terminal_output(output));
381
382 if (fd == nfd)
383 return nfd;
384
385 r = dup2(fd, nfd) < 0 ? -errno : nfd;
386 safe_close(fd);
387
388 return r;
389 }
390 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
391 int fd, r;
392
393 assert(path);
394 assert(nfd >= 0);
395
396 fd = open_terminal(path, mode | O_NOCTTY);
397 if (fd < 0)
398 return fd;
399
400 if (fd != nfd) {
401 r = dup2(fd, nfd) < 0 ? -errno : nfd;
402 safe_close(fd);
403 } else
404 r = nfd;
405
406 return r;
407 }
408
409 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
410
411 if (is_terminal_input(std_input) && !apply_tty_stdin)
412 return EXEC_INPUT_NULL;
413
414 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
415 return EXEC_INPUT_NULL;
416
417 return std_input;
418 }
419
420 static int fixup_output(ExecOutput std_output, int socket_fd) {
421
422 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
423 return EXEC_OUTPUT_INHERIT;
424
425 return std_output;
426 }
427
428 static int setup_input(
429 const ExecContext *context,
430 const ExecParameters *params,
431 int socket_fd,
432 int named_iofds[3]) {
433
434 ExecInput i;
435
436 assert(context);
437 assert(params);
438
439 if (params->stdin_fd >= 0) {
440 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
441 return -errno;
442
443 /* Try to make this the controlling tty, if it is a tty, and reset it */
444 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
445 (void) reset_terminal_fd(STDIN_FILENO, true);
446
447 return STDIN_FILENO;
448 }
449
450 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
451
452 switch (i) {
453
454 case EXEC_INPUT_NULL:
455 return open_null_as(O_RDONLY, STDIN_FILENO);
456
457 case EXEC_INPUT_TTY:
458 case EXEC_INPUT_TTY_FORCE:
459 case EXEC_INPUT_TTY_FAIL: {
460 int fd, r;
461
462 fd = acquire_terminal(exec_context_tty_path(context),
463 i == EXEC_INPUT_TTY_FAIL,
464 i == EXEC_INPUT_TTY_FORCE,
465 false,
466 USEC_INFINITY);
467 if (fd < 0)
468 return fd;
469
470 if (fd != STDIN_FILENO) {
471 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
472 safe_close(fd);
473 } else
474 r = STDIN_FILENO;
475
476 return r;
477 }
478
479 case EXEC_INPUT_SOCKET:
480 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
481
482 case EXEC_INPUT_NAMED_FD:
483 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
484 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
485
486 default:
487 assert_not_reached("Unknown input type");
488 }
489 }
490
491 static int setup_output(
492 Unit *unit,
493 const ExecContext *context,
494 const ExecParameters *params,
495 int fileno,
496 int socket_fd,
497 int named_iofds[3],
498 const char *ident,
499 uid_t uid,
500 gid_t gid,
501 dev_t *journal_stream_dev,
502 ino_t *journal_stream_ino) {
503
504 ExecOutput o;
505 ExecInput i;
506 int r;
507
508 assert(unit);
509 assert(context);
510 assert(params);
511 assert(ident);
512 assert(journal_stream_dev);
513 assert(journal_stream_ino);
514
515 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
516
517 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
518 return -errno;
519
520 return STDOUT_FILENO;
521 }
522
523 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
524 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
525 return -errno;
526
527 return STDERR_FILENO;
528 }
529
530 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
531 o = fixup_output(context->std_output, socket_fd);
532
533 if (fileno == STDERR_FILENO) {
534 ExecOutput e;
535 e = fixup_output(context->std_error, socket_fd);
536
537 /* This expects the input and output are already set up */
538
539 /* Don't change the stderr file descriptor if we inherit all
540 * the way and are not on a tty */
541 if (e == EXEC_OUTPUT_INHERIT &&
542 o == EXEC_OUTPUT_INHERIT &&
543 i == EXEC_INPUT_NULL &&
544 !is_terminal_input(context->std_input) &&
545 getppid () != 1)
546 return fileno;
547
548 /* Duplicate from stdout if possible */
549 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
550 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
551
552 o = e;
553
554 } else if (o == EXEC_OUTPUT_INHERIT) {
555 /* If input got downgraded, inherit the original value */
556 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
557 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
558
559 /* If the input is connected to anything that's not a /dev/null, inherit that... */
560 if (i != EXEC_INPUT_NULL)
561 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
562
563 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
564 if (getppid() != 1)
565 return fileno;
566
567 /* We need to open /dev/null here anew, to get the right access mode. */
568 return open_null_as(O_WRONLY, fileno);
569 }
570
571 switch (o) {
572
573 case EXEC_OUTPUT_NULL:
574 return open_null_as(O_WRONLY, fileno);
575
576 case EXEC_OUTPUT_TTY:
577 if (is_terminal_input(i))
578 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
579
580 /* We don't reset the terminal if this is just about output */
581 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
582
583 case EXEC_OUTPUT_SYSLOG:
584 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
585 case EXEC_OUTPUT_KMSG:
586 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
587 case EXEC_OUTPUT_JOURNAL:
588 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
589 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
590 if (r < 0) {
591 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
592 r = open_null_as(O_WRONLY, fileno);
593 } else {
594 struct stat st;
595
596 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
597 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
598 * services to detect whether they are connected to the journal or not. */
599
600 if (fstat(fileno, &st) >= 0) {
601 *journal_stream_dev = st.st_dev;
602 *journal_stream_ino = st.st_ino;
603 }
604 }
605 return r;
606
607 case EXEC_OUTPUT_SOCKET:
608 assert(socket_fd >= 0);
609 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
610
611 case EXEC_OUTPUT_NAMED_FD:
612 (void) fd_nonblock(named_iofds[fileno], false);
613 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
614
615 default:
616 assert_not_reached("Unknown error type");
617 }
618 }
619
620 static int chown_terminal(int fd, uid_t uid) {
621 struct stat st;
622
623 assert(fd >= 0);
624
625 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
626 if (isatty(fd) < 1)
627 return 0;
628
629 /* This might fail. What matters are the results. */
630 (void) fchown(fd, uid, -1);
631 (void) fchmod(fd, TTY_MODE);
632
633 if (fstat(fd, &st) < 0)
634 return -errno;
635
636 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
637 return -EPERM;
638
639 return 0;
640 }
641
642 static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
643 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
644 int r;
645
646 assert(_saved_stdin);
647 assert(_saved_stdout);
648
649 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
650 if (saved_stdin < 0)
651 return -errno;
652
653 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
654 if (saved_stdout < 0)
655 return -errno;
656
657 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
658 if (fd < 0)
659 return fd;
660
661 r = chown_terminal(fd, getuid());
662 if (r < 0)
663 return r;
664
665 r = reset_terminal_fd(fd, true);
666 if (r < 0)
667 return r;
668
669 if (dup2(fd, STDIN_FILENO) < 0)
670 return -errno;
671
672 if (dup2(fd, STDOUT_FILENO) < 0)
673 return -errno;
674
675 if (fd >= 2)
676 safe_close(fd);
677 fd = -1;
678
679 *_saved_stdin = saved_stdin;
680 *_saved_stdout = saved_stdout;
681
682 saved_stdin = saved_stdout = -1;
683
684 return 0;
685 }
686
687 static void write_confirm_error_fd(int err, int fd, const Unit *u) {
688 assert(err < 0);
689
690 if (err == -ETIMEDOUT)
691 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
692 else {
693 errno = -err;
694 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
695 }
696 }
697
698 static void write_confirm_error(int err, const char *vc, const Unit *u) {
699 _cleanup_close_ int fd = -1;
700
701 assert(vc);
702
703 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
704 if (fd < 0)
705 return;
706
707 write_confirm_error_fd(err, fd, u);
708 }
709
710 static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
711 int r = 0;
712
713 assert(saved_stdin);
714 assert(saved_stdout);
715
716 release_terminal();
717
718 if (*saved_stdin >= 0)
719 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
720 r = -errno;
721
722 if (*saved_stdout >= 0)
723 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
724 r = -errno;
725
726 *saved_stdin = safe_close(*saved_stdin);
727 *saved_stdout = safe_close(*saved_stdout);
728
729 return r;
730 }
731
732 enum {
733 CONFIRM_PRETEND_FAILURE = -1,
734 CONFIRM_PRETEND_SUCCESS = 0,
735 CONFIRM_EXECUTE = 1,
736 };
737
738 static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
739 int saved_stdout = -1, saved_stdin = -1, r;
740 _cleanup_free_ char *e = NULL;
741 char c;
742
743 /* For any internal errors, assume a positive response. */
744 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
745 if (r < 0) {
746 write_confirm_error(r, vc, u);
747 return CONFIRM_EXECUTE;
748 }
749
750 /* confirm_spawn might have been disabled while we were sleeping. */
751 if (manager_is_confirm_spawn_disabled(u->manager)) {
752 r = 1;
753 goto restore_stdio;
754 }
755
756 e = ellipsize(cmdline, 60, 100);
757 if (!e) {
758 log_oom();
759 r = CONFIRM_EXECUTE;
760 goto restore_stdio;
761 }
762
763 for (;;) {
764 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
765 if (r < 0) {
766 write_confirm_error_fd(r, STDOUT_FILENO, u);
767 r = CONFIRM_EXECUTE;
768 goto restore_stdio;
769 }
770
771 switch (c) {
772 case 'c':
773 printf("Resuming normal execution.\n");
774 manager_disable_confirm_spawn();
775 r = 1;
776 break;
777 case 'D':
778 unit_dump(u, stdout, " ");
779 continue; /* ask again */
780 case 'f':
781 printf("Failing execution.\n");
782 r = CONFIRM_PRETEND_FAILURE;
783 break;
784 case 'h':
785 printf(" c - continue, proceed without asking anymore\n"
786 " D - dump, show the state of the unit\n"
787 " f - fail, don't execute the command and pretend it failed\n"
788 " h - help\n"
789 " i - info, show a short summary of the unit\n"
790 " j - jobs, show jobs that are in progress\n"
791 " s - skip, don't execute the command and pretend it succeeded\n"
792 " y - yes, execute the command\n");
793 continue; /* ask again */
794 case 'i':
795 printf(" Description: %s\n"
796 " Unit: %s\n"
797 " Command: %s\n",
798 u->id, u->description, cmdline);
799 continue; /* ask again */
800 case 'j':
801 manager_dump_jobs(u->manager, stdout, " ");
802 continue; /* ask again */
803 case 'n':
804 /* 'n' was removed in favor of 'f'. */
805 printf("Didn't understand 'n', did you mean 'f'?\n");
806 continue; /* ask again */
807 case 's':
808 printf("Skipping execution.\n");
809 r = CONFIRM_PRETEND_SUCCESS;
810 break;
811 case 'y':
812 r = CONFIRM_EXECUTE;
813 break;
814 default:
815 assert_not_reached("Unhandled choice");
816 }
817 break;
818 }
819
820 restore_stdio:
821 restore_confirm_stdio(&saved_stdin, &saved_stdout);
822 return r;
823 }
824
825 static int get_fixed_user(const ExecContext *c, const char **user,
826 uid_t *uid, gid_t *gid,
827 const char **home, const char **shell) {
828 int r;
829 const char *name;
830
831 assert(c);
832
833 if (!c->user)
834 return 0;
835
836 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
837 * (i.e. are "/" or "/bin/nologin"). */
838
839 name = c->user;
840 r = get_user_creds_clean(&name, uid, gid, home, shell);
841 if (r < 0)
842 return r;
843
844 *user = name;
845 return 0;
846 }
847
848 static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
849 int r;
850 const char *name;
851
852 assert(c);
853
854 if (!c->group)
855 return 0;
856
857 name = c->group;
858 r = get_group_creds(&name, gid);
859 if (r < 0)
860 return r;
861
862 *group = name;
863 return 0;
864 }
865
866 static int get_supplementary_groups(const ExecContext *c, const char *user,
867 const char *group, gid_t gid,
868 gid_t **supplementary_gids, int *ngids) {
869 char **i;
870 int r, k = 0;
871 int ngroups_max;
872 bool keep_groups = false;
873 gid_t *groups = NULL;
874 _cleanup_free_ gid_t *l_gids = NULL;
875
876 assert(c);
877
878 /*
879 * If user is given, then lookup GID and supplementary groups list.
880 * We avoid NSS lookups for gid=0. Also we have to initialize groups
881 * here and as early as possible so we keep the list of supplementary
882 * groups of the caller.
883 */
884 if (user && gid_is_valid(gid) && gid != 0) {
885 /* First step, initialize groups from /etc/groups */
886 if (initgroups(user, gid) < 0)
887 return -errno;
888
889 keep_groups = true;
890 }
891
892 if (!c->supplementary_groups)
893 return 0;
894
895 /*
896 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
897 * be positive, otherwise fail.
898 */
899 errno = 0;
900 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
901 if (ngroups_max <= 0) {
902 if (errno > 0)
903 return -errno;
904 else
905 return -EOPNOTSUPP; /* For all other values */
906 }
907
908 l_gids = new(gid_t, ngroups_max);
909 if (!l_gids)
910 return -ENOMEM;
911
912 if (keep_groups) {
913 /*
914 * Lookup the list of groups that the user belongs to, we
915 * avoid NSS lookups here too for gid=0.
916 */
917 k = ngroups_max;
918 if (getgrouplist(user, gid, l_gids, &k) < 0)
919 return -EINVAL;
920 } else
921 k = 0;
922
923 STRV_FOREACH(i, c->supplementary_groups) {
924 const char *g;
925
926 if (k >= ngroups_max)
927 return -E2BIG;
928
929 g = *i;
930 r = get_group_creds(&g, l_gids+k);
931 if (r < 0)
932 return r;
933
934 k++;
935 }
936
937 /*
938 * Sets ngids to zero to drop all supplementary groups, happens
939 * when we are under root and SupplementaryGroups= is empty.
940 */
941 if (k == 0) {
942 *ngids = 0;
943 return 0;
944 }
945
946 /* Otherwise get the final list of supplementary groups */
947 groups = memdup(l_gids, sizeof(gid_t) * k);
948 if (!groups)
949 return -ENOMEM;
950
951 *supplementary_gids = groups;
952 *ngids = k;
953
954 groups = NULL;
955
956 return 0;
957 }
958
959 static int enforce_groups(const ExecContext *context, gid_t gid,
960 gid_t *supplementary_gids, int ngids) {
961 int r;
962
963 assert(context);
964
965 /* Handle SupplementaryGroups= even if it is empty */
966 if (context->supplementary_groups) {
967 r = maybe_setgroups(ngids, supplementary_gids);
968 if (r < 0)
969 return r;
970 }
971
972 if (gid_is_valid(gid)) {
973 /* Then set our gids */
974 if (setresgid(gid, gid, gid) < 0)
975 return -errno;
976 }
977
978 return 0;
979 }
980
981 static int enforce_user(const ExecContext *context, uid_t uid) {
982 assert(context);
983
984 if (!uid_is_valid(uid))
985 return 0;
986
987 /* Sets (but doesn't look up) the uid and make sure we keep the
988 * capabilities while doing so. */
989
990 if (context->capability_ambient_set != 0) {
991
992 /* First step: If we need to keep capabilities but
993 * drop privileges we need to make sure we keep our
994 * caps, while we drop privileges. */
995 if (uid != 0) {
996 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
997
998 if (prctl(PR_GET_SECUREBITS) != sb)
999 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1000 return -errno;
1001 }
1002 }
1003
1004 /* Second step: actually set the uids */
1005 if (setresuid(uid, uid, uid) < 0)
1006 return -errno;
1007
1008 /* At this point we should have all necessary capabilities but
1009 are otherwise a normal user. However, the caps might got
1010 corrupted due to the setresuid() so we need clean them up
1011 later. This is done outside of this call. */
1012
1013 return 0;
1014 }
1015
1016 #ifdef HAVE_PAM
1017
1018 static int null_conv(
1019 int num_msg,
1020 const struct pam_message **msg,
1021 struct pam_response **resp,
1022 void *appdata_ptr) {
1023
1024 /* We don't support conversations */
1025
1026 return PAM_CONV_ERR;
1027 }
1028
1029 #endif
1030
1031 static int setup_pam(
1032 const char *name,
1033 const char *user,
1034 uid_t uid,
1035 gid_t gid,
1036 const char *tty,
1037 char ***env,
1038 int fds[], unsigned n_fds) {
1039
1040 #ifdef HAVE_PAM
1041
1042 static const struct pam_conv conv = {
1043 .conv = null_conv,
1044 .appdata_ptr = NULL
1045 };
1046
1047 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
1048 pam_handle_t *handle = NULL;
1049 sigset_t old_ss;
1050 int pam_code = PAM_SUCCESS, r;
1051 char **nv, **e = NULL;
1052 bool close_session = false;
1053 pid_t pam_pid = 0, parent_pid;
1054 int flags = 0;
1055
1056 assert(name);
1057 assert(user);
1058 assert(env);
1059
1060 /* We set up PAM in the parent process, then fork. The child
1061 * will then stay around until killed via PR_GET_PDEATHSIG or
1062 * systemd via the cgroup logic. It will then remove the PAM
1063 * session again. The parent process will exec() the actual
1064 * daemon. We do things this way to ensure that the main PID
1065 * of the daemon is the one we initially fork()ed. */
1066
1067 r = barrier_create(&barrier);
1068 if (r < 0)
1069 goto fail;
1070
1071 if (log_get_max_level() < LOG_DEBUG)
1072 flags |= PAM_SILENT;
1073
1074 pam_code = pam_start(name, user, &conv, &handle);
1075 if (pam_code != PAM_SUCCESS) {
1076 handle = NULL;
1077 goto fail;
1078 }
1079
1080 if (tty) {
1081 pam_code = pam_set_item(handle, PAM_TTY, tty);
1082 if (pam_code != PAM_SUCCESS)
1083 goto fail;
1084 }
1085
1086 STRV_FOREACH(nv, *env) {
1087 pam_code = pam_putenv(handle, *nv);
1088 if (pam_code != PAM_SUCCESS)
1089 goto fail;
1090 }
1091
1092 pam_code = pam_acct_mgmt(handle, flags);
1093 if (pam_code != PAM_SUCCESS)
1094 goto fail;
1095
1096 pam_code = pam_open_session(handle, flags);
1097 if (pam_code != PAM_SUCCESS)
1098 goto fail;
1099
1100 close_session = true;
1101
1102 e = pam_getenvlist(handle);
1103 if (!e) {
1104 pam_code = PAM_BUF_ERR;
1105 goto fail;
1106 }
1107
1108 /* Block SIGTERM, so that we know that it won't get lost in
1109 * the child */
1110
1111 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
1112
1113 parent_pid = getpid_cached();
1114
1115 pam_pid = fork();
1116 if (pam_pid < 0) {
1117 r = -errno;
1118 goto fail;
1119 }
1120
1121 if (pam_pid == 0) {
1122 int sig, ret = EXIT_PAM;
1123
1124 /* The child's job is to reset the PAM session on
1125 * termination */
1126 barrier_set_role(&barrier, BARRIER_CHILD);
1127
1128 /* This string must fit in 10 chars (i.e. the length
1129 * of "/sbin/init"), to look pretty in /bin/ps */
1130 rename_process("(sd-pam)");
1131
1132 /* Make sure we don't keep open the passed fds in this
1133 child. We assume that otherwise only those fds are
1134 open here that have been opened by PAM. */
1135 close_many(fds, n_fds);
1136
1137 /* Drop privileges - we don't need any to pam_close_session
1138 * and this will make PR_SET_PDEATHSIG work in most cases.
1139 * If this fails, ignore the error - but expect sd-pam threads
1140 * to fail to exit normally */
1141
1142 r = maybe_setgroups(0, NULL);
1143 if (r < 0)
1144 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
1145 if (setresgid(gid, gid, gid) < 0)
1146 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
1147 if (setresuid(uid, uid, uid) < 0)
1148 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
1149
1150 (void) ignore_signals(SIGPIPE, -1);
1151
1152 /* Wait until our parent died. This will only work if
1153 * the above setresuid() succeeds, otherwise the kernel
1154 * will not allow unprivileged parents kill their privileged
1155 * children this way. We rely on the control groups kill logic
1156 * to do the rest for us. */
1157 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1158 goto child_finish;
1159
1160 /* Tell the parent that our setup is done. This is especially
1161 * important regarding dropping privileges. Otherwise, unit
1162 * setup might race against our setresuid(2) call.
1163 *
1164 * If the parent aborted, we'll detect this below, hence ignore
1165 * return failure here. */
1166 (void) barrier_place(&barrier);
1167
1168 /* Check if our parent process might already have died? */
1169 if (getppid() == parent_pid) {
1170 sigset_t ss;
1171
1172 assert_se(sigemptyset(&ss) >= 0);
1173 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1174
1175 for (;;) {
1176 if (sigwait(&ss, &sig) < 0) {
1177 if (errno == EINTR)
1178 continue;
1179
1180 goto child_finish;
1181 }
1182
1183 assert(sig == SIGTERM);
1184 break;
1185 }
1186 }
1187
1188 /* If our parent died we'll end the session */
1189 if (getppid() != parent_pid) {
1190 pam_code = pam_close_session(handle, flags);
1191 if (pam_code != PAM_SUCCESS)
1192 goto child_finish;
1193 }
1194
1195 ret = 0;
1196
1197 child_finish:
1198 pam_end(handle, pam_code | flags);
1199 _exit(ret);
1200 }
1201
1202 barrier_set_role(&barrier, BARRIER_PARENT);
1203
1204 /* If the child was forked off successfully it will do all the
1205 * cleanups, so forget about the handle here. */
1206 handle = NULL;
1207
1208 /* Unblock SIGTERM again in the parent */
1209 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
1210
1211 /* We close the log explicitly here, since the PAM modules
1212 * might have opened it, but we don't want this fd around. */
1213 closelog();
1214
1215 /* Synchronously wait for the child to initialize. We don't care for
1216 * errors as we cannot recover. However, warn loudly if it happens. */
1217 if (!barrier_place_and_sync(&barrier))
1218 log_error("PAM initialization failed");
1219
1220 strv_free(*env);
1221 *env = e;
1222
1223 return 0;
1224
1225 fail:
1226 if (pam_code != PAM_SUCCESS) {
1227 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
1228 r = -EPERM; /* PAM errors do not map to errno */
1229 } else
1230 log_error_errno(r, "PAM failed: %m");
1231
1232 if (handle) {
1233 if (close_session)
1234 pam_code = pam_close_session(handle, flags);
1235
1236 pam_end(handle, pam_code | flags);
1237 }
1238
1239 strv_free(e);
1240 closelog();
1241
1242 return r;
1243 #else
1244 return 0;
1245 #endif
1246 }
1247
1248 static void rename_process_from_path(const char *path) {
1249 char process_name[11];
1250 const char *p;
1251 size_t l;
1252
1253 /* This resulting string must fit in 10 chars (i.e. the length
1254 * of "/sbin/init") to look pretty in /bin/ps */
1255
1256 p = basename(path);
1257 if (isempty(p)) {
1258 rename_process("(...)");
1259 return;
1260 }
1261
1262 l = strlen(p);
1263 if (l > 8) {
1264 /* The end of the process name is usually more
1265 * interesting, since the first bit might just be
1266 * "systemd-" */
1267 p = p + l - 8;
1268 l = 8;
1269 }
1270
1271 process_name[0] = '(';
1272 memcpy(process_name+1, p, l);
1273 process_name[1+l] = ')';
1274 process_name[1+l+1] = 0;
1275
1276 rename_process(process_name);
1277 }
1278
1279 static bool context_has_address_families(const ExecContext *c) {
1280 assert(c);
1281
1282 return c->address_families_whitelist ||
1283 !set_isempty(c->address_families);
1284 }
1285
1286 static bool context_has_syscall_filters(const ExecContext *c) {
1287 assert(c);
1288
1289 return c->syscall_whitelist ||
1290 !set_isempty(c->syscall_filter);
1291 }
1292
1293 static bool context_has_no_new_privileges(const ExecContext *c) {
1294 assert(c);
1295
1296 if (c->no_new_privileges)
1297 return true;
1298
1299 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1300 return false;
1301
1302 /* We need NNP if we have any form of seccomp and are unprivileged */
1303 return context_has_address_families(c) ||
1304 c->memory_deny_write_execute ||
1305 c->restrict_realtime ||
1306 exec_context_restrict_namespaces_set(c) ||
1307 c->protect_kernel_tunables ||
1308 c->protect_kernel_modules ||
1309 c->private_devices ||
1310 context_has_syscall_filters(c) ||
1311 !set_isempty(c->syscall_archs) ||
1312 c->lock_personality;
1313 }
1314
1315 #ifdef HAVE_SECCOMP
1316
1317 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1318
1319 if (is_seccomp_available())
1320 return false;
1321
1322 log_open();
1323 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1324 log_close();
1325 return true;
1326 }
1327
1328 static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
1329 uint32_t negative_action, default_action, action;
1330 int r;
1331
1332 assert(u);
1333 assert(c);
1334
1335 if (!context_has_syscall_filters(c))
1336 return 0;
1337
1338 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1339 return 0;
1340
1341 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1342
1343 if (c->syscall_whitelist) {
1344 default_action = negative_action;
1345 action = SCMP_ACT_ALLOW;
1346 } else {
1347 default_action = SCMP_ACT_ALLOW;
1348 action = negative_action;
1349 }
1350
1351 if (needs_ambient_hack) {
1352 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1353 if (r < 0)
1354 return r;
1355 }
1356
1357 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
1358 }
1359
1360 static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1361 assert(u);
1362 assert(c);
1363
1364 if (set_isempty(c->syscall_archs))
1365 return 0;
1366
1367 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1368 return 0;
1369
1370 return seccomp_restrict_archs(c->syscall_archs);
1371 }
1372
1373 static int apply_address_families(const Unit* u, const ExecContext *c) {
1374 assert(u);
1375 assert(c);
1376
1377 if (!context_has_address_families(c))
1378 return 0;
1379
1380 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1381 return 0;
1382
1383 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
1384 }
1385
1386 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
1387 assert(u);
1388 assert(c);
1389
1390 if (!c->memory_deny_write_execute)
1391 return 0;
1392
1393 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1394 return 0;
1395
1396 return seccomp_memory_deny_write_execute();
1397 }
1398
1399 static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
1400 assert(u);
1401 assert(c);
1402
1403 if (!c->restrict_realtime)
1404 return 0;
1405
1406 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1407 return 0;
1408
1409 return seccomp_restrict_realtime();
1410 }
1411
1412 static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
1413 assert(u);
1414 assert(c);
1415
1416 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1417 * let's protect even those systems where this is left on in the kernel. */
1418
1419 if (!c->protect_kernel_tunables)
1420 return 0;
1421
1422 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1423 return 0;
1424
1425 return seccomp_protect_sysctl();
1426 }
1427
1428 static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
1429 assert(u);
1430 assert(c);
1431
1432 /* Turn off module syscalls on ProtectKernelModules=yes */
1433
1434 if (!c->protect_kernel_modules)
1435 return 0;
1436
1437 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1438 return 0;
1439
1440 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
1441 }
1442
1443 static int apply_private_devices(const Unit *u, const ExecContext *c) {
1444 assert(u);
1445 assert(c);
1446
1447 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1448
1449 if (!c->private_devices)
1450 return 0;
1451
1452 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1453 return 0;
1454
1455 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
1456 }
1457
1458 static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
1459 assert(u);
1460 assert(c);
1461
1462 if (!exec_context_restrict_namespaces_set(c))
1463 return 0;
1464
1465 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1466 return 0;
1467
1468 return seccomp_restrict_namespaces(c->restrict_namespaces);
1469 }
1470
1471 static int apply_lock_personality(const Unit* u, const ExecContext *c) {
1472 unsigned long personality;
1473 int r;
1474
1475 assert(u);
1476 assert(c);
1477
1478 if (!c->lock_personality)
1479 return 0;
1480
1481 if (skip_seccomp_unavailable(u, "LockPersonality="))
1482 return 0;
1483
1484 personality = c->personality;
1485
1486 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1487 if (personality == PERSONALITY_INVALID) {
1488
1489 r = opinionated_personality(&personality);
1490 if (r < 0)
1491 return r;
1492 }
1493
1494 return seccomp_lock_personality(personality);
1495 }
1496
1497 #endif
1498
1499 static void do_idle_pipe_dance(int idle_pipe[4]) {
1500 assert(idle_pipe);
1501
1502 idle_pipe[1] = safe_close(idle_pipe[1]);
1503 idle_pipe[2] = safe_close(idle_pipe[2]);
1504
1505 if (idle_pipe[0] >= 0) {
1506 int r;
1507
1508 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1509
1510 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1511 ssize_t n;
1512
1513 /* Signal systemd that we are bored and want to continue. */
1514 n = write(idle_pipe[3], "x", 1);
1515 if (n > 0)
1516 /* Wait for systemd to react to the signal above. */
1517 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1518 }
1519
1520 idle_pipe[0] = safe_close(idle_pipe[0]);
1521
1522 }
1523
1524 idle_pipe[3] = safe_close(idle_pipe[3]);
1525 }
1526
1527 static int build_environment(
1528 Unit *u,
1529 const ExecContext *c,
1530 const ExecParameters *p,
1531 unsigned n_fds,
1532 const char *home,
1533 const char *username,
1534 const char *shell,
1535 dev_t journal_stream_dev,
1536 ino_t journal_stream_ino,
1537 char ***ret) {
1538
1539 _cleanup_strv_free_ char **our_env = NULL;
1540 unsigned n_env = 0;
1541 char *x;
1542
1543 assert(u);
1544 assert(c);
1545 assert(ret);
1546
1547 our_env = new0(char*, 14);
1548 if (!our_env)
1549 return -ENOMEM;
1550
1551 if (n_fds > 0) {
1552 _cleanup_free_ char *joined = NULL;
1553
1554 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
1555 return -ENOMEM;
1556 our_env[n_env++] = x;
1557
1558 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1559 return -ENOMEM;
1560 our_env[n_env++] = x;
1561
1562 joined = strv_join(p->fd_names, ":");
1563 if (!joined)
1564 return -ENOMEM;
1565
1566 x = strjoin("LISTEN_FDNAMES=", joined);
1567 if (!x)
1568 return -ENOMEM;
1569 our_env[n_env++] = x;
1570 }
1571
1572 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
1573 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
1574 return -ENOMEM;
1575 our_env[n_env++] = x;
1576
1577 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
1578 return -ENOMEM;
1579 our_env[n_env++] = x;
1580 }
1581
1582 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1583 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1584 * check the database directly. */
1585 if (p->flags & EXEC_NSS_BYPASS_BUS) {
1586 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1587 if (!x)
1588 return -ENOMEM;
1589 our_env[n_env++] = x;
1590 }
1591
1592 if (home) {
1593 x = strappend("HOME=", home);
1594 if (!x)
1595 return -ENOMEM;
1596 our_env[n_env++] = x;
1597 }
1598
1599 if (username) {
1600 x = strappend("LOGNAME=", username);
1601 if (!x)
1602 return -ENOMEM;
1603 our_env[n_env++] = x;
1604
1605 x = strappend("USER=", username);
1606 if (!x)
1607 return -ENOMEM;
1608 our_env[n_env++] = x;
1609 }
1610
1611 if (shell) {
1612 x = strappend("SHELL=", shell);
1613 if (!x)
1614 return -ENOMEM;
1615 our_env[n_env++] = x;
1616 }
1617
1618 if (!sd_id128_is_null(u->invocation_id)) {
1619 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1620 return -ENOMEM;
1621
1622 our_env[n_env++] = x;
1623 }
1624
1625 if (exec_context_needs_term(c)) {
1626 const char *tty_path, *term = NULL;
1627
1628 tty_path = exec_context_tty_path(c);
1629
1630 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1631 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1632 * passes to PID 1 ends up all the way in the console login shown. */
1633
1634 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1635 term = getenv("TERM");
1636 if (!term)
1637 term = default_term_for_tty(tty_path);
1638
1639 x = strappend("TERM=", term);
1640 if (!x)
1641 return -ENOMEM;
1642 our_env[n_env++] = x;
1643 }
1644
1645 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1646 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1647 return -ENOMEM;
1648
1649 our_env[n_env++] = x;
1650 }
1651
1652 our_env[n_env++] = NULL;
1653 assert(n_env <= 12);
1654
1655 *ret = our_env;
1656 our_env = NULL;
1657
1658 return 0;
1659 }
1660
1661 static int build_pass_environment(const ExecContext *c, char ***ret) {
1662 _cleanup_strv_free_ char **pass_env = NULL;
1663 size_t n_env = 0, n_bufsize = 0;
1664 char **i;
1665
1666 STRV_FOREACH(i, c->pass_environment) {
1667 _cleanup_free_ char *x = NULL;
1668 char *v;
1669
1670 v = getenv(*i);
1671 if (!v)
1672 continue;
1673 x = strjoin(*i, "=", v);
1674 if (!x)
1675 return -ENOMEM;
1676
1677 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1678 return -ENOMEM;
1679
1680 pass_env[n_env++] = x;
1681 pass_env[n_env] = NULL;
1682 x = NULL;
1683 }
1684
1685 *ret = pass_env;
1686 pass_env = NULL;
1687
1688 return 0;
1689 }
1690
1691 static bool exec_needs_mount_namespace(
1692 const ExecContext *context,
1693 const ExecParameters *params,
1694 ExecRuntime *runtime) {
1695
1696 assert(context);
1697 assert(params);
1698
1699 if (context->root_image)
1700 return true;
1701
1702 if (!strv_isempty(context->read_write_paths) ||
1703 !strv_isempty(context->read_only_paths) ||
1704 !strv_isempty(context->inaccessible_paths))
1705 return true;
1706
1707 if (context->n_bind_mounts > 0)
1708 return true;
1709
1710 if (context->mount_flags != 0)
1711 return true;
1712
1713 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1714 return true;
1715
1716 if (context->private_devices ||
1717 context->protect_system != PROTECT_SYSTEM_NO ||
1718 context->protect_home != PROTECT_HOME_NO ||
1719 context->protect_kernel_tunables ||
1720 context->protect_kernel_modules ||
1721 context->protect_control_groups)
1722 return true;
1723
1724 if (context->mount_apivfs && (context->root_image || context->root_directory))
1725 return true;
1726
1727 return false;
1728 }
1729
1730 static int setup_private_users(uid_t uid, gid_t gid) {
1731 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1732 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1733 _cleanup_close_ int unshare_ready_fd = -1;
1734 _cleanup_(sigkill_waitp) pid_t pid = 0;
1735 uint64_t c = 1;
1736 siginfo_t si;
1737 ssize_t n;
1738 int r;
1739
1740 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1741 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1742 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1743 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1744 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1745 * continues execution normally. */
1746
1747 if (uid != 0 && uid_is_valid(uid)) {
1748 r = asprintf(&uid_map,
1749 "0 0 1\n" /* Map root → root */
1750 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1751 uid, uid);
1752 if (r < 0)
1753 return -ENOMEM;
1754 } else {
1755 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1756 if (!uid_map)
1757 return -ENOMEM;
1758 }
1759
1760 if (gid != 0 && gid_is_valid(gid)) {
1761 r = asprintf(&gid_map,
1762 "0 0 1\n" /* Map root → root */
1763 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1764 gid, gid);
1765 if (r < 0)
1766 return -ENOMEM;
1767 } else {
1768 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1769 if (!gid_map)
1770 return -ENOMEM;
1771 }
1772
1773 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1774 * namespace. */
1775 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1776 if (unshare_ready_fd < 0)
1777 return -errno;
1778
1779 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1780 * failed. */
1781 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1782 return -errno;
1783
1784 pid = fork();
1785 if (pid < 0)
1786 return -errno;
1787
1788 if (pid == 0) {
1789 _cleanup_close_ int fd = -1;
1790 const char *a;
1791 pid_t ppid;
1792
1793 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1794 * here, after the parent opened its own user namespace. */
1795
1796 ppid = getppid();
1797 errno_pipe[0] = safe_close(errno_pipe[0]);
1798
1799 /* Wait until the parent unshared the user namespace */
1800 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1801 r = -errno;
1802 goto child_fail;
1803 }
1804
1805 /* Disable the setgroups() system call in the child user namespace, for good. */
1806 a = procfs_file_alloca(ppid, "setgroups");
1807 fd = open(a, O_WRONLY|O_CLOEXEC);
1808 if (fd < 0) {
1809 if (errno != ENOENT) {
1810 r = -errno;
1811 goto child_fail;
1812 }
1813
1814 /* If the file is missing the kernel is too old, let's continue anyway. */
1815 } else {
1816 if (write(fd, "deny\n", 5) < 0) {
1817 r = -errno;
1818 goto child_fail;
1819 }
1820
1821 fd = safe_close(fd);
1822 }
1823
1824 /* First write the GID map */
1825 a = procfs_file_alloca(ppid, "gid_map");
1826 fd = open(a, O_WRONLY|O_CLOEXEC);
1827 if (fd < 0) {
1828 r = -errno;
1829 goto child_fail;
1830 }
1831 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1832 r = -errno;
1833 goto child_fail;
1834 }
1835 fd = safe_close(fd);
1836
1837 /* The write the UID map */
1838 a = procfs_file_alloca(ppid, "uid_map");
1839 fd = open(a, O_WRONLY|O_CLOEXEC);
1840 if (fd < 0) {
1841 r = -errno;
1842 goto child_fail;
1843 }
1844 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1845 r = -errno;
1846 goto child_fail;
1847 }
1848
1849 _exit(EXIT_SUCCESS);
1850
1851 child_fail:
1852 (void) write(errno_pipe[1], &r, sizeof(r));
1853 _exit(EXIT_FAILURE);
1854 }
1855
1856 errno_pipe[1] = safe_close(errno_pipe[1]);
1857
1858 if (unshare(CLONE_NEWUSER) < 0)
1859 return -errno;
1860
1861 /* Let the child know that the namespace is ready now */
1862 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1863 return -errno;
1864
1865 /* Try to read an error code from the child */
1866 n = read(errno_pipe[0], &r, sizeof(r));
1867 if (n < 0)
1868 return -errno;
1869 if (n == sizeof(r)) { /* an error code was sent to us */
1870 if (r < 0)
1871 return r;
1872 return -EIO;
1873 }
1874 if (n != 0) /* on success we should have read 0 bytes */
1875 return -EIO;
1876
1877 r = wait_for_terminate(pid, &si);
1878 if (r < 0)
1879 return r;
1880 pid = 0;
1881
1882 /* If something strange happened with the child, let's consider this fatal, too */
1883 if (si.si_code != CLD_EXITED || si.si_status != 0)
1884 return -EIO;
1885
1886 return 0;
1887 }
1888
1889 static int setup_exec_directory(
1890 const ExecContext *context,
1891 const ExecParameters *params,
1892 uid_t uid,
1893 gid_t gid,
1894 ExecDirectoryType type,
1895 int *exit_status) {
1896
1897 static const int exit_status_table[_EXEC_DIRECTORY_MAX] = {
1898 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1899 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1900 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1901 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1902 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1903 };
1904 char **rt;
1905 int r;
1906
1907 assert(context);
1908 assert(params);
1909 assert(type >= 0 && type < _EXEC_DIRECTORY_MAX);
1910 assert(exit_status);
1911
1912 if (!params->prefix[type])
1913 return 0;
1914
1915 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
1916 if (!uid_is_valid(uid))
1917 uid = 0;
1918 if (!gid_is_valid(gid))
1919 gid = 0;
1920 }
1921
1922 STRV_FOREACH(rt, context->directories[type].paths) {
1923 _cleanup_free_ char *p;
1924
1925 p = strjoin(params->prefix[type], "/", *rt);
1926 if (!p) {
1927 r = -ENOMEM;
1928 goto fail;
1929 }
1930
1931 r = mkdir_parents_label(p, 0755);
1932 if (r < 0)
1933 goto fail;
1934
1935 r = mkdir_p_label(p, context->directories[type].mode);
1936 if (r < 0)
1937 goto fail;
1938
1939 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
1940 * a service, and shall not be writable. */
1941 if (type == EXEC_DIRECTORY_CONFIGURATION)
1942 continue;
1943
1944 r = chmod_and_chown(p, context->directories[type].mode, uid, gid);
1945 if (r < 0)
1946 goto fail;
1947 }
1948
1949 return 0;
1950
1951 fail:
1952 *exit_status = exit_status_table[type];
1953
1954 return r;
1955 }
1956
1957 static int setup_smack(
1958 const ExecContext *context,
1959 const ExecCommand *command) {
1960
1961 int r;
1962
1963 assert(context);
1964 assert(command);
1965
1966 if (context->smack_process_label) {
1967 r = mac_smack_apply_pid(0, context->smack_process_label);
1968 if (r < 0)
1969 return r;
1970 }
1971 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1972 else {
1973 _cleanup_free_ char *exec_label = NULL;
1974
1975 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1976 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
1977 return r;
1978
1979 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1980 if (r < 0)
1981 return r;
1982 }
1983 #endif
1984
1985 return 0;
1986 }
1987
1988 static int compile_read_write_paths(
1989 const ExecContext *context,
1990 const ExecParameters *params,
1991 char ***ret) {
1992
1993 _cleanup_strv_free_ char **l = NULL;
1994 char **rt;
1995 ExecDirectoryType i;
1996
1997 /* Compile the list of writable paths. This is the combination of
1998 * the explicitly configured paths, plus all runtime directories. */
1999
2000 if (strv_isempty(context->read_write_paths)) {
2001 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
2002 if (!strv_isempty(context->directories[i].paths))
2003 break;
2004
2005 if (i == _EXEC_DIRECTORY_MAX) {
2006 *ret = NULL; /* NOP if neither is set */
2007 return 0;
2008 }
2009 }
2010
2011 l = strv_copy(context->read_write_paths);
2012 if (!l)
2013 return -ENOMEM;
2014
2015 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++) {
2016 if (!params->prefix[i])
2017 continue;
2018
2019 STRV_FOREACH(rt, context->directories[i].paths) {
2020 char *s;
2021
2022 s = strjoin(params->prefix[i], "/", *rt);
2023 if (!s)
2024 return -ENOMEM;
2025
2026 if (strv_consume(&l, s) < 0)
2027 return -ENOMEM;
2028 }
2029 }
2030
2031 *ret = l;
2032 l = NULL;
2033
2034 return 0;
2035 }
2036
2037 static int apply_mount_namespace(
2038 Unit *u,
2039 ExecCommand *command,
2040 const ExecContext *context,
2041 const ExecParameters *params,
2042 ExecRuntime *runtime) {
2043
2044 _cleanup_strv_free_ char **rw = NULL;
2045 char *tmp = NULL, *var = NULL;
2046 const char *root_dir = NULL, *root_image = NULL;
2047 NameSpaceInfo ns_info = {
2048 .ignore_protect_paths = false,
2049 .private_dev = context->private_devices,
2050 .protect_control_groups = context->protect_control_groups,
2051 .protect_kernel_tunables = context->protect_kernel_tunables,
2052 .protect_kernel_modules = context->protect_kernel_modules,
2053 .mount_apivfs = context->mount_apivfs,
2054 };
2055 bool needs_sandboxing;
2056 int r;
2057
2058 assert(context);
2059
2060 /* The runtime struct only contains the parent of the private /tmp,
2061 * which is non-accessible to world users. Inside of it there's a /tmp
2062 * that is sticky, and that's the one we want to use here. */
2063
2064 if (context->private_tmp && runtime) {
2065 if (runtime->tmp_dir)
2066 tmp = strjoina(runtime->tmp_dir, "/tmp");
2067 if (runtime->var_tmp_dir)
2068 var = strjoina(runtime->var_tmp_dir, "/tmp");
2069 }
2070
2071 r = compile_read_write_paths(context, params, &rw);
2072 if (r < 0)
2073 return r;
2074
2075 if (params->flags & EXEC_APPLY_CHROOT) {
2076 root_image = context->root_image;
2077
2078 if (!root_image)
2079 root_dir = context->root_directory;
2080 }
2081
2082 /*
2083 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2084 * sandbox info, otherwise enforce it, don't ignore protected paths and
2085 * fail if we are enable to apply the sandbox inside the mount namespace.
2086 */
2087 if (!context->dynamic_user && root_dir)
2088 ns_info.ignore_protect_paths = true;
2089
2090 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2091
2092 r = setup_namespace(root_dir, root_image,
2093 &ns_info, rw,
2094 needs_sandboxing ? context->read_only_paths : NULL,
2095 needs_sandboxing ? context->inaccessible_paths : NULL,
2096 context->bind_mounts,
2097 context->n_bind_mounts,
2098 tmp,
2099 var,
2100 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2101 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
2102 context->mount_flags,
2103 DISSECT_IMAGE_DISCARD_ON_LOOP);
2104
2105 /* If we couldn't set up the namespace this is probably due to a
2106 * missing capability. In this case, silently proceeed. */
2107 if (IN_SET(r, -EPERM, -EACCES)) {
2108 log_open();
2109 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2110 log_close();
2111 r = 0;
2112 }
2113
2114 return r;
2115 }
2116
2117 static int apply_working_directory(
2118 const ExecContext *context,
2119 const ExecParameters *params,
2120 const char *home,
2121 const bool needs_mount_ns,
2122 int *exit_status) {
2123
2124 const char *d, *wd;
2125
2126 assert(context);
2127 assert(exit_status);
2128
2129 if (context->working_directory_home) {
2130
2131 if (!home) {
2132 *exit_status = EXIT_CHDIR;
2133 return -ENXIO;
2134 }
2135
2136 wd = home;
2137
2138 } else if (context->working_directory)
2139 wd = context->working_directory;
2140 else
2141 wd = "/";
2142
2143 if (params->flags & EXEC_APPLY_CHROOT) {
2144 if (!needs_mount_ns && context->root_directory)
2145 if (chroot(context->root_directory) < 0) {
2146 *exit_status = EXIT_CHROOT;
2147 return -errno;
2148 }
2149
2150 d = wd;
2151 } else
2152 d = prefix_roota(context->root_directory, wd);
2153
2154 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2155 *exit_status = EXIT_CHDIR;
2156 return -errno;
2157 }
2158
2159 return 0;
2160 }
2161
2162 static int setup_keyring(Unit *u, const ExecParameters *p, uid_t uid, gid_t gid) {
2163 key_serial_t keyring;
2164
2165 assert(u);
2166 assert(p);
2167
2168 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2169 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2170 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2171 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2172 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2173 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2174
2175 if (!(p->flags & EXEC_NEW_KEYRING))
2176 return 0;
2177
2178 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2179 if (keyring == -1) {
2180 if (errno == ENOSYS)
2181 log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
2182 else if (IN_SET(errno, EACCES, EPERM))
2183 log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
2184 else if (errno == EDQUOT)
2185 log_debug_errno(errno, "Out of kernel keyrings to allocate, ignoring.");
2186 else
2187 return log_error_errno(errno, "Setting up kernel keyring failed: %m");
2188
2189 return 0;
2190 }
2191
2192 /* Populate they keyring with the invocation ID by default. */
2193 if (!sd_id128_is_null(u->invocation_id)) {
2194 key_serial_t key;
2195
2196 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2197 if (key == -1)
2198 log_debug_errno(errno, "Failed to add invocation ID to keyring, ignoring: %m");
2199 else {
2200 if (keyctl(KEYCTL_SETPERM, key,
2201 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2202 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2203 return log_error_errno(errno, "Failed to restrict invocation ID permission: %m");
2204 }
2205 }
2206
2207 /* And now, make the keyring owned by the service's user */
2208 if (uid_is_valid(uid) || gid_is_valid(gid))
2209 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
2210 return log_error_errno(errno, "Failed to change ownership of session keyring: %m");
2211
2212 return 0;
2213 }
2214
2215 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2216 assert(array);
2217 assert(n);
2218
2219 if (!pair)
2220 return;
2221
2222 if (pair[0] >= 0)
2223 array[(*n)++] = pair[0];
2224 if (pair[1] >= 0)
2225 array[(*n)++] = pair[1];
2226 }
2227
2228 static int close_remaining_fds(
2229 const ExecParameters *params,
2230 ExecRuntime *runtime,
2231 DynamicCreds *dcreds,
2232 int user_lookup_fd,
2233 int socket_fd,
2234 int *fds, unsigned n_fds) {
2235
2236 unsigned n_dont_close = 0;
2237 int dont_close[n_fds + 12];
2238
2239 assert(params);
2240
2241 if (params->stdin_fd >= 0)
2242 dont_close[n_dont_close++] = params->stdin_fd;
2243 if (params->stdout_fd >= 0)
2244 dont_close[n_dont_close++] = params->stdout_fd;
2245 if (params->stderr_fd >= 0)
2246 dont_close[n_dont_close++] = params->stderr_fd;
2247
2248 if (socket_fd >= 0)
2249 dont_close[n_dont_close++] = socket_fd;
2250 if (n_fds > 0) {
2251 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2252 n_dont_close += n_fds;
2253 }
2254
2255 if (runtime)
2256 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2257
2258 if (dcreds) {
2259 if (dcreds->user)
2260 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2261 if (dcreds->group)
2262 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
2263 }
2264
2265 if (user_lookup_fd >= 0)
2266 dont_close[n_dont_close++] = user_lookup_fd;
2267
2268 return close_all_fds(dont_close, n_dont_close);
2269 }
2270
2271 static int send_user_lookup(
2272 Unit *unit,
2273 int user_lookup_fd,
2274 uid_t uid,
2275 gid_t gid) {
2276
2277 assert(unit);
2278
2279 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2280 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2281 * specified. */
2282
2283 if (user_lookup_fd < 0)
2284 return 0;
2285
2286 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2287 return 0;
2288
2289 if (writev(user_lookup_fd,
2290 (struct iovec[]) {
2291 { .iov_base = &uid, .iov_len = sizeof(uid) },
2292 { .iov_base = &gid, .iov_len = sizeof(gid) },
2293 { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
2294 return -errno;
2295
2296 return 0;
2297 }
2298
2299 static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2300 int r;
2301
2302 assert(c);
2303 assert(home);
2304 assert(buf);
2305
2306 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2307
2308 if (*home)
2309 return 0;
2310
2311 if (!c->working_directory_home)
2312 return 0;
2313
2314 if (uid == 0) {
2315 /* Hardcode /root as home directory for UID 0 */
2316 *home = "/root";
2317 return 1;
2318 }
2319
2320 r = get_home_dir(buf);
2321 if (r < 0)
2322 return r;
2323
2324 *home = *buf;
2325 return 1;
2326 }
2327
2328 static int exec_child(
2329 Unit *unit,
2330 ExecCommand *command,
2331 const ExecContext *context,
2332 const ExecParameters *params,
2333 ExecRuntime *runtime,
2334 DynamicCreds *dcreds,
2335 char **argv,
2336 int socket_fd,
2337 int named_iofds[3],
2338 int *fds,
2339 unsigned n_storage_fds,
2340 unsigned n_socket_fds,
2341 char **files_env,
2342 int user_lookup_fd,
2343 int *exit_status,
2344 char **error_message) {
2345
2346 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
2347 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
2348 _cleanup_free_ gid_t *supplementary_gids = NULL;
2349 const char *username = NULL, *groupname = NULL;
2350 const char *home = NULL, *shell = NULL;
2351 dev_t journal_stream_dev = 0;
2352 ino_t journal_stream_ino = 0;
2353 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2354 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2355 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2356 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
2357 #ifdef HAVE_SELINUX
2358 bool use_selinux = false;
2359 #endif
2360 #ifdef HAVE_SMACK
2361 bool use_smack = false;
2362 #endif
2363 #ifdef HAVE_APPARMOR
2364 bool use_apparmor = false;
2365 #endif
2366 uid_t uid = UID_INVALID;
2367 gid_t gid = GID_INVALID;
2368 int i, r, ngids = 0;
2369 unsigned n_fds;
2370 ExecDirectoryType dt;
2371 int secure_bits;
2372
2373 assert(unit);
2374 assert(command);
2375 assert(context);
2376 assert(params);
2377 assert(exit_status);
2378 assert(error_message);
2379 /* We don't always set error_message, hence it must be initialized */
2380 assert(*error_message == NULL);
2381
2382 rename_process_from_path(command->path);
2383
2384 /* We reset exactly these signals, since they are the
2385 * only ones we set to SIG_IGN in the main daemon. All
2386 * others we leave untouched because we set them to
2387 * SIG_DFL or a valid handler initially, both of which
2388 * will be demoted to SIG_DFL. */
2389 (void) default_signals(SIGNALS_CRASH_HANDLER,
2390 SIGNALS_IGNORE, -1);
2391
2392 if (context->ignore_sigpipe)
2393 (void) ignore_signals(SIGPIPE, -1);
2394
2395 r = reset_signal_mask();
2396 if (r < 0) {
2397 *exit_status = EXIT_SIGNAL_MASK;
2398 *error_message = strdup("Failed to reset signal mask");
2399 /* If strdup fails, here and below, we will just print the generic error message. */
2400 return r;
2401 }
2402
2403 if (params->idle_pipe)
2404 do_idle_pipe_dance(params->idle_pipe);
2405
2406 /* Close sockets very early to make sure we don't
2407 * block init reexecution because it cannot bind its
2408 * sockets */
2409
2410 log_forget_fds();
2411
2412 n_fds = n_storage_fds + n_socket_fds;
2413 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
2414 if (r < 0) {
2415 *exit_status = EXIT_FDS;
2416 *error_message = strdup("Failed to close remaining fds");
2417 return r;
2418 }
2419
2420 if (!context->same_pgrp)
2421 if (setsid() < 0) {
2422 *exit_status = EXIT_SETSID;
2423 return -errno;
2424 }
2425
2426 exec_context_tty_reset(context, params);
2427
2428 if (unit_shall_confirm_spawn(unit)) {
2429 const char *vc = params->confirm_spawn;
2430 _cleanup_free_ char *cmdline = NULL;
2431
2432 cmdline = exec_command_line(argv);
2433 if (!cmdline) {
2434 *exit_status = EXIT_CONFIRM;
2435 return -ENOMEM;
2436 }
2437
2438 r = ask_for_confirmation(vc, unit, cmdline);
2439 if (r != CONFIRM_EXECUTE) {
2440 if (r == CONFIRM_PRETEND_SUCCESS) {
2441 *exit_status = EXIT_SUCCESS;
2442 return 0;
2443 }
2444 *exit_status = EXIT_CONFIRM;
2445 *error_message = strdup("Execution cancelled");
2446 return -ECANCELED;
2447 }
2448 }
2449
2450 if (context->dynamic_user && dcreds) {
2451
2452 /* Make sure we bypass our own NSS module for any NSS checks */
2453 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2454 *exit_status = EXIT_USER;
2455 *error_message = strdup("Failed to update environment");
2456 return -errno;
2457 }
2458
2459 r = dynamic_creds_realize(dcreds, &uid, &gid);
2460 if (r < 0) {
2461 *exit_status = EXIT_USER;
2462 *error_message = strdup("Failed to update dynamic user credentials");
2463 return r;
2464 }
2465
2466 if (!uid_is_valid(uid)) {
2467 *exit_status = EXIT_USER;
2468 (void) asprintf(error_message, "UID validation failed for \""UID_FMT"\"", uid);
2469 /* If asprintf fails, here and below, we will just print the generic error message. */
2470 return -ESRCH;
2471 }
2472
2473 if (!gid_is_valid(gid)) {
2474 *exit_status = EXIT_USER;
2475 (void) asprintf(error_message, "GID validation failed for \""GID_FMT"\"", gid);
2476 return -ESRCH;
2477 }
2478
2479 if (dcreds->user)
2480 username = dcreds->user->name;
2481
2482 } else {
2483 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2484 if (r < 0) {
2485 *exit_status = EXIT_USER;
2486 *error_message = strdup("Failed to determine user credentials");
2487 return r;
2488 }
2489
2490 r = get_fixed_group(context, &groupname, &gid);
2491 if (r < 0) {
2492 *exit_status = EXIT_GROUP;
2493 *error_message = strdup("Failed to determine group credentials");
2494 return r;
2495 }
2496 }
2497
2498 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2499 r = get_supplementary_groups(context, username, groupname, gid,
2500 &supplementary_gids, &ngids);
2501 if (r < 0) {
2502 *exit_status = EXIT_GROUP;
2503 *error_message = strdup("Failed to determine supplementary groups");
2504 return r;
2505 }
2506
2507 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2508 if (r < 0) {
2509 *exit_status = EXIT_USER;
2510 *error_message = strdup("Failed to send user credentials to PID1");
2511 return r;
2512 }
2513
2514 user_lookup_fd = safe_close(user_lookup_fd);
2515
2516 r = acquire_home(context, uid, &home, &home_buffer);
2517 if (r < 0) {
2518 *exit_status = EXIT_CHDIR;
2519 *error_message = strdup("Failed to determine $HOME for user");
2520 return r;
2521 }
2522
2523 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2524 * must sure to drop O_NONBLOCK */
2525 if (socket_fd >= 0)
2526 (void) fd_nonblock(socket_fd, false);
2527
2528 r = setup_input(context, params, socket_fd, named_iofds);
2529 if (r < 0) {
2530 *exit_status = EXIT_STDIN;
2531 *error_message = strdup("Failed to set up stdin");
2532 return r;
2533 }
2534
2535 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2536 if (r < 0) {
2537 *exit_status = EXIT_STDOUT;
2538 *error_message = strdup("Failed to set up stdout");
2539 return r;
2540 }
2541
2542 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2543 if (r < 0) {
2544 *exit_status = EXIT_STDERR;
2545 *error_message = strdup("Failed to set up stderr");
2546 return r;
2547 }
2548
2549 if (params->cgroup_path) {
2550 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2551 if (r < 0) {
2552 *exit_status = EXIT_CGROUP;
2553 (void) asprintf(error_message, "Failed to attach to cgroup %s", params->cgroup_path);
2554 return r;
2555 }
2556 }
2557
2558 if (context->oom_score_adjust_set) {
2559 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
2560
2561 /* When we can't make this change due to EPERM, then
2562 * let's silently skip over it. User namespaces
2563 * prohibit write access to this file, and we
2564 * shouldn't trip up over that. */
2565
2566 sprintf(t, "%i", context->oom_score_adjust);
2567 r = write_string_file("/proc/self/oom_score_adj", t, 0);
2568 if (r == -EPERM || r == -EACCES) {
2569 log_open();
2570 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
2571 log_close();
2572 } else if (r < 0) {
2573 *exit_status = EXIT_OOM_ADJUST;
2574 *error_message = strdup("Failed to write /proc/self/oom_score_adj");
2575 return -errno;
2576 }
2577 }
2578
2579 if (context->nice_set)
2580 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
2581 *exit_status = EXIT_NICE;
2582 return -errno;
2583 }
2584
2585 if (context->cpu_sched_set) {
2586 struct sched_param param = {
2587 .sched_priority = context->cpu_sched_priority,
2588 };
2589
2590 r = sched_setscheduler(0,
2591 context->cpu_sched_policy |
2592 (context->cpu_sched_reset_on_fork ?
2593 SCHED_RESET_ON_FORK : 0),
2594 &param);
2595 if (r < 0) {
2596 *exit_status = EXIT_SETSCHEDULER;
2597 return -errno;
2598 }
2599 }
2600
2601 if (context->cpuset)
2602 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
2603 *exit_status = EXIT_CPUAFFINITY;
2604 return -errno;
2605 }
2606
2607 if (context->ioprio_set)
2608 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
2609 *exit_status = EXIT_IOPRIO;
2610 return -errno;
2611 }
2612
2613 if (context->timer_slack_nsec != NSEC_INFINITY)
2614 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
2615 *exit_status = EXIT_TIMERSLACK;
2616 return -errno;
2617 }
2618
2619 if (context->personality != PERSONALITY_INVALID) {
2620 r = safe_personality(context->personality);
2621 if (r < 0) {
2622 *exit_status = EXIT_PERSONALITY;
2623 return r;
2624 }
2625 }
2626
2627 if (context->utmp_id)
2628 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
2629 context->tty_path,
2630 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2631 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2632 USER_PROCESS,
2633 username);
2634
2635 if (context->user) {
2636 r = chown_terminal(STDIN_FILENO, uid);
2637 if (r < 0) {
2638 *exit_status = EXIT_STDIN;
2639 return r;
2640 }
2641 }
2642
2643 /* If delegation is enabled we'll pass ownership of the cgroup
2644 * (but only in systemd's own controller hierarchy!) to the
2645 * user of the new process. */
2646 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
2647 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2648 if (r < 0) {
2649 *exit_status = EXIT_CGROUP;
2650 return r;
2651 }
2652
2653
2654 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2655 if (r < 0) {
2656 *exit_status = EXIT_CGROUP;
2657 return r;
2658 }
2659 }
2660
2661 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
2662 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
2663 if (r < 0)
2664 return r;
2665 }
2666
2667 r = build_environment(
2668 unit,
2669 context,
2670 params,
2671 n_fds,
2672 home,
2673 username,
2674 shell,
2675 journal_stream_dev,
2676 journal_stream_ino,
2677 &our_env);
2678 if (r < 0) {
2679 *exit_status = EXIT_MEMORY;
2680 return r;
2681 }
2682
2683 r = build_pass_environment(context, &pass_env);
2684 if (r < 0) {
2685 *exit_status = EXIT_MEMORY;
2686 return r;
2687 }
2688
2689 accum_env = strv_env_merge(5,
2690 params->environment,
2691 our_env,
2692 pass_env,
2693 context->environment,
2694 files_env,
2695 NULL);
2696 if (!accum_env) {
2697 *exit_status = EXIT_MEMORY;
2698 return -ENOMEM;
2699 }
2700 accum_env = strv_env_clean(accum_env);
2701
2702 (void) umask(context->umask);
2703
2704 r = setup_keyring(unit, params, uid, gid);
2705 if (r < 0) {
2706 *exit_status = EXIT_KEYRING;
2707 return r;
2708 }
2709
2710 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
2711 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2712
2713 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2714 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
2715
2716 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
2717 if (needs_ambient_hack)
2718 needs_setuid = false;
2719 else
2720 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
2721
2722 if (needs_sandboxing) {
2723 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
2724 * present. The actual MAC context application will happen later, as late as possible, to avoid
2725 * impacting our own code paths. */
2726
2727 #ifdef HAVE_SELINUX
2728 use_selinux = mac_selinux_use();
2729 #endif
2730 #ifdef HAVE_SMACK
2731 use_smack = mac_smack_use();
2732 #endif
2733 #ifdef HAVE_APPARMOR
2734 use_apparmor = mac_apparmor_use();
2735 #endif
2736 }
2737
2738 if (needs_setuid) {
2739 if (context->pam_name && username) {
2740 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
2741 if (r < 0) {
2742 *exit_status = EXIT_PAM;
2743 return r;
2744 }
2745 }
2746 }
2747
2748 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
2749 r = setup_netns(runtime->netns_storage_socket);
2750 if (r < 0) {
2751 *exit_status = EXIT_NETWORK;
2752 return r;
2753 }
2754 }
2755
2756 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
2757 if (needs_mount_namespace) {
2758 r = apply_mount_namespace(unit, command, context, params, runtime);
2759 if (r < 0) {
2760 *exit_status = EXIT_NAMESPACE;
2761 return r;
2762 }
2763 }
2764
2765 /* Apply just after mount namespace setup */
2766 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
2767 if (r < 0)
2768 return r;
2769
2770 /* Drop groups as early as possbile */
2771 if (needs_setuid) {
2772 r = enforce_groups(context, gid, supplementary_gids, ngids);
2773 if (r < 0) {
2774 *exit_status = EXIT_GROUP;
2775 return r;
2776 }
2777 }
2778
2779 if (needs_sandboxing) {
2780 #ifdef HAVE_SELINUX
2781 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
2782 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
2783 if (r < 0) {
2784 *exit_status = EXIT_SELINUX_CONTEXT;
2785 return r;
2786 }
2787 }
2788 #endif
2789
2790 if (context->private_users) {
2791 r = setup_private_users(uid, gid);
2792 if (r < 0) {
2793 *exit_status = EXIT_USER;
2794 return r;
2795 }
2796 }
2797 }
2798
2799 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
2800 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
2801 * was needed to upload the policy and can now be closed as well. */
2802 r = close_all_fds(fds, n_fds);
2803 if (r >= 0)
2804 r = shift_fds(fds, n_fds);
2805 if (r >= 0)
2806 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
2807 if (r < 0) {
2808 *exit_status = EXIT_FDS;
2809 return r;
2810 }
2811
2812 secure_bits = context->secure_bits;
2813
2814 if (needs_sandboxing) {
2815 uint64_t bset;
2816
2817 for (i = 0; i < _RLIMIT_MAX; i++) {
2818
2819 if (!context->rlimit[i])
2820 continue;
2821
2822 r = setrlimit_closest(i, context->rlimit[i]);
2823 if (r < 0) {
2824 *exit_status = EXIT_LIMITS;
2825 return r;
2826 }
2827 }
2828
2829 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2830 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
2831 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
2832 *exit_status = EXIT_LIMITS;
2833 return -errno;
2834 }
2835 }
2836
2837 bset = context->capability_bounding_set;
2838 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
2839 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
2840 * instead of us doing that */
2841 if (needs_ambient_hack)
2842 bset |= (UINT64_C(1) << CAP_SETPCAP) |
2843 (UINT64_C(1) << CAP_SETUID) |
2844 (UINT64_C(1) << CAP_SETGID);
2845
2846 if (!cap_test_all(bset)) {
2847 r = capability_bounding_set_drop(bset, false);
2848 if (r < 0) {
2849 *exit_status = EXIT_CAPABILITIES;
2850 *error_message = strdup("Failed to drop capabilities");
2851 return r;
2852 }
2853 }
2854
2855 /* This is done before enforce_user, but ambient set
2856 * does not survive over setresuid() if keep_caps is not set. */
2857 if (!needs_ambient_hack &&
2858 context->capability_ambient_set != 0) {
2859 r = capability_ambient_set_apply(context->capability_ambient_set, true);
2860 if (r < 0) {
2861 *exit_status = EXIT_CAPABILITIES;
2862 *error_message = strdup("Failed to apply ambient capabilities (before UID change)");
2863 return r;
2864 }
2865 }
2866 }
2867
2868 if (needs_setuid) {
2869 if (context->user) {
2870 r = enforce_user(context, uid);
2871 if (r < 0) {
2872 *exit_status = EXIT_USER;
2873 (void) asprintf(error_message, "Failed to change UID to "UID_FMT, uid);
2874 return r;
2875 }
2876
2877 if (!needs_ambient_hack &&
2878 context->capability_ambient_set != 0) {
2879
2880 /* Fix the ambient capabilities after user change. */
2881 r = capability_ambient_set_apply(context->capability_ambient_set, false);
2882 if (r < 0) {
2883 *exit_status = EXIT_CAPABILITIES;
2884 *error_message = strdup("Failed to apply ambient capabilities (after UID change)");
2885 return r;
2886 }
2887
2888 /* If we were asked to change user and ambient capabilities
2889 * were requested, we had to add keep-caps to the securebits
2890 * so that we would maintain the inherited capability set
2891 * through the setresuid(). Make sure that the bit is added
2892 * also to the context secure_bits so that we don't try to
2893 * drop the bit away next. */
2894
2895 secure_bits |= 1<<SECURE_KEEP_CAPS;
2896 }
2897 }
2898 }
2899
2900 if (needs_sandboxing) {
2901 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
2902 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
2903 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
2904 * are restricted. */
2905
2906 #ifdef HAVE_SELINUX
2907 if (use_selinux) {
2908 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
2909
2910 if (exec_context) {
2911 r = setexeccon(exec_context);
2912 if (r < 0) {
2913 *exit_status = EXIT_SELINUX_CONTEXT;
2914 (void) asprintf(error_message, "Failed to set SELinux context to %s", exec_context);
2915 return r;
2916 }
2917 }
2918 }
2919 #endif
2920
2921 #ifdef HAVE_SMACK
2922 if (use_smack) {
2923 r = setup_smack(context, command);
2924 if (r < 0) {
2925 *exit_status = EXIT_SMACK_PROCESS_LABEL;
2926 *error_message = strdup("Failed to set SMACK process label");
2927 return r;
2928 }
2929 }
2930 #endif
2931
2932 #ifdef HAVE_APPARMOR
2933 if (use_apparmor && context->apparmor_profile) {
2934 r = aa_change_onexec(context->apparmor_profile);
2935 if (r < 0 && !context->apparmor_profile_ignore) {
2936 *exit_status = EXIT_APPARMOR_PROFILE;
2937 (void) asprintf(error_message,
2938 "Failed to prepare AppArmor profile change to %s",
2939 context->apparmor_profile);
2940 return -errno;
2941 }
2942 }
2943 #endif
2944
2945 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
2946 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
2947 if (prctl(PR_GET_SECUREBITS) != secure_bits)
2948 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
2949 *exit_status = EXIT_SECUREBITS;
2950 *error_message = strdup("Failed to set secure bits");
2951 return -errno;
2952 }
2953
2954 if (context_has_no_new_privileges(context))
2955 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2956 *exit_status = EXIT_NO_NEW_PRIVILEGES;
2957 *error_message = strdup("Failed to disable new privileges");
2958 return -errno;
2959 }
2960
2961 #ifdef HAVE_SECCOMP
2962 r = apply_address_families(unit, context);
2963 if (r < 0) {
2964 *exit_status = EXIT_ADDRESS_FAMILIES;
2965 *error_message = strdup("Failed to restrict address families");
2966 return r;
2967 }
2968
2969 r = apply_memory_deny_write_execute(unit, context);
2970 if (r < 0) {
2971 *exit_status = EXIT_SECCOMP;
2972 *error_message = strdup("Failed to disable writing to executable memory");
2973 return r;
2974 }
2975
2976 r = apply_restrict_realtime(unit, context);
2977 if (r < 0) {
2978 *exit_status = EXIT_SECCOMP;
2979 *error_message = strdup("Failed to apply realtime restrictions");
2980 return r;
2981 }
2982
2983 r = apply_restrict_namespaces(unit, context);
2984 if (r < 0) {
2985 *exit_status = EXIT_SECCOMP;
2986 *error_message = strdup("Failed to apply namespace restrictions");
2987 return r;
2988 }
2989
2990 r = apply_protect_sysctl(unit, context);
2991 if (r < 0) {
2992 *exit_status = EXIT_SECCOMP;
2993 *error_message = strdup("Failed to apply sysctl restrictions");
2994 return r;
2995 }
2996
2997 r = apply_protect_kernel_modules(unit, context);
2998 if (r < 0) {
2999 *exit_status = EXIT_SECCOMP;
3000 *error_message = strdup("Failed to apply module loading restrictions");
3001 return r;
3002 }
3003
3004 r = apply_private_devices(unit, context);
3005 if (r < 0) {
3006 *exit_status = EXIT_SECCOMP;
3007 *error_message = strdup("Failed to set up private devices");
3008 return r;
3009 }
3010
3011 r = apply_syscall_archs(unit, context);
3012 if (r < 0) {
3013 *exit_status = EXIT_SECCOMP;
3014 *error_message = strdup("Failed to apply syscall architecture restrictions");
3015 return r;
3016 }
3017
3018 r = apply_lock_personality(unit, context);
3019 if (r < 0) {
3020 *exit_status = EXIT_SECCOMP;
3021 *error_message = strdup("Failed to lock personalities");
3022 return r;
3023 }
3024
3025 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3026 * by the filter as little as possible. */
3027 r = apply_syscall_filter(unit, context, needs_ambient_hack);
3028 if (r < 0) {
3029 *exit_status = EXIT_SECCOMP;
3030 *error_message = strdup("Failed to apply syscall filters");
3031 return r;
3032 }
3033 #endif
3034 }
3035
3036 if (!strv_isempty(context->unset_environment)) {
3037 char **ee = NULL;
3038
3039 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3040 if (!ee) {
3041 *exit_status = EXIT_MEMORY;
3042 return -ENOMEM;
3043 }
3044
3045 strv_free(accum_env);
3046 accum_env = ee;
3047 }
3048
3049 final_argv = replace_env_argv(argv, accum_env);
3050 if (!final_argv) {
3051 *exit_status = EXIT_MEMORY;
3052 *error_message = strdup("Failed to prepare process arguments");
3053 return -ENOMEM;
3054 }
3055
3056 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
3057 _cleanup_free_ char *line;
3058
3059 line = exec_command_line(final_argv);
3060 if (line) {
3061 log_open();
3062 log_struct(LOG_DEBUG,
3063 "EXECUTABLE=%s", command->path,
3064 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
3065 LOG_UNIT_ID(unit),
3066 NULL);
3067 log_close();
3068 }
3069 }
3070
3071 execve(command->path, final_argv, accum_env);
3072 *exit_status = EXIT_EXEC;
3073 return -errno;
3074 }
3075
3076 int exec_spawn(Unit *unit,
3077 ExecCommand *command,
3078 const ExecContext *context,
3079 const ExecParameters *params,
3080 ExecRuntime *runtime,
3081 DynamicCreds *dcreds,
3082 pid_t *ret) {
3083
3084 _cleanup_strv_free_ char **files_env = NULL;
3085 int *fds = NULL;
3086 unsigned n_storage_fds = 0, n_socket_fds = 0;
3087 _cleanup_free_ char *line = NULL;
3088 int socket_fd, r;
3089 int named_iofds[3] = { -1, -1, -1 };
3090 char **argv;
3091 pid_t pid;
3092
3093 assert(unit);
3094 assert(command);
3095 assert(context);
3096 assert(ret);
3097 assert(params);
3098 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
3099
3100 if (context->std_input == EXEC_INPUT_SOCKET ||
3101 context->std_output == EXEC_OUTPUT_SOCKET ||
3102 context->std_error == EXEC_OUTPUT_SOCKET) {
3103
3104 if (params->n_socket_fds > 1) {
3105 log_unit_error(unit, "Got more than one socket.");
3106 return -EINVAL;
3107 }
3108
3109 if (params->n_socket_fds == 0) {
3110 log_unit_error(unit, "Got no socket.");
3111 return -EINVAL;
3112 }
3113
3114 socket_fd = params->fds[0];
3115 } else {
3116 socket_fd = -1;
3117 fds = params->fds;
3118 n_storage_fds = params->n_storage_fds;
3119 n_socket_fds = params->n_socket_fds;
3120 }
3121
3122 r = exec_context_named_iofds(unit, context, params, named_iofds);
3123 if (r < 0)
3124 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3125
3126 r = exec_context_load_environment(unit, context, &files_env);
3127 if (r < 0)
3128 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
3129
3130 argv = params->argv ?: command->argv;
3131 line = exec_command_line(argv);
3132 if (!line)
3133 return log_oom();
3134
3135 log_struct(LOG_DEBUG,
3136 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3137 "EXECUTABLE=%s", command->path,
3138 LOG_UNIT_ID(unit),
3139 NULL);
3140 pid = fork();
3141 if (pid < 0)
3142 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
3143
3144 if (pid == 0) {
3145 int exit_status;
3146 _cleanup_free_ char *error_message = NULL;
3147
3148 r = exec_child(unit,
3149 command,
3150 context,
3151 params,
3152 runtime,
3153 dcreds,
3154 argv,
3155 socket_fd,
3156 named_iofds,
3157 fds,
3158 n_storage_fds,
3159 n_socket_fds,
3160 files_env,
3161 unit->manager->user_lookup_fds[1],
3162 &exit_status,
3163 &error_message);
3164 if (r < 0) {
3165 log_open();
3166 if (error_message)
3167 log_struct_errno(LOG_ERR, r,
3168 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3169 LOG_UNIT_ID(unit),
3170 LOG_UNIT_MESSAGE(unit, "%s: %m",
3171 error_message),
3172 "EXECUTABLE=%s", command->path,
3173 NULL);
3174 else if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE))
3175 log_struct_errno(LOG_INFO, r,
3176 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3177 LOG_UNIT_ID(unit),
3178 LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
3179 command->path),
3180 "EXECUTABLE=%s", command->path,
3181 NULL);
3182 else
3183 log_struct_errno(LOG_ERR, r,
3184 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3185 LOG_UNIT_ID(unit),
3186 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3187 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3188 command->path),
3189 "EXECUTABLE=%s", command->path,
3190 NULL);
3191 }
3192
3193 _exit(exit_status);
3194 }
3195
3196 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
3197
3198 /* We add the new process to the cgroup both in the child (so
3199 * that we can be sure that no user code is ever executed
3200 * outside of the cgroup) and in the parent (so that we can be
3201 * sure that when we kill the cgroup the process will be
3202 * killed too). */
3203 if (params->cgroup_path)
3204 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
3205
3206 exec_status_start(&command->exec_status, pid);
3207
3208 *ret = pid;
3209 return 0;
3210 }
3211
3212 void exec_context_init(ExecContext *c) {
3213 ExecDirectoryType i;
3214
3215 assert(c);
3216
3217 c->umask = 0022;
3218 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
3219 c->cpu_sched_policy = SCHED_OTHER;
3220 c->syslog_priority = LOG_DAEMON|LOG_INFO;
3221 c->syslog_level_prefix = true;
3222 c->ignore_sigpipe = true;
3223 c->timer_slack_nsec = NSEC_INFINITY;
3224 c->personality = PERSONALITY_INVALID;
3225 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3226 c->directories[i].mode = 0755;
3227 c->capability_bounding_set = CAP_ALL;
3228 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
3229 }
3230
3231 void exec_context_done(ExecContext *c) {
3232 unsigned l;
3233 ExecDirectoryType i;
3234
3235 assert(c);
3236
3237 c->environment = strv_free(c->environment);
3238 c->environment_files = strv_free(c->environment_files);
3239 c->pass_environment = strv_free(c->pass_environment);
3240 c->unset_environment = strv_free(c->unset_environment);
3241
3242 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
3243 c->rlimit[l] = mfree(c->rlimit[l]);
3244
3245 for (l = 0; l < 3; l++)
3246 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3247
3248 c->working_directory = mfree(c->working_directory);
3249 c->root_directory = mfree(c->root_directory);
3250 c->root_image = mfree(c->root_image);
3251 c->tty_path = mfree(c->tty_path);
3252 c->syslog_identifier = mfree(c->syslog_identifier);
3253 c->user = mfree(c->user);
3254 c->group = mfree(c->group);
3255
3256 c->supplementary_groups = strv_free(c->supplementary_groups);
3257
3258 c->pam_name = mfree(c->pam_name);
3259
3260 c->read_only_paths = strv_free(c->read_only_paths);
3261 c->read_write_paths = strv_free(c->read_write_paths);
3262 c->inaccessible_paths = strv_free(c->inaccessible_paths);
3263
3264 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3265
3266 if (c->cpuset)
3267 CPU_FREE(c->cpuset);
3268
3269 c->utmp_id = mfree(c->utmp_id);
3270 c->selinux_context = mfree(c->selinux_context);
3271 c->apparmor_profile = mfree(c->apparmor_profile);
3272 c->smack_process_label = mfree(c->smack_process_label);
3273
3274 c->syscall_filter = set_free(c->syscall_filter);
3275 c->syscall_archs = set_free(c->syscall_archs);
3276 c->address_families = set_free(c->address_families);
3277
3278 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3279 c->directories[i].paths = strv_free(c->directories[i].paths);
3280 }
3281
3282 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3283 char **i;
3284
3285 assert(c);
3286
3287 if (!runtime_prefix)
3288 return 0;
3289
3290 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
3291 _cleanup_free_ char *p;
3292
3293 p = strjoin(runtime_prefix, "/", *i);
3294 if (!p)
3295 return -ENOMEM;
3296
3297 /* We execute this synchronously, since we need to be
3298 * sure this is gone when we start the service
3299 * next. */
3300 (void) rm_rf(p, REMOVE_ROOT);
3301 }
3302
3303 return 0;
3304 }
3305
3306 void exec_command_done(ExecCommand *c) {
3307 assert(c);
3308
3309 c->path = mfree(c->path);
3310
3311 c->argv = strv_free(c->argv);
3312 }
3313
3314 void exec_command_done_array(ExecCommand *c, unsigned n) {
3315 unsigned i;
3316
3317 for (i = 0; i < n; i++)
3318 exec_command_done(c+i);
3319 }
3320
3321 ExecCommand* exec_command_free_list(ExecCommand *c) {
3322 ExecCommand *i;
3323
3324 while ((i = c)) {
3325 LIST_REMOVE(command, c, i);
3326 exec_command_done(i);
3327 free(i);
3328 }
3329
3330 return NULL;
3331 }
3332
3333 void exec_command_free_array(ExecCommand **c, unsigned n) {
3334 unsigned i;
3335
3336 for (i = 0; i < n; i++)
3337 c[i] = exec_command_free_list(c[i]);
3338 }
3339
3340 typedef struct InvalidEnvInfo {
3341 Unit *unit;
3342 const char *path;
3343 } InvalidEnvInfo;
3344
3345 static void invalid_env(const char *p, void *userdata) {
3346 InvalidEnvInfo *info = userdata;
3347
3348 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
3349 }
3350
3351 const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3352 assert(c);
3353
3354 switch (fd_index) {
3355 case STDIN_FILENO:
3356 if (c->std_input != EXEC_INPUT_NAMED_FD)
3357 return NULL;
3358 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3359 case STDOUT_FILENO:
3360 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3361 return NULL;
3362 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3363 case STDERR_FILENO:
3364 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3365 return NULL;
3366 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3367 default:
3368 return NULL;
3369 }
3370 }
3371
3372 int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3373 unsigned i, targets;
3374 const char* stdio_fdname[3];
3375 unsigned n_fds;
3376
3377 assert(c);
3378 assert(p);
3379
3380 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3381 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3382 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3383
3384 for (i = 0; i < 3; i++)
3385 stdio_fdname[i] = exec_context_fdname(c, i);
3386
3387 n_fds = p->n_storage_fds + p->n_socket_fds;
3388
3389 for (i = 0; i < n_fds && targets > 0; i++)
3390 if (named_iofds[STDIN_FILENO] < 0 &&
3391 c->std_input == EXEC_INPUT_NAMED_FD &&
3392 stdio_fdname[STDIN_FILENO] &&
3393 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3394
3395 named_iofds[STDIN_FILENO] = p->fds[i];
3396 targets--;
3397
3398 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3399 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3400 stdio_fdname[STDOUT_FILENO] &&
3401 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3402
3403 named_iofds[STDOUT_FILENO] = p->fds[i];
3404 targets--;
3405
3406 } else if (named_iofds[STDERR_FILENO] < 0 &&
3407 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3408 stdio_fdname[STDERR_FILENO] &&
3409 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3410
3411 named_iofds[STDERR_FILENO] = p->fds[i];
3412 targets--;
3413 }
3414
3415 return targets == 0 ? 0 : -ENOENT;
3416 }
3417
3418 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
3419 char **i, **r = NULL;
3420
3421 assert(c);
3422 assert(l);
3423
3424 STRV_FOREACH(i, c->environment_files) {
3425 char *fn;
3426 int k;
3427 unsigned n;
3428 bool ignore = false;
3429 char **p;
3430 _cleanup_globfree_ glob_t pglob = {};
3431
3432 fn = *i;
3433
3434 if (fn[0] == '-') {
3435 ignore = true;
3436 fn++;
3437 }
3438
3439 if (!path_is_absolute(fn)) {
3440 if (ignore)
3441 continue;
3442
3443 strv_free(r);
3444 return -EINVAL;
3445 }
3446
3447 /* Filename supports globbing, take all matching files */
3448 k = safe_glob(fn, 0, &pglob);
3449 if (k < 0) {
3450 if (ignore)
3451 continue;
3452
3453 strv_free(r);
3454 return k;
3455 }
3456
3457 /* When we don't match anything, -ENOENT should be returned */
3458 assert(pglob.gl_pathc > 0);
3459
3460 for (n = 0; n < pglob.gl_pathc; n++) {
3461 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
3462 if (k < 0) {
3463 if (ignore)
3464 continue;
3465
3466 strv_free(r);
3467 return k;
3468 }
3469 /* Log invalid environment variables with filename */
3470 if (p) {
3471 InvalidEnvInfo info = {
3472 .unit = unit,
3473 .path = pglob.gl_pathv[n]
3474 };
3475
3476 p = strv_env_clean_with_callback(p, invalid_env, &info);
3477 }
3478
3479 if (r == NULL)
3480 r = p;
3481 else {
3482 char **m;
3483
3484 m = strv_env_merge(2, r, p);
3485 strv_free(r);
3486 strv_free(p);
3487 if (!m)
3488 return -ENOMEM;
3489
3490 r = m;
3491 }
3492 }
3493 }
3494
3495 *l = r;
3496
3497 return 0;
3498 }
3499
3500 static bool tty_may_match_dev_console(const char *tty) {
3501 _cleanup_free_ char *active = NULL;
3502 char *console;
3503
3504 if (!tty)
3505 return true;
3506
3507 tty = skip_dev_prefix(tty);
3508
3509 /* trivial identity? */
3510 if (streq(tty, "console"))
3511 return true;
3512
3513 console = resolve_dev_console(&active);
3514 /* if we could not resolve, assume it may */
3515 if (!console)
3516 return true;
3517
3518 /* "tty0" means the active VC, so it may be the same sometimes */
3519 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
3520 }
3521
3522 bool exec_context_may_touch_console(ExecContext *ec) {
3523
3524 return (ec->tty_reset ||
3525 ec->tty_vhangup ||
3526 ec->tty_vt_disallocate ||
3527 is_terminal_input(ec->std_input) ||
3528 is_terminal_output(ec->std_output) ||
3529 is_terminal_output(ec->std_error)) &&
3530 tty_may_match_dev_console(exec_context_tty_path(ec));
3531 }
3532
3533 static void strv_fprintf(FILE *f, char **l) {
3534 char **g;
3535
3536 assert(f);
3537
3538 STRV_FOREACH(g, l)
3539 fprintf(f, " %s", *g);
3540 }
3541
3542 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
3543 char **e, **d;
3544 unsigned i;
3545 ExecDirectoryType dt;
3546 int r;
3547
3548 assert(c);
3549 assert(f);
3550
3551 prefix = strempty(prefix);
3552
3553 fprintf(f,
3554 "%sUMask: %04o\n"
3555 "%sWorkingDirectory: %s\n"
3556 "%sRootDirectory: %s\n"
3557 "%sNonBlocking: %s\n"
3558 "%sPrivateTmp: %s\n"
3559 "%sPrivateDevices: %s\n"
3560 "%sProtectKernelTunables: %s\n"
3561 "%sProtectKernelModules: %s\n"
3562 "%sProtectControlGroups: %s\n"
3563 "%sPrivateNetwork: %s\n"
3564 "%sPrivateUsers: %s\n"
3565 "%sProtectHome: %s\n"
3566 "%sProtectSystem: %s\n"
3567 "%sMountAPIVFS: %s\n"
3568 "%sIgnoreSIGPIPE: %s\n"
3569 "%sMemoryDenyWriteExecute: %s\n"
3570 "%sRestrictRealtime: %s\n",
3571 prefix, c->umask,
3572 prefix, c->working_directory ? c->working_directory : "/",
3573 prefix, c->root_directory ? c->root_directory : "/",
3574 prefix, yes_no(c->non_blocking),
3575 prefix, yes_no(c->private_tmp),
3576 prefix, yes_no(c->private_devices),
3577 prefix, yes_no(c->protect_kernel_tunables),
3578 prefix, yes_no(c->protect_kernel_modules),
3579 prefix, yes_no(c->protect_control_groups),
3580 prefix, yes_no(c->private_network),
3581 prefix, yes_no(c->private_users),
3582 prefix, protect_home_to_string(c->protect_home),
3583 prefix, protect_system_to_string(c->protect_system),
3584 prefix, yes_no(c->mount_apivfs),
3585 prefix, yes_no(c->ignore_sigpipe),
3586 prefix, yes_no(c->memory_deny_write_execute),
3587 prefix, yes_no(c->restrict_realtime));
3588
3589 if (c->root_image)
3590 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3591
3592 STRV_FOREACH(e, c->environment)
3593 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3594
3595 STRV_FOREACH(e, c->environment_files)
3596 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
3597
3598 STRV_FOREACH(e, c->pass_environment)
3599 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3600
3601 STRV_FOREACH(e, c->unset_environment)
3602 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
3603
3604 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3605
3606 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
3607 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3608
3609 STRV_FOREACH(d, c->directories[dt].paths)
3610 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3611 }
3612
3613 if (c->nice_set)
3614 fprintf(f,
3615 "%sNice: %i\n",
3616 prefix, c->nice);
3617
3618 if (c->oom_score_adjust_set)
3619 fprintf(f,
3620 "%sOOMScoreAdjust: %i\n",
3621 prefix, c->oom_score_adjust);
3622
3623 for (i = 0; i < RLIM_NLIMITS; i++)
3624 if (c->rlimit[i]) {
3625 fprintf(f, "%s%s: " RLIM_FMT "\n",
3626 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3627 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3628 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3629 }
3630
3631 if (c->ioprio_set) {
3632 _cleanup_free_ char *class_str = NULL;
3633
3634 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3635 if (r >= 0)
3636 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3637
3638 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
3639 }
3640
3641 if (c->cpu_sched_set) {
3642 _cleanup_free_ char *policy_str = NULL;
3643
3644 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3645 if (r >= 0)
3646 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3647
3648 fprintf(f,
3649 "%sCPUSchedulingPriority: %i\n"
3650 "%sCPUSchedulingResetOnFork: %s\n",
3651 prefix, c->cpu_sched_priority,
3652 prefix, yes_no(c->cpu_sched_reset_on_fork));
3653 }
3654
3655 if (c->cpuset) {
3656 fprintf(f, "%sCPUAffinity:", prefix);
3657 for (i = 0; i < c->cpuset_ncpus; i++)
3658 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
3659 fprintf(f, " %u", i);
3660 fputs("\n", f);
3661 }
3662
3663 if (c->timer_slack_nsec != NSEC_INFINITY)
3664 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
3665
3666 fprintf(f,
3667 "%sStandardInput: %s\n"
3668 "%sStandardOutput: %s\n"
3669 "%sStandardError: %s\n",
3670 prefix, exec_input_to_string(c->std_input),
3671 prefix, exec_output_to_string(c->std_output),
3672 prefix, exec_output_to_string(c->std_error));
3673
3674 if (c->tty_path)
3675 fprintf(f,
3676 "%sTTYPath: %s\n"
3677 "%sTTYReset: %s\n"
3678 "%sTTYVHangup: %s\n"
3679 "%sTTYVTDisallocate: %s\n",
3680 prefix, c->tty_path,
3681 prefix, yes_no(c->tty_reset),
3682 prefix, yes_no(c->tty_vhangup),
3683 prefix, yes_no(c->tty_vt_disallocate));
3684
3685 if (IN_SET(c->std_output,
3686 EXEC_OUTPUT_SYSLOG,
3687 EXEC_OUTPUT_KMSG,
3688 EXEC_OUTPUT_JOURNAL,
3689 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3690 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3691 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
3692 IN_SET(c->std_error,
3693 EXEC_OUTPUT_SYSLOG,
3694 EXEC_OUTPUT_KMSG,
3695 EXEC_OUTPUT_JOURNAL,
3696 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3697 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3698 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
3699
3700 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
3701
3702 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3703 if (r >= 0)
3704 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
3705
3706 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3707 if (r >= 0)
3708 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
3709 }
3710
3711 if (c->secure_bits) {
3712 _cleanup_free_ char *str = NULL;
3713
3714 r = secure_bits_to_string_alloc(c->secure_bits, &str);
3715 if (r >= 0)
3716 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
3717 }
3718
3719 if (c->capability_bounding_set != CAP_ALL) {
3720 _cleanup_free_ char *str = NULL;
3721
3722 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
3723 if (r >= 0)
3724 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
3725 }
3726
3727 if (c->capability_ambient_set != 0) {
3728 _cleanup_free_ char *str = NULL;
3729
3730 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
3731 if (r >= 0)
3732 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
3733 }
3734
3735 if (c->user)
3736 fprintf(f, "%sUser: %s\n", prefix, c->user);
3737 if (c->group)
3738 fprintf(f, "%sGroup: %s\n", prefix, c->group);
3739
3740 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
3741
3742 if (strv_length(c->supplementary_groups) > 0) {
3743 fprintf(f, "%sSupplementaryGroups:", prefix);
3744 strv_fprintf(f, c->supplementary_groups);
3745 fputs("\n", f);
3746 }
3747
3748 if (c->pam_name)
3749 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
3750
3751 if (strv_length(c->read_write_paths) > 0) {
3752 fprintf(f, "%sReadWritePaths:", prefix);
3753 strv_fprintf(f, c->read_write_paths);
3754 fputs("\n", f);
3755 }
3756
3757 if (strv_length(c->read_only_paths) > 0) {
3758 fprintf(f, "%sReadOnlyPaths:", prefix);
3759 strv_fprintf(f, c->read_only_paths);
3760 fputs("\n", f);
3761 }
3762
3763 if (strv_length(c->inaccessible_paths) > 0) {
3764 fprintf(f, "%sInaccessiblePaths:", prefix);
3765 strv_fprintf(f, c->inaccessible_paths);
3766 fputs("\n", f);
3767 }
3768
3769 if (c->n_bind_mounts > 0)
3770 for (i = 0; i < c->n_bind_mounts; i++) {
3771 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
3772 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
3773 c->bind_mounts[i].source,
3774 c->bind_mounts[i].destination,
3775 c->bind_mounts[i].recursive ? "rbind" : "norbind");
3776 }
3777
3778 if (c->utmp_id)
3779 fprintf(f,
3780 "%sUtmpIdentifier: %s\n",
3781 prefix, c->utmp_id);
3782
3783 if (c->selinux_context)
3784 fprintf(f,
3785 "%sSELinuxContext: %s%s\n",
3786 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
3787
3788 if (c->apparmor_profile)
3789 fprintf(f,
3790 "%sAppArmorProfile: %s%s\n",
3791 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3792
3793 if (c->smack_process_label)
3794 fprintf(f,
3795 "%sSmackProcessLabel: %s%s\n",
3796 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
3797
3798 if (c->personality != PERSONALITY_INVALID)
3799 fprintf(f,
3800 "%sPersonality: %s\n",
3801 prefix, strna(personality_to_string(c->personality)));
3802
3803 fprintf(f,
3804 "%sLockPersonality: %s\n",
3805 prefix, yes_no(c->lock_personality));
3806
3807 if (c->syscall_filter) {
3808 #ifdef HAVE_SECCOMP
3809 Iterator j;
3810 void *id;
3811 bool first = true;
3812 #endif
3813
3814 fprintf(f,
3815 "%sSystemCallFilter: ",
3816 prefix);
3817
3818 if (!c->syscall_whitelist)
3819 fputc('~', f);
3820
3821 #ifdef HAVE_SECCOMP
3822 SET_FOREACH(id, c->syscall_filter, j) {
3823 _cleanup_free_ char *name = NULL;
3824
3825 if (first)
3826 first = false;
3827 else
3828 fputc(' ', f);
3829
3830 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
3831 fputs(strna(name), f);
3832 }
3833 #endif
3834
3835 fputc('\n', f);
3836 }
3837
3838 if (c->syscall_archs) {
3839 #ifdef HAVE_SECCOMP
3840 Iterator j;
3841 void *id;
3842 #endif
3843
3844 fprintf(f,
3845 "%sSystemCallArchitectures:",
3846 prefix);
3847
3848 #ifdef HAVE_SECCOMP
3849 SET_FOREACH(id, c->syscall_archs, j)
3850 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
3851 #endif
3852 fputc('\n', f);
3853 }
3854
3855 if (exec_context_restrict_namespaces_set(c)) {
3856 _cleanup_free_ char *s = NULL;
3857
3858 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
3859 if (r >= 0)
3860 fprintf(f, "%sRestrictNamespaces: %s\n",
3861 prefix, s);
3862 }
3863
3864 if (c->syscall_errno > 0)
3865 fprintf(f,
3866 "%sSystemCallErrorNumber: %s\n",
3867 prefix, strna(errno_to_name(c->syscall_errno)));
3868
3869 if (c->apparmor_profile)
3870 fprintf(f,
3871 "%sAppArmorProfile: %s%s\n",
3872 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3873 }
3874
3875 bool exec_context_maintains_privileges(ExecContext *c) {
3876 assert(c);
3877
3878 /* Returns true if the process forked off would run under
3879 * an unchanged UID or as root. */
3880
3881 if (!c->user)
3882 return true;
3883
3884 if (streq(c->user, "root") || streq(c->user, "0"))
3885 return true;
3886
3887 return false;
3888 }
3889
3890 int exec_context_get_effective_ioprio(ExecContext *c) {
3891 int p;
3892
3893 assert(c);
3894
3895 if (c->ioprio_set)
3896 return c->ioprio;
3897
3898 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
3899 if (p < 0)
3900 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
3901
3902 return p;
3903 }
3904
3905 void exec_status_start(ExecStatus *s, pid_t pid) {
3906 assert(s);
3907
3908 zero(*s);
3909 s->pid = pid;
3910 dual_timestamp_get(&s->start_timestamp);
3911 }
3912
3913 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
3914 assert(s);
3915
3916 if (s->pid && s->pid != pid)
3917 zero(*s);
3918
3919 s->pid = pid;
3920 dual_timestamp_get(&s->exit_timestamp);
3921
3922 s->code = code;
3923 s->status = status;
3924
3925 if (context) {
3926 if (context->utmp_id)
3927 utmp_put_dead_process(context->utmp_id, pid, code, status);
3928
3929 exec_context_tty_reset(context, NULL);
3930 }
3931 }
3932
3933 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
3934 char buf[FORMAT_TIMESTAMP_MAX];
3935
3936 assert(s);
3937 assert(f);
3938
3939 if (s->pid <= 0)
3940 return;
3941
3942 prefix = strempty(prefix);
3943
3944 fprintf(f,
3945 "%sPID: "PID_FMT"\n",
3946 prefix, s->pid);
3947
3948 if (dual_timestamp_is_set(&s->start_timestamp))
3949 fprintf(f,
3950 "%sStart Timestamp: %s\n",
3951 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
3952
3953 if (dual_timestamp_is_set(&s->exit_timestamp))
3954 fprintf(f,
3955 "%sExit Timestamp: %s\n"
3956 "%sExit Code: %s\n"
3957 "%sExit Status: %i\n",
3958 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
3959 prefix, sigchld_code_to_string(s->code),
3960 prefix, s->status);
3961 }
3962
3963 char *exec_command_line(char **argv) {
3964 size_t k;
3965 char *n, *p, **a;
3966 bool first = true;
3967
3968 assert(argv);
3969
3970 k = 1;
3971 STRV_FOREACH(a, argv)
3972 k += strlen(*a)+3;
3973
3974 n = new(char, k);
3975 if (!n)
3976 return NULL;
3977
3978 p = n;
3979 STRV_FOREACH(a, argv) {
3980
3981 if (!first)
3982 *(p++) = ' ';
3983 else
3984 first = false;
3985
3986 if (strpbrk(*a, WHITESPACE)) {
3987 *(p++) = '\'';
3988 p = stpcpy(p, *a);
3989 *(p++) = '\'';
3990 } else
3991 p = stpcpy(p, *a);
3992
3993 }
3994
3995 *p = 0;
3996
3997 /* FIXME: this doesn't really handle arguments that have
3998 * spaces and ticks in them */
3999
4000 return n;
4001 }
4002
4003 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
4004 _cleanup_free_ char *cmd = NULL;
4005 const char *prefix2;
4006
4007 assert(c);
4008 assert(f);
4009
4010 prefix = strempty(prefix);
4011 prefix2 = strjoina(prefix, "\t");
4012
4013 cmd = exec_command_line(c->argv);
4014 fprintf(f,
4015 "%sCommand Line: %s\n",
4016 prefix, cmd ? cmd : strerror(ENOMEM));
4017
4018 exec_status_dump(&c->exec_status, f, prefix2);
4019 }
4020
4021 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4022 assert(f);
4023
4024 prefix = strempty(prefix);
4025
4026 LIST_FOREACH(command, c, c)
4027 exec_command_dump(c, f, prefix);
4028 }
4029
4030 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4031 ExecCommand *end;
4032
4033 assert(l);
4034 assert(e);
4035
4036 if (*l) {
4037 /* It's kind of important, that we keep the order here */
4038 LIST_FIND_TAIL(command, *l, end);
4039 LIST_INSERT_AFTER(command, *l, end, e);
4040 } else
4041 *l = e;
4042 }
4043
4044 int exec_command_set(ExecCommand *c, const char *path, ...) {
4045 va_list ap;
4046 char **l, *p;
4047
4048 assert(c);
4049 assert(path);
4050
4051 va_start(ap, path);
4052 l = strv_new_ap(path, ap);
4053 va_end(ap);
4054
4055 if (!l)
4056 return -ENOMEM;
4057
4058 p = strdup(path);
4059 if (!p) {
4060 strv_free(l);
4061 return -ENOMEM;
4062 }
4063
4064 free(c->path);
4065 c->path = p;
4066
4067 strv_free(c->argv);
4068 c->argv = l;
4069
4070 return 0;
4071 }
4072
4073 int exec_command_append(ExecCommand *c, const char *path, ...) {
4074 _cleanup_strv_free_ char **l = NULL;
4075 va_list ap;
4076 int r;
4077
4078 assert(c);
4079 assert(path);
4080
4081 va_start(ap, path);
4082 l = strv_new_ap(path, ap);
4083 va_end(ap);
4084
4085 if (!l)
4086 return -ENOMEM;
4087
4088 r = strv_extend_strv(&c->argv, l, false);
4089 if (r < 0)
4090 return r;
4091
4092 return 0;
4093 }
4094
4095
4096 static int exec_runtime_allocate(ExecRuntime **rt) {
4097
4098 if (*rt)
4099 return 0;
4100
4101 *rt = new0(ExecRuntime, 1);
4102 if (!*rt)
4103 return -ENOMEM;
4104
4105 (*rt)->n_ref = 1;
4106 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4107
4108 return 0;
4109 }
4110
4111 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4112 int r;
4113
4114 assert(rt);
4115 assert(c);
4116 assert(id);
4117
4118 if (*rt)
4119 return 1;
4120
4121 if (!c->private_network && !c->private_tmp)
4122 return 0;
4123
4124 r = exec_runtime_allocate(rt);
4125 if (r < 0)
4126 return r;
4127
4128 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
4129 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
4130 return -errno;
4131 }
4132
4133 if (c->private_tmp && !(*rt)->tmp_dir) {
4134 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4135 if (r < 0)
4136 return r;
4137 }
4138
4139 return 1;
4140 }
4141
4142 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4143 assert(r);
4144 assert(r->n_ref > 0);
4145
4146 r->n_ref++;
4147 return r;
4148 }
4149
4150 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4151
4152 if (!r)
4153 return NULL;
4154
4155 assert(r->n_ref > 0);
4156
4157 r->n_ref--;
4158 if (r->n_ref > 0)
4159 return NULL;
4160
4161 free(r->tmp_dir);
4162 free(r->var_tmp_dir);
4163 safe_close_pair(r->netns_storage_socket);
4164 return mfree(r);
4165 }
4166
4167 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
4168 assert(u);
4169 assert(f);
4170 assert(fds);
4171
4172 if (!rt)
4173 return 0;
4174
4175 if (rt->tmp_dir)
4176 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4177
4178 if (rt->var_tmp_dir)
4179 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4180
4181 if (rt->netns_storage_socket[0] >= 0) {
4182 int copy;
4183
4184 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4185 if (copy < 0)
4186 return copy;
4187
4188 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4189 }
4190
4191 if (rt->netns_storage_socket[1] >= 0) {
4192 int copy;
4193
4194 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4195 if (copy < 0)
4196 return copy;
4197
4198 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4199 }
4200
4201 return 0;
4202 }
4203
4204 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
4205 int r;
4206
4207 assert(rt);
4208 assert(key);
4209 assert(value);
4210
4211 if (streq(key, "tmp-dir")) {
4212 char *copy;
4213
4214 r = exec_runtime_allocate(rt);
4215 if (r < 0)
4216 return log_oom();
4217
4218 copy = strdup(value);
4219 if (!copy)
4220 return log_oom();
4221
4222 free((*rt)->tmp_dir);
4223 (*rt)->tmp_dir = copy;
4224
4225 } else if (streq(key, "var-tmp-dir")) {
4226 char *copy;
4227
4228 r = exec_runtime_allocate(rt);
4229 if (r < 0)
4230 return log_oom();
4231
4232 copy = strdup(value);
4233 if (!copy)
4234 return log_oom();
4235
4236 free((*rt)->var_tmp_dir);
4237 (*rt)->var_tmp_dir = copy;
4238
4239 } else if (streq(key, "netns-socket-0")) {
4240 int fd;
4241
4242 r = exec_runtime_allocate(rt);
4243 if (r < 0)
4244 return log_oom();
4245
4246 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4247 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4248 else {
4249 safe_close((*rt)->netns_storage_socket[0]);
4250 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4251 }
4252 } else if (streq(key, "netns-socket-1")) {
4253 int fd;
4254
4255 r = exec_runtime_allocate(rt);
4256 if (r < 0)
4257 return log_oom();
4258
4259 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4260 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4261 else {
4262 safe_close((*rt)->netns_storage_socket[1]);
4263 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4264 }
4265 } else
4266 return 0;
4267
4268 return 1;
4269 }
4270
4271 static void *remove_tmpdir_thread(void *p) {
4272 _cleanup_free_ char *path = p;
4273
4274 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4275 return NULL;
4276 }
4277
4278 void exec_runtime_destroy(ExecRuntime *rt) {
4279 int r;
4280
4281 if (!rt)
4282 return;
4283
4284 /* If there are multiple users of this, let's leave the stuff around */
4285 if (rt->n_ref > 1)
4286 return;
4287
4288 if (rt->tmp_dir) {
4289 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4290
4291 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4292 if (r < 0) {
4293 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4294 free(rt->tmp_dir);
4295 }
4296
4297 rt->tmp_dir = NULL;
4298 }
4299
4300 if (rt->var_tmp_dir) {
4301 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4302
4303 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4304 if (r < 0) {
4305 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4306 free(rt->var_tmp_dir);
4307 }
4308
4309 rt->var_tmp_dir = NULL;
4310 }
4311
4312 safe_close_pair(rt->netns_storage_socket);
4313 }
4314
4315 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4316 [EXEC_INPUT_NULL] = "null",
4317 [EXEC_INPUT_TTY] = "tty",
4318 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4319 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
4320 [EXEC_INPUT_SOCKET] = "socket",
4321 [EXEC_INPUT_NAMED_FD] = "fd",
4322 };
4323
4324 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4325
4326 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
4327 [EXEC_OUTPUT_INHERIT] = "inherit",
4328 [EXEC_OUTPUT_NULL] = "null",
4329 [EXEC_OUTPUT_TTY] = "tty",
4330 [EXEC_OUTPUT_SYSLOG] = "syslog",
4331 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
4332 [EXEC_OUTPUT_KMSG] = "kmsg",
4333 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
4334 [EXEC_OUTPUT_JOURNAL] = "journal",
4335 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
4336 [EXEC_OUTPUT_SOCKET] = "socket",
4337 [EXEC_OUTPUT_NAMED_FD] = "fd",
4338 };
4339
4340 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
4341
4342 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4343 [EXEC_UTMP_INIT] = "init",
4344 [EXEC_UTMP_LOGIN] = "login",
4345 [EXEC_UTMP_USER] = "user",
4346 };
4347
4348 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
4349
4350 static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4351 [EXEC_PRESERVE_NO] = "no",
4352 [EXEC_PRESERVE_YES] = "yes",
4353 [EXEC_PRESERVE_RESTART] = "restart",
4354 };
4355
4356 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
4357
4358 static const char* const exec_directory_type_table[_EXEC_DIRECTORY_MAX] = {
4359 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4360 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4361 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4362 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4363 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4364 };
4365
4366 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);