]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
util-lib: wrap personality() to fix up broken glibc error handling (#6766)
[thirdparty/systemd.git] / src / core / execute.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <glob.h>
23 #include <grp.h>
24 #include <poll.h>
25 #include <signal.h>
26 #include <string.h>
27 #include <sys/capability.h>
28 #include <sys/eventfd.h>
29 #include <sys/mman.h>
30 #include <sys/personality.h>
31 #include <sys/prctl.h>
32 #include <sys/shm.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/types.h>
36 #include <sys/un.h>
37 #include <unistd.h>
38 #include <utmpx.h>
39
40 #ifdef HAVE_PAM
41 #include <security/pam_appl.h>
42 #endif
43
44 #ifdef HAVE_SELINUX
45 #include <selinux/selinux.h>
46 #endif
47
48 #ifdef HAVE_SECCOMP
49 #include <seccomp.h>
50 #endif
51
52 #ifdef HAVE_APPARMOR
53 #include <sys/apparmor.h>
54 #endif
55
56 #include "sd-messages.h"
57
58 #include "af-list.h"
59 #include "alloc-util.h"
60 #ifdef HAVE_APPARMOR
61 #include "apparmor-util.h"
62 #endif
63 #include "async.h"
64 #include "barrier.h"
65 #include "cap-list.h"
66 #include "capability-util.h"
67 #include "def.h"
68 #include "env-util.h"
69 #include "errno-list.h"
70 #include "execute.h"
71 #include "exit-status.h"
72 #include "fd-util.h"
73 #include "fileio.h"
74 #include "format-util.h"
75 #include "fs-util.h"
76 #include "glob-util.h"
77 #include "io-util.h"
78 #include "ioprio.h"
79 #include "log.h"
80 #include "macro.h"
81 #include "missing.h"
82 #include "mkdir.h"
83 #include "namespace.h"
84 #include "parse-util.h"
85 #include "path-util.h"
86 #include "process-util.h"
87 #include "rlimit-util.h"
88 #include "rm-rf.h"
89 #ifdef HAVE_SECCOMP
90 #include "seccomp-util.h"
91 #endif
92 #include "securebits.h"
93 #include "securebits-util.h"
94 #include "selinux-util.h"
95 #include "signal-util.h"
96 #include "smack-util.h"
97 #include "special.h"
98 #include "string-table.h"
99 #include "string-util.h"
100 #include "strv.h"
101 #include "syslog-util.h"
102 #include "terminal-util.h"
103 #include "unit.h"
104 #include "user-util.h"
105 #include "util.h"
106 #include "utmp-wtmp.h"
107
108 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
109 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
110
111 /* This assumes there is a 'tty' group */
112 #define TTY_MODE 0620
113
114 #define SNDBUF_SIZE (8*1024*1024)
115
116 static int shift_fds(int fds[], unsigned n_fds) {
117 int start, restart_from;
118
119 if (n_fds <= 0)
120 return 0;
121
122 /* Modifies the fds array! (sorts it) */
123
124 assert(fds);
125
126 start = 0;
127 for (;;) {
128 int i;
129
130 restart_from = -1;
131
132 for (i = start; i < (int) n_fds; i++) {
133 int nfd;
134
135 /* Already at right index? */
136 if (fds[i] == i+3)
137 continue;
138
139 nfd = fcntl(fds[i], F_DUPFD, i + 3);
140 if (nfd < 0)
141 return -errno;
142
143 safe_close(fds[i]);
144 fds[i] = nfd;
145
146 /* Hmm, the fd we wanted isn't free? Then
147 * let's remember that and try again from here */
148 if (nfd != i+3 && restart_from < 0)
149 restart_from = i;
150 }
151
152 if (restart_from < 0)
153 break;
154
155 start = restart_from;
156 }
157
158 return 0;
159 }
160
161 static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
162 unsigned i, n_fds;
163 int r;
164
165 n_fds = n_storage_fds + n_socket_fds;
166 if (n_fds <= 0)
167 return 0;
168
169 assert(fds);
170
171 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
172 * O_NONBLOCK only applies to socket activation though. */
173
174 for (i = 0; i < n_fds; i++) {
175
176 if (i < n_socket_fds) {
177 r = fd_nonblock(fds[i], nonblock);
178 if (r < 0)
179 return r;
180 }
181
182 /* We unconditionally drop FD_CLOEXEC from the fds,
183 * since after all we want to pass these fds to our
184 * children */
185
186 r = fd_cloexec(fds[i], false);
187 if (r < 0)
188 return r;
189 }
190
191 return 0;
192 }
193
194 static const char *exec_context_tty_path(const ExecContext *context) {
195 assert(context);
196
197 if (context->stdio_as_fds)
198 return NULL;
199
200 if (context->tty_path)
201 return context->tty_path;
202
203 return "/dev/console";
204 }
205
206 static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
207 const char *path;
208
209 assert(context);
210
211 path = exec_context_tty_path(context);
212
213 if (context->tty_vhangup) {
214 if (p && p->stdin_fd >= 0)
215 (void) terminal_vhangup_fd(p->stdin_fd);
216 else if (path)
217 (void) terminal_vhangup(path);
218 }
219
220 if (context->tty_reset) {
221 if (p && p->stdin_fd >= 0)
222 (void) reset_terminal_fd(p->stdin_fd, true);
223 else if (path)
224 (void) reset_terminal(path);
225 }
226
227 if (context->tty_vt_disallocate && path)
228 (void) vt_disallocate(path);
229 }
230
231 static bool is_terminal_input(ExecInput i) {
232 return IN_SET(i,
233 EXEC_INPUT_TTY,
234 EXEC_INPUT_TTY_FORCE,
235 EXEC_INPUT_TTY_FAIL);
236 }
237
238 static bool is_terminal_output(ExecOutput o) {
239 return IN_SET(o,
240 EXEC_OUTPUT_TTY,
241 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
242 EXEC_OUTPUT_KMSG_AND_CONSOLE,
243 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
244 }
245
246 static bool is_syslog_output(ExecOutput o) {
247 return IN_SET(o,
248 EXEC_OUTPUT_SYSLOG,
249 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
250 }
251
252 static bool is_kmsg_output(ExecOutput o) {
253 return IN_SET(o,
254 EXEC_OUTPUT_KMSG,
255 EXEC_OUTPUT_KMSG_AND_CONSOLE);
256 }
257
258 static bool exec_context_needs_term(const ExecContext *c) {
259 assert(c);
260
261 /* Return true if the execution context suggests we should set $TERM to something useful. */
262
263 if (is_terminal_input(c->std_input))
264 return true;
265
266 if (is_terminal_output(c->std_output))
267 return true;
268
269 if (is_terminal_output(c->std_error))
270 return true;
271
272 return !!c->tty_path;
273 }
274
275 static int open_null_as(int flags, int nfd) {
276 int fd, r;
277
278 assert(nfd >= 0);
279
280 fd = open("/dev/null", flags|O_NOCTTY);
281 if (fd < 0)
282 return -errno;
283
284 if (fd != nfd) {
285 r = dup2(fd, nfd) < 0 ? -errno : nfd;
286 safe_close(fd);
287 } else
288 r = nfd;
289
290 return r;
291 }
292
293 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
294 static const union sockaddr_union sa = {
295 .un.sun_family = AF_UNIX,
296 .un.sun_path = "/run/systemd/journal/stdout",
297 };
298 uid_t olduid = UID_INVALID;
299 gid_t oldgid = GID_INVALID;
300 int r;
301
302 if (gid_is_valid(gid)) {
303 oldgid = getgid();
304
305 if (setegid(gid) < 0)
306 return -errno;
307 }
308
309 if (uid_is_valid(uid)) {
310 olduid = getuid();
311
312 if (seteuid(uid) < 0) {
313 r = -errno;
314 goto restore_gid;
315 }
316 }
317
318 r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
319
320 /* If we fail to restore the uid or gid, things will likely
321 fail later on. This should only happen if an LSM interferes. */
322
323 if (uid_is_valid(uid))
324 (void) seteuid(olduid);
325
326 restore_gid:
327 if (gid_is_valid(gid))
328 (void) setegid(oldgid);
329
330 return r;
331 }
332
333 static int connect_logger_as(
334 Unit *unit,
335 const ExecContext *context,
336 const ExecParameters *params,
337 ExecOutput output,
338 const char *ident,
339 int nfd,
340 uid_t uid,
341 gid_t gid) {
342
343 int fd, r;
344
345 assert(context);
346 assert(params);
347 assert(output < _EXEC_OUTPUT_MAX);
348 assert(ident);
349 assert(nfd >= 0);
350
351 fd = socket(AF_UNIX, SOCK_STREAM, 0);
352 if (fd < 0)
353 return -errno;
354
355 r = connect_journal_socket(fd, uid, gid);
356 if (r < 0)
357 return r;
358
359 if (shutdown(fd, SHUT_RD) < 0) {
360 safe_close(fd);
361 return -errno;
362 }
363
364 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
365
366 dprintf(fd,
367 "%s\n"
368 "%s\n"
369 "%i\n"
370 "%i\n"
371 "%i\n"
372 "%i\n"
373 "%i\n",
374 context->syslog_identifier ?: ident,
375 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
376 context->syslog_priority,
377 !!context->syslog_level_prefix,
378 is_syslog_output(output),
379 is_kmsg_output(output),
380 is_terminal_output(output));
381
382 if (fd == nfd)
383 return nfd;
384
385 r = dup2(fd, nfd) < 0 ? -errno : nfd;
386 safe_close(fd);
387
388 return r;
389 }
390 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
391 int fd, r;
392
393 assert(path);
394 assert(nfd >= 0);
395
396 fd = open_terminal(path, mode | O_NOCTTY);
397 if (fd < 0)
398 return fd;
399
400 if (fd != nfd) {
401 r = dup2(fd, nfd) < 0 ? -errno : nfd;
402 safe_close(fd);
403 } else
404 r = nfd;
405
406 return r;
407 }
408
409 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
410
411 if (is_terminal_input(std_input) && !apply_tty_stdin)
412 return EXEC_INPUT_NULL;
413
414 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
415 return EXEC_INPUT_NULL;
416
417 return std_input;
418 }
419
420 static int fixup_output(ExecOutput std_output, int socket_fd) {
421
422 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
423 return EXEC_OUTPUT_INHERIT;
424
425 return std_output;
426 }
427
428 static int setup_input(
429 const ExecContext *context,
430 const ExecParameters *params,
431 int socket_fd,
432 int named_iofds[3]) {
433
434 ExecInput i;
435
436 assert(context);
437 assert(params);
438
439 if (params->stdin_fd >= 0) {
440 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
441 return -errno;
442
443 /* Try to make this the controlling tty, if it is a tty, and reset it */
444 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
445 (void) reset_terminal_fd(STDIN_FILENO, true);
446
447 return STDIN_FILENO;
448 }
449
450 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
451
452 switch (i) {
453
454 case EXEC_INPUT_NULL:
455 return open_null_as(O_RDONLY, STDIN_FILENO);
456
457 case EXEC_INPUT_TTY:
458 case EXEC_INPUT_TTY_FORCE:
459 case EXEC_INPUT_TTY_FAIL: {
460 int fd, r;
461
462 fd = acquire_terminal(exec_context_tty_path(context),
463 i == EXEC_INPUT_TTY_FAIL,
464 i == EXEC_INPUT_TTY_FORCE,
465 false,
466 USEC_INFINITY);
467 if (fd < 0)
468 return fd;
469
470 if (fd != STDIN_FILENO) {
471 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
472 safe_close(fd);
473 } else
474 r = STDIN_FILENO;
475
476 return r;
477 }
478
479 case EXEC_INPUT_SOCKET:
480 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
481
482 case EXEC_INPUT_NAMED_FD:
483 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
484 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
485
486 default:
487 assert_not_reached("Unknown input type");
488 }
489 }
490
491 static int setup_output(
492 Unit *unit,
493 const ExecContext *context,
494 const ExecParameters *params,
495 int fileno,
496 int socket_fd,
497 int named_iofds[3],
498 const char *ident,
499 uid_t uid,
500 gid_t gid,
501 dev_t *journal_stream_dev,
502 ino_t *journal_stream_ino) {
503
504 ExecOutput o;
505 ExecInput i;
506 int r;
507
508 assert(unit);
509 assert(context);
510 assert(params);
511 assert(ident);
512 assert(journal_stream_dev);
513 assert(journal_stream_ino);
514
515 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
516
517 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
518 return -errno;
519
520 return STDOUT_FILENO;
521 }
522
523 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
524 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
525 return -errno;
526
527 return STDERR_FILENO;
528 }
529
530 i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
531 o = fixup_output(context->std_output, socket_fd);
532
533 if (fileno == STDERR_FILENO) {
534 ExecOutput e;
535 e = fixup_output(context->std_error, socket_fd);
536
537 /* This expects the input and output are already set up */
538
539 /* Don't change the stderr file descriptor if we inherit all
540 * the way and are not on a tty */
541 if (e == EXEC_OUTPUT_INHERIT &&
542 o == EXEC_OUTPUT_INHERIT &&
543 i == EXEC_INPUT_NULL &&
544 !is_terminal_input(context->std_input) &&
545 getppid () != 1)
546 return fileno;
547
548 /* Duplicate from stdout if possible */
549 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
550 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
551
552 o = e;
553
554 } else if (o == EXEC_OUTPUT_INHERIT) {
555 /* If input got downgraded, inherit the original value */
556 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
557 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
558
559 /* If the input is connected to anything that's not a /dev/null, inherit that... */
560 if (i != EXEC_INPUT_NULL)
561 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
562
563 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
564 if (getppid() != 1)
565 return fileno;
566
567 /* We need to open /dev/null here anew, to get the right access mode. */
568 return open_null_as(O_WRONLY, fileno);
569 }
570
571 switch (o) {
572
573 case EXEC_OUTPUT_NULL:
574 return open_null_as(O_WRONLY, fileno);
575
576 case EXEC_OUTPUT_TTY:
577 if (is_terminal_input(i))
578 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
579
580 /* We don't reset the terminal if this is just about output */
581 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
582
583 case EXEC_OUTPUT_SYSLOG:
584 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
585 case EXEC_OUTPUT_KMSG:
586 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
587 case EXEC_OUTPUT_JOURNAL:
588 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
589 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
590 if (r < 0) {
591 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
592 r = open_null_as(O_WRONLY, fileno);
593 } else {
594 struct stat st;
595
596 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
597 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
598 * services to detect whether they are connected to the journal or not. */
599
600 if (fstat(fileno, &st) >= 0) {
601 *journal_stream_dev = st.st_dev;
602 *journal_stream_ino = st.st_ino;
603 }
604 }
605 return r;
606
607 case EXEC_OUTPUT_SOCKET:
608 assert(socket_fd >= 0);
609 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
610
611 case EXEC_OUTPUT_NAMED_FD:
612 (void) fd_nonblock(named_iofds[fileno], false);
613 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
614
615 default:
616 assert_not_reached("Unknown error type");
617 }
618 }
619
620 static int chown_terminal(int fd, uid_t uid) {
621 struct stat st;
622
623 assert(fd >= 0);
624
625 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
626 if (isatty(fd) < 1)
627 return 0;
628
629 /* This might fail. What matters are the results. */
630 (void) fchown(fd, uid, -1);
631 (void) fchmod(fd, TTY_MODE);
632
633 if (fstat(fd, &st) < 0)
634 return -errno;
635
636 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
637 return -EPERM;
638
639 return 0;
640 }
641
642 static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
643 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
644 int r;
645
646 assert(_saved_stdin);
647 assert(_saved_stdout);
648
649 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
650 if (saved_stdin < 0)
651 return -errno;
652
653 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
654 if (saved_stdout < 0)
655 return -errno;
656
657 fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
658 if (fd < 0)
659 return fd;
660
661 r = chown_terminal(fd, getuid());
662 if (r < 0)
663 return r;
664
665 r = reset_terminal_fd(fd, true);
666 if (r < 0)
667 return r;
668
669 if (dup2(fd, STDIN_FILENO) < 0)
670 return -errno;
671
672 if (dup2(fd, STDOUT_FILENO) < 0)
673 return -errno;
674
675 if (fd >= 2)
676 safe_close(fd);
677 fd = -1;
678
679 *_saved_stdin = saved_stdin;
680 *_saved_stdout = saved_stdout;
681
682 saved_stdin = saved_stdout = -1;
683
684 return 0;
685 }
686
687 static void write_confirm_error_fd(int err, int fd, const Unit *u) {
688 assert(err < 0);
689
690 if (err == -ETIMEDOUT)
691 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
692 else {
693 errno = -err;
694 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
695 }
696 }
697
698 static void write_confirm_error(int err, const char *vc, const Unit *u) {
699 _cleanup_close_ int fd = -1;
700
701 assert(vc);
702
703 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
704 if (fd < 0)
705 return;
706
707 write_confirm_error_fd(err, fd, u);
708 }
709
710 static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
711 int r = 0;
712
713 assert(saved_stdin);
714 assert(saved_stdout);
715
716 release_terminal();
717
718 if (*saved_stdin >= 0)
719 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
720 r = -errno;
721
722 if (*saved_stdout >= 0)
723 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
724 r = -errno;
725
726 *saved_stdin = safe_close(*saved_stdin);
727 *saved_stdout = safe_close(*saved_stdout);
728
729 return r;
730 }
731
732 enum {
733 CONFIRM_PRETEND_FAILURE = -1,
734 CONFIRM_PRETEND_SUCCESS = 0,
735 CONFIRM_EXECUTE = 1,
736 };
737
738 static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
739 int saved_stdout = -1, saved_stdin = -1, r;
740 _cleanup_free_ char *e = NULL;
741 char c;
742
743 /* For any internal errors, assume a positive response. */
744 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
745 if (r < 0) {
746 write_confirm_error(r, vc, u);
747 return CONFIRM_EXECUTE;
748 }
749
750 /* confirm_spawn might have been disabled while we were sleeping. */
751 if (manager_is_confirm_spawn_disabled(u->manager)) {
752 r = 1;
753 goto restore_stdio;
754 }
755
756 e = ellipsize(cmdline, 60, 100);
757 if (!e) {
758 log_oom();
759 r = CONFIRM_EXECUTE;
760 goto restore_stdio;
761 }
762
763 for (;;) {
764 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
765 if (r < 0) {
766 write_confirm_error_fd(r, STDOUT_FILENO, u);
767 r = CONFIRM_EXECUTE;
768 goto restore_stdio;
769 }
770
771 switch (c) {
772 case 'c':
773 printf("Resuming normal execution.\n");
774 manager_disable_confirm_spawn();
775 r = 1;
776 break;
777 case 'D':
778 unit_dump(u, stdout, " ");
779 continue; /* ask again */
780 case 'f':
781 printf("Failing execution.\n");
782 r = CONFIRM_PRETEND_FAILURE;
783 break;
784 case 'h':
785 printf(" c - continue, proceed without asking anymore\n"
786 " D - dump, show the state of the unit\n"
787 " f - fail, don't execute the command and pretend it failed\n"
788 " h - help\n"
789 " i - info, show a short summary of the unit\n"
790 " j - jobs, show jobs that are in progress\n"
791 " s - skip, don't execute the command and pretend it succeeded\n"
792 " y - yes, execute the command\n");
793 continue; /* ask again */
794 case 'i':
795 printf(" Description: %s\n"
796 " Unit: %s\n"
797 " Command: %s\n",
798 u->id, u->description, cmdline);
799 continue; /* ask again */
800 case 'j':
801 manager_dump_jobs(u->manager, stdout, " ");
802 continue; /* ask again */
803 case 'n':
804 /* 'n' was removed in favor of 'f'. */
805 printf("Didn't understand 'n', did you mean 'f'?\n");
806 continue; /* ask again */
807 case 's':
808 printf("Skipping execution.\n");
809 r = CONFIRM_PRETEND_SUCCESS;
810 break;
811 case 'y':
812 r = CONFIRM_EXECUTE;
813 break;
814 default:
815 assert_not_reached("Unhandled choice");
816 }
817 break;
818 }
819
820 restore_stdio:
821 restore_confirm_stdio(&saved_stdin, &saved_stdout);
822 return r;
823 }
824
825 static int get_fixed_user(const ExecContext *c, const char **user,
826 uid_t *uid, gid_t *gid,
827 const char **home, const char **shell) {
828 int r;
829 const char *name;
830
831 assert(c);
832
833 if (!c->user)
834 return 0;
835
836 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
837 * (i.e. are "/" or "/bin/nologin"). */
838
839 name = c->user;
840 r = get_user_creds_clean(&name, uid, gid, home, shell);
841 if (r < 0)
842 return r;
843
844 *user = name;
845 return 0;
846 }
847
848 static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
849 int r;
850 const char *name;
851
852 assert(c);
853
854 if (!c->group)
855 return 0;
856
857 name = c->group;
858 r = get_group_creds(&name, gid);
859 if (r < 0)
860 return r;
861
862 *group = name;
863 return 0;
864 }
865
866 static int get_supplementary_groups(const ExecContext *c, const char *user,
867 const char *group, gid_t gid,
868 gid_t **supplementary_gids, int *ngids) {
869 char **i;
870 int r, k = 0;
871 int ngroups_max;
872 bool keep_groups = false;
873 gid_t *groups = NULL;
874 _cleanup_free_ gid_t *l_gids = NULL;
875
876 assert(c);
877
878 /*
879 * If user is given, then lookup GID and supplementary groups list.
880 * We avoid NSS lookups for gid=0. Also we have to initialize groups
881 * here and as early as possible so we keep the list of supplementary
882 * groups of the caller.
883 */
884 if (user && gid_is_valid(gid) && gid != 0) {
885 /* First step, initialize groups from /etc/groups */
886 if (initgroups(user, gid) < 0)
887 return -errno;
888
889 keep_groups = true;
890 }
891
892 if (!c->supplementary_groups)
893 return 0;
894
895 /*
896 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
897 * be positive, otherwise fail.
898 */
899 errno = 0;
900 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
901 if (ngroups_max <= 0) {
902 if (errno > 0)
903 return -errno;
904 else
905 return -EOPNOTSUPP; /* For all other values */
906 }
907
908 l_gids = new(gid_t, ngroups_max);
909 if (!l_gids)
910 return -ENOMEM;
911
912 if (keep_groups) {
913 /*
914 * Lookup the list of groups that the user belongs to, we
915 * avoid NSS lookups here too for gid=0.
916 */
917 k = ngroups_max;
918 if (getgrouplist(user, gid, l_gids, &k) < 0)
919 return -EINVAL;
920 } else
921 k = 0;
922
923 STRV_FOREACH(i, c->supplementary_groups) {
924 const char *g;
925
926 if (k >= ngroups_max)
927 return -E2BIG;
928
929 g = *i;
930 r = get_group_creds(&g, l_gids+k);
931 if (r < 0)
932 return r;
933
934 k++;
935 }
936
937 /*
938 * Sets ngids to zero to drop all supplementary groups, happens
939 * when we are under root and SupplementaryGroups= is empty.
940 */
941 if (k == 0) {
942 *ngids = 0;
943 return 0;
944 }
945
946 /* Otherwise get the final list of supplementary groups */
947 groups = memdup(l_gids, sizeof(gid_t) * k);
948 if (!groups)
949 return -ENOMEM;
950
951 *supplementary_gids = groups;
952 *ngids = k;
953
954 groups = NULL;
955
956 return 0;
957 }
958
959 static int enforce_groups(const ExecContext *context, gid_t gid,
960 gid_t *supplementary_gids, int ngids) {
961 int r;
962
963 assert(context);
964
965 /* Handle SupplementaryGroups= even if it is empty */
966 if (context->supplementary_groups) {
967 r = maybe_setgroups(ngids, supplementary_gids);
968 if (r < 0)
969 return r;
970 }
971
972 if (gid_is_valid(gid)) {
973 /* Then set our gids */
974 if (setresgid(gid, gid, gid) < 0)
975 return -errno;
976 }
977
978 return 0;
979 }
980
981 static int enforce_user(const ExecContext *context, uid_t uid) {
982 assert(context);
983
984 if (!uid_is_valid(uid))
985 return 0;
986
987 /* Sets (but doesn't look up) the uid and make sure we keep the
988 * capabilities while doing so. */
989
990 if (context->capability_ambient_set != 0) {
991
992 /* First step: If we need to keep capabilities but
993 * drop privileges we need to make sure we keep our
994 * caps, while we drop privileges. */
995 if (uid != 0) {
996 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
997
998 if (prctl(PR_GET_SECUREBITS) != sb)
999 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1000 return -errno;
1001 }
1002 }
1003
1004 /* Second step: actually set the uids */
1005 if (setresuid(uid, uid, uid) < 0)
1006 return -errno;
1007
1008 /* At this point we should have all necessary capabilities but
1009 are otherwise a normal user. However, the caps might got
1010 corrupted due to the setresuid() so we need clean them up
1011 later. This is done outside of this call. */
1012
1013 return 0;
1014 }
1015
1016 #ifdef HAVE_PAM
1017
1018 static int null_conv(
1019 int num_msg,
1020 const struct pam_message **msg,
1021 struct pam_response **resp,
1022 void *appdata_ptr) {
1023
1024 /* We don't support conversations */
1025
1026 return PAM_CONV_ERR;
1027 }
1028
1029 #endif
1030
1031 static int setup_pam(
1032 const char *name,
1033 const char *user,
1034 uid_t uid,
1035 gid_t gid,
1036 const char *tty,
1037 char ***env,
1038 int fds[], unsigned n_fds) {
1039
1040 #ifdef HAVE_PAM
1041
1042 static const struct pam_conv conv = {
1043 .conv = null_conv,
1044 .appdata_ptr = NULL
1045 };
1046
1047 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
1048 pam_handle_t *handle = NULL;
1049 sigset_t old_ss;
1050 int pam_code = PAM_SUCCESS, r;
1051 char **nv, **e = NULL;
1052 bool close_session = false;
1053 pid_t pam_pid = 0, parent_pid;
1054 int flags = 0;
1055
1056 assert(name);
1057 assert(user);
1058 assert(env);
1059
1060 /* We set up PAM in the parent process, then fork. The child
1061 * will then stay around until killed via PR_GET_PDEATHSIG or
1062 * systemd via the cgroup logic. It will then remove the PAM
1063 * session again. The parent process will exec() the actual
1064 * daemon. We do things this way to ensure that the main PID
1065 * of the daemon is the one we initially fork()ed. */
1066
1067 r = barrier_create(&barrier);
1068 if (r < 0)
1069 goto fail;
1070
1071 if (log_get_max_level() < LOG_DEBUG)
1072 flags |= PAM_SILENT;
1073
1074 pam_code = pam_start(name, user, &conv, &handle);
1075 if (pam_code != PAM_SUCCESS) {
1076 handle = NULL;
1077 goto fail;
1078 }
1079
1080 if (tty) {
1081 pam_code = pam_set_item(handle, PAM_TTY, tty);
1082 if (pam_code != PAM_SUCCESS)
1083 goto fail;
1084 }
1085
1086 STRV_FOREACH(nv, *env) {
1087 pam_code = pam_putenv(handle, *nv);
1088 if (pam_code != PAM_SUCCESS)
1089 goto fail;
1090 }
1091
1092 pam_code = pam_acct_mgmt(handle, flags);
1093 if (pam_code != PAM_SUCCESS)
1094 goto fail;
1095
1096 pam_code = pam_open_session(handle, flags);
1097 if (pam_code != PAM_SUCCESS)
1098 goto fail;
1099
1100 close_session = true;
1101
1102 e = pam_getenvlist(handle);
1103 if (!e) {
1104 pam_code = PAM_BUF_ERR;
1105 goto fail;
1106 }
1107
1108 /* Block SIGTERM, so that we know that it won't get lost in
1109 * the child */
1110
1111 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
1112
1113 parent_pid = getpid_cached();
1114
1115 pam_pid = fork();
1116 if (pam_pid < 0) {
1117 r = -errno;
1118 goto fail;
1119 }
1120
1121 if (pam_pid == 0) {
1122 int sig, ret = EXIT_PAM;
1123
1124 /* The child's job is to reset the PAM session on
1125 * termination */
1126 barrier_set_role(&barrier, BARRIER_CHILD);
1127
1128 /* This string must fit in 10 chars (i.e. the length
1129 * of "/sbin/init"), to look pretty in /bin/ps */
1130 rename_process("(sd-pam)");
1131
1132 /* Make sure we don't keep open the passed fds in this
1133 child. We assume that otherwise only those fds are
1134 open here that have been opened by PAM. */
1135 close_many(fds, n_fds);
1136
1137 /* Drop privileges - we don't need any to pam_close_session
1138 * and this will make PR_SET_PDEATHSIG work in most cases.
1139 * If this fails, ignore the error - but expect sd-pam threads
1140 * to fail to exit normally */
1141
1142 r = maybe_setgroups(0, NULL);
1143 if (r < 0)
1144 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
1145 if (setresgid(gid, gid, gid) < 0)
1146 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
1147 if (setresuid(uid, uid, uid) < 0)
1148 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
1149
1150 (void) ignore_signals(SIGPIPE, -1);
1151
1152 /* Wait until our parent died. This will only work if
1153 * the above setresuid() succeeds, otherwise the kernel
1154 * will not allow unprivileged parents kill their privileged
1155 * children this way. We rely on the control groups kill logic
1156 * to do the rest for us. */
1157 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1158 goto child_finish;
1159
1160 /* Tell the parent that our setup is done. This is especially
1161 * important regarding dropping privileges. Otherwise, unit
1162 * setup might race against our setresuid(2) call.
1163 *
1164 * If the parent aborted, we'll detect this below, hence ignore
1165 * return failure here. */
1166 (void) barrier_place(&barrier);
1167
1168 /* Check if our parent process might already have died? */
1169 if (getppid() == parent_pid) {
1170 sigset_t ss;
1171
1172 assert_se(sigemptyset(&ss) >= 0);
1173 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1174
1175 for (;;) {
1176 if (sigwait(&ss, &sig) < 0) {
1177 if (errno == EINTR)
1178 continue;
1179
1180 goto child_finish;
1181 }
1182
1183 assert(sig == SIGTERM);
1184 break;
1185 }
1186 }
1187
1188 /* If our parent died we'll end the session */
1189 if (getppid() != parent_pid) {
1190 pam_code = pam_close_session(handle, flags);
1191 if (pam_code != PAM_SUCCESS)
1192 goto child_finish;
1193 }
1194
1195 ret = 0;
1196
1197 child_finish:
1198 pam_end(handle, pam_code | flags);
1199 _exit(ret);
1200 }
1201
1202 barrier_set_role(&barrier, BARRIER_PARENT);
1203
1204 /* If the child was forked off successfully it will do all the
1205 * cleanups, so forget about the handle here. */
1206 handle = NULL;
1207
1208 /* Unblock SIGTERM again in the parent */
1209 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
1210
1211 /* We close the log explicitly here, since the PAM modules
1212 * might have opened it, but we don't want this fd around. */
1213 closelog();
1214
1215 /* Synchronously wait for the child to initialize. We don't care for
1216 * errors as we cannot recover. However, warn loudly if it happens. */
1217 if (!barrier_place_and_sync(&barrier))
1218 log_error("PAM initialization failed");
1219
1220 strv_free(*env);
1221 *env = e;
1222
1223 return 0;
1224
1225 fail:
1226 if (pam_code != PAM_SUCCESS) {
1227 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
1228 r = -EPERM; /* PAM errors do not map to errno */
1229 } else
1230 log_error_errno(r, "PAM failed: %m");
1231
1232 if (handle) {
1233 if (close_session)
1234 pam_code = pam_close_session(handle, flags);
1235
1236 pam_end(handle, pam_code | flags);
1237 }
1238
1239 strv_free(e);
1240 closelog();
1241
1242 return r;
1243 #else
1244 return 0;
1245 #endif
1246 }
1247
1248 static void rename_process_from_path(const char *path) {
1249 char process_name[11];
1250 const char *p;
1251 size_t l;
1252
1253 /* This resulting string must fit in 10 chars (i.e. the length
1254 * of "/sbin/init") to look pretty in /bin/ps */
1255
1256 p = basename(path);
1257 if (isempty(p)) {
1258 rename_process("(...)");
1259 return;
1260 }
1261
1262 l = strlen(p);
1263 if (l > 8) {
1264 /* The end of the process name is usually more
1265 * interesting, since the first bit might just be
1266 * "systemd-" */
1267 p = p + l - 8;
1268 l = 8;
1269 }
1270
1271 process_name[0] = '(';
1272 memcpy(process_name+1, p, l);
1273 process_name[1+l] = ')';
1274 process_name[1+l+1] = 0;
1275
1276 rename_process(process_name);
1277 }
1278
1279 static bool context_has_address_families(const ExecContext *c) {
1280 assert(c);
1281
1282 return c->address_families_whitelist ||
1283 !set_isempty(c->address_families);
1284 }
1285
1286 static bool context_has_syscall_filters(const ExecContext *c) {
1287 assert(c);
1288
1289 return c->syscall_whitelist ||
1290 !set_isempty(c->syscall_filter);
1291 }
1292
1293 static bool context_has_no_new_privileges(const ExecContext *c) {
1294 assert(c);
1295
1296 if (c->no_new_privileges)
1297 return true;
1298
1299 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1300 return false;
1301
1302 /* We need NNP if we have any form of seccomp and are unprivileged */
1303 return context_has_address_families(c) ||
1304 c->memory_deny_write_execute ||
1305 c->restrict_realtime ||
1306 exec_context_restrict_namespaces_set(c) ||
1307 c->protect_kernel_tunables ||
1308 c->protect_kernel_modules ||
1309 c->private_devices ||
1310 context_has_syscall_filters(c) ||
1311 !set_isempty(c->syscall_archs) ||
1312 c->lock_personality;
1313 }
1314
1315 #ifdef HAVE_SECCOMP
1316
1317 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1318
1319 if (is_seccomp_available())
1320 return false;
1321
1322 log_open();
1323 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1324 log_close();
1325 return true;
1326 }
1327
1328 static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
1329 uint32_t negative_action, default_action, action;
1330 int r;
1331
1332 assert(u);
1333 assert(c);
1334
1335 if (!context_has_syscall_filters(c))
1336 return 0;
1337
1338 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1339 return 0;
1340
1341 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1342
1343 if (c->syscall_whitelist) {
1344 default_action = negative_action;
1345 action = SCMP_ACT_ALLOW;
1346 } else {
1347 default_action = SCMP_ACT_ALLOW;
1348 action = negative_action;
1349 }
1350
1351 if (needs_ambient_hack) {
1352 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1353 if (r < 0)
1354 return r;
1355 }
1356
1357 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
1358 }
1359
1360 static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1361 assert(u);
1362 assert(c);
1363
1364 if (set_isempty(c->syscall_archs))
1365 return 0;
1366
1367 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1368 return 0;
1369
1370 return seccomp_restrict_archs(c->syscall_archs);
1371 }
1372
1373 static int apply_address_families(const Unit* u, const ExecContext *c) {
1374 assert(u);
1375 assert(c);
1376
1377 if (!context_has_address_families(c))
1378 return 0;
1379
1380 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1381 return 0;
1382
1383 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
1384 }
1385
1386 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
1387 assert(u);
1388 assert(c);
1389
1390 if (!c->memory_deny_write_execute)
1391 return 0;
1392
1393 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1394 return 0;
1395
1396 return seccomp_memory_deny_write_execute();
1397 }
1398
1399 static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
1400 assert(u);
1401 assert(c);
1402
1403 if (!c->restrict_realtime)
1404 return 0;
1405
1406 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1407 return 0;
1408
1409 return seccomp_restrict_realtime();
1410 }
1411
1412 static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
1413 assert(u);
1414 assert(c);
1415
1416 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1417 * let's protect even those systems where this is left on in the kernel. */
1418
1419 if (!c->protect_kernel_tunables)
1420 return 0;
1421
1422 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1423 return 0;
1424
1425 return seccomp_protect_sysctl();
1426 }
1427
1428 static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
1429 assert(u);
1430 assert(c);
1431
1432 /* Turn off module syscalls on ProtectKernelModules=yes */
1433
1434 if (!c->protect_kernel_modules)
1435 return 0;
1436
1437 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1438 return 0;
1439
1440 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
1441 }
1442
1443 static int apply_private_devices(const Unit *u, const ExecContext *c) {
1444 assert(u);
1445 assert(c);
1446
1447 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1448
1449 if (!c->private_devices)
1450 return 0;
1451
1452 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1453 return 0;
1454
1455 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
1456 }
1457
1458 static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
1459 assert(u);
1460 assert(c);
1461
1462 if (!exec_context_restrict_namespaces_set(c))
1463 return 0;
1464
1465 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1466 return 0;
1467
1468 return seccomp_restrict_namespaces(c->restrict_namespaces);
1469 }
1470
1471 static int apply_lock_personality(const Unit* u, const ExecContext *c) {
1472 unsigned long personality;
1473 int r;
1474
1475 assert(u);
1476 assert(c);
1477
1478 if (!c->lock_personality)
1479 return 0;
1480
1481 if (skip_seccomp_unavailable(u, "LockPersonality="))
1482 return 0;
1483
1484 personality = c->personality;
1485
1486 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1487 if (personality == PERSONALITY_INVALID) {
1488
1489 r = opinionated_personality(&personality);
1490 if (r < 0)
1491 return r;
1492 }
1493
1494 return seccomp_lock_personality(personality);
1495 }
1496
1497 #endif
1498
1499 static void do_idle_pipe_dance(int idle_pipe[4]) {
1500 assert(idle_pipe);
1501
1502 idle_pipe[1] = safe_close(idle_pipe[1]);
1503 idle_pipe[2] = safe_close(idle_pipe[2]);
1504
1505 if (idle_pipe[0] >= 0) {
1506 int r;
1507
1508 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1509
1510 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1511 ssize_t n;
1512
1513 /* Signal systemd that we are bored and want to continue. */
1514 n = write(idle_pipe[3], "x", 1);
1515 if (n > 0)
1516 /* Wait for systemd to react to the signal above. */
1517 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1518 }
1519
1520 idle_pipe[0] = safe_close(idle_pipe[0]);
1521
1522 }
1523
1524 idle_pipe[3] = safe_close(idle_pipe[3]);
1525 }
1526
1527 static int build_environment(
1528 Unit *u,
1529 const ExecContext *c,
1530 const ExecParameters *p,
1531 unsigned n_fds,
1532 const char *home,
1533 const char *username,
1534 const char *shell,
1535 dev_t journal_stream_dev,
1536 ino_t journal_stream_ino,
1537 char ***ret) {
1538
1539 _cleanup_strv_free_ char **our_env = NULL;
1540 unsigned n_env = 0;
1541 char *x;
1542
1543 assert(u);
1544 assert(c);
1545 assert(ret);
1546
1547 our_env = new0(char*, 14);
1548 if (!our_env)
1549 return -ENOMEM;
1550
1551 if (n_fds > 0) {
1552 _cleanup_free_ char *joined = NULL;
1553
1554 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
1555 return -ENOMEM;
1556 our_env[n_env++] = x;
1557
1558 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1559 return -ENOMEM;
1560 our_env[n_env++] = x;
1561
1562 joined = strv_join(p->fd_names, ":");
1563 if (!joined)
1564 return -ENOMEM;
1565
1566 x = strjoin("LISTEN_FDNAMES=", joined);
1567 if (!x)
1568 return -ENOMEM;
1569 our_env[n_env++] = x;
1570 }
1571
1572 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
1573 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
1574 return -ENOMEM;
1575 our_env[n_env++] = x;
1576
1577 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
1578 return -ENOMEM;
1579 our_env[n_env++] = x;
1580 }
1581
1582 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1583 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1584 * check the database directly. */
1585 if (p->flags & EXEC_NSS_BYPASS_BUS) {
1586 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1587 if (!x)
1588 return -ENOMEM;
1589 our_env[n_env++] = x;
1590 }
1591
1592 if (home) {
1593 x = strappend("HOME=", home);
1594 if (!x)
1595 return -ENOMEM;
1596 our_env[n_env++] = x;
1597 }
1598
1599 if (username) {
1600 x = strappend("LOGNAME=", username);
1601 if (!x)
1602 return -ENOMEM;
1603 our_env[n_env++] = x;
1604
1605 x = strappend("USER=", username);
1606 if (!x)
1607 return -ENOMEM;
1608 our_env[n_env++] = x;
1609 }
1610
1611 if (shell) {
1612 x = strappend("SHELL=", shell);
1613 if (!x)
1614 return -ENOMEM;
1615 our_env[n_env++] = x;
1616 }
1617
1618 if (!sd_id128_is_null(u->invocation_id)) {
1619 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1620 return -ENOMEM;
1621
1622 our_env[n_env++] = x;
1623 }
1624
1625 if (exec_context_needs_term(c)) {
1626 const char *tty_path, *term = NULL;
1627
1628 tty_path = exec_context_tty_path(c);
1629
1630 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1631 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1632 * passes to PID 1 ends up all the way in the console login shown. */
1633
1634 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1635 term = getenv("TERM");
1636 if (!term)
1637 term = default_term_for_tty(tty_path);
1638
1639 x = strappend("TERM=", term);
1640 if (!x)
1641 return -ENOMEM;
1642 our_env[n_env++] = x;
1643 }
1644
1645 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1646 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1647 return -ENOMEM;
1648
1649 our_env[n_env++] = x;
1650 }
1651
1652 our_env[n_env++] = NULL;
1653 assert(n_env <= 12);
1654
1655 *ret = our_env;
1656 our_env = NULL;
1657
1658 return 0;
1659 }
1660
1661 static int build_pass_environment(const ExecContext *c, char ***ret) {
1662 _cleanup_strv_free_ char **pass_env = NULL;
1663 size_t n_env = 0, n_bufsize = 0;
1664 char **i;
1665
1666 STRV_FOREACH(i, c->pass_environment) {
1667 _cleanup_free_ char *x = NULL;
1668 char *v;
1669
1670 v = getenv(*i);
1671 if (!v)
1672 continue;
1673 x = strjoin(*i, "=", v);
1674 if (!x)
1675 return -ENOMEM;
1676 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1677 return -ENOMEM;
1678 pass_env[n_env++] = x;
1679 pass_env[n_env] = NULL;
1680 x = NULL;
1681 }
1682
1683 *ret = pass_env;
1684 pass_env = NULL;
1685
1686 return 0;
1687 }
1688
1689 static bool exec_needs_mount_namespace(
1690 const ExecContext *context,
1691 const ExecParameters *params,
1692 ExecRuntime *runtime) {
1693
1694 assert(context);
1695 assert(params);
1696
1697 if (context->root_image)
1698 return true;
1699
1700 if (!strv_isempty(context->read_write_paths) ||
1701 !strv_isempty(context->read_only_paths) ||
1702 !strv_isempty(context->inaccessible_paths))
1703 return true;
1704
1705 if (context->n_bind_mounts > 0)
1706 return true;
1707
1708 if (context->mount_flags != 0)
1709 return true;
1710
1711 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1712 return true;
1713
1714 if (context->private_devices ||
1715 context->protect_system != PROTECT_SYSTEM_NO ||
1716 context->protect_home != PROTECT_HOME_NO ||
1717 context->protect_kernel_tunables ||
1718 context->protect_kernel_modules ||
1719 context->protect_control_groups)
1720 return true;
1721
1722 if (context->mount_apivfs && (context->root_image || context->root_directory))
1723 return true;
1724
1725 return false;
1726 }
1727
1728 static int setup_private_users(uid_t uid, gid_t gid) {
1729 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1730 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1731 _cleanup_close_ int unshare_ready_fd = -1;
1732 _cleanup_(sigkill_waitp) pid_t pid = 0;
1733 uint64_t c = 1;
1734 siginfo_t si;
1735 ssize_t n;
1736 int r;
1737
1738 /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1739 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1740 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1741 * which waits for the parent to create the new user namespace while staying in the original namespace. The
1742 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1743 * continues execution normally. */
1744
1745 if (uid != 0 && uid_is_valid(uid)) {
1746 r = asprintf(&uid_map,
1747 "0 0 1\n" /* Map root → root */
1748 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
1749 uid, uid);
1750 if (r < 0)
1751 return -ENOMEM;
1752 } else {
1753 uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1754 if (!uid_map)
1755 return -ENOMEM;
1756 }
1757
1758 if (gid != 0 && gid_is_valid(gid)) {
1759 r = asprintf(&gid_map,
1760 "0 0 1\n" /* Map root → root */
1761 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
1762 gid, gid);
1763 if (r < 0)
1764 return -ENOMEM;
1765 } else {
1766 gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
1767 if (!gid_map)
1768 return -ENOMEM;
1769 }
1770
1771 /* Create a communication channel so that the parent can tell the child when it finished creating the user
1772 * namespace. */
1773 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1774 if (unshare_ready_fd < 0)
1775 return -errno;
1776
1777 /* Create a communication channel so that the child can tell the parent a proper error code in case it
1778 * failed. */
1779 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1780 return -errno;
1781
1782 pid = fork();
1783 if (pid < 0)
1784 return -errno;
1785
1786 if (pid == 0) {
1787 _cleanup_close_ int fd = -1;
1788 const char *a;
1789 pid_t ppid;
1790
1791 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1792 * here, after the parent opened its own user namespace. */
1793
1794 ppid = getppid();
1795 errno_pipe[0] = safe_close(errno_pipe[0]);
1796
1797 /* Wait until the parent unshared the user namespace */
1798 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1799 r = -errno;
1800 goto child_fail;
1801 }
1802
1803 /* Disable the setgroups() system call in the child user namespace, for good. */
1804 a = procfs_file_alloca(ppid, "setgroups");
1805 fd = open(a, O_WRONLY|O_CLOEXEC);
1806 if (fd < 0) {
1807 if (errno != ENOENT) {
1808 r = -errno;
1809 goto child_fail;
1810 }
1811
1812 /* If the file is missing the kernel is too old, let's continue anyway. */
1813 } else {
1814 if (write(fd, "deny\n", 5) < 0) {
1815 r = -errno;
1816 goto child_fail;
1817 }
1818
1819 fd = safe_close(fd);
1820 }
1821
1822 /* First write the GID map */
1823 a = procfs_file_alloca(ppid, "gid_map");
1824 fd = open(a, O_WRONLY|O_CLOEXEC);
1825 if (fd < 0) {
1826 r = -errno;
1827 goto child_fail;
1828 }
1829 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1830 r = -errno;
1831 goto child_fail;
1832 }
1833 fd = safe_close(fd);
1834
1835 /* The write the UID map */
1836 a = procfs_file_alloca(ppid, "uid_map");
1837 fd = open(a, O_WRONLY|O_CLOEXEC);
1838 if (fd < 0) {
1839 r = -errno;
1840 goto child_fail;
1841 }
1842 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1843 r = -errno;
1844 goto child_fail;
1845 }
1846
1847 _exit(EXIT_SUCCESS);
1848
1849 child_fail:
1850 (void) write(errno_pipe[1], &r, sizeof(r));
1851 _exit(EXIT_FAILURE);
1852 }
1853
1854 errno_pipe[1] = safe_close(errno_pipe[1]);
1855
1856 if (unshare(CLONE_NEWUSER) < 0)
1857 return -errno;
1858
1859 /* Let the child know that the namespace is ready now */
1860 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1861 return -errno;
1862
1863 /* Try to read an error code from the child */
1864 n = read(errno_pipe[0], &r, sizeof(r));
1865 if (n < 0)
1866 return -errno;
1867 if (n == sizeof(r)) { /* an error code was sent to us */
1868 if (r < 0)
1869 return r;
1870 return -EIO;
1871 }
1872 if (n != 0) /* on success we should have read 0 bytes */
1873 return -EIO;
1874
1875 r = wait_for_terminate(pid, &si);
1876 if (r < 0)
1877 return r;
1878 pid = 0;
1879
1880 /* If something strange happened with the child, let's consider this fatal, too */
1881 if (si.si_code != CLD_EXITED || si.si_status != 0)
1882 return -EIO;
1883
1884 return 0;
1885 }
1886
1887 static int setup_exec_directory(
1888 const ExecContext *context,
1889 const ExecParameters *params,
1890 uid_t uid,
1891 gid_t gid,
1892 ExecDirectoryType type,
1893 int *exit_status) {
1894
1895 static const int exit_status_table[_EXEC_DIRECTORY_MAX] = {
1896 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
1897 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
1898 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
1899 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
1900 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
1901 };
1902 char **rt;
1903 int r;
1904
1905 assert(context);
1906 assert(params);
1907 assert(type >= 0 && type < _EXEC_DIRECTORY_MAX);
1908 assert(exit_status);
1909
1910 if (!params->prefix[type])
1911 return 0;
1912
1913 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
1914 if (!uid_is_valid(uid))
1915 uid = 0;
1916 if (!gid_is_valid(gid))
1917 gid = 0;
1918 }
1919
1920 STRV_FOREACH(rt, context->directories[type].paths) {
1921 _cleanup_free_ char *p;
1922
1923 p = strjoin(params->prefix[type], "/", *rt);
1924 if (!p) {
1925 r = -ENOMEM;
1926 goto fail;
1927 }
1928
1929 r = mkdir_parents_label(p, 0755);
1930 if (r < 0)
1931 goto fail;
1932
1933 r = mkdir_p_label(p, context->directories[type].mode);
1934 if (r < 0)
1935 goto fail;
1936
1937 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
1938 * a service, and shall not be writable. */
1939 if (type == EXEC_DIRECTORY_CONFIGURATION)
1940 continue;
1941
1942 r = chmod_and_chown(p, context->directories[type].mode, uid, gid);
1943 if (r < 0)
1944 goto fail;
1945 }
1946
1947 return 0;
1948
1949 fail:
1950 *exit_status = exit_status_table[type];
1951
1952 return r;
1953 }
1954
1955 static int setup_smack(
1956 const ExecContext *context,
1957 const ExecCommand *command) {
1958
1959 int r;
1960
1961 assert(context);
1962 assert(command);
1963
1964 if (context->smack_process_label) {
1965 r = mac_smack_apply_pid(0, context->smack_process_label);
1966 if (r < 0)
1967 return r;
1968 }
1969 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1970 else {
1971 _cleanup_free_ char *exec_label = NULL;
1972
1973 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1974 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
1975 return r;
1976
1977 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1978 if (r < 0)
1979 return r;
1980 }
1981 #endif
1982
1983 return 0;
1984 }
1985
1986 static int compile_read_write_paths(
1987 const ExecContext *context,
1988 const ExecParameters *params,
1989 char ***ret) {
1990
1991 _cleanup_strv_free_ char **l = NULL;
1992 char **rt;
1993 ExecDirectoryType i;
1994
1995 /* Compile the list of writable paths. This is the combination of
1996 * the explicitly configured paths, plus all runtime directories. */
1997
1998 if (strv_isempty(context->read_write_paths)) {
1999 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
2000 if (!strv_isempty(context->directories[i].paths))
2001 break;
2002
2003 if (i == _EXEC_DIRECTORY_MAX) {
2004 *ret = NULL; /* NOP if neither is set */
2005 return 0;
2006 }
2007 }
2008
2009 l = strv_copy(context->read_write_paths);
2010 if (!l)
2011 return -ENOMEM;
2012
2013 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++) {
2014 if (!params->prefix[i])
2015 continue;
2016
2017 STRV_FOREACH(rt, context->directories[i].paths) {
2018 char *s;
2019
2020 s = strjoin(params->prefix[i], "/", *rt);
2021 if (!s)
2022 return -ENOMEM;
2023
2024 if (strv_consume(&l, s) < 0)
2025 return -ENOMEM;
2026 }
2027 }
2028
2029 *ret = l;
2030 l = NULL;
2031
2032 return 0;
2033 }
2034
2035 static int apply_mount_namespace(
2036 Unit *u,
2037 ExecCommand *command,
2038 const ExecContext *context,
2039 const ExecParameters *params,
2040 ExecRuntime *runtime) {
2041
2042 _cleanup_strv_free_ char **rw = NULL;
2043 char *tmp = NULL, *var = NULL;
2044 const char *root_dir = NULL, *root_image = NULL;
2045 NameSpaceInfo ns_info = {
2046 .ignore_protect_paths = false,
2047 .private_dev = context->private_devices,
2048 .protect_control_groups = context->protect_control_groups,
2049 .protect_kernel_tunables = context->protect_kernel_tunables,
2050 .protect_kernel_modules = context->protect_kernel_modules,
2051 .mount_apivfs = context->mount_apivfs,
2052 };
2053 bool needs_sandboxing;
2054 int r;
2055
2056 assert(context);
2057
2058 /* The runtime struct only contains the parent of the private /tmp,
2059 * which is non-accessible to world users. Inside of it there's a /tmp
2060 * that is sticky, and that's the one we want to use here. */
2061
2062 if (context->private_tmp && runtime) {
2063 if (runtime->tmp_dir)
2064 tmp = strjoina(runtime->tmp_dir, "/tmp");
2065 if (runtime->var_tmp_dir)
2066 var = strjoina(runtime->var_tmp_dir, "/tmp");
2067 }
2068
2069 r = compile_read_write_paths(context, params, &rw);
2070 if (r < 0)
2071 return r;
2072
2073 if (params->flags & EXEC_APPLY_CHROOT) {
2074 root_image = context->root_image;
2075
2076 if (!root_image)
2077 root_dir = context->root_directory;
2078 }
2079
2080 /*
2081 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2082 * sandbox info, otherwise enforce it, don't ignore protected paths and
2083 * fail if we are enable to apply the sandbox inside the mount namespace.
2084 */
2085 if (!context->dynamic_user && root_dir)
2086 ns_info.ignore_protect_paths = true;
2087
2088 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2089
2090 r = setup_namespace(root_dir, root_image,
2091 &ns_info, rw,
2092 needs_sandboxing ? context->read_only_paths : NULL,
2093 needs_sandboxing ? context->inaccessible_paths : NULL,
2094 context->bind_mounts,
2095 context->n_bind_mounts,
2096 tmp,
2097 var,
2098 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2099 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
2100 context->mount_flags,
2101 DISSECT_IMAGE_DISCARD_ON_LOOP);
2102
2103 /* If we couldn't set up the namespace this is probably due to a
2104 * missing capability. In this case, silently proceeed. */
2105 if (IN_SET(r, -EPERM, -EACCES)) {
2106 log_open();
2107 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2108 log_close();
2109 r = 0;
2110 }
2111
2112 return r;
2113 }
2114
2115 static int apply_working_directory(
2116 const ExecContext *context,
2117 const ExecParameters *params,
2118 const char *home,
2119 const bool needs_mount_ns,
2120 int *exit_status) {
2121
2122 const char *d, *wd;
2123
2124 assert(context);
2125 assert(exit_status);
2126
2127 if (context->working_directory_home) {
2128
2129 if (!home) {
2130 *exit_status = EXIT_CHDIR;
2131 return -ENXIO;
2132 }
2133
2134 wd = home;
2135
2136 } else if (context->working_directory)
2137 wd = context->working_directory;
2138 else
2139 wd = "/";
2140
2141 if (params->flags & EXEC_APPLY_CHROOT) {
2142 if (!needs_mount_ns && context->root_directory)
2143 if (chroot(context->root_directory) < 0) {
2144 *exit_status = EXIT_CHROOT;
2145 return -errno;
2146 }
2147
2148 d = wd;
2149 } else
2150 d = prefix_roota(context->root_directory, wd);
2151
2152 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2153 *exit_status = EXIT_CHDIR;
2154 return -errno;
2155 }
2156
2157 return 0;
2158 }
2159
2160 static int setup_keyring(Unit *u, const ExecParameters *p, uid_t uid, gid_t gid) {
2161 key_serial_t keyring;
2162
2163 assert(u);
2164 assert(p);
2165
2166 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2167 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2168 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2169 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2170 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2171 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2172
2173 if (!(p->flags & EXEC_NEW_KEYRING))
2174 return 0;
2175
2176 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2177 if (keyring == -1) {
2178 if (errno == ENOSYS)
2179 log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
2180 else if (IN_SET(errno, EACCES, EPERM))
2181 log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
2182 else if (errno == EDQUOT)
2183 log_debug_errno(errno, "Out of kernel keyrings to allocate, ignoring.");
2184 else
2185 return log_error_errno(errno, "Setting up kernel keyring failed: %m");
2186
2187 return 0;
2188 }
2189
2190 /* Populate they keyring with the invocation ID by default. */
2191 if (!sd_id128_is_null(u->invocation_id)) {
2192 key_serial_t key;
2193
2194 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2195 if (key == -1)
2196 log_debug_errno(errno, "Failed to add invocation ID to keyring, ignoring: %m");
2197 else {
2198 if (keyctl(KEYCTL_SETPERM, key,
2199 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2200 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2201 return log_error_errno(errno, "Failed to restrict invocation ID permission: %m");
2202 }
2203 }
2204
2205 /* And now, make the keyring owned by the service's user */
2206 if (uid_is_valid(uid) || gid_is_valid(gid))
2207 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
2208 return log_error_errno(errno, "Failed to change ownership of session keyring: %m");
2209
2210 return 0;
2211 }
2212
2213 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2214 assert(array);
2215 assert(n);
2216
2217 if (!pair)
2218 return;
2219
2220 if (pair[0] >= 0)
2221 array[(*n)++] = pair[0];
2222 if (pair[1] >= 0)
2223 array[(*n)++] = pair[1];
2224 }
2225
2226 static int close_remaining_fds(
2227 const ExecParameters *params,
2228 ExecRuntime *runtime,
2229 DynamicCreds *dcreds,
2230 int user_lookup_fd,
2231 int socket_fd,
2232 int *fds, unsigned n_fds) {
2233
2234 unsigned n_dont_close = 0;
2235 int dont_close[n_fds + 12];
2236
2237 assert(params);
2238
2239 if (params->stdin_fd >= 0)
2240 dont_close[n_dont_close++] = params->stdin_fd;
2241 if (params->stdout_fd >= 0)
2242 dont_close[n_dont_close++] = params->stdout_fd;
2243 if (params->stderr_fd >= 0)
2244 dont_close[n_dont_close++] = params->stderr_fd;
2245
2246 if (socket_fd >= 0)
2247 dont_close[n_dont_close++] = socket_fd;
2248 if (n_fds > 0) {
2249 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2250 n_dont_close += n_fds;
2251 }
2252
2253 if (runtime)
2254 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2255
2256 if (dcreds) {
2257 if (dcreds->user)
2258 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2259 if (dcreds->group)
2260 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
2261 }
2262
2263 if (user_lookup_fd >= 0)
2264 dont_close[n_dont_close++] = user_lookup_fd;
2265
2266 return close_all_fds(dont_close, n_dont_close);
2267 }
2268
2269 static int send_user_lookup(
2270 Unit *unit,
2271 int user_lookup_fd,
2272 uid_t uid,
2273 gid_t gid) {
2274
2275 assert(unit);
2276
2277 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2278 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2279 * specified. */
2280
2281 if (user_lookup_fd < 0)
2282 return 0;
2283
2284 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2285 return 0;
2286
2287 if (writev(user_lookup_fd,
2288 (struct iovec[]) {
2289 { .iov_base = &uid, .iov_len = sizeof(uid) },
2290 { .iov_base = &gid, .iov_len = sizeof(gid) },
2291 { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
2292 return -errno;
2293
2294 return 0;
2295 }
2296
2297 static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2298 int r;
2299
2300 assert(c);
2301 assert(home);
2302 assert(buf);
2303
2304 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2305
2306 if (*home)
2307 return 0;
2308
2309 if (!c->working_directory_home)
2310 return 0;
2311
2312 if (uid == 0) {
2313 /* Hardcode /root as home directory for UID 0 */
2314 *home = "/root";
2315 return 1;
2316 }
2317
2318 r = get_home_dir(buf);
2319 if (r < 0)
2320 return r;
2321
2322 *home = *buf;
2323 return 1;
2324 }
2325
2326 static int exec_child(
2327 Unit *unit,
2328 ExecCommand *command,
2329 const ExecContext *context,
2330 const ExecParameters *params,
2331 ExecRuntime *runtime,
2332 DynamicCreds *dcreds,
2333 char **argv,
2334 int socket_fd,
2335 int named_iofds[3],
2336 int *fds,
2337 unsigned n_storage_fds,
2338 unsigned n_socket_fds,
2339 char **files_env,
2340 int user_lookup_fd,
2341 int *exit_status,
2342 char **error_message) {
2343
2344 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
2345 _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
2346 _cleanup_free_ gid_t *supplementary_gids = NULL;
2347 const char *username = NULL, *groupname = NULL;
2348 const char *home = NULL, *shell = NULL;
2349 dev_t journal_stream_dev = 0;
2350 ino_t journal_stream_ino = 0;
2351 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
2352 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
2353 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
2354 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
2355 #ifdef HAVE_SELINUX
2356 bool use_selinux = false;
2357 #endif
2358 #ifdef HAVE_SMACK
2359 bool use_smack = false;
2360 #endif
2361 #ifdef HAVE_APPARMOR
2362 bool use_apparmor = false;
2363 #endif
2364 uid_t uid = UID_INVALID;
2365 gid_t gid = GID_INVALID;
2366 int i, r, ngids = 0;
2367 unsigned n_fds;
2368 ExecDirectoryType dt;
2369 int secure_bits;
2370
2371 assert(unit);
2372 assert(command);
2373 assert(context);
2374 assert(params);
2375 assert(exit_status);
2376 assert(error_message);
2377 /* We don't always set error_message, hence it must be initialized */
2378 assert(*error_message == NULL);
2379
2380 rename_process_from_path(command->path);
2381
2382 /* We reset exactly these signals, since they are the
2383 * only ones we set to SIG_IGN in the main daemon. All
2384 * others we leave untouched because we set them to
2385 * SIG_DFL or a valid handler initially, both of which
2386 * will be demoted to SIG_DFL. */
2387 (void) default_signals(SIGNALS_CRASH_HANDLER,
2388 SIGNALS_IGNORE, -1);
2389
2390 if (context->ignore_sigpipe)
2391 (void) ignore_signals(SIGPIPE, -1);
2392
2393 r = reset_signal_mask();
2394 if (r < 0) {
2395 *exit_status = EXIT_SIGNAL_MASK;
2396 *error_message = strdup("Failed to reset signal mask");
2397 /* If strdup fails, here and below, we will just print the generic error message. */
2398 return r;
2399 }
2400
2401 if (params->idle_pipe)
2402 do_idle_pipe_dance(params->idle_pipe);
2403
2404 /* Close sockets very early to make sure we don't
2405 * block init reexecution because it cannot bind its
2406 * sockets */
2407
2408 log_forget_fds();
2409
2410 n_fds = n_storage_fds + n_socket_fds;
2411 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
2412 if (r < 0) {
2413 *exit_status = EXIT_FDS;
2414 *error_message = strdup("Failed to close remaining fds");
2415 return r;
2416 }
2417
2418 if (!context->same_pgrp)
2419 if (setsid() < 0) {
2420 *exit_status = EXIT_SETSID;
2421 return -errno;
2422 }
2423
2424 exec_context_tty_reset(context, params);
2425
2426 if (unit_shall_confirm_spawn(unit)) {
2427 const char *vc = params->confirm_spawn;
2428 _cleanup_free_ char *cmdline = NULL;
2429
2430 cmdline = exec_command_line(argv);
2431 if (!cmdline) {
2432 *exit_status = EXIT_CONFIRM;
2433 return -ENOMEM;
2434 }
2435
2436 r = ask_for_confirmation(vc, unit, cmdline);
2437 if (r != CONFIRM_EXECUTE) {
2438 if (r == CONFIRM_PRETEND_SUCCESS) {
2439 *exit_status = EXIT_SUCCESS;
2440 return 0;
2441 }
2442 *exit_status = EXIT_CONFIRM;
2443 *error_message = strdup("Execution cancelled");
2444 return -ECANCELED;
2445 }
2446 }
2447
2448 if (context->dynamic_user && dcreds) {
2449
2450 /* Make sure we bypass our own NSS module for any NSS checks */
2451 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2452 *exit_status = EXIT_USER;
2453 *error_message = strdup("Failed to update environment");
2454 return -errno;
2455 }
2456
2457 r = dynamic_creds_realize(dcreds, &uid, &gid);
2458 if (r < 0) {
2459 *exit_status = EXIT_USER;
2460 *error_message = strdup("Failed to update dynamic user credentials");
2461 return r;
2462 }
2463
2464 if (!uid_is_valid(uid)) {
2465 *exit_status = EXIT_USER;
2466 (void) asprintf(error_message, "UID validation failed for \""UID_FMT"\"", uid);
2467 /* If asprintf fails, here and below, we will just print the generic error message. */
2468 return -ESRCH;
2469 }
2470
2471 if (!gid_is_valid(gid)) {
2472 *exit_status = EXIT_USER;
2473 (void) asprintf(error_message, "GID validation failed for \""GID_FMT"\"", gid);
2474 return -ESRCH;
2475 }
2476
2477 if (dcreds->user)
2478 username = dcreds->user->name;
2479
2480 } else {
2481 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2482 if (r < 0) {
2483 *exit_status = EXIT_USER;
2484 *error_message = strdup("Failed to determine user credentials");
2485 return r;
2486 }
2487
2488 r = get_fixed_group(context, &groupname, &gid);
2489 if (r < 0) {
2490 *exit_status = EXIT_GROUP;
2491 *error_message = strdup("Failed to determine group credentials");
2492 return r;
2493 }
2494 }
2495
2496 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2497 r = get_supplementary_groups(context, username, groupname, gid,
2498 &supplementary_gids, &ngids);
2499 if (r < 0) {
2500 *exit_status = EXIT_GROUP;
2501 *error_message = strdup("Failed to determine supplementary groups");
2502 return r;
2503 }
2504
2505 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2506 if (r < 0) {
2507 *exit_status = EXIT_USER;
2508 *error_message = strdup("Failed to send user credentials to PID1");
2509 return r;
2510 }
2511
2512 user_lookup_fd = safe_close(user_lookup_fd);
2513
2514 r = acquire_home(context, uid, &home, &home_buffer);
2515 if (r < 0) {
2516 *exit_status = EXIT_CHDIR;
2517 *error_message = strdup("Failed to determine $HOME for user");
2518 return r;
2519 }
2520
2521 /* If a socket is connected to STDIN/STDOUT/STDERR, we
2522 * must sure to drop O_NONBLOCK */
2523 if (socket_fd >= 0)
2524 (void) fd_nonblock(socket_fd, false);
2525
2526 r = setup_input(context, params, socket_fd, named_iofds);
2527 if (r < 0) {
2528 *exit_status = EXIT_STDIN;
2529 *error_message = strdup("Failed to set up stdin");
2530 return r;
2531 }
2532
2533 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2534 if (r < 0) {
2535 *exit_status = EXIT_STDOUT;
2536 *error_message = strdup("Failed to set up stdout");
2537 return r;
2538 }
2539
2540 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2541 if (r < 0) {
2542 *exit_status = EXIT_STDERR;
2543 *error_message = strdup("Failed to set up stderr");
2544 return r;
2545 }
2546
2547 if (params->cgroup_path) {
2548 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2549 if (r < 0) {
2550 *exit_status = EXIT_CGROUP;
2551 (void) asprintf(error_message, "Failed to attach to cgroup %s", params->cgroup_path);
2552 return r;
2553 }
2554 }
2555
2556 if (context->oom_score_adjust_set) {
2557 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
2558
2559 /* When we can't make this change due to EPERM, then
2560 * let's silently skip over it. User namespaces
2561 * prohibit write access to this file, and we
2562 * shouldn't trip up over that. */
2563
2564 sprintf(t, "%i", context->oom_score_adjust);
2565 r = write_string_file("/proc/self/oom_score_adj", t, 0);
2566 if (r == -EPERM || r == -EACCES) {
2567 log_open();
2568 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
2569 log_close();
2570 } else if (r < 0) {
2571 *exit_status = EXIT_OOM_ADJUST;
2572 *error_message = strdup("Failed to write /proc/self/oom_score_adj");
2573 return -errno;
2574 }
2575 }
2576
2577 if (context->nice_set)
2578 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
2579 *exit_status = EXIT_NICE;
2580 return -errno;
2581 }
2582
2583 if (context->cpu_sched_set) {
2584 struct sched_param param = {
2585 .sched_priority = context->cpu_sched_priority,
2586 };
2587
2588 r = sched_setscheduler(0,
2589 context->cpu_sched_policy |
2590 (context->cpu_sched_reset_on_fork ?
2591 SCHED_RESET_ON_FORK : 0),
2592 &param);
2593 if (r < 0) {
2594 *exit_status = EXIT_SETSCHEDULER;
2595 return -errno;
2596 }
2597 }
2598
2599 if (context->cpuset)
2600 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
2601 *exit_status = EXIT_CPUAFFINITY;
2602 return -errno;
2603 }
2604
2605 if (context->ioprio_set)
2606 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
2607 *exit_status = EXIT_IOPRIO;
2608 return -errno;
2609 }
2610
2611 if (context->timer_slack_nsec != NSEC_INFINITY)
2612 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
2613 *exit_status = EXIT_TIMERSLACK;
2614 return -errno;
2615 }
2616
2617 if (context->personality != PERSONALITY_INVALID) {
2618 r = safe_personality(context->personality);
2619 if (r < 0) {
2620 *exit_status = EXIT_PERSONALITY;
2621 return r;
2622 }
2623 }
2624
2625 if (context->utmp_id)
2626 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
2627 context->tty_path,
2628 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
2629 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2630 USER_PROCESS,
2631 username);
2632
2633 if (context->user) {
2634 r = chown_terminal(STDIN_FILENO, uid);
2635 if (r < 0) {
2636 *exit_status = EXIT_STDIN;
2637 return r;
2638 }
2639 }
2640
2641 /* If delegation is enabled we'll pass ownership of the cgroup
2642 * (but only in systemd's own controller hierarchy!) to the
2643 * user of the new process. */
2644 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
2645 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2646 if (r < 0) {
2647 *exit_status = EXIT_CGROUP;
2648 return r;
2649 }
2650
2651
2652 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2653 if (r < 0) {
2654 *exit_status = EXIT_CGROUP;
2655 return r;
2656 }
2657 }
2658
2659 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
2660 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
2661 if (r < 0)
2662 return r;
2663 }
2664
2665 r = build_environment(
2666 unit,
2667 context,
2668 params,
2669 n_fds,
2670 home,
2671 username,
2672 shell,
2673 journal_stream_dev,
2674 journal_stream_ino,
2675 &our_env);
2676 if (r < 0) {
2677 *exit_status = EXIT_MEMORY;
2678 return r;
2679 }
2680
2681 r = build_pass_environment(context, &pass_env);
2682 if (r < 0) {
2683 *exit_status = EXIT_MEMORY;
2684 return r;
2685 }
2686
2687 accum_env = strv_env_merge(5,
2688 params->environment,
2689 our_env,
2690 pass_env,
2691 context->environment,
2692 files_env,
2693 NULL);
2694 if (!accum_env) {
2695 *exit_status = EXIT_MEMORY;
2696 return -ENOMEM;
2697 }
2698 accum_env = strv_env_clean(accum_env);
2699
2700 (void) umask(context->umask);
2701
2702 r = setup_keyring(unit, params, uid, gid);
2703 if (r < 0) {
2704 *exit_status = EXIT_KEYRING;
2705 return r;
2706 }
2707
2708 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
2709 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
2710
2711 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
2712 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
2713
2714 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
2715 if (needs_ambient_hack)
2716 needs_setuid = false;
2717 else
2718 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
2719
2720 if (needs_sandboxing) {
2721 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
2722 * present. The actual MAC context application will happen later, as late as possible, to avoid
2723 * impacting our own code paths. */
2724
2725 #ifdef HAVE_SELINUX
2726 use_selinux = mac_selinux_use();
2727 #endif
2728 #ifdef HAVE_SMACK
2729 use_smack = mac_smack_use();
2730 #endif
2731 #ifdef HAVE_APPARMOR
2732 use_apparmor = mac_apparmor_use();
2733 #endif
2734 }
2735
2736 if (needs_setuid) {
2737 if (context->pam_name && username) {
2738 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
2739 if (r < 0) {
2740 *exit_status = EXIT_PAM;
2741 return r;
2742 }
2743 }
2744 }
2745
2746 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
2747 r = setup_netns(runtime->netns_storage_socket);
2748 if (r < 0) {
2749 *exit_status = EXIT_NETWORK;
2750 return r;
2751 }
2752 }
2753
2754 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
2755 if (needs_mount_namespace) {
2756 r = apply_mount_namespace(unit, command, context, params, runtime);
2757 if (r < 0) {
2758 *exit_status = EXIT_NAMESPACE;
2759 return r;
2760 }
2761 }
2762
2763 /* Apply just after mount namespace setup */
2764 r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
2765 if (r < 0)
2766 return r;
2767
2768 /* Drop groups as early as possbile */
2769 if (needs_setuid) {
2770 r = enforce_groups(context, gid, supplementary_gids, ngids);
2771 if (r < 0) {
2772 *exit_status = EXIT_GROUP;
2773 return r;
2774 }
2775 }
2776
2777 if (needs_sandboxing) {
2778 #ifdef HAVE_SELINUX
2779 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
2780 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
2781 if (r < 0) {
2782 *exit_status = EXIT_SELINUX_CONTEXT;
2783 return r;
2784 }
2785 }
2786 #endif
2787
2788 if (context->private_users) {
2789 r = setup_private_users(uid, gid);
2790 if (r < 0) {
2791 *exit_status = EXIT_USER;
2792 return r;
2793 }
2794 }
2795 }
2796
2797 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
2798 * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
2799 * was needed to upload the policy and can now be closed as well. */
2800 r = close_all_fds(fds, n_fds);
2801 if (r >= 0)
2802 r = shift_fds(fds, n_fds);
2803 if (r >= 0)
2804 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
2805 if (r < 0) {
2806 *exit_status = EXIT_FDS;
2807 return r;
2808 }
2809
2810 secure_bits = context->secure_bits;
2811
2812 if (needs_sandboxing) {
2813 uint64_t bset;
2814
2815 for (i = 0; i < _RLIMIT_MAX; i++) {
2816
2817 if (!context->rlimit[i])
2818 continue;
2819
2820 r = setrlimit_closest(i, context->rlimit[i]);
2821 if (r < 0) {
2822 *exit_status = EXIT_LIMITS;
2823 return r;
2824 }
2825 }
2826
2827 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2828 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
2829 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
2830 *exit_status = EXIT_LIMITS;
2831 return -errno;
2832 }
2833 }
2834
2835 bset = context->capability_bounding_set;
2836 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
2837 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
2838 * instead of us doing that */
2839 if (needs_ambient_hack)
2840 bset |= (UINT64_C(1) << CAP_SETPCAP) |
2841 (UINT64_C(1) << CAP_SETUID) |
2842 (UINT64_C(1) << CAP_SETGID);
2843
2844 if (!cap_test_all(bset)) {
2845 r = capability_bounding_set_drop(bset, false);
2846 if (r < 0) {
2847 *exit_status = EXIT_CAPABILITIES;
2848 *error_message = strdup("Failed to drop capabilities");
2849 return r;
2850 }
2851 }
2852
2853 /* This is done before enforce_user, but ambient set
2854 * does not survive over setresuid() if keep_caps is not set. */
2855 if (!needs_ambient_hack &&
2856 context->capability_ambient_set != 0) {
2857 r = capability_ambient_set_apply(context->capability_ambient_set, true);
2858 if (r < 0) {
2859 *exit_status = EXIT_CAPABILITIES;
2860 *error_message = strdup("Failed to apply ambient capabilities (before UID change)");
2861 return r;
2862 }
2863 }
2864 }
2865
2866 if (needs_setuid) {
2867 if (context->user) {
2868 r = enforce_user(context, uid);
2869 if (r < 0) {
2870 *exit_status = EXIT_USER;
2871 (void) asprintf(error_message, "Failed to change UID to "UID_FMT, uid);
2872 return r;
2873 }
2874
2875 if (!needs_ambient_hack &&
2876 context->capability_ambient_set != 0) {
2877
2878 /* Fix the ambient capabilities after user change. */
2879 r = capability_ambient_set_apply(context->capability_ambient_set, false);
2880 if (r < 0) {
2881 *exit_status = EXIT_CAPABILITIES;
2882 *error_message = strdup("Failed to apply ambient capabilities (after UID change)");
2883 return r;
2884 }
2885
2886 /* If we were asked to change user and ambient capabilities
2887 * were requested, we had to add keep-caps to the securebits
2888 * so that we would maintain the inherited capability set
2889 * through the setresuid(). Make sure that the bit is added
2890 * also to the context secure_bits so that we don't try to
2891 * drop the bit away next. */
2892
2893 secure_bits |= 1<<SECURE_KEEP_CAPS;
2894 }
2895 }
2896 }
2897
2898 if (needs_sandboxing) {
2899 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
2900 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
2901 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
2902 * are restricted. */
2903
2904 #ifdef HAVE_SELINUX
2905 if (use_selinux) {
2906 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
2907
2908 if (exec_context) {
2909 r = setexeccon(exec_context);
2910 if (r < 0) {
2911 *exit_status = EXIT_SELINUX_CONTEXT;
2912 (void) asprintf(error_message, "Failed to set SELinux context to %s", exec_context);
2913 return r;
2914 }
2915 }
2916 }
2917 #endif
2918
2919 #ifdef HAVE_SMACK
2920 if (use_smack) {
2921 r = setup_smack(context, command);
2922 if (r < 0) {
2923 *exit_status = EXIT_SMACK_PROCESS_LABEL;
2924 *error_message = strdup("Failed to set SMACK process label");
2925 return r;
2926 }
2927 }
2928 #endif
2929
2930 #ifdef HAVE_APPARMOR
2931 if (use_apparmor && context->apparmor_profile) {
2932 r = aa_change_onexec(context->apparmor_profile);
2933 if (r < 0 && !context->apparmor_profile_ignore) {
2934 *exit_status = EXIT_APPARMOR_PROFILE;
2935 (void) asprintf(error_message,
2936 "Failed to prepare AppArmor profile change to %s",
2937 context->apparmor_profile);
2938 return -errno;
2939 }
2940 }
2941 #endif
2942
2943 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
2944 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
2945 if (prctl(PR_GET_SECUREBITS) != secure_bits)
2946 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
2947 *exit_status = EXIT_SECUREBITS;
2948 *error_message = strdup("Failed to set secure bits");
2949 return -errno;
2950 }
2951
2952 if (context_has_no_new_privileges(context))
2953 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2954 *exit_status = EXIT_NO_NEW_PRIVILEGES;
2955 *error_message = strdup("Failed to disable new privileges");
2956 return -errno;
2957 }
2958
2959 #ifdef HAVE_SECCOMP
2960 r = apply_address_families(unit, context);
2961 if (r < 0) {
2962 *exit_status = EXIT_ADDRESS_FAMILIES;
2963 *error_message = strdup("Failed to restrict address families");
2964 return r;
2965 }
2966
2967 r = apply_memory_deny_write_execute(unit, context);
2968 if (r < 0) {
2969 *exit_status = EXIT_SECCOMP;
2970 *error_message = strdup("Failed to disable writing to executable memory");
2971 return r;
2972 }
2973
2974 r = apply_restrict_realtime(unit, context);
2975 if (r < 0) {
2976 *exit_status = EXIT_SECCOMP;
2977 *error_message = strdup("Failed to apply realtime restrictions");
2978 return r;
2979 }
2980
2981 r = apply_restrict_namespaces(unit, context);
2982 if (r < 0) {
2983 *exit_status = EXIT_SECCOMP;
2984 *error_message = strdup("Failed to apply namespace restrictions");
2985 return r;
2986 }
2987
2988 r = apply_protect_sysctl(unit, context);
2989 if (r < 0) {
2990 *exit_status = EXIT_SECCOMP;
2991 *error_message = strdup("Failed to apply sysctl restrictions");
2992 return r;
2993 }
2994
2995 r = apply_protect_kernel_modules(unit, context);
2996 if (r < 0) {
2997 *exit_status = EXIT_SECCOMP;
2998 *error_message = strdup("Failed to apply module loading restrictions");
2999 return r;
3000 }
3001
3002 r = apply_private_devices(unit, context);
3003 if (r < 0) {
3004 *exit_status = EXIT_SECCOMP;
3005 *error_message = strdup("Failed to set up private devices");
3006 return r;
3007 }
3008
3009 r = apply_syscall_archs(unit, context);
3010 if (r < 0) {
3011 *exit_status = EXIT_SECCOMP;
3012 *error_message = strdup("Failed to apply syscall architecture restrictions");
3013 return r;
3014 }
3015
3016 r = apply_lock_personality(unit, context);
3017 if (r < 0) {
3018 *exit_status = EXIT_SECCOMP;
3019 *error_message = strdup("Failed to lock personalities");
3020 return r;
3021 }
3022
3023 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3024 * by the filter as little as possible. */
3025 r = apply_syscall_filter(unit, context, needs_ambient_hack);
3026 if (r < 0) {
3027 *exit_status = EXIT_SECCOMP;
3028 *error_message = strdup("Failed to apply syscall filters");
3029 return r;
3030 }
3031 #endif
3032 }
3033
3034 final_argv = replace_env_argv(argv, accum_env);
3035 if (!final_argv) {
3036 *exit_status = EXIT_MEMORY;
3037 *error_message = strdup("Failed to prepare process arguments");
3038 return -ENOMEM;
3039 }
3040
3041 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
3042 _cleanup_free_ char *line;
3043
3044 line = exec_command_line(final_argv);
3045 if (line) {
3046 log_open();
3047 log_struct(LOG_DEBUG,
3048 "EXECUTABLE=%s", command->path,
3049 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
3050 LOG_UNIT_ID(unit),
3051 NULL);
3052 log_close();
3053 }
3054 }
3055
3056 execve(command->path, final_argv, accum_env);
3057 *exit_status = EXIT_EXEC;
3058 return -errno;
3059 }
3060
3061 int exec_spawn(Unit *unit,
3062 ExecCommand *command,
3063 const ExecContext *context,
3064 const ExecParameters *params,
3065 ExecRuntime *runtime,
3066 DynamicCreds *dcreds,
3067 pid_t *ret) {
3068
3069 _cleanup_strv_free_ char **files_env = NULL;
3070 int *fds = NULL;
3071 unsigned n_storage_fds = 0, n_socket_fds = 0;
3072 _cleanup_free_ char *line = NULL;
3073 int socket_fd, r;
3074 int named_iofds[3] = { -1, -1, -1 };
3075 char **argv;
3076 pid_t pid;
3077
3078 assert(unit);
3079 assert(command);
3080 assert(context);
3081 assert(ret);
3082 assert(params);
3083 assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
3084
3085 if (context->std_input == EXEC_INPUT_SOCKET ||
3086 context->std_output == EXEC_OUTPUT_SOCKET ||
3087 context->std_error == EXEC_OUTPUT_SOCKET) {
3088
3089 if (params->n_socket_fds > 1) {
3090 log_unit_error(unit, "Got more than one socket.");
3091 return -EINVAL;
3092 }
3093
3094 if (params->n_socket_fds == 0) {
3095 log_unit_error(unit, "Got no socket.");
3096 return -EINVAL;
3097 }
3098
3099 socket_fd = params->fds[0];
3100 } else {
3101 socket_fd = -1;
3102 fds = params->fds;
3103 n_storage_fds = params->n_storage_fds;
3104 n_socket_fds = params->n_socket_fds;
3105 }
3106
3107 r = exec_context_named_iofds(unit, context, params, named_iofds);
3108 if (r < 0)
3109 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
3110
3111 r = exec_context_load_environment(unit, context, &files_env);
3112 if (r < 0)
3113 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
3114
3115 argv = params->argv ?: command->argv;
3116 line = exec_command_line(argv);
3117 if (!line)
3118 return log_oom();
3119
3120 log_struct(LOG_DEBUG,
3121 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
3122 "EXECUTABLE=%s", command->path,
3123 LOG_UNIT_ID(unit),
3124 NULL);
3125 pid = fork();
3126 if (pid < 0)
3127 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
3128
3129 if (pid == 0) {
3130 int exit_status;
3131 _cleanup_free_ char *error_message = NULL;
3132
3133 r = exec_child(unit,
3134 command,
3135 context,
3136 params,
3137 runtime,
3138 dcreds,
3139 argv,
3140 socket_fd,
3141 named_iofds,
3142 fds,
3143 n_storage_fds,
3144 n_socket_fds,
3145 files_env,
3146 unit->manager->user_lookup_fds[1],
3147 &exit_status,
3148 &error_message);
3149 if (r < 0) {
3150 log_open();
3151 if (error_message)
3152 log_struct_errno(LOG_ERR, r,
3153 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3154 LOG_UNIT_ID(unit),
3155 LOG_UNIT_MESSAGE(unit, "%s: %m",
3156 error_message),
3157 "EXECUTABLE=%s", command->path,
3158 NULL);
3159 else if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE))
3160 log_struct_errno(LOG_INFO, r,
3161 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3162 LOG_UNIT_ID(unit),
3163 LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
3164 command->path),
3165 "EXECUTABLE=%s", command->path,
3166 NULL);
3167 else
3168 log_struct_errno(LOG_ERR, r,
3169 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3170 LOG_UNIT_ID(unit),
3171 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3172 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3173 command->path),
3174 "EXECUTABLE=%s", command->path,
3175 NULL);
3176 }
3177
3178 _exit(exit_status);
3179 }
3180
3181 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
3182
3183 /* We add the new process to the cgroup both in the child (so
3184 * that we can be sure that no user code is ever executed
3185 * outside of the cgroup) and in the parent (so that we can be
3186 * sure that when we kill the cgroup the process will be
3187 * killed too). */
3188 if (params->cgroup_path)
3189 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
3190
3191 exec_status_start(&command->exec_status, pid);
3192
3193 *ret = pid;
3194 return 0;
3195 }
3196
3197 void exec_context_init(ExecContext *c) {
3198 ExecDirectoryType i;
3199
3200 assert(c);
3201
3202 c->umask = 0022;
3203 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
3204 c->cpu_sched_policy = SCHED_OTHER;
3205 c->syslog_priority = LOG_DAEMON|LOG_INFO;
3206 c->syslog_level_prefix = true;
3207 c->ignore_sigpipe = true;
3208 c->timer_slack_nsec = NSEC_INFINITY;
3209 c->personality = PERSONALITY_INVALID;
3210 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3211 c->directories[i].mode = 0755;
3212 c->capability_bounding_set = CAP_ALL;
3213 c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
3214 }
3215
3216 void exec_context_done(ExecContext *c) {
3217 unsigned l;
3218 ExecDirectoryType i;
3219
3220 assert(c);
3221
3222 c->environment = strv_free(c->environment);
3223 c->environment_files = strv_free(c->environment_files);
3224 c->pass_environment = strv_free(c->pass_environment);
3225
3226 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
3227 c->rlimit[l] = mfree(c->rlimit[l]);
3228
3229 for (l = 0; l < 3; l++)
3230 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3231
3232 c->working_directory = mfree(c->working_directory);
3233 c->root_directory = mfree(c->root_directory);
3234 c->root_image = mfree(c->root_image);
3235 c->tty_path = mfree(c->tty_path);
3236 c->syslog_identifier = mfree(c->syslog_identifier);
3237 c->user = mfree(c->user);
3238 c->group = mfree(c->group);
3239
3240 c->supplementary_groups = strv_free(c->supplementary_groups);
3241
3242 c->pam_name = mfree(c->pam_name);
3243
3244 c->read_only_paths = strv_free(c->read_only_paths);
3245 c->read_write_paths = strv_free(c->read_write_paths);
3246 c->inaccessible_paths = strv_free(c->inaccessible_paths);
3247
3248 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3249
3250 if (c->cpuset)
3251 CPU_FREE(c->cpuset);
3252
3253 c->utmp_id = mfree(c->utmp_id);
3254 c->selinux_context = mfree(c->selinux_context);
3255 c->apparmor_profile = mfree(c->apparmor_profile);
3256 c->smack_process_label = mfree(c->smack_process_label);
3257
3258 c->syscall_filter = set_free(c->syscall_filter);
3259 c->syscall_archs = set_free(c->syscall_archs);
3260 c->address_families = set_free(c->address_families);
3261
3262 for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
3263 c->directories[i].paths = strv_free(c->directories[i].paths);
3264 }
3265
3266 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3267 char **i;
3268
3269 assert(c);
3270
3271 if (!runtime_prefix)
3272 return 0;
3273
3274 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
3275 _cleanup_free_ char *p;
3276
3277 p = strjoin(runtime_prefix, "/", *i);
3278 if (!p)
3279 return -ENOMEM;
3280
3281 /* We execute this synchronously, since we need to be
3282 * sure this is gone when we start the service
3283 * next. */
3284 (void) rm_rf(p, REMOVE_ROOT);
3285 }
3286
3287 return 0;
3288 }
3289
3290 void exec_command_done(ExecCommand *c) {
3291 assert(c);
3292
3293 c->path = mfree(c->path);
3294
3295 c->argv = strv_free(c->argv);
3296 }
3297
3298 void exec_command_done_array(ExecCommand *c, unsigned n) {
3299 unsigned i;
3300
3301 for (i = 0; i < n; i++)
3302 exec_command_done(c+i);
3303 }
3304
3305 ExecCommand* exec_command_free_list(ExecCommand *c) {
3306 ExecCommand *i;
3307
3308 while ((i = c)) {
3309 LIST_REMOVE(command, c, i);
3310 exec_command_done(i);
3311 free(i);
3312 }
3313
3314 return NULL;
3315 }
3316
3317 void exec_command_free_array(ExecCommand **c, unsigned n) {
3318 unsigned i;
3319
3320 for (i = 0; i < n; i++)
3321 c[i] = exec_command_free_list(c[i]);
3322 }
3323
3324 typedef struct InvalidEnvInfo {
3325 Unit *unit;
3326 const char *path;
3327 } InvalidEnvInfo;
3328
3329 static void invalid_env(const char *p, void *userdata) {
3330 InvalidEnvInfo *info = userdata;
3331
3332 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
3333 }
3334
3335 const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3336 assert(c);
3337
3338 switch (fd_index) {
3339 case STDIN_FILENO:
3340 if (c->std_input != EXEC_INPUT_NAMED_FD)
3341 return NULL;
3342 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3343 case STDOUT_FILENO:
3344 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3345 return NULL;
3346 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3347 case STDERR_FILENO:
3348 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3349 return NULL;
3350 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3351 default:
3352 return NULL;
3353 }
3354 }
3355
3356 int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3357 unsigned i, targets;
3358 const char* stdio_fdname[3];
3359 unsigned n_fds;
3360
3361 assert(c);
3362 assert(p);
3363
3364 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3365 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3366 (c->std_error == EXEC_OUTPUT_NAMED_FD);
3367
3368 for (i = 0; i < 3; i++)
3369 stdio_fdname[i] = exec_context_fdname(c, i);
3370
3371 n_fds = p->n_storage_fds + p->n_socket_fds;
3372
3373 for (i = 0; i < n_fds && targets > 0; i++)
3374 if (named_iofds[STDIN_FILENO] < 0 &&
3375 c->std_input == EXEC_INPUT_NAMED_FD &&
3376 stdio_fdname[STDIN_FILENO] &&
3377 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3378
3379 named_iofds[STDIN_FILENO] = p->fds[i];
3380 targets--;
3381
3382 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3383 c->std_output == EXEC_OUTPUT_NAMED_FD &&
3384 stdio_fdname[STDOUT_FILENO] &&
3385 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3386
3387 named_iofds[STDOUT_FILENO] = p->fds[i];
3388 targets--;
3389
3390 } else if (named_iofds[STDERR_FILENO] < 0 &&
3391 c->std_error == EXEC_OUTPUT_NAMED_FD &&
3392 stdio_fdname[STDERR_FILENO] &&
3393 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3394
3395 named_iofds[STDERR_FILENO] = p->fds[i];
3396 targets--;
3397 }
3398
3399 return targets == 0 ? 0 : -ENOENT;
3400 }
3401
3402 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
3403 char **i, **r = NULL;
3404
3405 assert(c);
3406 assert(l);
3407
3408 STRV_FOREACH(i, c->environment_files) {
3409 char *fn;
3410 int k;
3411 unsigned n;
3412 bool ignore = false;
3413 char **p;
3414 _cleanup_globfree_ glob_t pglob = {};
3415
3416 fn = *i;
3417
3418 if (fn[0] == '-') {
3419 ignore = true;
3420 fn++;
3421 }
3422
3423 if (!path_is_absolute(fn)) {
3424 if (ignore)
3425 continue;
3426
3427 strv_free(r);
3428 return -EINVAL;
3429 }
3430
3431 /* Filename supports globbing, take all matching files */
3432 k = safe_glob(fn, 0, &pglob);
3433 if (k < 0) {
3434 if (ignore)
3435 continue;
3436
3437 strv_free(r);
3438 return k;
3439 }
3440
3441 /* When we don't match anything, -ENOENT should be returned */
3442 assert(pglob.gl_pathc > 0);
3443
3444 for (n = 0; n < pglob.gl_pathc; n++) {
3445 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
3446 if (k < 0) {
3447 if (ignore)
3448 continue;
3449
3450 strv_free(r);
3451 return k;
3452 }
3453 /* Log invalid environment variables with filename */
3454 if (p) {
3455 InvalidEnvInfo info = {
3456 .unit = unit,
3457 .path = pglob.gl_pathv[n]
3458 };
3459
3460 p = strv_env_clean_with_callback(p, invalid_env, &info);
3461 }
3462
3463 if (r == NULL)
3464 r = p;
3465 else {
3466 char **m;
3467
3468 m = strv_env_merge(2, r, p);
3469 strv_free(r);
3470 strv_free(p);
3471 if (!m)
3472 return -ENOMEM;
3473
3474 r = m;
3475 }
3476 }
3477 }
3478
3479 *l = r;
3480
3481 return 0;
3482 }
3483
3484 static bool tty_may_match_dev_console(const char *tty) {
3485 _cleanup_free_ char *active = NULL;
3486 char *console;
3487
3488 if (!tty)
3489 return true;
3490
3491 tty = skip_dev_prefix(tty);
3492
3493 /* trivial identity? */
3494 if (streq(tty, "console"))
3495 return true;
3496
3497 console = resolve_dev_console(&active);
3498 /* if we could not resolve, assume it may */
3499 if (!console)
3500 return true;
3501
3502 /* "tty0" means the active VC, so it may be the same sometimes */
3503 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
3504 }
3505
3506 bool exec_context_may_touch_console(ExecContext *ec) {
3507
3508 return (ec->tty_reset ||
3509 ec->tty_vhangup ||
3510 ec->tty_vt_disallocate ||
3511 is_terminal_input(ec->std_input) ||
3512 is_terminal_output(ec->std_output) ||
3513 is_terminal_output(ec->std_error)) &&
3514 tty_may_match_dev_console(exec_context_tty_path(ec));
3515 }
3516
3517 static void strv_fprintf(FILE *f, char **l) {
3518 char **g;
3519
3520 assert(f);
3521
3522 STRV_FOREACH(g, l)
3523 fprintf(f, " %s", *g);
3524 }
3525
3526 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
3527 char **e, **d;
3528 unsigned i;
3529 ExecDirectoryType dt;
3530 int r;
3531
3532 assert(c);
3533 assert(f);
3534
3535 prefix = strempty(prefix);
3536
3537 fprintf(f,
3538 "%sUMask: %04o\n"
3539 "%sWorkingDirectory: %s\n"
3540 "%sRootDirectory: %s\n"
3541 "%sNonBlocking: %s\n"
3542 "%sPrivateTmp: %s\n"
3543 "%sPrivateDevices: %s\n"
3544 "%sProtectKernelTunables: %s\n"
3545 "%sProtectKernelModules: %s\n"
3546 "%sProtectControlGroups: %s\n"
3547 "%sPrivateNetwork: %s\n"
3548 "%sPrivateUsers: %s\n"
3549 "%sProtectHome: %s\n"
3550 "%sProtectSystem: %s\n"
3551 "%sMountAPIVFS: %s\n"
3552 "%sIgnoreSIGPIPE: %s\n"
3553 "%sMemoryDenyWriteExecute: %s\n"
3554 "%sRestrictRealtime: %s\n",
3555 prefix, c->umask,
3556 prefix, c->working_directory ? c->working_directory : "/",
3557 prefix, c->root_directory ? c->root_directory : "/",
3558 prefix, yes_no(c->non_blocking),
3559 prefix, yes_no(c->private_tmp),
3560 prefix, yes_no(c->private_devices),
3561 prefix, yes_no(c->protect_kernel_tunables),
3562 prefix, yes_no(c->protect_kernel_modules),
3563 prefix, yes_no(c->protect_control_groups),
3564 prefix, yes_no(c->private_network),
3565 prefix, yes_no(c->private_users),
3566 prefix, protect_home_to_string(c->protect_home),
3567 prefix, protect_system_to_string(c->protect_system),
3568 prefix, yes_no(c->mount_apivfs),
3569 prefix, yes_no(c->ignore_sigpipe),
3570 prefix, yes_no(c->memory_deny_write_execute),
3571 prefix, yes_no(c->restrict_realtime));
3572
3573 if (c->root_image)
3574 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3575
3576 STRV_FOREACH(e, c->environment)
3577 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3578
3579 STRV_FOREACH(e, c->environment_files)
3580 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
3581
3582 STRV_FOREACH(e, c->pass_environment)
3583 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3584
3585 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
3586
3587 for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
3588 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
3589
3590 STRV_FOREACH(d, c->directories[dt].paths)
3591 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
3592 }
3593
3594 if (c->nice_set)
3595 fprintf(f,
3596 "%sNice: %i\n",
3597 prefix, c->nice);
3598
3599 if (c->oom_score_adjust_set)
3600 fprintf(f,
3601 "%sOOMScoreAdjust: %i\n",
3602 prefix, c->oom_score_adjust);
3603
3604 for (i = 0; i < RLIM_NLIMITS; i++)
3605 if (c->rlimit[i]) {
3606 fprintf(f, "%s%s: " RLIM_FMT "\n",
3607 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3608 fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3609 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3610 }
3611
3612 if (c->ioprio_set) {
3613 _cleanup_free_ char *class_str = NULL;
3614
3615 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3616 if (r >= 0)
3617 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
3618
3619 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
3620 }
3621
3622 if (c->cpu_sched_set) {
3623 _cleanup_free_ char *policy_str = NULL;
3624
3625 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3626 if (r >= 0)
3627 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
3628
3629 fprintf(f,
3630 "%sCPUSchedulingPriority: %i\n"
3631 "%sCPUSchedulingResetOnFork: %s\n",
3632 prefix, c->cpu_sched_priority,
3633 prefix, yes_no(c->cpu_sched_reset_on_fork));
3634 }
3635
3636 if (c->cpuset) {
3637 fprintf(f, "%sCPUAffinity:", prefix);
3638 for (i = 0; i < c->cpuset_ncpus; i++)
3639 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
3640 fprintf(f, " %u", i);
3641 fputs("\n", f);
3642 }
3643
3644 if (c->timer_slack_nsec != NSEC_INFINITY)
3645 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
3646
3647 fprintf(f,
3648 "%sStandardInput: %s\n"
3649 "%sStandardOutput: %s\n"
3650 "%sStandardError: %s\n",
3651 prefix, exec_input_to_string(c->std_input),
3652 prefix, exec_output_to_string(c->std_output),
3653 prefix, exec_output_to_string(c->std_error));
3654
3655 if (c->tty_path)
3656 fprintf(f,
3657 "%sTTYPath: %s\n"
3658 "%sTTYReset: %s\n"
3659 "%sTTYVHangup: %s\n"
3660 "%sTTYVTDisallocate: %s\n",
3661 prefix, c->tty_path,
3662 prefix, yes_no(c->tty_reset),
3663 prefix, yes_no(c->tty_vhangup),
3664 prefix, yes_no(c->tty_vt_disallocate));
3665
3666 if (IN_SET(c->std_output,
3667 EXEC_OUTPUT_SYSLOG,
3668 EXEC_OUTPUT_KMSG,
3669 EXEC_OUTPUT_JOURNAL,
3670 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3671 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3672 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
3673 IN_SET(c->std_error,
3674 EXEC_OUTPUT_SYSLOG,
3675 EXEC_OUTPUT_KMSG,
3676 EXEC_OUTPUT_JOURNAL,
3677 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
3678 EXEC_OUTPUT_KMSG_AND_CONSOLE,
3679 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
3680
3681 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
3682
3683 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3684 if (r >= 0)
3685 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
3686
3687 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3688 if (r >= 0)
3689 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
3690 }
3691
3692 if (c->secure_bits) {
3693 _cleanup_free_ char *str = NULL;
3694
3695 r = secure_bits_to_string_alloc(c->secure_bits, &str);
3696 if (r >= 0)
3697 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
3698 }
3699
3700 if (c->capability_bounding_set != CAP_ALL) {
3701 _cleanup_free_ char *str = NULL;
3702
3703 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
3704 if (r >= 0)
3705 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
3706 }
3707
3708 if (c->capability_ambient_set != 0) {
3709 _cleanup_free_ char *str = NULL;
3710
3711 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
3712 if (r >= 0)
3713 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
3714 }
3715
3716 if (c->user)
3717 fprintf(f, "%sUser: %s\n", prefix, c->user);
3718 if (c->group)
3719 fprintf(f, "%sGroup: %s\n", prefix, c->group);
3720
3721 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
3722
3723 if (strv_length(c->supplementary_groups) > 0) {
3724 fprintf(f, "%sSupplementaryGroups:", prefix);
3725 strv_fprintf(f, c->supplementary_groups);
3726 fputs("\n", f);
3727 }
3728
3729 if (c->pam_name)
3730 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
3731
3732 if (strv_length(c->read_write_paths) > 0) {
3733 fprintf(f, "%sReadWritePaths:", prefix);
3734 strv_fprintf(f, c->read_write_paths);
3735 fputs("\n", f);
3736 }
3737
3738 if (strv_length(c->read_only_paths) > 0) {
3739 fprintf(f, "%sReadOnlyPaths:", prefix);
3740 strv_fprintf(f, c->read_only_paths);
3741 fputs("\n", f);
3742 }
3743
3744 if (strv_length(c->inaccessible_paths) > 0) {
3745 fprintf(f, "%sInaccessiblePaths:", prefix);
3746 strv_fprintf(f, c->inaccessible_paths);
3747 fputs("\n", f);
3748 }
3749
3750 if (c->n_bind_mounts > 0)
3751 for (i = 0; i < c->n_bind_mounts; i++) {
3752 fprintf(f, "%s%s: %s:%s:%s\n", prefix,
3753 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
3754 c->bind_mounts[i].source,
3755 c->bind_mounts[i].destination,
3756 c->bind_mounts[i].recursive ? "rbind" : "norbind");
3757 }
3758
3759 if (c->utmp_id)
3760 fprintf(f,
3761 "%sUtmpIdentifier: %s\n",
3762 prefix, c->utmp_id);
3763
3764 if (c->selinux_context)
3765 fprintf(f,
3766 "%sSELinuxContext: %s%s\n",
3767 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
3768
3769 if (c->apparmor_profile)
3770 fprintf(f,
3771 "%sAppArmorProfile: %s%s\n",
3772 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3773
3774 if (c->smack_process_label)
3775 fprintf(f,
3776 "%sSmackProcessLabel: %s%s\n",
3777 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
3778
3779 if (c->personality != PERSONALITY_INVALID)
3780 fprintf(f,
3781 "%sPersonality: %s\n",
3782 prefix, strna(personality_to_string(c->personality)));
3783
3784 fprintf(f,
3785 "%sLockPersonality: %s\n",
3786 prefix, yes_no(c->lock_personality));
3787
3788 if (c->syscall_filter) {
3789 #ifdef HAVE_SECCOMP
3790 Iterator j;
3791 void *id;
3792 bool first = true;
3793 #endif
3794
3795 fprintf(f,
3796 "%sSystemCallFilter: ",
3797 prefix);
3798
3799 if (!c->syscall_whitelist)
3800 fputc('~', f);
3801
3802 #ifdef HAVE_SECCOMP
3803 SET_FOREACH(id, c->syscall_filter, j) {
3804 _cleanup_free_ char *name = NULL;
3805
3806 if (first)
3807 first = false;
3808 else
3809 fputc(' ', f);
3810
3811 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
3812 fputs(strna(name), f);
3813 }
3814 #endif
3815
3816 fputc('\n', f);
3817 }
3818
3819 if (c->syscall_archs) {
3820 #ifdef HAVE_SECCOMP
3821 Iterator j;
3822 void *id;
3823 #endif
3824
3825 fprintf(f,
3826 "%sSystemCallArchitectures:",
3827 prefix);
3828
3829 #ifdef HAVE_SECCOMP
3830 SET_FOREACH(id, c->syscall_archs, j)
3831 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
3832 #endif
3833 fputc('\n', f);
3834 }
3835
3836 if (exec_context_restrict_namespaces_set(c)) {
3837 _cleanup_free_ char *s = NULL;
3838
3839 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
3840 if (r >= 0)
3841 fprintf(f, "%sRestrictNamespaces: %s\n",
3842 prefix, s);
3843 }
3844
3845 if (c->syscall_errno > 0)
3846 fprintf(f,
3847 "%sSystemCallErrorNumber: %s\n",
3848 prefix, strna(errno_to_name(c->syscall_errno)));
3849
3850 if (c->apparmor_profile)
3851 fprintf(f,
3852 "%sAppArmorProfile: %s%s\n",
3853 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3854 }
3855
3856 bool exec_context_maintains_privileges(ExecContext *c) {
3857 assert(c);
3858
3859 /* Returns true if the process forked off would run under
3860 * an unchanged UID or as root. */
3861
3862 if (!c->user)
3863 return true;
3864
3865 if (streq(c->user, "root") || streq(c->user, "0"))
3866 return true;
3867
3868 return false;
3869 }
3870
3871 int exec_context_get_effective_ioprio(ExecContext *c) {
3872 int p;
3873
3874 assert(c);
3875
3876 if (c->ioprio_set)
3877 return c->ioprio;
3878
3879 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
3880 if (p < 0)
3881 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
3882
3883 return p;
3884 }
3885
3886 void exec_status_start(ExecStatus *s, pid_t pid) {
3887 assert(s);
3888
3889 zero(*s);
3890 s->pid = pid;
3891 dual_timestamp_get(&s->start_timestamp);
3892 }
3893
3894 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
3895 assert(s);
3896
3897 if (s->pid && s->pid != pid)
3898 zero(*s);
3899
3900 s->pid = pid;
3901 dual_timestamp_get(&s->exit_timestamp);
3902
3903 s->code = code;
3904 s->status = status;
3905
3906 if (context) {
3907 if (context->utmp_id)
3908 utmp_put_dead_process(context->utmp_id, pid, code, status);
3909
3910 exec_context_tty_reset(context, NULL);
3911 }
3912 }
3913
3914 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
3915 char buf[FORMAT_TIMESTAMP_MAX];
3916
3917 assert(s);
3918 assert(f);
3919
3920 if (s->pid <= 0)
3921 return;
3922
3923 prefix = strempty(prefix);
3924
3925 fprintf(f,
3926 "%sPID: "PID_FMT"\n",
3927 prefix, s->pid);
3928
3929 if (dual_timestamp_is_set(&s->start_timestamp))
3930 fprintf(f,
3931 "%sStart Timestamp: %s\n",
3932 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
3933
3934 if (dual_timestamp_is_set(&s->exit_timestamp))
3935 fprintf(f,
3936 "%sExit Timestamp: %s\n"
3937 "%sExit Code: %s\n"
3938 "%sExit Status: %i\n",
3939 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
3940 prefix, sigchld_code_to_string(s->code),
3941 prefix, s->status);
3942 }
3943
3944 char *exec_command_line(char **argv) {
3945 size_t k;
3946 char *n, *p, **a;
3947 bool first = true;
3948
3949 assert(argv);
3950
3951 k = 1;
3952 STRV_FOREACH(a, argv)
3953 k += strlen(*a)+3;
3954
3955 n = new(char, k);
3956 if (!n)
3957 return NULL;
3958
3959 p = n;
3960 STRV_FOREACH(a, argv) {
3961
3962 if (!first)
3963 *(p++) = ' ';
3964 else
3965 first = false;
3966
3967 if (strpbrk(*a, WHITESPACE)) {
3968 *(p++) = '\'';
3969 p = stpcpy(p, *a);
3970 *(p++) = '\'';
3971 } else
3972 p = stpcpy(p, *a);
3973
3974 }
3975
3976 *p = 0;
3977
3978 /* FIXME: this doesn't really handle arguments that have
3979 * spaces and ticks in them */
3980
3981 return n;
3982 }
3983
3984 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
3985 _cleanup_free_ char *cmd = NULL;
3986 const char *prefix2;
3987
3988 assert(c);
3989 assert(f);
3990
3991 prefix = strempty(prefix);
3992 prefix2 = strjoina(prefix, "\t");
3993
3994 cmd = exec_command_line(c->argv);
3995 fprintf(f,
3996 "%sCommand Line: %s\n",
3997 prefix, cmd ? cmd : strerror(ENOMEM));
3998
3999 exec_status_dump(&c->exec_status, f, prefix2);
4000 }
4001
4002 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
4003 assert(f);
4004
4005 prefix = strempty(prefix);
4006
4007 LIST_FOREACH(command, c, c)
4008 exec_command_dump(c, f, prefix);
4009 }
4010
4011 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
4012 ExecCommand *end;
4013
4014 assert(l);
4015 assert(e);
4016
4017 if (*l) {
4018 /* It's kind of important, that we keep the order here */
4019 LIST_FIND_TAIL(command, *l, end);
4020 LIST_INSERT_AFTER(command, *l, end, e);
4021 } else
4022 *l = e;
4023 }
4024
4025 int exec_command_set(ExecCommand *c, const char *path, ...) {
4026 va_list ap;
4027 char **l, *p;
4028
4029 assert(c);
4030 assert(path);
4031
4032 va_start(ap, path);
4033 l = strv_new_ap(path, ap);
4034 va_end(ap);
4035
4036 if (!l)
4037 return -ENOMEM;
4038
4039 p = strdup(path);
4040 if (!p) {
4041 strv_free(l);
4042 return -ENOMEM;
4043 }
4044
4045 free(c->path);
4046 c->path = p;
4047
4048 strv_free(c->argv);
4049 c->argv = l;
4050
4051 return 0;
4052 }
4053
4054 int exec_command_append(ExecCommand *c, const char *path, ...) {
4055 _cleanup_strv_free_ char **l = NULL;
4056 va_list ap;
4057 int r;
4058
4059 assert(c);
4060 assert(path);
4061
4062 va_start(ap, path);
4063 l = strv_new_ap(path, ap);
4064 va_end(ap);
4065
4066 if (!l)
4067 return -ENOMEM;
4068
4069 r = strv_extend_strv(&c->argv, l, false);
4070 if (r < 0)
4071 return r;
4072
4073 return 0;
4074 }
4075
4076
4077 static int exec_runtime_allocate(ExecRuntime **rt) {
4078
4079 if (*rt)
4080 return 0;
4081
4082 *rt = new0(ExecRuntime, 1);
4083 if (!*rt)
4084 return -ENOMEM;
4085
4086 (*rt)->n_ref = 1;
4087 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
4088
4089 return 0;
4090 }
4091
4092 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
4093 int r;
4094
4095 assert(rt);
4096 assert(c);
4097 assert(id);
4098
4099 if (*rt)
4100 return 1;
4101
4102 if (!c->private_network && !c->private_tmp)
4103 return 0;
4104
4105 r = exec_runtime_allocate(rt);
4106 if (r < 0)
4107 return r;
4108
4109 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
4110 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
4111 return -errno;
4112 }
4113
4114 if (c->private_tmp && !(*rt)->tmp_dir) {
4115 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
4116 if (r < 0)
4117 return r;
4118 }
4119
4120 return 1;
4121 }
4122
4123 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
4124 assert(r);
4125 assert(r->n_ref > 0);
4126
4127 r->n_ref++;
4128 return r;
4129 }
4130
4131 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
4132
4133 if (!r)
4134 return NULL;
4135
4136 assert(r->n_ref > 0);
4137
4138 r->n_ref--;
4139 if (r->n_ref > 0)
4140 return NULL;
4141
4142 free(r->tmp_dir);
4143 free(r->var_tmp_dir);
4144 safe_close_pair(r->netns_storage_socket);
4145 return mfree(r);
4146 }
4147
4148 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
4149 assert(u);
4150 assert(f);
4151 assert(fds);
4152
4153 if (!rt)
4154 return 0;
4155
4156 if (rt->tmp_dir)
4157 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
4158
4159 if (rt->var_tmp_dir)
4160 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
4161
4162 if (rt->netns_storage_socket[0] >= 0) {
4163 int copy;
4164
4165 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
4166 if (copy < 0)
4167 return copy;
4168
4169 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4170 }
4171
4172 if (rt->netns_storage_socket[1] >= 0) {
4173 int copy;
4174
4175 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4176 if (copy < 0)
4177 return copy;
4178
4179 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4180 }
4181
4182 return 0;
4183 }
4184
4185 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
4186 int r;
4187
4188 assert(rt);
4189 assert(key);
4190 assert(value);
4191
4192 if (streq(key, "tmp-dir")) {
4193 char *copy;
4194
4195 r = exec_runtime_allocate(rt);
4196 if (r < 0)
4197 return log_oom();
4198
4199 copy = strdup(value);
4200 if (!copy)
4201 return log_oom();
4202
4203 free((*rt)->tmp_dir);
4204 (*rt)->tmp_dir = copy;
4205
4206 } else if (streq(key, "var-tmp-dir")) {
4207 char *copy;
4208
4209 r = exec_runtime_allocate(rt);
4210 if (r < 0)
4211 return log_oom();
4212
4213 copy = strdup(value);
4214 if (!copy)
4215 return log_oom();
4216
4217 free((*rt)->var_tmp_dir);
4218 (*rt)->var_tmp_dir = copy;
4219
4220 } else if (streq(key, "netns-socket-0")) {
4221 int fd;
4222
4223 r = exec_runtime_allocate(rt);
4224 if (r < 0)
4225 return log_oom();
4226
4227 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4228 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4229 else {
4230 safe_close((*rt)->netns_storage_socket[0]);
4231 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4232 }
4233 } else if (streq(key, "netns-socket-1")) {
4234 int fd;
4235
4236 r = exec_runtime_allocate(rt);
4237 if (r < 0)
4238 return log_oom();
4239
4240 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4241 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4242 else {
4243 safe_close((*rt)->netns_storage_socket[1]);
4244 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4245 }
4246 } else
4247 return 0;
4248
4249 return 1;
4250 }
4251
4252 static void *remove_tmpdir_thread(void *p) {
4253 _cleanup_free_ char *path = p;
4254
4255 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4256 return NULL;
4257 }
4258
4259 void exec_runtime_destroy(ExecRuntime *rt) {
4260 int r;
4261
4262 if (!rt)
4263 return;
4264
4265 /* If there are multiple users of this, let's leave the stuff around */
4266 if (rt->n_ref > 1)
4267 return;
4268
4269 if (rt->tmp_dir) {
4270 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4271
4272 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4273 if (r < 0) {
4274 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4275 free(rt->tmp_dir);
4276 }
4277
4278 rt->tmp_dir = NULL;
4279 }
4280
4281 if (rt->var_tmp_dir) {
4282 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4283
4284 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4285 if (r < 0) {
4286 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4287 free(rt->var_tmp_dir);
4288 }
4289
4290 rt->var_tmp_dir = NULL;
4291 }
4292
4293 safe_close_pair(rt->netns_storage_socket);
4294 }
4295
4296 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4297 [EXEC_INPUT_NULL] = "null",
4298 [EXEC_INPUT_TTY] = "tty",
4299 [EXEC_INPUT_TTY_FORCE] = "tty-force",
4300 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
4301 [EXEC_INPUT_SOCKET] = "socket",
4302 [EXEC_INPUT_NAMED_FD] = "fd",
4303 };
4304
4305 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4306
4307 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
4308 [EXEC_OUTPUT_INHERIT] = "inherit",
4309 [EXEC_OUTPUT_NULL] = "null",
4310 [EXEC_OUTPUT_TTY] = "tty",
4311 [EXEC_OUTPUT_SYSLOG] = "syslog",
4312 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
4313 [EXEC_OUTPUT_KMSG] = "kmsg",
4314 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
4315 [EXEC_OUTPUT_JOURNAL] = "journal",
4316 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
4317 [EXEC_OUTPUT_SOCKET] = "socket",
4318 [EXEC_OUTPUT_NAMED_FD] = "fd",
4319 };
4320
4321 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
4322
4323 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4324 [EXEC_UTMP_INIT] = "init",
4325 [EXEC_UTMP_LOGIN] = "login",
4326 [EXEC_UTMP_USER] = "user",
4327 };
4328
4329 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
4330
4331 static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
4332 [EXEC_PRESERVE_NO] = "no",
4333 [EXEC_PRESERVE_YES] = "yes",
4334 [EXEC_PRESERVE_RESTART] = "restart",
4335 };
4336
4337 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
4338
4339 static const char* const exec_directory_type_table[_EXEC_DIRECTORY_MAX] = {
4340 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
4341 [EXEC_DIRECTORY_STATE] = "StateDirectory",
4342 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
4343 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
4344 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
4345 };
4346
4347 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);