]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
7796c07fcf59d594bf7c248ba3819a8ecf24c2aa
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/un.h>
29 #include <sys/prctl.h>
30 #include <sys/stat.h>
31 #include <grp.h>
32 #include <poll.h>
33 #include <glob.h>
34 #include <utmpx.h>
35 #include <sys/personality.h>
36
37 #ifdef HAVE_PAM
38 #include <security/pam_appl.h>
39 #endif
40
41 #ifdef HAVE_SELINUX
42 #include <selinux/selinux.h>
43 #endif
44
45 #ifdef HAVE_SECCOMP
46 #include <seccomp.h>
47 #endif
48
49 #ifdef HAVE_APPARMOR
50 #include <sys/apparmor.h>
51 #endif
52
53 #include "barrier.h"
54 #include "sd-messages.h"
55 #include "rm-rf.h"
56 #include "strv.h"
57 #include "macro.h"
58 #include "capability.h"
59 #include "util.h"
60 #include "log.h"
61 #include "ioprio.h"
62 #include "securebits.h"
63 #include "namespace.h"
64 #include "exit-status.h"
65 #include "missing.h"
66 #include "utmp-wtmp.h"
67 #include "def.h"
68 #include "path-util.h"
69 #include "env-util.h"
70 #include "fileio.h"
71 #include "unit.h"
72 #include "async.h"
73 #include "selinux-util.h"
74 #include "errno-list.h"
75 #include "af-list.h"
76 #include "mkdir.h"
77 #include "smack-util.h"
78 #include "bus-endpoint.h"
79 #include "cap-list.h"
80 #include "formats-util.h"
81 #include "process-util.h"
82 #include "terminal-util.h"
83 #include "signal-util.h"
84
85 #ifdef HAVE_APPARMOR
86 #include "apparmor-util.h"
87 #endif
88
89 #ifdef HAVE_SECCOMP
90 #include "seccomp-util.h"
91 #endif
92
93 #include "execute.h"
94
95 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
96 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97
98 /* This assumes there is a 'tty' group */
99 #define TTY_MODE 0620
100
101 #define SNDBUF_SIZE (8*1024*1024)
102
103 static int shift_fds(int fds[], unsigned n_fds) {
104 int start, restart_from;
105
106 if (n_fds <= 0)
107 return 0;
108
109 /* Modifies the fds array! (sorts it) */
110
111 assert(fds);
112
113 start = 0;
114 for (;;) {
115 int i;
116
117 restart_from = -1;
118
119 for (i = start; i < (int) n_fds; i++) {
120 int nfd;
121
122 /* Already at right index? */
123 if (fds[i] == i+3)
124 continue;
125
126 nfd = fcntl(fds[i], F_DUPFD, i + 3);
127 if (nfd < 0)
128 return -errno;
129
130 safe_close(fds[i]);
131 fds[i] = nfd;
132
133 /* Hmm, the fd we wanted isn't free? Then
134 * let's remember that and try again from here */
135 if (nfd != i+3 && restart_from < 0)
136 restart_from = i;
137 }
138
139 if (restart_from < 0)
140 break;
141
142 start = restart_from;
143 }
144
145 return 0;
146 }
147
148 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
149 unsigned i;
150 int r;
151
152 if (n_fds <= 0)
153 return 0;
154
155 assert(fds);
156
157 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
158
159 for (i = 0; i < n_fds; i++) {
160
161 r = fd_nonblock(fds[i], nonblock);
162 if (r < 0)
163 return r;
164
165 /* We unconditionally drop FD_CLOEXEC from the fds,
166 * since after all we want to pass these fds to our
167 * children */
168
169 r = fd_cloexec(fds[i], false);
170 if (r < 0)
171 return r;
172 }
173
174 return 0;
175 }
176
177 _pure_ static const char *tty_path(const ExecContext *context) {
178 assert(context);
179
180 if (context->tty_path)
181 return context->tty_path;
182
183 return "/dev/console";
184 }
185
186 static void exec_context_tty_reset(const ExecContext *context) {
187 assert(context);
188
189 if (context->tty_vhangup)
190 terminal_vhangup(tty_path(context));
191
192 if (context->tty_reset)
193 reset_terminal(tty_path(context));
194
195 if (context->tty_vt_disallocate && context->tty_path)
196 vt_disallocate(context->tty_path);
197 }
198
199 static bool is_terminal_output(ExecOutput o) {
200 return
201 o == EXEC_OUTPUT_TTY ||
202 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
203 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
204 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
205 }
206
207 static int open_null_as(int flags, int nfd) {
208 int fd, r;
209
210 assert(nfd >= 0);
211
212 fd = open("/dev/null", flags|O_NOCTTY);
213 if (fd < 0)
214 return -errno;
215
216 if (fd != nfd) {
217 r = dup2(fd, nfd) < 0 ? -errno : nfd;
218 safe_close(fd);
219 } else
220 r = nfd;
221
222 return r;
223 }
224
225 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
226 union sockaddr_union sa = {
227 .un.sun_family = AF_UNIX,
228 .un.sun_path = "/run/systemd/journal/stdout",
229 };
230 uid_t olduid = UID_INVALID;
231 gid_t oldgid = GID_INVALID;
232 int r;
233
234 if (gid != GID_INVALID) {
235 oldgid = getgid();
236
237 r = setegid(gid);
238 if (r < 0)
239 return -errno;
240 }
241
242 if (uid != UID_INVALID) {
243 olduid = getuid();
244
245 r = seteuid(uid);
246 if (r < 0) {
247 r = -errno;
248 goto restore_gid;
249 }
250 }
251
252 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
253 if (r < 0)
254 r = -errno;
255
256 /* If we fail to restore the uid or gid, things will likely
257 fail later on. This should only happen if an LSM interferes. */
258
259 if (uid != UID_INVALID)
260 (void) seteuid(olduid);
261
262 restore_gid:
263 if (gid != GID_INVALID)
264 (void) setegid(oldgid);
265
266 return r;
267 }
268
269 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
270 int fd, r;
271
272 assert(context);
273 assert(output < _EXEC_OUTPUT_MAX);
274 assert(ident);
275 assert(nfd >= 0);
276
277 fd = socket(AF_UNIX, SOCK_STREAM, 0);
278 if (fd < 0)
279 return -errno;
280
281 r = connect_journal_socket(fd, uid, gid);
282 if (r < 0)
283 return r;
284
285 if (shutdown(fd, SHUT_RD) < 0) {
286 safe_close(fd);
287 return -errno;
288 }
289
290 fd_inc_sndbuf(fd, SNDBUF_SIZE);
291
292 dprintf(fd,
293 "%s\n"
294 "%s\n"
295 "%i\n"
296 "%i\n"
297 "%i\n"
298 "%i\n"
299 "%i\n",
300 context->syslog_identifier ? context->syslog_identifier : ident,
301 unit_id,
302 context->syslog_priority,
303 !!context->syslog_level_prefix,
304 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
305 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
306 is_terminal_output(output));
307
308 if (fd != nfd) {
309 r = dup2(fd, nfd) < 0 ? -errno : nfd;
310 safe_close(fd);
311 } else
312 r = nfd;
313
314 return r;
315 }
316 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
317 int fd, r;
318
319 assert(path);
320 assert(nfd >= 0);
321
322 fd = open_terminal(path, mode | O_NOCTTY);
323 if (fd < 0)
324 return fd;
325
326 if (fd != nfd) {
327 r = dup2(fd, nfd) < 0 ? -errno : nfd;
328 safe_close(fd);
329 } else
330 r = nfd;
331
332 return r;
333 }
334
335 static bool is_terminal_input(ExecInput i) {
336 return
337 i == EXEC_INPUT_TTY ||
338 i == EXEC_INPUT_TTY_FORCE ||
339 i == EXEC_INPUT_TTY_FAIL;
340 }
341
342 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
343
344 if (is_terminal_input(std_input) && !apply_tty_stdin)
345 return EXEC_INPUT_NULL;
346
347 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
348 return EXEC_INPUT_NULL;
349
350 return std_input;
351 }
352
353 static int fixup_output(ExecOutput std_output, int socket_fd) {
354
355 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
356 return EXEC_OUTPUT_INHERIT;
357
358 return std_output;
359 }
360
361 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
362 ExecInput i;
363
364 assert(context);
365
366 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
367
368 switch (i) {
369
370 case EXEC_INPUT_NULL:
371 return open_null_as(O_RDONLY, STDIN_FILENO);
372
373 case EXEC_INPUT_TTY:
374 case EXEC_INPUT_TTY_FORCE:
375 case EXEC_INPUT_TTY_FAIL: {
376 int fd, r;
377
378 fd = acquire_terminal(tty_path(context),
379 i == EXEC_INPUT_TTY_FAIL,
380 i == EXEC_INPUT_TTY_FORCE,
381 false,
382 USEC_INFINITY);
383 if (fd < 0)
384 return fd;
385
386 if (fd != STDIN_FILENO) {
387 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
388 safe_close(fd);
389 } else
390 r = STDIN_FILENO;
391
392 return r;
393 }
394
395 case EXEC_INPUT_SOCKET:
396 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
397
398 default:
399 assert_not_reached("Unknown input type");
400 }
401 }
402
403 static int setup_output(Unit *unit, const ExecContext *context, int fileno, int socket_fd, const char *ident, bool apply_tty_stdin, uid_t uid, gid_t gid) {
404 ExecOutput o;
405 ExecInput i;
406 int r;
407
408 assert(unit);
409 assert(context);
410 assert(ident);
411
412 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
413 o = fixup_output(context->std_output, socket_fd);
414
415 if (fileno == STDERR_FILENO) {
416 ExecOutput e;
417 e = fixup_output(context->std_error, socket_fd);
418
419 /* This expects the input and output are already set up */
420
421 /* Don't change the stderr file descriptor if we inherit all
422 * the way and are not on a tty */
423 if (e == EXEC_OUTPUT_INHERIT &&
424 o == EXEC_OUTPUT_INHERIT &&
425 i == EXEC_INPUT_NULL &&
426 !is_terminal_input(context->std_input) &&
427 getppid () != 1)
428 return fileno;
429
430 /* Duplicate from stdout if possible */
431 if (e == o || e == EXEC_OUTPUT_INHERIT)
432 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
433
434 o = e;
435
436 } else if (o == EXEC_OUTPUT_INHERIT) {
437 /* If input got downgraded, inherit the original value */
438 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
439 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
440
441 /* If the input is connected to anything that's not a /dev/null, inherit that... */
442 if (i != EXEC_INPUT_NULL)
443 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
444
445 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
446 if (getppid() != 1)
447 return fileno;
448
449 /* We need to open /dev/null here anew, to get the right access mode. */
450 return open_null_as(O_WRONLY, fileno);
451 }
452
453 switch (o) {
454
455 case EXEC_OUTPUT_NULL:
456 return open_null_as(O_WRONLY, fileno);
457
458 case EXEC_OUTPUT_TTY:
459 if (is_terminal_input(i))
460 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
461
462 /* We don't reset the terminal if this is just about output */
463 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
464
465 case EXEC_OUTPUT_SYSLOG:
466 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
467 case EXEC_OUTPUT_KMSG:
468 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
469 case EXEC_OUTPUT_JOURNAL:
470 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
471 r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid);
472 if (r < 0) {
473 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
474 r = open_null_as(O_WRONLY, fileno);
475 }
476 return r;
477
478 case EXEC_OUTPUT_SOCKET:
479 assert(socket_fd >= 0);
480 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
481
482 default:
483 assert_not_reached("Unknown error type");
484 }
485 }
486
487 static int chown_terminal(int fd, uid_t uid) {
488 struct stat st;
489
490 assert(fd >= 0);
491
492 /* This might fail. What matters are the results. */
493 (void) fchown(fd, uid, -1);
494 (void) fchmod(fd, TTY_MODE);
495
496 if (fstat(fd, &st) < 0)
497 return -errno;
498
499 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
500 return -EPERM;
501
502 return 0;
503 }
504
505 static int setup_confirm_stdio(int *_saved_stdin,
506 int *_saved_stdout) {
507 int fd = -1, saved_stdin, saved_stdout = -1, r;
508
509 assert(_saved_stdin);
510 assert(_saved_stdout);
511
512 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
513 if (saved_stdin < 0)
514 return -errno;
515
516 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
517 if (saved_stdout < 0) {
518 r = errno;
519 goto fail;
520 }
521
522 fd = acquire_terminal(
523 "/dev/console",
524 false,
525 false,
526 false,
527 DEFAULT_CONFIRM_USEC);
528 if (fd < 0) {
529 r = fd;
530 goto fail;
531 }
532
533 r = chown_terminal(fd, getuid());
534 if (r < 0)
535 goto fail;
536
537 if (dup2(fd, STDIN_FILENO) < 0) {
538 r = -errno;
539 goto fail;
540 }
541
542 if (dup2(fd, STDOUT_FILENO) < 0) {
543 r = -errno;
544 goto fail;
545 }
546
547 if (fd >= 2)
548 safe_close(fd);
549
550 *_saved_stdin = saved_stdin;
551 *_saved_stdout = saved_stdout;
552
553 return 0;
554
555 fail:
556 safe_close(saved_stdout);
557 safe_close(saved_stdin);
558 safe_close(fd);
559
560 return r;
561 }
562
563 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
564 _cleanup_close_ int fd = -1;
565 va_list ap;
566
567 assert(format);
568
569 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
570 if (fd < 0)
571 return fd;
572
573 va_start(ap, format);
574 vdprintf(fd, format, ap);
575 va_end(ap);
576
577 return 0;
578 }
579
580 static int restore_confirm_stdio(int *saved_stdin,
581 int *saved_stdout) {
582
583 int r = 0;
584
585 assert(saved_stdin);
586 assert(saved_stdout);
587
588 release_terminal();
589
590 if (*saved_stdin >= 0)
591 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
592 r = -errno;
593
594 if (*saved_stdout >= 0)
595 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
596 r = -errno;
597
598 safe_close(*saved_stdin);
599 safe_close(*saved_stdout);
600
601 return r;
602 }
603
604 static int ask_for_confirmation(char *response, char **argv) {
605 int saved_stdout = -1, saved_stdin = -1, r;
606 _cleanup_free_ char *line = NULL;
607
608 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
609 if (r < 0)
610 return r;
611
612 line = exec_command_line(argv);
613 if (!line)
614 return -ENOMEM;
615
616 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
617
618 restore_confirm_stdio(&saved_stdin, &saved_stdout);
619
620 return r;
621 }
622
623 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
624 bool keep_groups = false;
625 int r;
626
627 assert(context);
628
629 /* Lookup and set GID and supplementary group list. Here too
630 * we avoid NSS lookups for gid=0. */
631
632 if (context->group || username) {
633 /* First step, initialize groups from /etc/groups */
634 if (username && gid != 0) {
635 if (initgroups(username, gid) < 0)
636 return -errno;
637
638 keep_groups = true;
639 }
640
641 /* Second step, set our gids */
642 if (setresgid(gid, gid, gid) < 0)
643 return -errno;
644 }
645
646 if (context->supplementary_groups) {
647 int ngroups_max, k;
648 gid_t *gids;
649 char **i;
650
651 /* Final step, initialize any manually set supplementary groups */
652 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
653
654 if (!(gids = new(gid_t, ngroups_max)))
655 return -ENOMEM;
656
657 if (keep_groups) {
658 k = getgroups(ngroups_max, gids);
659 if (k < 0) {
660 free(gids);
661 return -errno;
662 }
663 } else
664 k = 0;
665
666 STRV_FOREACH(i, context->supplementary_groups) {
667 const char *g;
668
669 if (k >= ngroups_max) {
670 free(gids);
671 return -E2BIG;
672 }
673
674 g = *i;
675 r = get_group_creds(&g, gids+k);
676 if (r < 0) {
677 free(gids);
678 return r;
679 }
680
681 k++;
682 }
683
684 if (setgroups(k, gids) < 0) {
685 free(gids);
686 return -errno;
687 }
688
689 free(gids);
690 }
691
692 return 0;
693 }
694
695 static int enforce_user(const ExecContext *context, uid_t uid) {
696 assert(context);
697
698 /* Sets (but doesn't lookup) the uid and make sure we keep the
699 * capabilities while doing so. */
700
701 if (context->capabilities) {
702 _cleanup_cap_free_ cap_t d = NULL;
703 static const cap_value_t bits[] = {
704 CAP_SETUID, /* Necessary so that we can run setresuid() below */
705 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
706 };
707
708 /* First step: If we need to keep capabilities but
709 * drop privileges we need to make sure we keep our
710 * caps, while we drop privileges. */
711 if (uid != 0) {
712 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
713
714 if (prctl(PR_GET_SECUREBITS) != sb)
715 if (prctl(PR_SET_SECUREBITS, sb) < 0)
716 return -errno;
717 }
718
719 /* Second step: set the capabilities. This will reduce
720 * the capabilities to the minimum we need. */
721
722 d = cap_dup(context->capabilities);
723 if (!d)
724 return -errno;
725
726 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
727 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
728 return -errno;
729
730 if (cap_set_proc(d) < 0)
731 return -errno;
732 }
733
734 /* Third step: actually set the uids */
735 if (setresuid(uid, uid, uid) < 0)
736 return -errno;
737
738 /* At this point we should have all necessary capabilities but
739 are otherwise a normal user. However, the caps might got
740 corrupted due to the setresuid() so we need clean them up
741 later. This is done outside of this call. */
742
743 return 0;
744 }
745
746 #ifdef HAVE_PAM
747
748 static int null_conv(
749 int num_msg,
750 const struct pam_message **msg,
751 struct pam_response **resp,
752 void *appdata_ptr) {
753
754 /* We don't support conversations */
755
756 return PAM_CONV_ERR;
757 }
758
759 static int setup_pam(
760 const char *name,
761 const char *user,
762 uid_t uid,
763 const char *tty,
764 char ***pam_env,
765 int fds[], unsigned n_fds) {
766
767 static const struct pam_conv conv = {
768 .conv = null_conv,
769 .appdata_ptr = NULL
770 };
771
772 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
773 pam_handle_t *handle = NULL;
774 sigset_t old_ss;
775 int pam_code = PAM_SUCCESS;
776 int err = 0;
777 char **e = NULL;
778 bool close_session = false;
779 pid_t pam_pid = 0, parent_pid;
780 int flags = 0;
781
782 assert(name);
783 assert(user);
784 assert(pam_env);
785
786 /* We set up PAM in the parent process, then fork. The child
787 * will then stay around until killed via PR_GET_PDEATHSIG or
788 * systemd via the cgroup logic. It will then remove the PAM
789 * session again. The parent process will exec() the actual
790 * daemon. We do things this way to ensure that the main PID
791 * of the daemon is the one we initially fork()ed. */
792
793 err = barrier_create(&barrier);
794 if (err < 0)
795 goto fail;
796
797 if (log_get_max_level() < LOG_DEBUG)
798 flags |= PAM_SILENT;
799
800 pam_code = pam_start(name, user, &conv, &handle);
801 if (pam_code != PAM_SUCCESS) {
802 handle = NULL;
803 goto fail;
804 }
805
806 if (tty) {
807 pam_code = pam_set_item(handle, PAM_TTY, tty);
808 if (pam_code != PAM_SUCCESS)
809 goto fail;
810 }
811
812 pam_code = pam_acct_mgmt(handle, flags);
813 if (pam_code != PAM_SUCCESS)
814 goto fail;
815
816 pam_code = pam_open_session(handle, flags);
817 if (pam_code != PAM_SUCCESS)
818 goto fail;
819
820 close_session = true;
821
822 e = pam_getenvlist(handle);
823 if (!e) {
824 pam_code = PAM_BUF_ERR;
825 goto fail;
826 }
827
828 /* Block SIGTERM, so that we know that it won't get lost in
829 * the child */
830
831 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
832
833 parent_pid = getpid();
834
835 pam_pid = fork();
836 if (pam_pid < 0)
837 goto fail;
838
839 if (pam_pid == 0) {
840 int sig;
841 int r = EXIT_PAM;
842
843 /* The child's job is to reset the PAM session on
844 * termination */
845 barrier_set_role(&barrier, BARRIER_CHILD);
846
847 /* This string must fit in 10 chars (i.e. the length
848 * of "/sbin/init"), to look pretty in /bin/ps */
849 rename_process("(sd-pam)");
850
851 /* Make sure we don't keep open the passed fds in this
852 child. We assume that otherwise only those fds are
853 open here that have been opened by PAM. */
854 close_many(fds, n_fds);
855
856 /* Drop privileges - we don't need any to pam_close_session
857 * and this will make PR_SET_PDEATHSIG work in most cases.
858 * If this fails, ignore the error - but expect sd-pam threads
859 * to fail to exit normally */
860 if (setresuid(uid, uid, uid) < 0)
861 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
862
863 (void) ignore_signals(SIGPIPE, -1);
864
865 /* Wait until our parent died. This will only work if
866 * the above setresuid() succeeds, otherwise the kernel
867 * will not allow unprivileged parents kill their privileged
868 * children this way. We rely on the control groups kill logic
869 * to do the rest for us. */
870 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
871 goto child_finish;
872
873 /* Tell the parent that our setup is done. This is especially
874 * important regarding dropping privileges. Otherwise, unit
875 * setup might race against our setresuid(2) call. */
876 barrier_place(&barrier);
877
878 /* Check if our parent process might already have
879 * died? */
880 if (getppid() == parent_pid) {
881 sigset_t ss;
882
883 assert_se(sigemptyset(&ss) >= 0);
884 assert_se(sigaddset(&ss, SIGTERM) >= 0);
885
886 for (;;) {
887 if (sigwait(&ss, &sig) < 0) {
888 if (errno == EINTR)
889 continue;
890
891 goto child_finish;
892 }
893
894 assert(sig == SIGTERM);
895 break;
896 }
897 }
898
899 /* If our parent died we'll end the session */
900 if (getppid() != parent_pid) {
901 pam_code = pam_close_session(handle, flags);
902 if (pam_code != PAM_SUCCESS)
903 goto child_finish;
904 }
905
906 r = 0;
907
908 child_finish:
909 pam_end(handle, pam_code | flags);
910 _exit(r);
911 }
912
913 barrier_set_role(&barrier, BARRIER_PARENT);
914
915 /* If the child was forked off successfully it will do all the
916 * cleanups, so forget about the handle here. */
917 handle = NULL;
918
919 /* Unblock SIGTERM again in the parent */
920 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
921
922 /* We close the log explicitly here, since the PAM modules
923 * might have opened it, but we don't want this fd around. */
924 closelog();
925
926 /* Synchronously wait for the child to initialize. We don't care for
927 * errors as we cannot recover. However, warn loudly if it happens. */
928 if (!barrier_place_and_sync(&barrier))
929 log_error("PAM initialization failed");
930
931 *pam_env = e;
932 e = NULL;
933
934 return 0;
935
936 fail:
937 if (pam_code != PAM_SUCCESS) {
938 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
939 err = -EPERM; /* PAM errors do not map to errno */
940 } else {
941 err = log_error_errno(err < 0 ? err : errno, "PAM failed: %m");
942 }
943
944 if (handle) {
945 if (close_session)
946 pam_code = pam_close_session(handle, flags);
947
948 pam_end(handle, pam_code | flags);
949 }
950
951 strv_free(e);
952
953 closelog();
954
955 if (pam_pid > 1) {
956 kill(pam_pid, SIGTERM);
957 kill(pam_pid, SIGCONT);
958 }
959
960 return err;
961 }
962 #endif
963
964 static void rename_process_from_path(const char *path) {
965 char process_name[11];
966 const char *p;
967 size_t l;
968
969 /* This resulting string must fit in 10 chars (i.e. the length
970 * of "/sbin/init") to look pretty in /bin/ps */
971
972 p = basename(path);
973 if (isempty(p)) {
974 rename_process("(...)");
975 return;
976 }
977
978 l = strlen(p);
979 if (l > 8) {
980 /* The end of the process name is usually more
981 * interesting, since the first bit might just be
982 * "systemd-" */
983 p = p + l - 8;
984 l = 8;
985 }
986
987 process_name[0] = '(';
988 memcpy(process_name+1, p, l);
989 process_name[1+l] = ')';
990 process_name[1+l+1] = 0;
991
992 rename_process(process_name);
993 }
994
995 #ifdef HAVE_SECCOMP
996
997 static int apply_seccomp(const ExecContext *c) {
998 uint32_t negative_action, action;
999 scmp_filter_ctx *seccomp;
1000 Iterator i;
1001 void *id;
1002 int r;
1003
1004 assert(c);
1005
1006 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1007
1008 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
1009 if (!seccomp)
1010 return -ENOMEM;
1011
1012 if (c->syscall_archs) {
1013
1014 SET_FOREACH(id, c->syscall_archs, i) {
1015 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1016 if (r == -EEXIST)
1017 continue;
1018 if (r < 0)
1019 goto finish;
1020 }
1021
1022 } else {
1023 r = seccomp_add_secondary_archs(seccomp);
1024 if (r < 0)
1025 goto finish;
1026 }
1027
1028 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1029 SET_FOREACH(id, c->syscall_filter, i) {
1030 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1031 if (r < 0)
1032 goto finish;
1033 }
1034
1035 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1036 if (r < 0)
1037 goto finish;
1038
1039 r = seccomp_load(seccomp);
1040
1041 finish:
1042 seccomp_release(seccomp);
1043 return r;
1044 }
1045
1046 static int apply_address_families(const ExecContext *c) {
1047 scmp_filter_ctx *seccomp;
1048 Iterator i;
1049 int r;
1050
1051 assert(c);
1052
1053 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1054 if (!seccomp)
1055 return -ENOMEM;
1056
1057 r = seccomp_add_secondary_archs(seccomp);
1058 if (r < 0)
1059 goto finish;
1060
1061 if (c->address_families_whitelist) {
1062 int af, first = 0, last = 0;
1063 void *afp;
1064
1065 /* If this is a whitelist, we first block the address
1066 * families that are out of range and then everything
1067 * that is not in the set. First, we find the lowest
1068 * and highest address family in the set. */
1069
1070 SET_FOREACH(afp, c->address_families, i) {
1071 af = PTR_TO_INT(afp);
1072
1073 if (af <= 0 || af >= af_max())
1074 continue;
1075
1076 if (first == 0 || af < first)
1077 first = af;
1078
1079 if (last == 0 || af > last)
1080 last = af;
1081 }
1082
1083 assert((first == 0) == (last == 0));
1084
1085 if (first == 0) {
1086
1087 /* No entries in the valid range, block everything */
1088 r = seccomp_rule_add(
1089 seccomp,
1090 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1091 SCMP_SYS(socket),
1092 0);
1093 if (r < 0)
1094 goto finish;
1095
1096 } else {
1097
1098 /* Block everything below the first entry */
1099 r = seccomp_rule_add(
1100 seccomp,
1101 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1102 SCMP_SYS(socket),
1103 1,
1104 SCMP_A0(SCMP_CMP_LT, first));
1105 if (r < 0)
1106 goto finish;
1107
1108 /* Block everything above the last entry */
1109 r = seccomp_rule_add(
1110 seccomp,
1111 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1112 SCMP_SYS(socket),
1113 1,
1114 SCMP_A0(SCMP_CMP_GT, last));
1115 if (r < 0)
1116 goto finish;
1117
1118 /* Block everything between the first and last
1119 * entry */
1120 for (af = 1; af < af_max(); af++) {
1121
1122 if (set_contains(c->address_families, INT_TO_PTR(af)))
1123 continue;
1124
1125 r = seccomp_rule_add(
1126 seccomp,
1127 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1128 SCMP_SYS(socket),
1129 1,
1130 SCMP_A0(SCMP_CMP_EQ, af));
1131 if (r < 0)
1132 goto finish;
1133 }
1134 }
1135
1136 } else {
1137 void *af;
1138
1139 /* If this is a blacklist, then generate one rule for
1140 * each address family that are then combined in OR
1141 * checks. */
1142
1143 SET_FOREACH(af, c->address_families, i) {
1144
1145 r = seccomp_rule_add(
1146 seccomp,
1147 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1148 SCMP_SYS(socket),
1149 1,
1150 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1151 if (r < 0)
1152 goto finish;
1153 }
1154 }
1155
1156 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1157 if (r < 0)
1158 goto finish;
1159
1160 r = seccomp_load(seccomp);
1161
1162 finish:
1163 seccomp_release(seccomp);
1164 return r;
1165 }
1166
1167 #endif
1168
1169 static void do_idle_pipe_dance(int idle_pipe[4]) {
1170 assert(idle_pipe);
1171
1172
1173 idle_pipe[1] = safe_close(idle_pipe[1]);
1174 idle_pipe[2] = safe_close(idle_pipe[2]);
1175
1176 if (idle_pipe[0] >= 0) {
1177 int r;
1178
1179 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1180
1181 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1182 ssize_t n;
1183
1184 /* Signal systemd that we are bored and want to continue. */
1185 n = write(idle_pipe[3], "x", 1);
1186 if (n > 0)
1187 /* Wait for systemd to react to the signal above. */
1188 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1189 }
1190
1191 idle_pipe[0] = safe_close(idle_pipe[0]);
1192
1193 }
1194
1195 idle_pipe[3] = safe_close(idle_pipe[3]);
1196 }
1197
1198 static int build_environment(
1199 const ExecContext *c,
1200 unsigned n_fds,
1201 usec_t watchdog_usec,
1202 const char *home,
1203 const char *username,
1204 const char *shell,
1205 char ***ret) {
1206
1207 _cleanup_strv_free_ char **our_env = NULL;
1208 unsigned n_env = 0;
1209 char *x;
1210
1211 assert(c);
1212 assert(ret);
1213
1214 our_env = new0(char*, 10);
1215 if (!our_env)
1216 return -ENOMEM;
1217
1218 if (n_fds > 0) {
1219 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1220 return -ENOMEM;
1221 our_env[n_env++] = x;
1222
1223 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1224 return -ENOMEM;
1225 our_env[n_env++] = x;
1226 }
1227
1228 if (watchdog_usec > 0) {
1229 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1230 return -ENOMEM;
1231 our_env[n_env++] = x;
1232
1233 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1234 return -ENOMEM;
1235 our_env[n_env++] = x;
1236 }
1237
1238 if (home) {
1239 x = strappend("HOME=", home);
1240 if (!x)
1241 return -ENOMEM;
1242 our_env[n_env++] = x;
1243 }
1244
1245 if (username) {
1246 x = strappend("LOGNAME=", username);
1247 if (!x)
1248 return -ENOMEM;
1249 our_env[n_env++] = x;
1250
1251 x = strappend("USER=", username);
1252 if (!x)
1253 return -ENOMEM;
1254 our_env[n_env++] = x;
1255 }
1256
1257 if (shell) {
1258 x = strappend("SHELL=", shell);
1259 if (!x)
1260 return -ENOMEM;
1261 our_env[n_env++] = x;
1262 }
1263
1264 if (is_terminal_input(c->std_input) ||
1265 c->std_output == EXEC_OUTPUT_TTY ||
1266 c->std_error == EXEC_OUTPUT_TTY ||
1267 c->tty_path) {
1268
1269 x = strdup(default_term_for_tty(tty_path(c)));
1270 if (!x)
1271 return -ENOMEM;
1272 our_env[n_env++] = x;
1273 }
1274
1275 our_env[n_env++] = NULL;
1276 assert(n_env <= 10);
1277
1278 *ret = our_env;
1279 our_env = NULL;
1280
1281 return 0;
1282 }
1283
1284 static bool exec_needs_mount_namespace(
1285 const ExecContext *context,
1286 const ExecParameters *params,
1287 ExecRuntime *runtime) {
1288
1289 assert(context);
1290 assert(params);
1291
1292 if (!strv_isempty(context->read_write_dirs) ||
1293 !strv_isempty(context->read_only_dirs) ||
1294 !strv_isempty(context->inaccessible_dirs))
1295 return true;
1296
1297 if (context->mount_flags != 0)
1298 return true;
1299
1300 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1301 return true;
1302
1303 if (params->bus_endpoint_path)
1304 return true;
1305
1306 if (context->private_devices ||
1307 context->protect_system != PROTECT_SYSTEM_NO ||
1308 context->protect_home != PROTECT_HOME_NO)
1309 return true;
1310
1311 return false;
1312 }
1313
1314 static int exec_child(
1315 Unit *unit,
1316 ExecCommand *command,
1317 const ExecContext *context,
1318 const ExecParameters *params,
1319 ExecRuntime *runtime,
1320 char **argv,
1321 int socket_fd,
1322 int *fds, unsigned n_fds,
1323 char **files_env,
1324 int *exit_status) {
1325
1326 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1327 _cleanup_free_ char *mac_selinux_context_net = NULL;
1328 const char *username = NULL, *home = NULL, *shell = NULL;
1329 unsigned n_dont_close = 0;
1330 int dont_close[n_fds + 4];
1331 uid_t uid = UID_INVALID;
1332 gid_t gid = GID_INVALID;
1333 int i, r;
1334 bool needs_mount_namespace;
1335
1336 assert(unit);
1337 assert(command);
1338 assert(context);
1339 assert(params);
1340 assert(exit_status);
1341
1342 rename_process_from_path(command->path);
1343
1344 /* We reset exactly these signals, since they are the
1345 * only ones we set to SIG_IGN in the main daemon. All
1346 * others we leave untouched because we set them to
1347 * SIG_DFL or a valid handler initially, both of which
1348 * will be demoted to SIG_DFL. */
1349 (void) default_signals(SIGNALS_CRASH_HANDLER,
1350 SIGNALS_IGNORE, -1);
1351
1352 if (context->ignore_sigpipe)
1353 (void) ignore_signals(SIGPIPE, -1);
1354
1355 r = reset_signal_mask();
1356 if (r < 0) {
1357 *exit_status = EXIT_SIGNAL_MASK;
1358 return r;
1359 }
1360
1361 if (params->idle_pipe)
1362 do_idle_pipe_dance(params->idle_pipe);
1363
1364 /* Close sockets very early to make sure we don't
1365 * block init reexecution because it cannot bind its
1366 * sockets */
1367
1368 log_forget_fds();
1369
1370 if (socket_fd >= 0)
1371 dont_close[n_dont_close++] = socket_fd;
1372 if (n_fds > 0) {
1373 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1374 n_dont_close += n_fds;
1375 }
1376 if (params->bus_endpoint_fd >= 0)
1377 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1378 if (runtime) {
1379 if (runtime->netns_storage_socket[0] >= 0)
1380 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1381 if (runtime->netns_storage_socket[1] >= 0)
1382 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1383 }
1384
1385 r = close_all_fds(dont_close, n_dont_close);
1386 if (r < 0) {
1387 *exit_status = EXIT_FDS;
1388 return r;
1389 }
1390
1391 if (!context->same_pgrp)
1392 if (setsid() < 0) {
1393 *exit_status = EXIT_SETSID;
1394 return -errno;
1395 }
1396
1397 exec_context_tty_reset(context);
1398
1399 if (params->confirm_spawn) {
1400 char response;
1401
1402 r = ask_for_confirmation(&response, argv);
1403 if (r == -ETIMEDOUT)
1404 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1405 else if (r < 0)
1406 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1407 else if (response == 's') {
1408 write_confirm_message("Skipping execution.\n");
1409 *exit_status = EXIT_CONFIRM;
1410 return -ECANCELED;
1411 } else if (response == 'n') {
1412 write_confirm_message("Failing execution.\n");
1413 *exit_status = 0;
1414 return 0;
1415 }
1416 }
1417
1418 if (context->user) {
1419 username = context->user;
1420 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1421 if (r < 0) {
1422 *exit_status = EXIT_USER;
1423 return r;
1424 }
1425 }
1426
1427 if (context->group) {
1428 const char *g = context->group;
1429
1430 r = get_group_creds(&g, &gid);
1431 if (r < 0) {
1432 *exit_status = EXIT_GROUP;
1433 return r;
1434 }
1435 }
1436
1437
1438 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1439 * must sure to drop O_NONBLOCK */
1440 if (socket_fd >= 0)
1441 fd_nonblock(socket_fd, false);
1442
1443 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1444 if (r < 0) {
1445 *exit_status = EXIT_STDIN;
1446 return r;
1447 }
1448
1449 r = setup_output(unit, context, STDOUT_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1450 if (r < 0) {
1451 *exit_status = EXIT_STDOUT;
1452 return r;
1453 }
1454
1455 r = setup_output(unit, context, STDERR_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1456 if (r < 0) {
1457 *exit_status = EXIT_STDERR;
1458 return r;
1459 }
1460
1461 if (params->cgroup_path) {
1462 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1463 if (r < 0) {
1464 *exit_status = EXIT_CGROUP;
1465 return r;
1466 }
1467 }
1468
1469 if (context->oom_score_adjust_set) {
1470 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1471
1472 /* When we can't make this change due to EPERM, then
1473 * let's silently skip over it. User namespaces
1474 * prohibit write access to this file, and we
1475 * shouldn't trip up over that. */
1476
1477 sprintf(t, "%i", context->oom_score_adjust);
1478 r = write_string_file("/proc/self/oom_score_adj", t, 0);
1479 if (r == -EPERM || r == -EACCES) {
1480 log_open();
1481 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1482 log_close();
1483 } else if (r < 0) {
1484 *exit_status = EXIT_OOM_ADJUST;
1485 return -errno;
1486 }
1487 }
1488
1489 if (context->nice_set)
1490 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1491 *exit_status = EXIT_NICE;
1492 return -errno;
1493 }
1494
1495 if (context->cpu_sched_set) {
1496 struct sched_param param = {
1497 .sched_priority = context->cpu_sched_priority,
1498 };
1499
1500 r = sched_setscheduler(0,
1501 context->cpu_sched_policy |
1502 (context->cpu_sched_reset_on_fork ?
1503 SCHED_RESET_ON_FORK : 0),
1504 &param);
1505 if (r < 0) {
1506 *exit_status = EXIT_SETSCHEDULER;
1507 return -errno;
1508 }
1509 }
1510
1511 if (context->cpuset)
1512 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1513 *exit_status = EXIT_CPUAFFINITY;
1514 return -errno;
1515 }
1516
1517 if (context->ioprio_set)
1518 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1519 *exit_status = EXIT_IOPRIO;
1520 return -errno;
1521 }
1522
1523 if (context->timer_slack_nsec != NSEC_INFINITY)
1524 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1525 *exit_status = EXIT_TIMERSLACK;
1526 return -errno;
1527 }
1528
1529 if (context->personality != PERSONALITY_INVALID)
1530 if (personality(context->personality) < 0) {
1531 *exit_status = EXIT_PERSONALITY;
1532 return -errno;
1533 }
1534
1535 if (context->utmp_id)
1536 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path,
1537 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
1538 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
1539 USER_PROCESS,
1540 username ? "root" : context->user);
1541
1542 if (context->user && is_terminal_input(context->std_input)) {
1543 r = chown_terminal(STDIN_FILENO, uid);
1544 if (r < 0) {
1545 *exit_status = EXIT_STDIN;
1546 return r;
1547 }
1548 }
1549
1550 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1551 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1552
1553 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1554 if (r < 0) {
1555 *exit_status = EXIT_BUS_ENDPOINT;
1556 return r;
1557 }
1558 }
1559
1560 /* If delegation is enabled we'll pass ownership of the cgroup
1561 * (but only in systemd's own controller hierarchy!) to the
1562 * user of the new process. */
1563 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1564 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1565 if (r < 0) {
1566 *exit_status = EXIT_CGROUP;
1567 return r;
1568 }
1569
1570
1571 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1572 if (r < 0) {
1573 *exit_status = EXIT_CGROUP;
1574 return r;
1575 }
1576 }
1577
1578 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1579 char **rt;
1580
1581 STRV_FOREACH(rt, context->runtime_directory) {
1582 _cleanup_free_ char *p;
1583
1584 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1585 if (!p) {
1586 *exit_status = EXIT_RUNTIME_DIRECTORY;
1587 return -ENOMEM;
1588 }
1589
1590 r = mkdir_p_label(p, context->runtime_directory_mode);
1591 if (r < 0) {
1592 *exit_status = EXIT_RUNTIME_DIRECTORY;
1593 return r;
1594 }
1595
1596 r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
1597 if (r < 0) {
1598 *exit_status = EXIT_RUNTIME_DIRECTORY;
1599 return r;
1600 }
1601 }
1602 }
1603
1604 umask(context->umask);
1605
1606 if (params->apply_permissions) {
1607 r = enforce_groups(context, username, gid);
1608 if (r < 0) {
1609 *exit_status = EXIT_GROUP;
1610 return r;
1611 }
1612 #ifdef HAVE_SMACK
1613 if (context->smack_process_label) {
1614 r = mac_smack_apply_pid(0, context->smack_process_label);
1615 if (r < 0) {
1616 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1617 return r;
1618 }
1619 }
1620 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1621 else {
1622 _cleanup_free_ char *exec_label = NULL;
1623
1624 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1625 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) {
1626 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1627 return r;
1628 }
1629
1630 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1631 if (r < 0) {
1632 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1633 return r;
1634 }
1635 }
1636 #endif
1637 #endif
1638 #ifdef HAVE_PAM
1639 if (context->pam_name && username) {
1640 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1641 if (r < 0) {
1642 *exit_status = EXIT_PAM;
1643 return r;
1644 }
1645 }
1646 #endif
1647 }
1648
1649 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1650 r = setup_netns(runtime->netns_storage_socket);
1651 if (r < 0) {
1652 *exit_status = EXIT_NETWORK;
1653 return r;
1654 }
1655 }
1656
1657 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
1658
1659 if (needs_mount_namespace) {
1660 char *tmp = NULL, *var = NULL;
1661
1662 /* The runtime struct only contains the parent
1663 * of the private /tmp, which is
1664 * non-accessible to world users. Inside of it
1665 * there's a /tmp that is sticky, and that's
1666 * the one we want to use here. */
1667
1668 if (context->private_tmp && runtime) {
1669 if (runtime->tmp_dir)
1670 tmp = strjoina(runtime->tmp_dir, "/tmp");
1671 if (runtime->var_tmp_dir)
1672 var = strjoina(runtime->var_tmp_dir, "/tmp");
1673 }
1674
1675 r = setup_namespace(
1676 params->apply_chroot ? context->root_directory : NULL,
1677 context->read_write_dirs,
1678 context->read_only_dirs,
1679 context->inaccessible_dirs,
1680 tmp,
1681 var,
1682 params->bus_endpoint_path,
1683 context->private_devices,
1684 context->protect_home,
1685 context->protect_system,
1686 context->mount_flags);
1687
1688 /* If we couldn't set up the namespace this is
1689 * probably due to a missing capability. In this case,
1690 * silently proceeed. */
1691 if (r == -EPERM || r == -EACCES) {
1692 log_open();
1693 log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1694 log_close();
1695 } else if (r < 0) {
1696 *exit_status = EXIT_NAMESPACE;
1697 return r;
1698 }
1699 }
1700
1701 if (params->apply_chroot) {
1702 if (!needs_mount_namespace && context->root_directory)
1703 if (chroot(context->root_directory) < 0) {
1704 *exit_status = EXIT_CHROOT;
1705 return -errno;
1706 }
1707
1708 if (chdir(context->working_directory ?: "/") < 0 &&
1709 !context->working_directory_missing_ok) {
1710 *exit_status = EXIT_CHDIR;
1711 return -errno;
1712 }
1713 } else {
1714 _cleanup_free_ char *d = NULL;
1715
1716 if (asprintf(&d, "%s/%s",
1717 context->root_directory ?: "",
1718 context->working_directory ?: "") < 0) {
1719 *exit_status = EXIT_MEMORY;
1720 return -ENOMEM;
1721 }
1722
1723 if (chdir(d) < 0 &&
1724 !context->working_directory_missing_ok) {
1725 *exit_status = EXIT_CHDIR;
1726 return -errno;
1727 }
1728 }
1729
1730 #ifdef HAVE_SELINUX
1731 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1732 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1733 if (r < 0) {
1734 *exit_status = EXIT_SELINUX_CONTEXT;
1735 return r;
1736 }
1737 }
1738 #endif
1739
1740 /* We repeat the fd closing here, to make sure that
1741 * nothing is leaked from the PAM modules. Note that
1742 * we are more aggressive this time since socket_fd
1743 * and the netns fds we don't need anymore. The custom
1744 * endpoint fd was needed to upload the policy and can
1745 * now be closed as well. */
1746 r = close_all_fds(fds, n_fds);
1747 if (r >= 0)
1748 r = shift_fds(fds, n_fds);
1749 if (r >= 0)
1750 r = flags_fds(fds, n_fds, context->non_blocking);
1751 if (r < 0) {
1752 *exit_status = EXIT_FDS;
1753 return r;
1754 }
1755
1756 if (params->apply_permissions) {
1757
1758 for (i = 0; i < _RLIMIT_MAX; i++) {
1759 if (!context->rlimit[i])
1760 continue;
1761
1762 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1763 *exit_status = EXIT_LIMITS;
1764 return -errno;
1765 }
1766 }
1767
1768 if (context->capability_bounding_set_drop) {
1769 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1770 if (r < 0) {
1771 *exit_status = EXIT_CAPABILITIES;
1772 return r;
1773 }
1774 }
1775
1776 if (context->user) {
1777 r = enforce_user(context, uid);
1778 if (r < 0) {
1779 *exit_status = EXIT_USER;
1780 return r;
1781 }
1782 }
1783
1784 /* PR_GET_SECUREBITS is not privileged, while
1785 * PR_SET_SECUREBITS is. So to suppress
1786 * potential EPERMs we'll try not to call
1787 * PR_SET_SECUREBITS unless necessary. */
1788 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1789 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1790 *exit_status = EXIT_SECUREBITS;
1791 return -errno;
1792 }
1793
1794 if (context->capabilities)
1795 if (cap_set_proc(context->capabilities) < 0) {
1796 *exit_status = EXIT_CAPABILITIES;
1797 return -errno;
1798 }
1799
1800 if (context->no_new_privileges)
1801 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1802 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1803 return -errno;
1804 }
1805
1806 #ifdef HAVE_SECCOMP
1807 if (context->address_families_whitelist ||
1808 !set_isempty(context->address_families)) {
1809 r = apply_address_families(context);
1810 if (r < 0) {
1811 *exit_status = EXIT_ADDRESS_FAMILIES;
1812 return r;
1813 }
1814 }
1815
1816 if (context->syscall_whitelist ||
1817 !set_isempty(context->syscall_filter) ||
1818 !set_isempty(context->syscall_archs)) {
1819 r = apply_seccomp(context);
1820 if (r < 0) {
1821 *exit_status = EXIT_SECCOMP;
1822 return r;
1823 }
1824 }
1825 #endif
1826
1827 #ifdef HAVE_SELINUX
1828 if (mac_selinux_use()) {
1829 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1830
1831 if (exec_context) {
1832 r = setexeccon(exec_context);
1833 if (r < 0) {
1834 *exit_status = EXIT_SELINUX_CONTEXT;
1835 return r;
1836 }
1837 }
1838 }
1839 #endif
1840
1841 #ifdef HAVE_APPARMOR
1842 if (context->apparmor_profile && mac_apparmor_use()) {
1843 r = aa_change_onexec(context->apparmor_profile);
1844 if (r < 0 && !context->apparmor_profile_ignore) {
1845 *exit_status = EXIT_APPARMOR_PROFILE;
1846 return -errno;
1847 }
1848 }
1849 #endif
1850 }
1851
1852 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1853 if (r < 0) {
1854 *exit_status = EXIT_MEMORY;
1855 return r;
1856 }
1857
1858 final_env = strv_env_merge(5,
1859 params->environment,
1860 our_env,
1861 context->environment,
1862 files_env,
1863 pam_env,
1864 NULL);
1865 if (!final_env) {
1866 *exit_status = EXIT_MEMORY;
1867 return -ENOMEM;
1868 }
1869
1870 final_argv = replace_env_argv(argv, final_env);
1871 if (!final_argv) {
1872 *exit_status = EXIT_MEMORY;
1873 return -ENOMEM;
1874 }
1875
1876 final_env = strv_env_clean(final_env);
1877
1878 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1879 _cleanup_free_ char *line;
1880
1881 line = exec_command_line(final_argv);
1882 if (line) {
1883 log_open();
1884 log_struct(LOG_DEBUG,
1885 LOG_UNIT_ID(unit),
1886 "EXECUTABLE=%s", command->path,
1887 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
1888 NULL);
1889 log_close();
1890 }
1891 }
1892
1893 execve(command->path, final_argv, final_env);
1894 *exit_status = EXIT_EXEC;
1895 return -errno;
1896 }
1897
1898 int exec_spawn(Unit *unit,
1899 ExecCommand *command,
1900 const ExecContext *context,
1901 const ExecParameters *params,
1902 ExecRuntime *runtime,
1903 pid_t *ret) {
1904
1905 _cleanup_strv_free_ char **files_env = NULL;
1906 int *fds = NULL; unsigned n_fds = 0;
1907 _cleanup_free_ char *line = NULL;
1908 int socket_fd, r;
1909 char **argv;
1910 pid_t pid;
1911
1912 assert(unit);
1913 assert(command);
1914 assert(context);
1915 assert(ret);
1916 assert(params);
1917 assert(params->fds || params->n_fds <= 0);
1918
1919 if (context->std_input == EXEC_INPUT_SOCKET ||
1920 context->std_output == EXEC_OUTPUT_SOCKET ||
1921 context->std_error == EXEC_OUTPUT_SOCKET) {
1922
1923 if (params->n_fds != 1) {
1924 log_unit_error(unit, "Got more than one socket.");
1925 return -EINVAL;
1926 }
1927
1928 socket_fd = params->fds[0];
1929 } else {
1930 socket_fd = -1;
1931 fds = params->fds;
1932 n_fds = params->n_fds;
1933 }
1934
1935 r = exec_context_load_environment(unit, context, &files_env);
1936 if (r < 0)
1937 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
1938
1939 argv = params->argv ?: command->argv;
1940 line = exec_command_line(argv);
1941 if (!line)
1942 return log_oom();
1943
1944 log_struct(LOG_DEBUG,
1945 LOG_UNIT_ID(unit),
1946 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
1947 "EXECUTABLE=%s", command->path,
1948 NULL);
1949 pid = fork();
1950 if (pid < 0)
1951 return log_unit_error_errno(unit, r, "Failed to fork: %m");
1952
1953 if (pid == 0) {
1954 int exit_status;
1955
1956 r = exec_child(unit,
1957 command,
1958 context,
1959 params,
1960 runtime,
1961 argv,
1962 socket_fd,
1963 fds, n_fds,
1964 files_env,
1965 &exit_status);
1966 if (r < 0) {
1967 log_open();
1968 log_struct_errno(LOG_ERR, r,
1969 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1970 LOG_UNIT_ID(unit),
1971 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
1972 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1973 command->path),
1974 "EXECUTABLE=%s", command->path,
1975 NULL);
1976 }
1977
1978 _exit(exit_status);
1979 }
1980
1981 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
1982
1983 /* We add the new process to the cgroup both in the child (so
1984 * that we can be sure that no user code is ever executed
1985 * outside of the cgroup) and in the parent (so that we can be
1986 * sure that when we kill the cgroup the process will be
1987 * killed too). */
1988 if (params->cgroup_path)
1989 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1990
1991 exec_status_start(&command->exec_status, pid);
1992
1993 *ret = pid;
1994 return 0;
1995 }
1996
1997 void exec_context_init(ExecContext *c) {
1998 assert(c);
1999
2000 c->umask = 0022;
2001 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
2002 c->cpu_sched_policy = SCHED_OTHER;
2003 c->syslog_priority = LOG_DAEMON|LOG_INFO;
2004 c->syslog_level_prefix = true;
2005 c->ignore_sigpipe = true;
2006 c->timer_slack_nsec = NSEC_INFINITY;
2007 c->personality = PERSONALITY_INVALID;
2008 c->runtime_directory_mode = 0755;
2009 }
2010
2011 void exec_context_done(ExecContext *c) {
2012 unsigned l;
2013
2014 assert(c);
2015
2016 c->environment = strv_free(c->environment);
2017 c->environment_files = strv_free(c->environment_files);
2018
2019 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
2020 c->rlimit[l] = mfree(c->rlimit[l]);
2021
2022 c->working_directory = mfree(c->working_directory);
2023 c->root_directory = mfree(c->root_directory);
2024 c->tty_path = mfree(c->tty_path);
2025 c->syslog_identifier = mfree(c->syslog_identifier);
2026 c->user = mfree(c->user);
2027 c->group = mfree(c->group);
2028
2029 c->supplementary_groups = strv_free(c->supplementary_groups);
2030
2031 c->pam_name = mfree(c->pam_name);
2032
2033 if (c->capabilities) {
2034 cap_free(c->capabilities);
2035 c->capabilities = NULL;
2036 }
2037
2038 c->read_only_dirs = strv_free(c->read_only_dirs);
2039 c->read_write_dirs = strv_free(c->read_write_dirs);
2040 c->inaccessible_dirs = strv_free(c->inaccessible_dirs);
2041
2042 if (c->cpuset)
2043 CPU_FREE(c->cpuset);
2044
2045 c->utmp_id = mfree(c->utmp_id);
2046 c->selinux_context = mfree(c->selinux_context);
2047 c->apparmor_profile = mfree(c->apparmor_profile);
2048
2049 c->syscall_filter = set_free(c->syscall_filter);
2050 c->syscall_archs = set_free(c->syscall_archs);
2051 c->address_families = set_free(c->address_families);
2052
2053 c->runtime_directory = strv_free(c->runtime_directory);
2054
2055 bus_endpoint_free(c->bus_endpoint);
2056 c->bus_endpoint = NULL;
2057 }
2058
2059 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2060 char **i;
2061
2062 assert(c);
2063
2064 if (!runtime_prefix)
2065 return 0;
2066
2067 STRV_FOREACH(i, c->runtime_directory) {
2068 _cleanup_free_ char *p;
2069
2070 p = strjoin(runtime_prefix, "/", *i, NULL);
2071 if (!p)
2072 return -ENOMEM;
2073
2074 /* We execute this synchronously, since we need to be
2075 * sure this is gone when we start the service
2076 * next. */
2077 (void) rm_rf(p, REMOVE_ROOT);
2078 }
2079
2080 return 0;
2081 }
2082
2083 void exec_command_done(ExecCommand *c) {
2084 assert(c);
2085
2086 c->path = mfree(c->path);
2087
2088 c->argv = strv_free(c->argv);
2089 }
2090
2091 void exec_command_done_array(ExecCommand *c, unsigned n) {
2092 unsigned i;
2093
2094 for (i = 0; i < n; i++)
2095 exec_command_done(c+i);
2096 }
2097
2098 ExecCommand* exec_command_free_list(ExecCommand *c) {
2099 ExecCommand *i;
2100
2101 while ((i = c)) {
2102 LIST_REMOVE(command, c, i);
2103 exec_command_done(i);
2104 free(i);
2105 }
2106
2107 return NULL;
2108 }
2109
2110 void exec_command_free_array(ExecCommand **c, unsigned n) {
2111 unsigned i;
2112
2113 for (i = 0; i < n; i++)
2114 c[i] = exec_command_free_list(c[i]);
2115 }
2116
2117 typedef struct InvalidEnvInfo {
2118 Unit *unit;
2119 const char *path;
2120 } InvalidEnvInfo;
2121
2122 static void invalid_env(const char *p, void *userdata) {
2123 InvalidEnvInfo *info = userdata;
2124
2125 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2126 }
2127
2128 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
2129 char **i, **r = NULL;
2130
2131 assert(c);
2132 assert(l);
2133
2134 STRV_FOREACH(i, c->environment_files) {
2135 char *fn;
2136 int k;
2137 bool ignore = false;
2138 char **p;
2139 _cleanup_globfree_ glob_t pglob = {};
2140 int count, n;
2141
2142 fn = *i;
2143
2144 if (fn[0] == '-') {
2145 ignore = true;
2146 fn ++;
2147 }
2148
2149 if (!path_is_absolute(fn)) {
2150 if (ignore)
2151 continue;
2152
2153 strv_free(r);
2154 return -EINVAL;
2155 }
2156
2157 /* Filename supports globbing, take all matching files */
2158 errno = 0;
2159 if (glob(fn, 0, NULL, &pglob) != 0) {
2160 if (ignore)
2161 continue;
2162
2163 strv_free(r);
2164 return errno ? -errno : -EINVAL;
2165 }
2166 count = pglob.gl_pathc;
2167 if (count == 0) {
2168 if (ignore)
2169 continue;
2170
2171 strv_free(r);
2172 return -EINVAL;
2173 }
2174 for (n = 0; n < count; n++) {
2175 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2176 if (k < 0) {
2177 if (ignore)
2178 continue;
2179
2180 strv_free(r);
2181 return k;
2182 }
2183 /* Log invalid environment variables with filename */
2184 if (p) {
2185 InvalidEnvInfo info = {
2186 .unit = unit,
2187 .path = pglob.gl_pathv[n]
2188 };
2189
2190 p = strv_env_clean_with_callback(p, invalid_env, &info);
2191 }
2192
2193 if (r == NULL)
2194 r = p;
2195 else {
2196 char **m;
2197
2198 m = strv_env_merge(2, r, p);
2199 strv_free(r);
2200 strv_free(p);
2201 if (!m)
2202 return -ENOMEM;
2203
2204 r = m;
2205 }
2206 }
2207 }
2208
2209 *l = r;
2210
2211 return 0;
2212 }
2213
2214 static bool tty_may_match_dev_console(const char *tty) {
2215 _cleanup_free_ char *active = NULL;
2216 char *console;
2217
2218 if (startswith(tty, "/dev/"))
2219 tty += 5;
2220
2221 /* trivial identity? */
2222 if (streq(tty, "console"))
2223 return true;
2224
2225 console = resolve_dev_console(&active);
2226 /* if we could not resolve, assume it may */
2227 if (!console)
2228 return true;
2229
2230 /* "tty0" means the active VC, so it may be the same sometimes */
2231 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2232 }
2233
2234 bool exec_context_may_touch_console(ExecContext *ec) {
2235 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2236 is_terminal_input(ec->std_input) ||
2237 is_terminal_output(ec->std_output) ||
2238 is_terminal_output(ec->std_error)) &&
2239 tty_may_match_dev_console(tty_path(ec));
2240 }
2241
2242 static void strv_fprintf(FILE *f, char **l) {
2243 char **g;
2244
2245 assert(f);
2246
2247 STRV_FOREACH(g, l)
2248 fprintf(f, " %s", *g);
2249 }
2250
2251 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2252 char **e;
2253 unsigned i;
2254
2255 assert(c);
2256 assert(f);
2257
2258 prefix = strempty(prefix);
2259
2260 fprintf(f,
2261 "%sUMask: %04o\n"
2262 "%sWorkingDirectory: %s\n"
2263 "%sRootDirectory: %s\n"
2264 "%sNonBlocking: %s\n"
2265 "%sPrivateTmp: %s\n"
2266 "%sPrivateNetwork: %s\n"
2267 "%sPrivateDevices: %s\n"
2268 "%sProtectHome: %s\n"
2269 "%sProtectSystem: %s\n"
2270 "%sIgnoreSIGPIPE: %s\n",
2271 prefix, c->umask,
2272 prefix, c->working_directory ? c->working_directory : "/",
2273 prefix, c->root_directory ? c->root_directory : "/",
2274 prefix, yes_no(c->non_blocking),
2275 prefix, yes_no(c->private_tmp),
2276 prefix, yes_no(c->private_network),
2277 prefix, yes_no(c->private_devices),
2278 prefix, protect_home_to_string(c->protect_home),
2279 prefix, protect_system_to_string(c->protect_system),
2280 prefix, yes_no(c->ignore_sigpipe));
2281
2282 STRV_FOREACH(e, c->environment)
2283 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2284
2285 STRV_FOREACH(e, c->environment_files)
2286 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2287
2288 if (c->nice_set)
2289 fprintf(f,
2290 "%sNice: %i\n",
2291 prefix, c->nice);
2292
2293 if (c->oom_score_adjust_set)
2294 fprintf(f,
2295 "%sOOMScoreAdjust: %i\n",
2296 prefix, c->oom_score_adjust);
2297
2298 for (i = 0; i < RLIM_NLIMITS; i++)
2299 if (c->rlimit[i])
2300 fprintf(f, "%s%s: "RLIM_FMT"\n",
2301 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2302
2303 if (c->ioprio_set) {
2304 _cleanup_free_ char *class_str = NULL;
2305
2306 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2307 fprintf(f,
2308 "%sIOSchedulingClass: %s\n"
2309 "%sIOPriority: %i\n",
2310 prefix, strna(class_str),
2311 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2312 }
2313
2314 if (c->cpu_sched_set) {
2315 _cleanup_free_ char *policy_str = NULL;
2316
2317 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2318 fprintf(f,
2319 "%sCPUSchedulingPolicy: %s\n"
2320 "%sCPUSchedulingPriority: %i\n"
2321 "%sCPUSchedulingResetOnFork: %s\n",
2322 prefix, strna(policy_str),
2323 prefix, c->cpu_sched_priority,
2324 prefix, yes_no(c->cpu_sched_reset_on_fork));
2325 }
2326
2327 if (c->cpuset) {
2328 fprintf(f, "%sCPUAffinity:", prefix);
2329 for (i = 0; i < c->cpuset_ncpus; i++)
2330 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2331 fprintf(f, " %u", i);
2332 fputs("\n", f);
2333 }
2334
2335 if (c->timer_slack_nsec != NSEC_INFINITY)
2336 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2337
2338 fprintf(f,
2339 "%sStandardInput: %s\n"
2340 "%sStandardOutput: %s\n"
2341 "%sStandardError: %s\n",
2342 prefix, exec_input_to_string(c->std_input),
2343 prefix, exec_output_to_string(c->std_output),
2344 prefix, exec_output_to_string(c->std_error));
2345
2346 if (c->tty_path)
2347 fprintf(f,
2348 "%sTTYPath: %s\n"
2349 "%sTTYReset: %s\n"
2350 "%sTTYVHangup: %s\n"
2351 "%sTTYVTDisallocate: %s\n",
2352 prefix, c->tty_path,
2353 prefix, yes_no(c->tty_reset),
2354 prefix, yes_no(c->tty_vhangup),
2355 prefix, yes_no(c->tty_vt_disallocate));
2356
2357 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2358 c->std_output == EXEC_OUTPUT_KMSG ||
2359 c->std_output == EXEC_OUTPUT_JOURNAL ||
2360 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2361 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2362 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2363 c->std_error == EXEC_OUTPUT_SYSLOG ||
2364 c->std_error == EXEC_OUTPUT_KMSG ||
2365 c->std_error == EXEC_OUTPUT_JOURNAL ||
2366 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2367 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2368 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2369
2370 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2371
2372 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2373 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2374
2375 fprintf(f,
2376 "%sSyslogFacility: %s\n"
2377 "%sSyslogLevel: %s\n",
2378 prefix, strna(fac_str),
2379 prefix, strna(lvl_str));
2380 }
2381
2382 if (c->capabilities) {
2383 _cleanup_cap_free_charp_ char *t;
2384
2385 t = cap_to_text(c->capabilities, NULL);
2386 if (t)
2387 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2388 }
2389
2390 if (c->secure_bits)
2391 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2392 prefix,
2393 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2394 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2395 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2396 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2397 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2398 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2399
2400 if (c->capability_bounding_set_drop) {
2401 unsigned long l;
2402 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2403
2404 for (l = 0; l <= cap_last_cap(); l++)
2405 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2406 fprintf(f, " %s", strna(capability_to_name(l)));
2407
2408 fputs("\n", f);
2409 }
2410
2411 if (c->user)
2412 fprintf(f, "%sUser: %s\n", prefix, c->user);
2413 if (c->group)
2414 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2415
2416 if (strv_length(c->supplementary_groups) > 0) {
2417 fprintf(f, "%sSupplementaryGroups:", prefix);
2418 strv_fprintf(f, c->supplementary_groups);
2419 fputs("\n", f);
2420 }
2421
2422 if (c->pam_name)
2423 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2424
2425 if (strv_length(c->read_write_dirs) > 0) {
2426 fprintf(f, "%sReadWriteDirs:", prefix);
2427 strv_fprintf(f, c->read_write_dirs);
2428 fputs("\n", f);
2429 }
2430
2431 if (strv_length(c->read_only_dirs) > 0) {
2432 fprintf(f, "%sReadOnlyDirs:", prefix);
2433 strv_fprintf(f, c->read_only_dirs);
2434 fputs("\n", f);
2435 }
2436
2437 if (strv_length(c->inaccessible_dirs) > 0) {
2438 fprintf(f, "%sInaccessibleDirs:", prefix);
2439 strv_fprintf(f, c->inaccessible_dirs);
2440 fputs("\n", f);
2441 }
2442
2443 if (c->utmp_id)
2444 fprintf(f,
2445 "%sUtmpIdentifier: %s\n",
2446 prefix, c->utmp_id);
2447
2448 if (c->selinux_context)
2449 fprintf(f,
2450 "%sSELinuxContext: %s%s\n",
2451 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2452
2453 if (c->personality != PERSONALITY_INVALID)
2454 fprintf(f,
2455 "%sPersonality: %s\n",
2456 prefix, strna(personality_to_string(c->personality)));
2457
2458 if (c->syscall_filter) {
2459 #ifdef HAVE_SECCOMP
2460 Iterator j;
2461 void *id;
2462 bool first = true;
2463 #endif
2464
2465 fprintf(f,
2466 "%sSystemCallFilter: ",
2467 prefix);
2468
2469 if (!c->syscall_whitelist)
2470 fputc('~', f);
2471
2472 #ifdef HAVE_SECCOMP
2473 SET_FOREACH(id, c->syscall_filter, j) {
2474 _cleanup_free_ char *name = NULL;
2475
2476 if (first)
2477 first = false;
2478 else
2479 fputc(' ', f);
2480
2481 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2482 fputs(strna(name), f);
2483 }
2484 #endif
2485
2486 fputc('\n', f);
2487 }
2488
2489 if (c->syscall_archs) {
2490 #ifdef HAVE_SECCOMP
2491 Iterator j;
2492 void *id;
2493 #endif
2494
2495 fprintf(f,
2496 "%sSystemCallArchitectures:",
2497 prefix);
2498
2499 #ifdef HAVE_SECCOMP
2500 SET_FOREACH(id, c->syscall_archs, j)
2501 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2502 #endif
2503 fputc('\n', f);
2504 }
2505
2506 if (c->syscall_errno != 0)
2507 fprintf(f,
2508 "%sSystemCallErrorNumber: %s\n",
2509 prefix, strna(errno_to_name(c->syscall_errno)));
2510
2511 if (c->apparmor_profile)
2512 fprintf(f,
2513 "%sAppArmorProfile: %s%s\n",
2514 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2515 }
2516
2517 bool exec_context_maintains_privileges(ExecContext *c) {
2518 assert(c);
2519
2520 /* Returns true if the process forked off would run run under
2521 * an unchanged UID or as root. */
2522
2523 if (!c->user)
2524 return true;
2525
2526 if (streq(c->user, "root") || streq(c->user, "0"))
2527 return true;
2528
2529 return false;
2530 }
2531
2532 void exec_status_start(ExecStatus *s, pid_t pid) {
2533 assert(s);
2534
2535 zero(*s);
2536 s->pid = pid;
2537 dual_timestamp_get(&s->start_timestamp);
2538 }
2539
2540 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2541 assert(s);
2542
2543 if (s->pid && s->pid != pid)
2544 zero(*s);
2545
2546 s->pid = pid;
2547 dual_timestamp_get(&s->exit_timestamp);
2548
2549 s->code = code;
2550 s->status = status;
2551
2552 if (context) {
2553 if (context->utmp_id)
2554 utmp_put_dead_process(context->utmp_id, pid, code, status);
2555
2556 exec_context_tty_reset(context);
2557 }
2558 }
2559
2560 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2561 char buf[FORMAT_TIMESTAMP_MAX];
2562
2563 assert(s);
2564 assert(f);
2565
2566 if (s->pid <= 0)
2567 return;
2568
2569 prefix = strempty(prefix);
2570
2571 fprintf(f,
2572 "%sPID: "PID_FMT"\n",
2573 prefix, s->pid);
2574
2575 if (s->start_timestamp.realtime > 0)
2576 fprintf(f,
2577 "%sStart Timestamp: %s\n",
2578 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2579
2580 if (s->exit_timestamp.realtime > 0)
2581 fprintf(f,
2582 "%sExit Timestamp: %s\n"
2583 "%sExit Code: %s\n"
2584 "%sExit Status: %i\n",
2585 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2586 prefix, sigchld_code_to_string(s->code),
2587 prefix, s->status);
2588 }
2589
2590 char *exec_command_line(char **argv) {
2591 size_t k;
2592 char *n, *p, **a;
2593 bool first = true;
2594
2595 assert(argv);
2596
2597 k = 1;
2598 STRV_FOREACH(a, argv)
2599 k += strlen(*a)+3;
2600
2601 if (!(n = new(char, k)))
2602 return NULL;
2603
2604 p = n;
2605 STRV_FOREACH(a, argv) {
2606
2607 if (!first)
2608 *(p++) = ' ';
2609 else
2610 first = false;
2611
2612 if (strpbrk(*a, WHITESPACE)) {
2613 *(p++) = '\'';
2614 p = stpcpy(p, *a);
2615 *(p++) = '\'';
2616 } else
2617 p = stpcpy(p, *a);
2618
2619 }
2620
2621 *p = 0;
2622
2623 /* FIXME: this doesn't really handle arguments that have
2624 * spaces and ticks in them */
2625
2626 return n;
2627 }
2628
2629 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2630 _cleanup_free_ char *cmd = NULL;
2631 const char *prefix2;
2632
2633 assert(c);
2634 assert(f);
2635
2636 prefix = strempty(prefix);
2637 prefix2 = strjoina(prefix, "\t");
2638
2639 cmd = exec_command_line(c->argv);
2640 fprintf(f,
2641 "%sCommand Line: %s\n",
2642 prefix, cmd ? cmd : strerror(ENOMEM));
2643
2644 exec_status_dump(&c->exec_status, f, prefix2);
2645 }
2646
2647 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2648 assert(f);
2649
2650 prefix = strempty(prefix);
2651
2652 LIST_FOREACH(command, c, c)
2653 exec_command_dump(c, f, prefix);
2654 }
2655
2656 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2657 ExecCommand *end;
2658
2659 assert(l);
2660 assert(e);
2661
2662 if (*l) {
2663 /* It's kind of important, that we keep the order here */
2664 LIST_FIND_TAIL(command, *l, end);
2665 LIST_INSERT_AFTER(command, *l, end, e);
2666 } else
2667 *l = e;
2668 }
2669
2670 int exec_command_set(ExecCommand *c, const char *path, ...) {
2671 va_list ap;
2672 char **l, *p;
2673
2674 assert(c);
2675 assert(path);
2676
2677 va_start(ap, path);
2678 l = strv_new_ap(path, ap);
2679 va_end(ap);
2680
2681 if (!l)
2682 return -ENOMEM;
2683
2684 p = strdup(path);
2685 if (!p) {
2686 strv_free(l);
2687 return -ENOMEM;
2688 }
2689
2690 free(c->path);
2691 c->path = p;
2692
2693 strv_free(c->argv);
2694 c->argv = l;
2695
2696 return 0;
2697 }
2698
2699 int exec_command_append(ExecCommand *c, const char *path, ...) {
2700 _cleanup_strv_free_ char **l = NULL;
2701 va_list ap;
2702 int r;
2703
2704 assert(c);
2705 assert(path);
2706
2707 va_start(ap, path);
2708 l = strv_new_ap(path, ap);
2709 va_end(ap);
2710
2711 if (!l)
2712 return -ENOMEM;
2713
2714 r = strv_extend_strv(&c->argv, l);
2715 if (r < 0)
2716 return r;
2717
2718 return 0;
2719 }
2720
2721
2722 static int exec_runtime_allocate(ExecRuntime **rt) {
2723
2724 if (*rt)
2725 return 0;
2726
2727 *rt = new0(ExecRuntime, 1);
2728 if (!*rt)
2729 return -ENOMEM;
2730
2731 (*rt)->n_ref = 1;
2732 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2733
2734 return 0;
2735 }
2736
2737 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2738 int r;
2739
2740 assert(rt);
2741 assert(c);
2742 assert(id);
2743
2744 if (*rt)
2745 return 1;
2746
2747 if (!c->private_network && !c->private_tmp)
2748 return 0;
2749
2750 r = exec_runtime_allocate(rt);
2751 if (r < 0)
2752 return r;
2753
2754 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2755 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2756 return -errno;
2757 }
2758
2759 if (c->private_tmp && !(*rt)->tmp_dir) {
2760 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2761 if (r < 0)
2762 return r;
2763 }
2764
2765 return 1;
2766 }
2767
2768 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2769 assert(r);
2770 assert(r->n_ref > 0);
2771
2772 r->n_ref++;
2773 return r;
2774 }
2775
2776 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2777
2778 if (!r)
2779 return NULL;
2780
2781 assert(r->n_ref > 0);
2782
2783 r->n_ref--;
2784 if (r->n_ref > 0)
2785 return NULL;
2786
2787 free(r->tmp_dir);
2788 free(r->var_tmp_dir);
2789 safe_close_pair(r->netns_storage_socket);
2790 free(r);
2791
2792 return NULL;
2793 }
2794
2795 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
2796 assert(u);
2797 assert(f);
2798 assert(fds);
2799
2800 if (!rt)
2801 return 0;
2802
2803 if (rt->tmp_dir)
2804 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2805
2806 if (rt->var_tmp_dir)
2807 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2808
2809 if (rt->netns_storage_socket[0] >= 0) {
2810 int copy;
2811
2812 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2813 if (copy < 0)
2814 return copy;
2815
2816 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2817 }
2818
2819 if (rt->netns_storage_socket[1] >= 0) {
2820 int copy;
2821
2822 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2823 if (copy < 0)
2824 return copy;
2825
2826 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2827 }
2828
2829 return 0;
2830 }
2831
2832 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
2833 int r;
2834
2835 assert(rt);
2836 assert(key);
2837 assert(value);
2838
2839 if (streq(key, "tmp-dir")) {
2840 char *copy;
2841
2842 r = exec_runtime_allocate(rt);
2843 if (r < 0)
2844 return log_oom();
2845
2846 copy = strdup(value);
2847 if (!copy)
2848 return log_oom();
2849
2850 free((*rt)->tmp_dir);
2851 (*rt)->tmp_dir = copy;
2852
2853 } else if (streq(key, "var-tmp-dir")) {
2854 char *copy;
2855
2856 r = exec_runtime_allocate(rt);
2857 if (r < 0)
2858 return log_oom();
2859
2860 copy = strdup(value);
2861 if (!copy)
2862 return log_oom();
2863
2864 free((*rt)->var_tmp_dir);
2865 (*rt)->var_tmp_dir = copy;
2866
2867 } else if (streq(key, "netns-socket-0")) {
2868 int fd;
2869
2870 r = exec_runtime_allocate(rt);
2871 if (r < 0)
2872 return log_oom();
2873
2874 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2875 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2876 else {
2877 safe_close((*rt)->netns_storage_socket[0]);
2878 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2879 }
2880 } else if (streq(key, "netns-socket-1")) {
2881 int fd;
2882
2883 r = exec_runtime_allocate(rt);
2884 if (r < 0)
2885 return log_oom();
2886
2887 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2888 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2889 else {
2890 safe_close((*rt)->netns_storage_socket[1]);
2891 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2892 }
2893 } else
2894 return 0;
2895
2896 return 1;
2897 }
2898
2899 static void *remove_tmpdir_thread(void *p) {
2900 _cleanup_free_ char *path = p;
2901
2902 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
2903 return NULL;
2904 }
2905
2906 void exec_runtime_destroy(ExecRuntime *rt) {
2907 int r;
2908
2909 if (!rt)
2910 return;
2911
2912 /* If there are multiple users of this, let's leave the stuff around */
2913 if (rt->n_ref > 1)
2914 return;
2915
2916 if (rt->tmp_dir) {
2917 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2918
2919 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2920 if (r < 0) {
2921 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2922 free(rt->tmp_dir);
2923 }
2924
2925 rt->tmp_dir = NULL;
2926 }
2927
2928 if (rt->var_tmp_dir) {
2929 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2930
2931 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2932 if (r < 0) {
2933 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2934 free(rt->var_tmp_dir);
2935 }
2936
2937 rt->var_tmp_dir = NULL;
2938 }
2939
2940 safe_close_pair(rt->netns_storage_socket);
2941 }
2942
2943 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2944 [EXEC_INPUT_NULL] = "null",
2945 [EXEC_INPUT_TTY] = "tty",
2946 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2947 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2948 [EXEC_INPUT_SOCKET] = "socket"
2949 };
2950
2951 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2952
2953 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2954 [EXEC_OUTPUT_INHERIT] = "inherit",
2955 [EXEC_OUTPUT_NULL] = "null",
2956 [EXEC_OUTPUT_TTY] = "tty",
2957 [EXEC_OUTPUT_SYSLOG] = "syslog",
2958 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2959 [EXEC_OUTPUT_KMSG] = "kmsg",
2960 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2961 [EXEC_OUTPUT_JOURNAL] = "journal",
2962 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2963 [EXEC_OUTPUT_SOCKET] = "socket"
2964 };
2965
2966 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2967
2968 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
2969 [EXEC_UTMP_INIT] = "init",
2970 [EXEC_UTMP_LOGIN] = "login",
2971 [EXEC_UTMP_USER] = "user",
2972 };
2973
2974 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);