]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
tree-wide: update empty-if coccinelle script to cover empty-while and more
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/un.h>
29 #include <sys/prctl.h>
30 #include <sys/stat.h>
31 #include <grp.h>
32 #include <poll.h>
33 #include <glob.h>
34 #include <utmpx.h>
35 #include <sys/personality.h>
36
37 #ifdef HAVE_PAM
38 #include <security/pam_appl.h>
39 #endif
40
41 #ifdef HAVE_SELINUX
42 #include <selinux/selinux.h>
43 #endif
44
45 #ifdef HAVE_SECCOMP
46 #include <seccomp.h>
47 #endif
48
49 #ifdef HAVE_APPARMOR
50 #include <sys/apparmor.h>
51 #endif
52
53 #include "sd-messages.h"
54 #include "rm-rf.h"
55 #include "strv.h"
56 #include "macro.h"
57 #include "capability.h"
58 #include "util.h"
59 #include "log.h"
60 #include "ioprio.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
64 #include "missing.h"
65 #include "utmp-wtmp.h"
66 #include "def.h"
67 #include "path-util.h"
68 #include "env-util.h"
69 #include "fileio.h"
70 #include "unit.h"
71 #include "async.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
74 #include "af-list.h"
75 #include "mkdir.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
78 #include "cap-list.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
82 #include "signal-util.h"
83
84 #ifdef HAVE_APPARMOR
85 #include "apparmor-util.h"
86 #endif
87
88 #ifdef HAVE_SECCOMP
89 #include "seccomp-util.h"
90 #endif
91
92 #include "execute.h"
93
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
96
97 /* This assumes there is a 'tty' group */
98 #define TTY_MODE 0620
99
100 #define SNDBUF_SIZE (8*1024*1024)
101
102 static int shift_fds(int fds[], unsigned n_fds) {
103 int start, restart_from;
104
105 if (n_fds <= 0)
106 return 0;
107
108 /* Modifies the fds array! (sorts it) */
109
110 assert(fds);
111
112 start = 0;
113 for (;;) {
114 int i;
115
116 restart_from = -1;
117
118 for (i = start; i < (int) n_fds; i++) {
119 int nfd;
120
121 /* Already at right index? */
122 if (fds[i] == i+3)
123 continue;
124
125 nfd = fcntl(fds[i], F_DUPFD, i + 3);
126 if (nfd < 0)
127 return -errno;
128
129 safe_close(fds[i]);
130 fds[i] = nfd;
131
132 /* Hmm, the fd we wanted isn't free? Then
133 * let's remember that and try again from here */
134 if (nfd != i+3 && restart_from < 0)
135 restart_from = i;
136 }
137
138 if (restart_from < 0)
139 break;
140
141 start = restart_from;
142 }
143
144 return 0;
145 }
146
147 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
148 unsigned i;
149 int r;
150
151 if (n_fds <= 0)
152 return 0;
153
154 assert(fds);
155
156 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157
158 for (i = 0; i < n_fds; i++) {
159
160 r = fd_nonblock(fds[i], nonblock);
161 if (r < 0)
162 return r;
163
164 /* We unconditionally drop FD_CLOEXEC from the fds,
165 * since after all we want to pass these fds to our
166 * children */
167
168 r = fd_cloexec(fds[i], false);
169 if (r < 0)
170 return r;
171 }
172
173 return 0;
174 }
175
176 _pure_ static const char *tty_path(const ExecContext *context) {
177 assert(context);
178
179 if (context->tty_path)
180 return context->tty_path;
181
182 return "/dev/console";
183 }
184
185 static void exec_context_tty_reset(const ExecContext *context) {
186 assert(context);
187
188 if (context->tty_vhangup)
189 terminal_vhangup(tty_path(context));
190
191 if (context->tty_reset)
192 reset_terminal(tty_path(context));
193
194 if (context->tty_vt_disallocate && context->tty_path)
195 vt_disallocate(context->tty_path);
196 }
197
198 static bool is_terminal_output(ExecOutput o) {
199 return
200 o == EXEC_OUTPUT_TTY ||
201 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
202 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
203 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
204 }
205
206 static int open_null_as(int flags, int nfd) {
207 int fd, r;
208
209 assert(nfd >= 0);
210
211 fd = open("/dev/null", flags|O_NOCTTY);
212 if (fd < 0)
213 return -errno;
214
215 if (fd != nfd) {
216 r = dup2(fd, nfd) < 0 ? -errno : nfd;
217 safe_close(fd);
218 } else
219 r = nfd;
220
221 return r;
222 }
223
224 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
225 union sockaddr_union sa = {
226 .un.sun_family = AF_UNIX,
227 .un.sun_path = "/run/systemd/journal/stdout",
228 };
229 uid_t olduid = UID_INVALID;
230 gid_t oldgid = GID_INVALID;
231 int r;
232
233 if (gid != GID_INVALID) {
234 oldgid = getgid();
235
236 r = setegid(gid);
237 if (r < 0)
238 return -errno;
239 }
240
241 if (uid != UID_INVALID) {
242 olduid = getuid();
243
244 r = seteuid(uid);
245 if (r < 0) {
246 r = -errno;
247 goto restore_gid;
248 }
249 }
250
251 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
252 if (r < 0)
253 r = -errno;
254
255 /* If we fail to restore the uid or gid, things will likely
256 fail later on. This should only happen if an LSM interferes. */
257
258 if (uid != UID_INVALID)
259 (void) seteuid(olduid);
260
261 restore_gid:
262 if (gid != GID_INVALID)
263 (void) setegid(oldgid);
264
265 return r;
266 }
267
268 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
269 int fd, r;
270
271 assert(context);
272 assert(output < _EXEC_OUTPUT_MAX);
273 assert(ident);
274 assert(nfd >= 0);
275
276 fd = socket(AF_UNIX, SOCK_STREAM, 0);
277 if (fd < 0)
278 return -errno;
279
280 r = connect_journal_socket(fd, uid, gid);
281 if (r < 0)
282 return r;
283
284 if (shutdown(fd, SHUT_RD) < 0) {
285 safe_close(fd);
286 return -errno;
287 }
288
289 fd_inc_sndbuf(fd, SNDBUF_SIZE);
290
291 dprintf(fd,
292 "%s\n"
293 "%s\n"
294 "%i\n"
295 "%i\n"
296 "%i\n"
297 "%i\n"
298 "%i\n",
299 context->syslog_identifier ? context->syslog_identifier : ident,
300 unit_id,
301 context->syslog_priority,
302 !!context->syslog_level_prefix,
303 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
304 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
305 is_terminal_output(output));
306
307 if (fd != nfd) {
308 r = dup2(fd, nfd) < 0 ? -errno : nfd;
309 safe_close(fd);
310 } else
311 r = nfd;
312
313 return r;
314 }
315 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
316 int fd, r;
317
318 assert(path);
319 assert(nfd >= 0);
320
321 fd = open_terminal(path, mode | O_NOCTTY);
322 if (fd < 0)
323 return fd;
324
325 if (fd != nfd) {
326 r = dup2(fd, nfd) < 0 ? -errno : nfd;
327 safe_close(fd);
328 } else
329 r = nfd;
330
331 return r;
332 }
333
334 static bool is_terminal_input(ExecInput i) {
335 return
336 i == EXEC_INPUT_TTY ||
337 i == EXEC_INPUT_TTY_FORCE ||
338 i == EXEC_INPUT_TTY_FAIL;
339 }
340
341 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
342
343 if (is_terminal_input(std_input) && !apply_tty_stdin)
344 return EXEC_INPUT_NULL;
345
346 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
347 return EXEC_INPUT_NULL;
348
349 return std_input;
350 }
351
352 static int fixup_output(ExecOutput std_output, int socket_fd) {
353
354 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
355 return EXEC_OUTPUT_INHERIT;
356
357 return std_output;
358 }
359
360 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
361 ExecInput i;
362
363 assert(context);
364
365 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366
367 switch (i) {
368
369 case EXEC_INPUT_NULL:
370 return open_null_as(O_RDONLY, STDIN_FILENO);
371
372 case EXEC_INPUT_TTY:
373 case EXEC_INPUT_TTY_FORCE:
374 case EXEC_INPUT_TTY_FAIL: {
375 int fd, r;
376
377 fd = acquire_terminal(tty_path(context),
378 i == EXEC_INPUT_TTY_FAIL,
379 i == EXEC_INPUT_TTY_FORCE,
380 false,
381 USEC_INFINITY);
382 if (fd < 0)
383 return fd;
384
385 if (fd != STDIN_FILENO) {
386 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
387 safe_close(fd);
388 } else
389 r = STDIN_FILENO;
390
391 return r;
392 }
393
394 case EXEC_INPUT_SOCKET:
395 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
396
397 default:
398 assert_not_reached("Unknown input type");
399 }
400 }
401
402 static int setup_output(Unit *unit, const ExecContext *context, int fileno, int socket_fd, const char *ident, bool apply_tty_stdin, uid_t uid, gid_t gid) {
403 ExecOutput o;
404 ExecInput i;
405 int r;
406
407 assert(unit);
408 assert(context);
409 assert(ident);
410
411 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
412 o = fixup_output(context->std_output, socket_fd);
413
414 if (fileno == STDERR_FILENO) {
415 ExecOutput e;
416 e = fixup_output(context->std_error, socket_fd);
417
418 /* This expects the input and output are already set up */
419
420 /* Don't change the stderr file descriptor if we inherit all
421 * the way and are not on a tty */
422 if (e == EXEC_OUTPUT_INHERIT &&
423 o == EXEC_OUTPUT_INHERIT &&
424 i == EXEC_INPUT_NULL &&
425 !is_terminal_input(context->std_input) &&
426 getppid () != 1)
427 return fileno;
428
429 /* Duplicate from stdout if possible */
430 if (e == o || e == EXEC_OUTPUT_INHERIT)
431 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
432
433 o = e;
434
435 } else if (o == EXEC_OUTPUT_INHERIT) {
436 /* If input got downgraded, inherit the original value */
437 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
438 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
439
440 /* If the input is connected to anything that's not a /dev/null, inherit that... */
441 if (i != EXEC_INPUT_NULL)
442 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
443
444 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
445 if (getppid() != 1)
446 return fileno;
447
448 /* We need to open /dev/null here anew, to get the right access mode. */
449 return open_null_as(O_WRONLY, fileno);
450 }
451
452 switch (o) {
453
454 case EXEC_OUTPUT_NULL:
455 return open_null_as(O_WRONLY, fileno);
456
457 case EXEC_OUTPUT_TTY:
458 if (is_terminal_input(i))
459 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
460
461 /* We don't reset the terminal if this is just about output */
462 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
463
464 case EXEC_OUTPUT_SYSLOG:
465 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
466 case EXEC_OUTPUT_KMSG:
467 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
468 case EXEC_OUTPUT_JOURNAL:
469 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
470 r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid);
471 if (r < 0) {
472 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
473 r = open_null_as(O_WRONLY, fileno);
474 }
475 return r;
476
477 case EXEC_OUTPUT_SOCKET:
478 assert(socket_fd >= 0);
479 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
480
481 default:
482 assert_not_reached("Unknown error type");
483 }
484 }
485
486 static int chown_terminal(int fd, uid_t uid) {
487 struct stat st;
488
489 assert(fd >= 0);
490
491 /* This might fail. What matters are the results. */
492 (void) fchown(fd, uid, -1);
493 (void) fchmod(fd, TTY_MODE);
494
495 if (fstat(fd, &st) < 0)
496 return -errno;
497
498 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
499 return -EPERM;
500
501 return 0;
502 }
503
504 static int setup_confirm_stdio(int *_saved_stdin,
505 int *_saved_stdout) {
506 int fd = -1, saved_stdin, saved_stdout = -1, r;
507
508 assert(_saved_stdin);
509 assert(_saved_stdout);
510
511 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
512 if (saved_stdin < 0)
513 return -errno;
514
515 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
516 if (saved_stdout < 0) {
517 r = errno;
518 goto fail;
519 }
520
521 fd = acquire_terminal(
522 "/dev/console",
523 false,
524 false,
525 false,
526 DEFAULT_CONFIRM_USEC);
527 if (fd < 0) {
528 r = fd;
529 goto fail;
530 }
531
532 r = chown_terminal(fd, getuid());
533 if (r < 0)
534 goto fail;
535
536 if (dup2(fd, STDIN_FILENO) < 0) {
537 r = -errno;
538 goto fail;
539 }
540
541 if (dup2(fd, STDOUT_FILENO) < 0) {
542 r = -errno;
543 goto fail;
544 }
545
546 if (fd >= 2)
547 safe_close(fd);
548
549 *_saved_stdin = saved_stdin;
550 *_saved_stdout = saved_stdout;
551
552 return 0;
553
554 fail:
555 safe_close(saved_stdout);
556 safe_close(saved_stdin);
557 safe_close(fd);
558
559 return r;
560 }
561
562 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
563 _cleanup_close_ int fd = -1;
564 va_list ap;
565
566 assert(format);
567
568 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
569 if (fd < 0)
570 return fd;
571
572 va_start(ap, format);
573 vdprintf(fd, format, ap);
574 va_end(ap);
575
576 return 0;
577 }
578
579 static int restore_confirm_stdio(int *saved_stdin,
580 int *saved_stdout) {
581
582 int r = 0;
583
584 assert(saved_stdin);
585 assert(saved_stdout);
586
587 release_terminal();
588
589 if (*saved_stdin >= 0)
590 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
591 r = -errno;
592
593 if (*saved_stdout >= 0)
594 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
595 r = -errno;
596
597 safe_close(*saved_stdin);
598 safe_close(*saved_stdout);
599
600 return r;
601 }
602
603 static int ask_for_confirmation(char *response, char **argv) {
604 int saved_stdout = -1, saved_stdin = -1, r;
605 _cleanup_free_ char *line = NULL;
606
607 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
608 if (r < 0)
609 return r;
610
611 line = exec_command_line(argv);
612 if (!line)
613 return -ENOMEM;
614
615 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
616
617 restore_confirm_stdio(&saved_stdin, &saved_stdout);
618
619 return r;
620 }
621
622 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
623 bool keep_groups = false;
624 int r;
625
626 assert(context);
627
628 /* Lookup and set GID and supplementary group list. Here too
629 * we avoid NSS lookups for gid=0. */
630
631 if (context->group || username) {
632
633 if (context->group) {
634 const char *g = context->group;
635
636 r = get_group_creds(&g, &gid);
637 if (r < 0)
638 return r;
639 }
640
641 /* First step, initialize groups from /etc/groups */
642 if (username && gid != 0) {
643 if (initgroups(username, gid) < 0)
644 return -errno;
645
646 keep_groups = true;
647 }
648
649 /* Second step, set our gids */
650 if (setresgid(gid, gid, gid) < 0)
651 return -errno;
652 }
653
654 if (context->supplementary_groups) {
655 int ngroups_max, k;
656 gid_t *gids;
657 char **i;
658
659 /* Final step, initialize any manually set supplementary groups */
660 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
661
662 if (!(gids = new(gid_t, ngroups_max)))
663 return -ENOMEM;
664
665 if (keep_groups) {
666 k = getgroups(ngroups_max, gids);
667 if (k < 0) {
668 free(gids);
669 return -errno;
670 }
671 } else
672 k = 0;
673
674 STRV_FOREACH(i, context->supplementary_groups) {
675 const char *g;
676
677 if (k >= ngroups_max) {
678 free(gids);
679 return -E2BIG;
680 }
681
682 g = *i;
683 r = get_group_creds(&g, gids+k);
684 if (r < 0) {
685 free(gids);
686 return r;
687 }
688
689 k++;
690 }
691
692 if (setgroups(k, gids) < 0) {
693 free(gids);
694 return -errno;
695 }
696
697 free(gids);
698 }
699
700 return 0;
701 }
702
703 static int enforce_user(const ExecContext *context, uid_t uid) {
704 assert(context);
705
706 /* Sets (but doesn't lookup) the uid and make sure we keep the
707 * capabilities while doing so. */
708
709 if (context->capabilities) {
710 _cleanup_cap_free_ cap_t d = NULL;
711 static const cap_value_t bits[] = {
712 CAP_SETUID, /* Necessary so that we can run setresuid() below */
713 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
714 };
715
716 /* First step: If we need to keep capabilities but
717 * drop privileges we need to make sure we keep our
718 * caps, while we drop privileges. */
719 if (uid != 0) {
720 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
721
722 if (prctl(PR_GET_SECUREBITS) != sb)
723 if (prctl(PR_SET_SECUREBITS, sb) < 0)
724 return -errno;
725 }
726
727 /* Second step: set the capabilities. This will reduce
728 * the capabilities to the minimum we need. */
729
730 d = cap_dup(context->capabilities);
731 if (!d)
732 return -errno;
733
734 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
735 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
736 return -errno;
737
738 if (cap_set_proc(d) < 0)
739 return -errno;
740 }
741
742 /* Third step: actually set the uids */
743 if (setresuid(uid, uid, uid) < 0)
744 return -errno;
745
746 /* At this point we should have all necessary capabilities but
747 are otherwise a normal user. However, the caps might got
748 corrupted due to the setresuid() so we need clean them up
749 later. This is done outside of this call. */
750
751 return 0;
752 }
753
754 #ifdef HAVE_PAM
755
756 static int null_conv(
757 int num_msg,
758 const struct pam_message **msg,
759 struct pam_response **resp,
760 void *appdata_ptr) {
761
762 /* We don't support conversations */
763
764 return PAM_CONV_ERR;
765 }
766
767 static int setup_pam(
768 const char *name,
769 const char *user,
770 uid_t uid,
771 const char *tty,
772 char ***pam_env,
773 int fds[], unsigned n_fds) {
774
775 static const struct pam_conv conv = {
776 .conv = null_conv,
777 .appdata_ptr = NULL
778 };
779
780 pam_handle_t *handle = NULL;
781 sigset_t old_ss;
782 int pam_code = PAM_SUCCESS;
783 int err;
784 char **e = NULL;
785 bool close_session = false;
786 pid_t pam_pid = 0, parent_pid;
787 int flags = 0;
788
789 assert(name);
790 assert(user);
791 assert(pam_env);
792
793 /* We set up PAM in the parent process, then fork. The child
794 * will then stay around until killed via PR_GET_PDEATHSIG or
795 * systemd via the cgroup logic. It will then remove the PAM
796 * session again. The parent process will exec() the actual
797 * daemon. We do things this way to ensure that the main PID
798 * of the daemon is the one we initially fork()ed. */
799
800 if (log_get_max_level() < LOG_DEBUG)
801 flags |= PAM_SILENT;
802
803 pam_code = pam_start(name, user, &conv, &handle);
804 if (pam_code != PAM_SUCCESS) {
805 handle = NULL;
806 goto fail;
807 }
808
809 if (tty) {
810 pam_code = pam_set_item(handle, PAM_TTY, tty);
811 if (pam_code != PAM_SUCCESS)
812 goto fail;
813 }
814
815 pam_code = pam_acct_mgmt(handle, flags);
816 if (pam_code != PAM_SUCCESS)
817 goto fail;
818
819 pam_code = pam_open_session(handle, flags);
820 if (pam_code != PAM_SUCCESS)
821 goto fail;
822
823 close_session = true;
824
825 e = pam_getenvlist(handle);
826 if (!e) {
827 pam_code = PAM_BUF_ERR;
828 goto fail;
829 }
830
831 /* Block SIGTERM, so that we know that it won't get lost in
832 * the child */
833
834 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
835
836 parent_pid = getpid();
837
838 pam_pid = fork();
839 if (pam_pid < 0)
840 goto fail;
841
842 if (pam_pid == 0) {
843 int sig;
844 int r = EXIT_PAM;
845
846 /* The child's job is to reset the PAM session on
847 * termination */
848
849 /* This string must fit in 10 chars (i.e. the length
850 * of "/sbin/init"), to look pretty in /bin/ps */
851 rename_process("(sd-pam)");
852
853 /* Make sure we don't keep open the passed fds in this
854 child. We assume that otherwise only those fds are
855 open here that have been opened by PAM. */
856 close_many(fds, n_fds);
857
858 /* Drop privileges - we don't need any to pam_close_session
859 * and this will make PR_SET_PDEATHSIG work in most cases.
860 * If this fails, ignore the error - but expect sd-pam threads
861 * to fail to exit normally */
862 if (setresuid(uid, uid, uid) < 0)
863 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
864
865 (void) ignore_signals(SIGPIPE, -1);
866
867 /* Wait until our parent died. This will only work if
868 * the above setresuid() succeeds, otherwise the kernel
869 * will not allow unprivileged parents kill their privileged
870 * children this way. We rely on the control groups kill logic
871 * to do the rest for us. */
872 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
873 goto child_finish;
874
875 /* Check if our parent process might already have
876 * died? */
877 if (getppid() == parent_pid) {
878 sigset_t ss;
879
880 assert_se(sigemptyset(&ss) >= 0);
881 assert_se(sigaddset(&ss, SIGTERM) >= 0);
882
883 for (;;) {
884 if (sigwait(&ss, &sig) < 0) {
885 if (errno == EINTR)
886 continue;
887
888 goto child_finish;
889 }
890
891 assert(sig == SIGTERM);
892 break;
893 }
894 }
895
896 /* If our parent died we'll end the session */
897 if (getppid() != parent_pid) {
898 pam_code = pam_close_session(handle, flags);
899 if (pam_code != PAM_SUCCESS)
900 goto child_finish;
901 }
902
903 r = 0;
904
905 child_finish:
906 pam_end(handle, pam_code | flags);
907 _exit(r);
908 }
909
910 /* If the child was forked off successfully it will do all the
911 * cleanups, so forget about the handle here. */
912 handle = NULL;
913
914 /* Unblock SIGTERM again in the parent */
915 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
916
917 /* We close the log explicitly here, since the PAM modules
918 * might have opened it, but we don't want this fd around. */
919 closelog();
920
921 *pam_env = e;
922 e = NULL;
923
924 return 0;
925
926 fail:
927 if (pam_code != PAM_SUCCESS) {
928 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
929 err = -EPERM; /* PAM errors do not map to errno */
930 } else {
931 err = log_error_errno(errno, "PAM failed: %m");
932 }
933
934 if (handle) {
935 if (close_session)
936 pam_code = pam_close_session(handle, flags);
937
938 pam_end(handle, pam_code | flags);
939 }
940
941 strv_free(e);
942
943 closelog();
944
945 if (pam_pid > 1) {
946 kill(pam_pid, SIGTERM);
947 kill(pam_pid, SIGCONT);
948 }
949
950 return err;
951 }
952 #endif
953
954 static void rename_process_from_path(const char *path) {
955 char process_name[11];
956 const char *p;
957 size_t l;
958
959 /* This resulting string must fit in 10 chars (i.e. the length
960 * of "/sbin/init") to look pretty in /bin/ps */
961
962 p = basename(path);
963 if (isempty(p)) {
964 rename_process("(...)");
965 return;
966 }
967
968 l = strlen(p);
969 if (l > 8) {
970 /* The end of the process name is usually more
971 * interesting, since the first bit might just be
972 * "systemd-" */
973 p = p + l - 8;
974 l = 8;
975 }
976
977 process_name[0] = '(';
978 memcpy(process_name+1, p, l);
979 process_name[1+l] = ')';
980 process_name[1+l+1] = 0;
981
982 rename_process(process_name);
983 }
984
985 #ifdef HAVE_SECCOMP
986
987 static int apply_seccomp(const ExecContext *c) {
988 uint32_t negative_action, action;
989 scmp_filter_ctx *seccomp;
990 Iterator i;
991 void *id;
992 int r;
993
994 assert(c);
995
996 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
997
998 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
999 if (!seccomp)
1000 return -ENOMEM;
1001
1002 if (c->syscall_archs) {
1003
1004 SET_FOREACH(id, c->syscall_archs, i) {
1005 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1006 if (r == -EEXIST)
1007 continue;
1008 if (r < 0)
1009 goto finish;
1010 }
1011
1012 } else {
1013 r = seccomp_add_secondary_archs(seccomp);
1014 if (r < 0)
1015 goto finish;
1016 }
1017
1018 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1019 SET_FOREACH(id, c->syscall_filter, i) {
1020 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1021 if (r < 0)
1022 goto finish;
1023 }
1024
1025 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1026 if (r < 0)
1027 goto finish;
1028
1029 r = seccomp_load(seccomp);
1030
1031 finish:
1032 seccomp_release(seccomp);
1033 return r;
1034 }
1035
1036 static int apply_address_families(const ExecContext *c) {
1037 scmp_filter_ctx *seccomp;
1038 Iterator i;
1039 int r;
1040
1041 assert(c);
1042
1043 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1044 if (!seccomp)
1045 return -ENOMEM;
1046
1047 r = seccomp_add_secondary_archs(seccomp);
1048 if (r < 0)
1049 goto finish;
1050
1051 if (c->address_families_whitelist) {
1052 int af, first = 0, last = 0;
1053 void *afp;
1054
1055 /* If this is a whitelist, we first block the address
1056 * families that are out of range and then everything
1057 * that is not in the set. First, we find the lowest
1058 * and highest address family in the set. */
1059
1060 SET_FOREACH(afp, c->address_families, i) {
1061 af = PTR_TO_INT(afp);
1062
1063 if (af <= 0 || af >= af_max())
1064 continue;
1065
1066 if (first == 0 || af < first)
1067 first = af;
1068
1069 if (last == 0 || af > last)
1070 last = af;
1071 }
1072
1073 assert((first == 0) == (last == 0));
1074
1075 if (first == 0) {
1076
1077 /* No entries in the valid range, block everything */
1078 r = seccomp_rule_add(
1079 seccomp,
1080 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1081 SCMP_SYS(socket),
1082 0);
1083 if (r < 0)
1084 goto finish;
1085
1086 } else {
1087
1088 /* Block everything below the first entry */
1089 r = seccomp_rule_add(
1090 seccomp,
1091 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1092 SCMP_SYS(socket),
1093 1,
1094 SCMP_A0(SCMP_CMP_LT, first));
1095 if (r < 0)
1096 goto finish;
1097
1098 /* Block everything above the last entry */
1099 r = seccomp_rule_add(
1100 seccomp,
1101 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1102 SCMP_SYS(socket),
1103 1,
1104 SCMP_A0(SCMP_CMP_GT, last));
1105 if (r < 0)
1106 goto finish;
1107
1108 /* Block everything between the first and last
1109 * entry */
1110 for (af = 1; af < af_max(); af++) {
1111
1112 if (set_contains(c->address_families, INT_TO_PTR(af)))
1113 continue;
1114
1115 r = seccomp_rule_add(
1116 seccomp,
1117 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1118 SCMP_SYS(socket),
1119 1,
1120 SCMP_A0(SCMP_CMP_EQ, af));
1121 if (r < 0)
1122 goto finish;
1123 }
1124 }
1125
1126 } else {
1127 void *af;
1128
1129 /* If this is a blacklist, then generate one rule for
1130 * each address family that are then combined in OR
1131 * checks. */
1132
1133 SET_FOREACH(af, c->address_families, i) {
1134
1135 r = seccomp_rule_add(
1136 seccomp,
1137 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1138 SCMP_SYS(socket),
1139 1,
1140 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1141 if (r < 0)
1142 goto finish;
1143 }
1144 }
1145
1146 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1147 if (r < 0)
1148 goto finish;
1149
1150 r = seccomp_load(seccomp);
1151
1152 finish:
1153 seccomp_release(seccomp);
1154 return r;
1155 }
1156
1157 #endif
1158
1159 static void do_idle_pipe_dance(int idle_pipe[4]) {
1160 assert(idle_pipe);
1161
1162
1163 safe_close(idle_pipe[1]);
1164 safe_close(idle_pipe[2]);
1165
1166 if (idle_pipe[0] >= 0) {
1167 int r;
1168
1169 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1170
1171 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1172 /* Signal systemd that we are bored and want to continue. */
1173 r = write(idle_pipe[3], "x", 1);
1174 if (r > 0)
1175 /* Wait for systemd to react to the signal above. */
1176 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1177 }
1178
1179 safe_close(idle_pipe[0]);
1180
1181 }
1182
1183 safe_close(idle_pipe[3]);
1184 }
1185
1186 static int build_environment(
1187 const ExecContext *c,
1188 unsigned n_fds,
1189 usec_t watchdog_usec,
1190 const char *home,
1191 const char *username,
1192 const char *shell,
1193 char ***ret) {
1194
1195 _cleanup_strv_free_ char **our_env = NULL;
1196 unsigned n_env = 0;
1197 char *x;
1198
1199 assert(c);
1200 assert(ret);
1201
1202 our_env = new0(char*, 10);
1203 if (!our_env)
1204 return -ENOMEM;
1205
1206 if (n_fds > 0) {
1207 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1208 return -ENOMEM;
1209 our_env[n_env++] = x;
1210
1211 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1212 return -ENOMEM;
1213 our_env[n_env++] = x;
1214 }
1215
1216 if (watchdog_usec > 0) {
1217 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1218 return -ENOMEM;
1219 our_env[n_env++] = x;
1220
1221 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1222 return -ENOMEM;
1223 our_env[n_env++] = x;
1224 }
1225
1226 if (home) {
1227 x = strappend("HOME=", home);
1228 if (!x)
1229 return -ENOMEM;
1230 our_env[n_env++] = x;
1231 }
1232
1233 if (username) {
1234 x = strappend("LOGNAME=", username);
1235 if (!x)
1236 return -ENOMEM;
1237 our_env[n_env++] = x;
1238
1239 x = strappend("USER=", username);
1240 if (!x)
1241 return -ENOMEM;
1242 our_env[n_env++] = x;
1243 }
1244
1245 if (shell) {
1246 x = strappend("SHELL=", shell);
1247 if (!x)
1248 return -ENOMEM;
1249 our_env[n_env++] = x;
1250 }
1251
1252 if (is_terminal_input(c->std_input) ||
1253 c->std_output == EXEC_OUTPUT_TTY ||
1254 c->std_error == EXEC_OUTPUT_TTY ||
1255 c->tty_path) {
1256
1257 x = strdup(default_term_for_tty(tty_path(c)));
1258 if (!x)
1259 return -ENOMEM;
1260 our_env[n_env++] = x;
1261 }
1262
1263 our_env[n_env++] = NULL;
1264 assert(n_env <= 10);
1265
1266 *ret = our_env;
1267 our_env = NULL;
1268
1269 return 0;
1270 }
1271
1272 static bool exec_needs_mount_namespace(
1273 const ExecContext *context,
1274 const ExecParameters *params,
1275 ExecRuntime *runtime) {
1276
1277 assert(context);
1278 assert(params);
1279
1280 if (!strv_isempty(context->read_write_dirs) ||
1281 !strv_isempty(context->read_only_dirs) ||
1282 !strv_isempty(context->inaccessible_dirs))
1283 return true;
1284
1285 if (context->mount_flags != 0)
1286 return true;
1287
1288 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1289 return true;
1290
1291 if (params->bus_endpoint_path)
1292 return true;
1293
1294 if (context->private_devices ||
1295 context->protect_system != PROTECT_SYSTEM_NO ||
1296 context->protect_home != PROTECT_HOME_NO)
1297 return true;
1298
1299 return false;
1300 }
1301
1302 static int exec_child(
1303 Unit *unit,
1304 ExecCommand *command,
1305 const ExecContext *context,
1306 const ExecParameters *params,
1307 ExecRuntime *runtime,
1308 char **argv,
1309 int socket_fd,
1310 int *fds, unsigned n_fds,
1311 char **files_env,
1312 int *exit_status) {
1313
1314 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1315 _cleanup_free_ char *mac_selinux_context_net = NULL;
1316 const char *username = NULL, *home = NULL, *shell = NULL;
1317 unsigned n_dont_close = 0;
1318 int dont_close[n_fds + 4];
1319 uid_t uid = UID_INVALID;
1320 gid_t gid = GID_INVALID;
1321 int i, r;
1322 bool needs_mount_namespace;
1323
1324 assert(unit);
1325 assert(command);
1326 assert(context);
1327 assert(params);
1328 assert(exit_status);
1329
1330 rename_process_from_path(command->path);
1331
1332 /* We reset exactly these signals, since they are the
1333 * only ones we set to SIG_IGN in the main daemon. All
1334 * others we leave untouched because we set them to
1335 * SIG_DFL or a valid handler initially, both of which
1336 * will be demoted to SIG_DFL. */
1337 (void) default_signals(SIGNALS_CRASH_HANDLER,
1338 SIGNALS_IGNORE, -1);
1339
1340 if (context->ignore_sigpipe)
1341 (void) ignore_signals(SIGPIPE, -1);
1342
1343 r = reset_signal_mask();
1344 if (r < 0) {
1345 *exit_status = EXIT_SIGNAL_MASK;
1346 return r;
1347 }
1348
1349 if (params->idle_pipe)
1350 do_idle_pipe_dance(params->idle_pipe);
1351
1352 /* Close sockets very early to make sure we don't
1353 * block init reexecution because it cannot bind its
1354 * sockets */
1355
1356 log_forget_fds();
1357
1358 if (socket_fd >= 0)
1359 dont_close[n_dont_close++] = socket_fd;
1360 if (n_fds > 0) {
1361 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1362 n_dont_close += n_fds;
1363 }
1364 if (params->bus_endpoint_fd >= 0)
1365 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1366 if (runtime) {
1367 if (runtime->netns_storage_socket[0] >= 0)
1368 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1369 if (runtime->netns_storage_socket[1] >= 0)
1370 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1371 }
1372
1373 r = close_all_fds(dont_close, n_dont_close);
1374 if (r < 0) {
1375 *exit_status = EXIT_FDS;
1376 return r;
1377 }
1378
1379 if (!context->same_pgrp)
1380 if (setsid() < 0) {
1381 *exit_status = EXIT_SETSID;
1382 return -errno;
1383 }
1384
1385 exec_context_tty_reset(context);
1386
1387 if (params->confirm_spawn) {
1388 char response;
1389
1390 r = ask_for_confirmation(&response, argv);
1391 if (r == -ETIMEDOUT)
1392 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1393 else if (r < 0)
1394 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1395 else if (response == 's') {
1396 write_confirm_message("Skipping execution.\n");
1397 *exit_status = EXIT_CONFIRM;
1398 return -ECANCELED;
1399 } else if (response == 'n') {
1400 write_confirm_message("Failing execution.\n");
1401 *exit_status = 0;
1402 return 0;
1403 }
1404 }
1405
1406 if (context->user) {
1407 username = context->user;
1408 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1409 if (r < 0) {
1410 *exit_status = EXIT_USER;
1411 return r;
1412 }
1413 }
1414
1415 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1416 * must sure to drop O_NONBLOCK */
1417 if (socket_fd >= 0)
1418 fd_nonblock(socket_fd, false);
1419
1420 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1421 if (r < 0) {
1422 *exit_status = EXIT_STDIN;
1423 return r;
1424 }
1425
1426 r = setup_output(unit, context, STDOUT_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1427 if (r < 0) {
1428 *exit_status = EXIT_STDOUT;
1429 return r;
1430 }
1431
1432 r = setup_output(unit, context, STDERR_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1433 if (r < 0) {
1434 *exit_status = EXIT_STDERR;
1435 return r;
1436 }
1437
1438 if (params->cgroup_path) {
1439 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1440 if (r < 0) {
1441 *exit_status = EXIT_CGROUP;
1442 return r;
1443 }
1444 }
1445
1446 if (context->oom_score_adjust_set) {
1447 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1448
1449 /* When we can't make this change due to EPERM, then
1450 * let's silently skip over it. User namespaces
1451 * prohibit write access to this file, and we
1452 * shouldn't trip up over that. */
1453
1454 sprintf(t, "%i", context->oom_score_adjust);
1455 r = write_string_file("/proc/self/oom_score_adj", t, 0);
1456 if (r == -EPERM || r == -EACCES) {
1457 log_open();
1458 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1459 log_close();
1460 } else if (r < 0) {
1461 *exit_status = EXIT_OOM_ADJUST;
1462 return -errno;
1463 }
1464 }
1465
1466 if (context->nice_set)
1467 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1468 *exit_status = EXIT_NICE;
1469 return -errno;
1470 }
1471
1472 if (context->cpu_sched_set) {
1473 struct sched_param param = {
1474 .sched_priority = context->cpu_sched_priority,
1475 };
1476
1477 r = sched_setscheduler(0,
1478 context->cpu_sched_policy |
1479 (context->cpu_sched_reset_on_fork ?
1480 SCHED_RESET_ON_FORK : 0),
1481 &param);
1482 if (r < 0) {
1483 *exit_status = EXIT_SETSCHEDULER;
1484 return -errno;
1485 }
1486 }
1487
1488 if (context->cpuset)
1489 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1490 *exit_status = EXIT_CPUAFFINITY;
1491 return -errno;
1492 }
1493
1494 if (context->ioprio_set)
1495 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1496 *exit_status = EXIT_IOPRIO;
1497 return -errno;
1498 }
1499
1500 if (context->timer_slack_nsec != NSEC_INFINITY)
1501 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1502 *exit_status = EXIT_TIMERSLACK;
1503 return -errno;
1504 }
1505
1506 if (context->personality != PERSONALITY_INVALID)
1507 if (personality(context->personality) < 0) {
1508 *exit_status = EXIT_PERSONALITY;
1509 return -errno;
1510 }
1511
1512 if (context->utmp_id)
1513 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path,
1514 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
1515 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
1516 USER_PROCESS,
1517 username ? "root" : context->user);
1518
1519 if (context->user && is_terminal_input(context->std_input)) {
1520 r = chown_terminal(STDIN_FILENO, uid);
1521 if (r < 0) {
1522 *exit_status = EXIT_STDIN;
1523 return r;
1524 }
1525 }
1526
1527 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1528 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1529
1530 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1531 if (r < 0) {
1532 *exit_status = EXIT_BUS_ENDPOINT;
1533 return r;
1534 }
1535 }
1536
1537 /* If delegation is enabled we'll pass ownership of the cgroup
1538 * (but only in systemd's own controller hierarchy!) to the
1539 * user of the new process. */
1540 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1541 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1542 if (r < 0) {
1543 *exit_status = EXIT_CGROUP;
1544 return r;
1545 }
1546
1547
1548 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1549 if (r < 0) {
1550 *exit_status = EXIT_CGROUP;
1551 return r;
1552 }
1553 }
1554
1555 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1556 char **rt;
1557
1558 STRV_FOREACH(rt, context->runtime_directory) {
1559 _cleanup_free_ char *p;
1560
1561 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1562 if (!p) {
1563 *exit_status = EXIT_RUNTIME_DIRECTORY;
1564 return -ENOMEM;
1565 }
1566
1567 r = mkdir_p_label(p, context->runtime_directory_mode);
1568 if (r < 0) {
1569 *exit_status = EXIT_RUNTIME_DIRECTORY;
1570 return r;
1571 }
1572
1573 r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
1574 if (r < 0) {
1575 *exit_status = EXIT_RUNTIME_DIRECTORY;
1576 return r;
1577 }
1578 }
1579 }
1580
1581 if (params->apply_permissions) {
1582 r = enforce_groups(context, username, gid);
1583 if (r < 0) {
1584 *exit_status = EXIT_GROUP;
1585 return r;
1586 }
1587 }
1588
1589 umask(context->umask);
1590
1591 #ifdef HAVE_PAM
1592 if (params->apply_permissions && context->pam_name && username) {
1593 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1594 if (r < 0) {
1595 *exit_status = EXIT_PAM;
1596 return r;
1597 }
1598 }
1599 #endif
1600
1601 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1602 r = setup_netns(runtime->netns_storage_socket);
1603 if (r < 0) {
1604 *exit_status = EXIT_NETWORK;
1605 return r;
1606 }
1607 }
1608
1609 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
1610
1611 if (needs_mount_namespace) {
1612 char *tmp = NULL, *var = NULL;
1613
1614 /* The runtime struct only contains the parent
1615 * of the private /tmp, which is
1616 * non-accessible to world users. Inside of it
1617 * there's a /tmp that is sticky, and that's
1618 * the one we want to use here. */
1619
1620 if (context->private_tmp && runtime) {
1621 if (runtime->tmp_dir)
1622 tmp = strjoina(runtime->tmp_dir, "/tmp");
1623 if (runtime->var_tmp_dir)
1624 var = strjoina(runtime->var_tmp_dir, "/tmp");
1625 }
1626
1627 r = setup_namespace(
1628 params->apply_chroot ? context->root_directory : NULL,
1629 context->read_write_dirs,
1630 context->read_only_dirs,
1631 context->inaccessible_dirs,
1632 tmp,
1633 var,
1634 params->bus_endpoint_path,
1635 context->private_devices,
1636 context->protect_home,
1637 context->protect_system,
1638 context->mount_flags);
1639
1640 /* If we couldn't set up the namespace this is
1641 * probably due to a missing capability. In this case,
1642 * silently proceeed. */
1643 if (r == -EPERM || r == -EACCES) {
1644 log_open();
1645 log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1646 log_close();
1647 } else if (r < 0) {
1648 *exit_status = EXIT_NAMESPACE;
1649 return r;
1650 }
1651 }
1652
1653 if (params->apply_chroot) {
1654 if (!needs_mount_namespace && context->root_directory)
1655 if (chroot(context->root_directory) < 0) {
1656 *exit_status = EXIT_CHROOT;
1657 return -errno;
1658 }
1659
1660 if (chdir(context->working_directory ?: "/") < 0 &&
1661 !context->working_directory_missing_ok) {
1662 *exit_status = EXIT_CHDIR;
1663 return -errno;
1664 }
1665 } else {
1666 _cleanup_free_ char *d = NULL;
1667
1668 if (asprintf(&d, "%s/%s",
1669 context->root_directory ?: "",
1670 context->working_directory ?: "") < 0) {
1671 *exit_status = EXIT_MEMORY;
1672 return -ENOMEM;
1673 }
1674
1675 if (chdir(d) < 0 &&
1676 !context->working_directory_missing_ok) {
1677 *exit_status = EXIT_CHDIR;
1678 return -errno;
1679 }
1680 }
1681
1682 #ifdef HAVE_SELINUX
1683 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1684 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1685 if (r < 0) {
1686 *exit_status = EXIT_SELINUX_CONTEXT;
1687 return r;
1688 }
1689 }
1690 #endif
1691
1692 /* We repeat the fd closing here, to make sure that
1693 * nothing is leaked from the PAM modules. Note that
1694 * we are more aggressive this time since socket_fd
1695 * and the netns fds we don't need anymore. The custom
1696 * endpoint fd was needed to upload the policy and can
1697 * now be closed as well. */
1698 r = close_all_fds(fds, n_fds);
1699 if (r >= 0)
1700 r = shift_fds(fds, n_fds);
1701 if (r >= 0)
1702 r = flags_fds(fds, n_fds, context->non_blocking);
1703 if (r < 0) {
1704 *exit_status = EXIT_FDS;
1705 return r;
1706 }
1707
1708 if (params->apply_permissions) {
1709
1710 for (i = 0; i < _RLIMIT_MAX; i++) {
1711 if (!context->rlimit[i])
1712 continue;
1713
1714 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1715 *exit_status = EXIT_LIMITS;
1716 return -errno;
1717 }
1718 }
1719
1720 if (context->capability_bounding_set_drop) {
1721 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1722 if (r < 0) {
1723 *exit_status = EXIT_CAPABILITIES;
1724 return r;
1725 }
1726 }
1727
1728 #ifdef HAVE_SMACK
1729 if (context->smack_process_label) {
1730 r = mac_smack_apply_pid(0, context->smack_process_label);
1731 if (r < 0) {
1732 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1733 return r;
1734 }
1735 }
1736 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1737 else {
1738 _cleanup_free_ char *exec_label = NULL;
1739
1740 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1741 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) {
1742 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1743 return r;
1744 }
1745
1746 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1747 if (r < 0) {
1748 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1749 return r;
1750 }
1751 }
1752 #endif
1753 #endif
1754
1755 if (context->user) {
1756 r = enforce_user(context, uid);
1757 if (r < 0) {
1758 *exit_status = EXIT_USER;
1759 return r;
1760 }
1761 }
1762
1763 /* PR_GET_SECUREBITS is not privileged, while
1764 * PR_SET_SECUREBITS is. So to suppress
1765 * potential EPERMs we'll try not to call
1766 * PR_SET_SECUREBITS unless necessary. */
1767 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1768 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1769 *exit_status = EXIT_SECUREBITS;
1770 return -errno;
1771 }
1772
1773 if (context->capabilities)
1774 if (cap_set_proc(context->capabilities) < 0) {
1775 *exit_status = EXIT_CAPABILITIES;
1776 return -errno;
1777 }
1778
1779 if (context->no_new_privileges)
1780 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1781 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1782 return -errno;
1783 }
1784
1785 #ifdef HAVE_SECCOMP
1786 if (context->address_families_whitelist ||
1787 !set_isempty(context->address_families)) {
1788 r = apply_address_families(context);
1789 if (r < 0) {
1790 *exit_status = EXIT_ADDRESS_FAMILIES;
1791 return r;
1792 }
1793 }
1794
1795 if (context->syscall_whitelist ||
1796 !set_isempty(context->syscall_filter) ||
1797 !set_isempty(context->syscall_archs)) {
1798 r = apply_seccomp(context);
1799 if (r < 0) {
1800 *exit_status = EXIT_SECCOMP;
1801 return r;
1802 }
1803 }
1804 #endif
1805
1806 #ifdef HAVE_SELINUX
1807 if (mac_selinux_use()) {
1808 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1809
1810 if (exec_context) {
1811 r = setexeccon(exec_context);
1812 if (r < 0) {
1813 *exit_status = EXIT_SELINUX_CONTEXT;
1814 return r;
1815 }
1816 }
1817 }
1818 #endif
1819
1820 #ifdef HAVE_APPARMOR
1821 if (context->apparmor_profile && mac_apparmor_use()) {
1822 r = aa_change_onexec(context->apparmor_profile);
1823 if (r < 0 && !context->apparmor_profile_ignore) {
1824 *exit_status = EXIT_APPARMOR_PROFILE;
1825 return -errno;
1826 }
1827 }
1828 #endif
1829 }
1830
1831 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1832 if (r < 0) {
1833 *exit_status = EXIT_MEMORY;
1834 return r;
1835 }
1836
1837 final_env = strv_env_merge(5,
1838 params->environment,
1839 our_env,
1840 context->environment,
1841 files_env,
1842 pam_env,
1843 NULL);
1844 if (!final_env) {
1845 *exit_status = EXIT_MEMORY;
1846 return -ENOMEM;
1847 }
1848
1849 final_argv = replace_env_argv(argv, final_env);
1850 if (!final_argv) {
1851 *exit_status = EXIT_MEMORY;
1852 return -ENOMEM;
1853 }
1854
1855 final_env = strv_env_clean(final_env);
1856
1857 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1858 _cleanup_free_ char *line;
1859
1860 line = exec_command_line(final_argv);
1861 if (line) {
1862 log_open();
1863 log_struct(LOG_DEBUG,
1864 LOG_UNIT_ID(unit),
1865 "EXECUTABLE=%s", command->path,
1866 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
1867 NULL);
1868 log_close();
1869 }
1870 }
1871
1872 execve(command->path, final_argv, final_env);
1873 *exit_status = EXIT_EXEC;
1874 return -errno;
1875 }
1876
1877 int exec_spawn(Unit *unit,
1878 ExecCommand *command,
1879 const ExecContext *context,
1880 const ExecParameters *params,
1881 ExecRuntime *runtime,
1882 pid_t *ret) {
1883
1884 _cleanup_strv_free_ char **files_env = NULL;
1885 int *fds = NULL; unsigned n_fds = 0;
1886 _cleanup_free_ char *line = NULL;
1887 int socket_fd, r;
1888 char **argv;
1889 pid_t pid;
1890
1891 assert(unit);
1892 assert(command);
1893 assert(context);
1894 assert(ret);
1895 assert(params);
1896 assert(params->fds || params->n_fds <= 0);
1897
1898 if (context->std_input == EXEC_INPUT_SOCKET ||
1899 context->std_output == EXEC_OUTPUT_SOCKET ||
1900 context->std_error == EXEC_OUTPUT_SOCKET) {
1901
1902 if (params->n_fds != 1) {
1903 log_unit_error(unit, "Got more than one socket.");
1904 return -EINVAL;
1905 }
1906
1907 socket_fd = params->fds[0];
1908 } else {
1909 socket_fd = -1;
1910 fds = params->fds;
1911 n_fds = params->n_fds;
1912 }
1913
1914 r = exec_context_load_environment(unit, context, &files_env);
1915 if (r < 0)
1916 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
1917
1918 argv = params->argv ?: command->argv;
1919 line = exec_command_line(argv);
1920 if (!line)
1921 return log_oom();
1922
1923 log_struct(LOG_DEBUG,
1924 LOG_UNIT_ID(unit),
1925 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
1926 "EXECUTABLE=%s", command->path,
1927 NULL);
1928 pid = fork();
1929 if (pid < 0)
1930 return log_unit_error_errno(unit, r, "Failed to fork: %m");
1931
1932 if (pid == 0) {
1933 int exit_status;
1934
1935 r = exec_child(unit,
1936 command,
1937 context,
1938 params,
1939 runtime,
1940 argv,
1941 socket_fd,
1942 fds, n_fds,
1943 files_env,
1944 &exit_status);
1945 if (r < 0) {
1946 log_open();
1947 log_struct_errno(LOG_ERR, r,
1948 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1949 LOG_UNIT_ID(unit),
1950 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
1951 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1952 command->path),
1953 "EXECUTABLE=%s", command->path,
1954 NULL);
1955 }
1956
1957 _exit(exit_status);
1958 }
1959
1960 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
1961
1962 /* We add the new process to the cgroup both in the child (so
1963 * that we can be sure that no user code is ever executed
1964 * outside of the cgroup) and in the parent (so that we can be
1965 * sure that when we kill the cgroup the process will be
1966 * killed too). */
1967 if (params->cgroup_path)
1968 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1969
1970 exec_status_start(&command->exec_status, pid);
1971
1972 *ret = pid;
1973 return 0;
1974 }
1975
1976 void exec_context_init(ExecContext *c) {
1977 assert(c);
1978
1979 c->umask = 0022;
1980 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1981 c->cpu_sched_policy = SCHED_OTHER;
1982 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1983 c->syslog_level_prefix = true;
1984 c->ignore_sigpipe = true;
1985 c->timer_slack_nsec = NSEC_INFINITY;
1986 c->personality = PERSONALITY_INVALID;
1987 c->runtime_directory_mode = 0755;
1988 }
1989
1990 void exec_context_done(ExecContext *c) {
1991 unsigned l;
1992
1993 assert(c);
1994
1995 strv_free(c->environment);
1996 c->environment = NULL;
1997
1998 strv_free(c->environment_files);
1999 c->environment_files = NULL;
2000
2001 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
2002 c->rlimit[l] = mfree(c->rlimit[l]);
2003
2004 c->working_directory = mfree(c->working_directory);
2005 c->root_directory = mfree(c->root_directory);
2006 c->tty_path = mfree(c->tty_path);
2007 c->syslog_identifier = mfree(c->syslog_identifier);
2008 c->user = mfree(c->user);
2009 c->group = mfree(c->group);
2010
2011 strv_free(c->supplementary_groups);
2012 c->supplementary_groups = NULL;
2013
2014 c->pam_name = mfree(c->pam_name);
2015
2016 if (c->capabilities) {
2017 cap_free(c->capabilities);
2018 c->capabilities = NULL;
2019 }
2020
2021 strv_free(c->read_only_dirs);
2022 c->read_only_dirs = NULL;
2023
2024 strv_free(c->read_write_dirs);
2025 c->read_write_dirs = NULL;
2026
2027 strv_free(c->inaccessible_dirs);
2028 c->inaccessible_dirs = NULL;
2029
2030 if (c->cpuset)
2031 CPU_FREE(c->cpuset);
2032
2033 c->utmp_id = mfree(c->utmp_id);
2034 c->selinux_context = mfree(c->selinux_context);
2035 c->apparmor_profile = mfree(c->apparmor_profile);
2036
2037 set_free(c->syscall_filter);
2038 c->syscall_filter = NULL;
2039
2040 set_free(c->syscall_archs);
2041 c->syscall_archs = NULL;
2042
2043 set_free(c->address_families);
2044 c->address_families = NULL;
2045
2046 strv_free(c->runtime_directory);
2047 c->runtime_directory = NULL;
2048
2049 bus_endpoint_free(c->bus_endpoint);
2050 c->bus_endpoint = NULL;
2051 }
2052
2053 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2054 char **i;
2055
2056 assert(c);
2057
2058 if (!runtime_prefix)
2059 return 0;
2060
2061 STRV_FOREACH(i, c->runtime_directory) {
2062 _cleanup_free_ char *p;
2063
2064 p = strjoin(runtime_prefix, "/", *i, NULL);
2065 if (!p)
2066 return -ENOMEM;
2067
2068 /* We execute this synchronously, since we need to be
2069 * sure this is gone when we start the service
2070 * next. */
2071 (void) rm_rf(p, REMOVE_ROOT);
2072 }
2073
2074 return 0;
2075 }
2076
2077 void exec_command_done(ExecCommand *c) {
2078 assert(c);
2079
2080 c->path = mfree(c->path);
2081
2082 strv_free(c->argv);
2083 c->argv = NULL;
2084 }
2085
2086 void exec_command_done_array(ExecCommand *c, unsigned n) {
2087 unsigned i;
2088
2089 for (i = 0; i < n; i++)
2090 exec_command_done(c+i);
2091 }
2092
2093 ExecCommand* exec_command_free_list(ExecCommand *c) {
2094 ExecCommand *i;
2095
2096 while ((i = c)) {
2097 LIST_REMOVE(command, c, i);
2098 exec_command_done(i);
2099 free(i);
2100 }
2101
2102 return NULL;
2103 }
2104
2105 void exec_command_free_array(ExecCommand **c, unsigned n) {
2106 unsigned i;
2107
2108 for (i = 0; i < n; i++)
2109 c[i] = exec_command_free_list(c[i]);
2110 }
2111
2112 typedef struct InvalidEnvInfo {
2113 Unit *unit;
2114 const char *path;
2115 } InvalidEnvInfo;
2116
2117 static void invalid_env(const char *p, void *userdata) {
2118 InvalidEnvInfo *info = userdata;
2119
2120 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2121 }
2122
2123 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
2124 char **i, **r = NULL;
2125
2126 assert(c);
2127 assert(l);
2128
2129 STRV_FOREACH(i, c->environment_files) {
2130 char *fn;
2131 int k;
2132 bool ignore = false;
2133 char **p;
2134 _cleanup_globfree_ glob_t pglob = {};
2135 int count, n;
2136
2137 fn = *i;
2138
2139 if (fn[0] == '-') {
2140 ignore = true;
2141 fn ++;
2142 }
2143
2144 if (!path_is_absolute(fn)) {
2145 if (ignore)
2146 continue;
2147
2148 strv_free(r);
2149 return -EINVAL;
2150 }
2151
2152 /* Filename supports globbing, take all matching files */
2153 errno = 0;
2154 if (glob(fn, 0, NULL, &pglob) != 0) {
2155 if (ignore)
2156 continue;
2157
2158 strv_free(r);
2159 return errno ? -errno : -EINVAL;
2160 }
2161 count = pglob.gl_pathc;
2162 if (count == 0) {
2163 if (ignore)
2164 continue;
2165
2166 strv_free(r);
2167 return -EINVAL;
2168 }
2169 for (n = 0; n < count; n++) {
2170 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2171 if (k < 0) {
2172 if (ignore)
2173 continue;
2174
2175 strv_free(r);
2176 return k;
2177 }
2178 /* Log invalid environment variables with filename */
2179 if (p) {
2180 InvalidEnvInfo info = {
2181 .unit = unit,
2182 .path = pglob.gl_pathv[n]
2183 };
2184
2185 p = strv_env_clean_with_callback(p, invalid_env, &info);
2186 }
2187
2188 if (r == NULL)
2189 r = p;
2190 else {
2191 char **m;
2192
2193 m = strv_env_merge(2, r, p);
2194 strv_free(r);
2195 strv_free(p);
2196 if (!m)
2197 return -ENOMEM;
2198
2199 r = m;
2200 }
2201 }
2202 }
2203
2204 *l = r;
2205
2206 return 0;
2207 }
2208
2209 static bool tty_may_match_dev_console(const char *tty) {
2210 _cleanup_free_ char *active = NULL;
2211 char *console;
2212
2213 if (startswith(tty, "/dev/"))
2214 tty += 5;
2215
2216 /* trivial identity? */
2217 if (streq(tty, "console"))
2218 return true;
2219
2220 console = resolve_dev_console(&active);
2221 /* if we could not resolve, assume it may */
2222 if (!console)
2223 return true;
2224
2225 /* "tty0" means the active VC, so it may be the same sometimes */
2226 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2227 }
2228
2229 bool exec_context_may_touch_console(ExecContext *ec) {
2230 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2231 is_terminal_input(ec->std_input) ||
2232 is_terminal_output(ec->std_output) ||
2233 is_terminal_output(ec->std_error)) &&
2234 tty_may_match_dev_console(tty_path(ec));
2235 }
2236
2237 static void strv_fprintf(FILE *f, char **l) {
2238 char **g;
2239
2240 assert(f);
2241
2242 STRV_FOREACH(g, l)
2243 fprintf(f, " %s", *g);
2244 }
2245
2246 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2247 char **e;
2248 unsigned i;
2249
2250 assert(c);
2251 assert(f);
2252
2253 prefix = strempty(prefix);
2254
2255 fprintf(f,
2256 "%sUMask: %04o\n"
2257 "%sWorkingDirectory: %s\n"
2258 "%sRootDirectory: %s\n"
2259 "%sNonBlocking: %s\n"
2260 "%sPrivateTmp: %s\n"
2261 "%sPrivateNetwork: %s\n"
2262 "%sPrivateDevices: %s\n"
2263 "%sProtectHome: %s\n"
2264 "%sProtectSystem: %s\n"
2265 "%sIgnoreSIGPIPE: %s\n",
2266 prefix, c->umask,
2267 prefix, c->working_directory ? c->working_directory : "/",
2268 prefix, c->root_directory ? c->root_directory : "/",
2269 prefix, yes_no(c->non_blocking),
2270 prefix, yes_no(c->private_tmp),
2271 prefix, yes_no(c->private_network),
2272 prefix, yes_no(c->private_devices),
2273 prefix, protect_home_to_string(c->protect_home),
2274 prefix, protect_system_to_string(c->protect_system),
2275 prefix, yes_no(c->ignore_sigpipe));
2276
2277 STRV_FOREACH(e, c->environment)
2278 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2279
2280 STRV_FOREACH(e, c->environment_files)
2281 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2282
2283 if (c->nice_set)
2284 fprintf(f,
2285 "%sNice: %i\n",
2286 prefix, c->nice);
2287
2288 if (c->oom_score_adjust_set)
2289 fprintf(f,
2290 "%sOOMScoreAdjust: %i\n",
2291 prefix, c->oom_score_adjust);
2292
2293 for (i = 0; i < RLIM_NLIMITS; i++)
2294 if (c->rlimit[i])
2295 fprintf(f, "%s%s: "RLIM_FMT"\n",
2296 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2297
2298 if (c->ioprio_set) {
2299 _cleanup_free_ char *class_str = NULL;
2300
2301 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2302 fprintf(f,
2303 "%sIOSchedulingClass: %s\n"
2304 "%sIOPriority: %i\n",
2305 prefix, strna(class_str),
2306 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2307 }
2308
2309 if (c->cpu_sched_set) {
2310 _cleanup_free_ char *policy_str = NULL;
2311
2312 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2313 fprintf(f,
2314 "%sCPUSchedulingPolicy: %s\n"
2315 "%sCPUSchedulingPriority: %i\n"
2316 "%sCPUSchedulingResetOnFork: %s\n",
2317 prefix, strna(policy_str),
2318 prefix, c->cpu_sched_priority,
2319 prefix, yes_no(c->cpu_sched_reset_on_fork));
2320 }
2321
2322 if (c->cpuset) {
2323 fprintf(f, "%sCPUAffinity:", prefix);
2324 for (i = 0; i < c->cpuset_ncpus; i++)
2325 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2326 fprintf(f, " %u", i);
2327 fputs("\n", f);
2328 }
2329
2330 if (c->timer_slack_nsec != NSEC_INFINITY)
2331 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2332
2333 fprintf(f,
2334 "%sStandardInput: %s\n"
2335 "%sStandardOutput: %s\n"
2336 "%sStandardError: %s\n",
2337 prefix, exec_input_to_string(c->std_input),
2338 prefix, exec_output_to_string(c->std_output),
2339 prefix, exec_output_to_string(c->std_error));
2340
2341 if (c->tty_path)
2342 fprintf(f,
2343 "%sTTYPath: %s\n"
2344 "%sTTYReset: %s\n"
2345 "%sTTYVHangup: %s\n"
2346 "%sTTYVTDisallocate: %s\n",
2347 prefix, c->tty_path,
2348 prefix, yes_no(c->tty_reset),
2349 prefix, yes_no(c->tty_vhangup),
2350 prefix, yes_no(c->tty_vt_disallocate));
2351
2352 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2353 c->std_output == EXEC_OUTPUT_KMSG ||
2354 c->std_output == EXEC_OUTPUT_JOURNAL ||
2355 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2356 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2357 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2358 c->std_error == EXEC_OUTPUT_SYSLOG ||
2359 c->std_error == EXEC_OUTPUT_KMSG ||
2360 c->std_error == EXEC_OUTPUT_JOURNAL ||
2361 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2362 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2363 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2364
2365 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2366
2367 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2368 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2369
2370 fprintf(f,
2371 "%sSyslogFacility: %s\n"
2372 "%sSyslogLevel: %s\n",
2373 prefix, strna(fac_str),
2374 prefix, strna(lvl_str));
2375 }
2376
2377 if (c->capabilities) {
2378 _cleanup_cap_free_charp_ char *t;
2379
2380 t = cap_to_text(c->capabilities, NULL);
2381 if (t)
2382 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2383 }
2384
2385 if (c->secure_bits)
2386 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2387 prefix,
2388 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2389 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2390 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2391 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2392 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2393 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2394
2395 if (c->capability_bounding_set_drop) {
2396 unsigned long l;
2397 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2398
2399 for (l = 0; l <= cap_last_cap(); l++)
2400 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2401 fprintf(f, " %s", strna(capability_to_name(l)));
2402
2403 fputs("\n", f);
2404 }
2405
2406 if (c->user)
2407 fprintf(f, "%sUser: %s\n", prefix, c->user);
2408 if (c->group)
2409 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2410
2411 if (strv_length(c->supplementary_groups) > 0) {
2412 fprintf(f, "%sSupplementaryGroups:", prefix);
2413 strv_fprintf(f, c->supplementary_groups);
2414 fputs("\n", f);
2415 }
2416
2417 if (c->pam_name)
2418 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2419
2420 if (strv_length(c->read_write_dirs) > 0) {
2421 fprintf(f, "%sReadWriteDirs:", prefix);
2422 strv_fprintf(f, c->read_write_dirs);
2423 fputs("\n", f);
2424 }
2425
2426 if (strv_length(c->read_only_dirs) > 0) {
2427 fprintf(f, "%sReadOnlyDirs:", prefix);
2428 strv_fprintf(f, c->read_only_dirs);
2429 fputs("\n", f);
2430 }
2431
2432 if (strv_length(c->inaccessible_dirs) > 0) {
2433 fprintf(f, "%sInaccessibleDirs:", prefix);
2434 strv_fprintf(f, c->inaccessible_dirs);
2435 fputs("\n", f);
2436 }
2437
2438 if (c->utmp_id)
2439 fprintf(f,
2440 "%sUtmpIdentifier: %s\n",
2441 prefix, c->utmp_id);
2442
2443 if (c->selinux_context)
2444 fprintf(f,
2445 "%sSELinuxContext: %s%s\n",
2446 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2447
2448 if (c->personality != PERSONALITY_INVALID)
2449 fprintf(f,
2450 "%sPersonality: %s\n",
2451 prefix, strna(personality_to_string(c->personality)));
2452
2453 if (c->syscall_filter) {
2454 #ifdef HAVE_SECCOMP
2455 Iterator j;
2456 void *id;
2457 bool first = true;
2458 #endif
2459
2460 fprintf(f,
2461 "%sSystemCallFilter: ",
2462 prefix);
2463
2464 if (!c->syscall_whitelist)
2465 fputc('~', f);
2466
2467 #ifdef HAVE_SECCOMP
2468 SET_FOREACH(id, c->syscall_filter, j) {
2469 _cleanup_free_ char *name = NULL;
2470
2471 if (first)
2472 first = false;
2473 else
2474 fputc(' ', f);
2475
2476 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2477 fputs(strna(name), f);
2478 }
2479 #endif
2480
2481 fputc('\n', f);
2482 }
2483
2484 if (c->syscall_archs) {
2485 #ifdef HAVE_SECCOMP
2486 Iterator j;
2487 void *id;
2488 #endif
2489
2490 fprintf(f,
2491 "%sSystemCallArchitectures:",
2492 prefix);
2493
2494 #ifdef HAVE_SECCOMP
2495 SET_FOREACH(id, c->syscall_archs, j)
2496 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2497 #endif
2498 fputc('\n', f);
2499 }
2500
2501 if (c->syscall_errno != 0)
2502 fprintf(f,
2503 "%sSystemCallErrorNumber: %s\n",
2504 prefix, strna(errno_to_name(c->syscall_errno)));
2505
2506 if (c->apparmor_profile)
2507 fprintf(f,
2508 "%sAppArmorProfile: %s%s\n",
2509 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2510 }
2511
2512 bool exec_context_maintains_privileges(ExecContext *c) {
2513 assert(c);
2514
2515 /* Returns true if the process forked off would run run under
2516 * an unchanged UID or as root. */
2517
2518 if (!c->user)
2519 return true;
2520
2521 if (streq(c->user, "root") || streq(c->user, "0"))
2522 return true;
2523
2524 return false;
2525 }
2526
2527 void exec_status_start(ExecStatus *s, pid_t pid) {
2528 assert(s);
2529
2530 zero(*s);
2531 s->pid = pid;
2532 dual_timestamp_get(&s->start_timestamp);
2533 }
2534
2535 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2536 assert(s);
2537
2538 if (s->pid && s->pid != pid)
2539 zero(*s);
2540
2541 s->pid = pid;
2542 dual_timestamp_get(&s->exit_timestamp);
2543
2544 s->code = code;
2545 s->status = status;
2546
2547 if (context) {
2548 if (context->utmp_id)
2549 utmp_put_dead_process(context->utmp_id, pid, code, status);
2550
2551 exec_context_tty_reset(context);
2552 }
2553 }
2554
2555 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2556 char buf[FORMAT_TIMESTAMP_MAX];
2557
2558 assert(s);
2559 assert(f);
2560
2561 if (s->pid <= 0)
2562 return;
2563
2564 prefix = strempty(prefix);
2565
2566 fprintf(f,
2567 "%sPID: "PID_FMT"\n",
2568 prefix, s->pid);
2569
2570 if (s->start_timestamp.realtime > 0)
2571 fprintf(f,
2572 "%sStart Timestamp: %s\n",
2573 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2574
2575 if (s->exit_timestamp.realtime > 0)
2576 fprintf(f,
2577 "%sExit Timestamp: %s\n"
2578 "%sExit Code: %s\n"
2579 "%sExit Status: %i\n",
2580 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2581 prefix, sigchld_code_to_string(s->code),
2582 prefix, s->status);
2583 }
2584
2585 char *exec_command_line(char **argv) {
2586 size_t k;
2587 char *n, *p, **a;
2588 bool first = true;
2589
2590 assert(argv);
2591
2592 k = 1;
2593 STRV_FOREACH(a, argv)
2594 k += strlen(*a)+3;
2595
2596 if (!(n = new(char, k)))
2597 return NULL;
2598
2599 p = n;
2600 STRV_FOREACH(a, argv) {
2601
2602 if (!first)
2603 *(p++) = ' ';
2604 else
2605 first = false;
2606
2607 if (strpbrk(*a, WHITESPACE)) {
2608 *(p++) = '\'';
2609 p = stpcpy(p, *a);
2610 *(p++) = '\'';
2611 } else
2612 p = stpcpy(p, *a);
2613
2614 }
2615
2616 *p = 0;
2617
2618 /* FIXME: this doesn't really handle arguments that have
2619 * spaces and ticks in them */
2620
2621 return n;
2622 }
2623
2624 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2625 _cleanup_free_ char *cmd = NULL;
2626 const char *prefix2;
2627
2628 assert(c);
2629 assert(f);
2630
2631 prefix = strempty(prefix);
2632 prefix2 = strjoina(prefix, "\t");
2633
2634 cmd = exec_command_line(c->argv);
2635 fprintf(f,
2636 "%sCommand Line: %s\n",
2637 prefix, cmd ? cmd : strerror(ENOMEM));
2638
2639 exec_status_dump(&c->exec_status, f, prefix2);
2640 }
2641
2642 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2643 assert(f);
2644
2645 prefix = strempty(prefix);
2646
2647 LIST_FOREACH(command, c, c)
2648 exec_command_dump(c, f, prefix);
2649 }
2650
2651 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2652 ExecCommand *end;
2653
2654 assert(l);
2655 assert(e);
2656
2657 if (*l) {
2658 /* It's kind of important, that we keep the order here */
2659 LIST_FIND_TAIL(command, *l, end);
2660 LIST_INSERT_AFTER(command, *l, end, e);
2661 } else
2662 *l = e;
2663 }
2664
2665 int exec_command_set(ExecCommand *c, const char *path, ...) {
2666 va_list ap;
2667 char **l, *p;
2668
2669 assert(c);
2670 assert(path);
2671
2672 va_start(ap, path);
2673 l = strv_new_ap(path, ap);
2674 va_end(ap);
2675
2676 if (!l)
2677 return -ENOMEM;
2678
2679 p = strdup(path);
2680 if (!p) {
2681 strv_free(l);
2682 return -ENOMEM;
2683 }
2684
2685 free(c->path);
2686 c->path = p;
2687
2688 strv_free(c->argv);
2689 c->argv = l;
2690
2691 return 0;
2692 }
2693
2694 int exec_command_append(ExecCommand *c, const char *path, ...) {
2695 _cleanup_strv_free_ char **l = NULL;
2696 va_list ap;
2697 int r;
2698
2699 assert(c);
2700 assert(path);
2701
2702 va_start(ap, path);
2703 l = strv_new_ap(path, ap);
2704 va_end(ap);
2705
2706 if (!l)
2707 return -ENOMEM;
2708
2709 r = strv_extend_strv(&c->argv, l);
2710 if (r < 0)
2711 return r;
2712
2713 return 0;
2714 }
2715
2716
2717 static int exec_runtime_allocate(ExecRuntime **rt) {
2718
2719 if (*rt)
2720 return 0;
2721
2722 *rt = new0(ExecRuntime, 1);
2723 if (!*rt)
2724 return -ENOMEM;
2725
2726 (*rt)->n_ref = 1;
2727 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2728
2729 return 0;
2730 }
2731
2732 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2733 int r;
2734
2735 assert(rt);
2736 assert(c);
2737 assert(id);
2738
2739 if (*rt)
2740 return 1;
2741
2742 if (!c->private_network && !c->private_tmp)
2743 return 0;
2744
2745 r = exec_runtime_allocate(rt);
2746 if (r < 0)
2747 return r;
2748
2749 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2750 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2751 return -errno;
2752 }
2753
2754 if (c->private_tmp && !(*rt)->tmp_dir) {
2755 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2756 if (r < 0)
2757 return r;
2758 }
2759
2760 return 1;
2761 }
2762
2763 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2764 assert(r);
2765 assert(r->n_ref > 0);
2766
2767 r->n_ref++;
2768 return r;
2769 }
2770
2771 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2772
2773 if (!r)
2774 return NULL;
2775
2776 assert(r->n_ref > 0);
2777
2778 r->n_ref--;
2779 if (r->n_ref > 0)
2780 return NULL;
2781
2782 free(r->tmp_dir);
2783 free(r->var_tmp_dir);
2784 safe_close_pair(r->netns_storage_socket);
2785 free(r);
2786
2787 return NULL;
2788 }
2789
2790 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
2791 assert(u);
2792 assert(f);
2793 assert(fds);
2794
2795 if (!rt)
2796 return 0;
2797
2798 if (rt->tmp_dir)
2799 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2800
2801 if (rt->var_tmp_dir)
2802 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2803
2804 if (rt->netns_storage_socket[0] >= 0) {
2805 int copy;
2806
2807 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2808 if (copy < 0)
2809 return copy;
2810
2811 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2812 }
2813
2814 if (rt->netns_storage_socket[1] >= 0) {
2815 int copy;
2816
2817 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2818 if (copy < 0)
2819 return copy;
2820
2821 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2822 }
2823
2824 return 0;
2825 }
2826
2827 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
2828 int r;
2829
2830 assert(rt);
2831 assert(key);
2832 assert(value);
2833
2834 if (streq(key, "tmp-dir")) {
2835 char *copy;
2836
2837 r = exec_runtime_allocate(rt);
2838 if (r < 0)
2839 return log_oom();
2840
2841 copy = strdup(value);
2842 if (!copy)
2843 return log_oom();
2844
2845 free((*rt)->tmp_dir);
2846 (*rt)->tmp_dir = copy;
2847
2848 } else if (streq(key, "var-tmp-dir")) {
2849 char *copy;
2850
2851 r = exec_runtime_allocate(rt);
2852 if (r < 0)
2853 return log_oom();
2854
2855 copy = strdup(value);
2856 if (!copy)
2857 return log_oom();
2858
2859 free((*rt)->var_tmp_dir);
2860 (*rt)->var_tmp_dir = copy;
2861
2862 } else if (streq(key, "netns-socket-0")) {
2863 int fd;
2864
2865 r = exec_runtime_allocate(rt);
2866 if (r < 0)
2867 return log_oom();
2868
2869 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2870 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2871 else {
2872 safe_close((*rt)->netns_storage_socket[0]);
2873 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2874 }
2875 } else if (streq(key, "netns-socket-1")) {
2876 int fd;
2877
2878 r = exec_runtime_allocate(rt);
2879 if (r < 0)
2880 return log_oom();
2881
2882 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2883 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2884 else {
2885 safe_close((*rt)->netns_storage_socket[1]);
2886 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2887 }
2888 } else
2889 return 0;
2890
2891 return 1;
2892 }
2893
2894 static void *remove_tmpdir_thread(void *p) {
2895 _cleanup_free_ char *path = p;
2896
2897 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
2898 return NULL;
2899 }
2900
2901 void exec_runtime_destroy(ExecRuntime *rt) {
2902 int r;
2903
2904 if (!rt)
2905 return;
2906
2907 /* If there are multiple users of this, let's leave the stuff around */
2908 if (rt->n_ref > 1)
2909 return;
2910
2911 if (rt->tmp_dir) {
2912 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2913
2914 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2915 if (r < 0) {
2916 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2917 free(rt->tmp_dir);
2918 }
2919
2920 rt->tmp_dir = NULL;
2921 }
2922
2923 if (rt->var_tmp_dir) {
2924 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2925
2926 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2927 if (r < 0) {
2928 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2929 free(rt->var_tmp_dir);
2930 }
2931
2932 rt->var_tmp_dir = NULL;
2933 }
2934
2935 safe_close_pair(rt->netns_storage_socket);
2936 }
2937
2938 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2939 [EXEC_INPUT_NULL] = "null",
2940 [EXEC_INPUT_TTY] = "tty",
2941 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2942 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2943 [EXEC_INPUT_SOCKET] = "socket"
2944 };
2945
2946 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2947
2948 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2949 [EXEC_OUTPUT_INHERIT] = "inherit",
2950 [EXEC_OUTPUT_NULL] = "null",
2951 [EXEC_OUTPUT_TTY] = "tty",
2952 [EXEC_OUTPUT_SYSLOG] = "syslog",
2953 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2954 [EXEC_OUTPUT_KMSG] = "kmsg",
2955 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2956 [EXEC_OUTPUT_JOURNAL] = "journal",
2957 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2958 [EXEC_OUTPUT_SOCKET] = "socket"
2959 };
2960
2961 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2962
2963 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
2964 [EXEC_UTMP_INIT] = "init",
2965 [EXEC_UTMP_LOGIN] = "login",
2966 [EXEC_UTMP_USER] = "user",
2967 };
2968
2969 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);