]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
core: always initialize ExecParamters.bus_endpoint_fd to -1
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/un.h>
29 #include <sys/prctl.h>
30 #include <sys/stat.h>
31 #include <grp.h>
32 #include <poll.h>
33 #include <glob.h>
34 #include <sys/personality.h>
35
36 #ifdef HAVE_PAM
37 #include <security/pam_appl.h>
38 #endif
39
40 #ifdef HAVE_SELINUX
41 #include <selinux/selinux.h>
42 #endif
43
44 #ifdef HAVE_SECCOMP
45 #include <seccomp.h>
46 #endif
47
48 #ifdef HAVE_APPARMOR
49 #include <sys/apparmor.h>
50 #endif
51
52 #include "rm-rf.h"
53 #include "execute.h"
54 #include "strv.h"
55 #include "macro.h"
56 #include "capability.h"
57 #include "util.h"
58 #include "log.h"
59 #include "sd-messages.h"
60 #include "ioprio.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
64 #include "missing.h"
65 #include "utmp-wtmp.h"
66 #include "def.h"
67 #include "path-util.h"
68 #include "env-util.h"
69 #include "fileio.h"
70 #include "unit.h"
71 #include "async.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
74 #include "af-list.h"
75 #include "mkdir.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
78 #include "cap-list.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
82
83 #ifdef HAVE_APPARMOR
84 #include "apparmor-util.h"
85 #endif
86
87 #ifdef HAVE_SECCOMP
88 #include "seccomp-util.h"
89 #endif
90
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
93
94 /* This assumes there is a 'tty' group */
95 #define TTY_MODE 0620
96
97 #define SNDBUF_SIZE (8*1024*1024)
98
99 static int shift_fds(int fds[], unsigned n_fds) {
100 int start, restart_from;
101
102 if (n_fds <= 0)
103 return 0;
104
105 /* Modifies the fds array! (sorts it) */
106
107 assert(fds);
108
109 start = 0;
110 for (;;) {
111 int i;
112
113 restart_from = -1;
114
115 for (i = start; i < (int) n_fds; i++) {
116 int nfd;
117
118 /* Already at right index? */
119 if (fds[i] == i+3)
120 continue;
121
122 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
123 return -errno;
124
125 safe_close(fds[i]);
126 fds[i] = nfd;
127
128 /* Hmm, the fd we wanted isn't free? Then
129 * let's remember that and try again from here */
130 if (nfd != i+3 && restart_from < 0)
131 restart_from = i;
132 }
133
134 if (restart_from < 0)
135 break;
136
137 start = restart_from;
138 }
139
140 return 0;
141 }
142
143 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
144 unsigned i;
145 int r;
146
147 if (n_fds <= 0)
148 return 0;
149
150 assert(fds);
151
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
153
154 for (i = 0; i < n_fds; i++) {
155
156 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
157 return r;
158
159 /* We unconditionally drop FD_CLOEXEC from the fds,
160 * since after all we want to pass these fds to our
161 * children */
162
163 if ((r = fd_cloexec(fds[i], false)) < 0)
164 return r;
165 }
166
167 return 0;
168 }
169
170 _pure_ static const char *tty_path(const ExecContext *context) {
171 assert(context);
172
173 if (context->tty_path)
174 return context->tty_path;
175
176 return "/dev/console";
177 }
178
179 static void exec_context_tty_reset(const ExecContext *context) {
180 assert(context);
181
182 if (context->tty_vhangup)
183 terminal_vhangup(tty_path(context));
184
185 if (context->tty_reset)
186 reset_terminal(tty_path(context));
187
188 if (context->tty_vt_disallocate && context->tty_path)
189 vt_disallocate(context->tty_path);
190 }
191
192 static bool is_terminal_output(ExecOutput o) {
193 return
194 o == EXEC_OUTPUT_TTY ||
195 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
196 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
197 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
198 }
199
200 static int open_null_as(int flags, int nfd) {
201 int fd, r;
202
203 assert(nfd >= 0);
204
205 fd = open("/dev/null", flags|O_NOCTTY);
206 if (fd < 0)
207 return -errno;
208
209 if (fd != nfd) {
210 r = dup2(fd, nfd) < 0 ? -errno : nfd;
211 safe_close(fd);
212 } else
213 r = nfd;
214
215 return r;
216 }
217
218 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
219 union sockaddr_union sa = {
220 .un.sun_family = AF_UNIX,
221 .un.sun_path = "/run/systemd/journal/stdout",
222 };
223 uid_t olduid = UID_INVALID;
224 gid_t oldgid = GID_INVALID;
225 int r;
226
227 if (gid != GID_INVALID) {
228 oldgid = getgid();
229
230 r = setegid(gid);
231 if (r < 0)
232 return -errno;
233 }
234
235 if (uid != UID_INVALID) {
236 olduid = getuid();
237
238 r = seteuid(uid);
239 if (r < 0) {
240 r = -errno;
241 goto restore_gid;
242 }
243 }
244
245 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
246 if (r < 0)
247 r = -errno;
248
249 /* If we fail to restore the uid or gid, things will likely
250 fail later on. This should only happen if an LSM interferes. */
251
252 if (uid != UID_INVALID)
253 (void) seteuid(olduid);
254
255 restore_gid:
256 if (gid != GID_INVALID)
257 (void) setegid(oldgid);
258
259 return r;
260 }
261
262 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
263 int fd, r;
264
265 assert(context);
266 assert(output < _EXEC_OUTPUT_MAX);
267 assert(ident);
268 assert(nfd >= 0);
269
270 fd = socket(AF_UNIX, SOCK_STREAM, 0);
271 if (fd < 0)
272 return -errno;
273
274 r = connect_journal_socket(fd, uid, gid);
275 if (r < 0)
276 return r;
277
278 if (shutdown(fd, SHUT_RD) < 0) {
279 safe_close(fd);
280 return -errno;
281 }
282
283 fd_inc_sndbuf(fd, SNDBUF_SIZE);
284
285 dprintf(fd,
286 "%s\n"
287 "%s\n"
288 "%i\n"
289 "%i\n"
290 "%i\n"
291 "%i\n"
292 "%i\n",
293 context->syslog_identifier ? context->syslog_identifier : ident,
294 unit_id,
295 context->syslog_priority,
296 !!context->syslog_level_prefix,
297 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
298 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
299 is_terminal_output(output));
300
301 if (fd != nfd) {
302 r = dup2(fd, nfd) < 0 ? -errno : nfd;
303 safe_close(fd);
304 } else
305 r = nfd;
306
307 return r;
308 }
309 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
310 int fd, r;
311
312 assert(path);
313 assert(nfd >= 0);
314
315 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
316 return fd;
317
318 if (fd != nfd) {
319 r = dup2(fd, nfd) < 0 ? -errno : nfd;
320 safe_close(fd);
321 } else
322 r = nfd;
323
324 return r;
325 }
326
327 static bool is_terminal_input(ExecInput i) {
328 return
329 i == EXEC_INPUT_TTY ||
330 i == EXEC_INPUT_TTY_FORCE ||
331 i == EXEC_INPUT_TTY_FAIL;
332 }
333
334 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
335
336 if (is_terminal_input(std_input) && !apply_tty_stdin)
337 return EXEC_INPUT_NULL;
338
339 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
340 return EXEC_INPUT_NULL;
341
342 return std_input;
343 }
344
345 static int fixup_output(ExecOutput std_output, int socket_fd) {
346
347 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
348 return EXEC_OUTPUT_INHERIT;
349
350 return std_output;
351 }
352
353 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
354 ExecInput i;
355
356 assert(context);
357
358 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
359
360 switch (i) {
361
362 case EXEC_INPUT_NULL:
363 return open_null_as(O_RDONLY, STDIN_FILENO);
364
365 case EXEC_INPUT_TTY:
366 case EXEC_INPUT_TTY_FORCE:
367 case EXEC_INPUT_TTY_FAIL: {
368 int fd, r;
369
370 fd = acquire_terminal(tty_path(context),
371 i == EXEC_INPUT_TTY_FAIL,
372 i == EXEC_INPUT_TTY_FORCE,
373 false,
374 USEC_INFINITY);
375 if (fd < 0)
376 return fd;
377
378 if (fd != STDIN_FILENO) {
379 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
380 safe_close(fd);
381 } else
382 r = STDIN_FILENO;
383
384 return r;
385 }
386
387 case EXEC_INPUT_SOCKET:
388 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
389
390 default:
391 assert_not_reached("Unknown input type");
392 }
393 }
394
395 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
396 ExecOutput o;
397 ExecInput i;
398 int r;
399
400 assert(context);
401 assert(ident);
402
403 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
404 o = fixup_output(context->std_output, socket_fd);
405
406 if (fileno == STDERR_FILENO) {
407 ExecOutput e;
408 e = fixup_output(context->std_error, socket_fd);
409
410 /* This expects the input and output are already set up */
411
412 /* Don't change the stderr file descriptor if we inherit all
413 * the way and are not on a tty */
414 if (e == EXEC_OUTPUT_INHERIT &&
415 o == EXEC_OUTPUT_INHERIT &&
416 i == EXEC_INPUT_NULL &&
417 !is_terminal_input(context->std_input) &&
418 getppid () != 1)
419 return fileno;
420
421 /* Duplicate from stdout if possible */
422 if (e == o || e == EXEC_OUTPUT_INHERIT)
423 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
424
425 o = e;
426
427 } else if (o == EXEC_OUTPUT_INHERIT) {
428 /* If input got downgraded, inherit the original value */
429 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
430 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
431
432 /* If the input is connected to anything that's not a /dev/null, inherit that... */
433 if (i != EXEC_INPUT_NULL)
434 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
435
436 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
437 if (getppid() != 1)
438 return fileno;
439
440 /* We need to open /dev/null here anew, to get the right access mode. */
441 return open_null_as(O_WRONLY, fileno);
442 }
443
444 switch (o) {
445
446 case EXEC_OUTPUT_NULL:
447 return open_null_as(O_WRONLY, fileno);
448
449 case EXEC_OUTPUT_TTY:
450 if (is_terminal_input(i))
451 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
452
453 /* We don't reset the terminal if this is just about output */
454 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
455
456 case EXEC_OUTPUT_SYSLOG:
457 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
458 case EXEC_OUTPUT_KMSG:
459 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
460 case EXEC_OUTPUT_JOURNAL:
461 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
462 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
463 if (r < 0) {
464 log_unit_struct(unit_id,
465 LOG_ERR,
466 LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
467 fileno == STDOUT_FILENO ? "stdout" : "stderr",
468 unit_id, strerror(-r)),
469 LOG_ERRNO(-r),
470 NULL);
471 r = open_null_as(O_WRONLY, fileno);
472 }
473 return r;
474
475 case EXEC_OUTPUT_SOCKET:
476 assert(socket_fd >= 0);
477 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
478
479 default:
480 assert_not_reached("Unknown error type");
481 }
482 }
483
484 static int chown_terminal(int fd, uid_t uid) {
485 struct stat st;
486
487 assert(fd >= 0);
488
489 /* This might fail. What matters are the results. */
490 (void) fchown(fd, uid, -1);
491 (void) fchmod(fd, TTY_MODE);
492
493 if (fstat(fd, &st) < 0)
494 return -errno;
495
496 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
497 return -EPERM;
498
499 return 0;
500 }
501
502 static int setup_confirm_stdio(int *_saved_stdin,
503 int *_saved_stdout) {
504 int fd = -1, saved_stdin, saved_stdout = -1, r;
505
506 assert(_saved_stdin);
507 assert(_saved_stdout);
508
509 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
510 if (saved_stdin < 0)
511 return -errno;
512
513 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
514 if (saved_stdout < 0) {
515 r = errno;
516 goto fail;
517 }
518
519 fd = acquire_terminal(
520 "/dev/console",
521 false,
522 false,
523 false,
524 DEFAULT_CONFIRM_USEC);
525 if (fd < 0) {
526 r = fd;
527 goto fail;
528 }
529
530 r = chown_terminal(fd, getuid());
531 if (r < 0)
532 goto fail;
533
534 if (dup2(fd, STDIN_FILENO) < 0) {
535 r = -errno;
536 goto fail;
537 }
538
539 if (dup2(fd, STDOUT_FILENO) < 0) {
540 r = -errno;
541 goto fail;
542 }
543
544 if (fd >= 2)
545 safe_close(fd);
546
547 *_saved_stdin = saved_stdin;
548 *_saved_stdout = saved_stdout;
549
550 return 0;
551
552 fail:
553 safe_close(saved_stdout);
554 safe_close(saved_stdin);
555 safe_close(fd);
556
557 return r;
558 }
559
560 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
561 _cleanup_close_ int fd = -1;
562 va_list ap;
563
564 assert(format);
565
566 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
567 if (fd < 0)
568 return fd;
569
570 va_start(ap, format);
571 vdprintf(fd, format, ap);
572 va_end(ap);
573
574 return 0;
575 }
576
577 static int restore_confirm_stdio(int *saved_stdin,
578 int *saved_stdout) {
579
580 int r = 0;
581
582 assert(saved_stdin);
583 assert(saved_stdout);
584
585 release_terminal();
586
587 if (*saved_stdin >= 0)
588 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
589 r = -errno;
590
591 if (*saved_stdout >= 0)
592 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
593 r = -errno;
594
595 safe_close(*saved_stdin);
596 safe_close(*saved_stdout);
597
598 return r;
599 }
600
601 static int ask_for_confirmation(char *response, char **argv) {
602 int saved_stdout = -1, saved_stdin = -1, r;
603 _cleanup_free_ char *line = NULL;
604
605 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
606 if (r < 0)
607 return r;
608
609 line = exec_command_line(argv);
610 if (!line)
611 return -ENOMEM;
612
613 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
614
615 restore_confirm_stdio(&saved_stdin, &saved_stdout);
616
617 return r;
618 }
619
620 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
621 bool keep_groups = false;
622 int r;
623
624 assert(context);
625
626 /* Lookup and set GID and supplementary group list. Here too
627 * we avoid NSS lookups for gid=0. */
628
629 if (context->group || username) {
630
631 if (context->group) {
632 const char *g = context->group;
633
634 if ((r = get_group_creds(&g, &gid)) < 0)
635 return r;
636 }
637
638 /* First step, initialize groups from /etc/groups */
639 if (username && gid != 0) {
640 if (initgroups(username, gid) < 0)
641 return -errno;
642
643 keep_groups = true;
644 }
645
646 /* Second step, set our gids */
647 if (setresgid(gid, gid, gid) < 0)
648 return -errno;
649 }
650
651 if (context->supplementary_groups) {
652 int ngroups_max, k;
653 gid_t *gids;
654 char **i;
655
656 /* Final step, initialize any manually set supplementary groups */
657 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
658
659 if (!(gids = new(gid_t, ngroups_max)))
660 return -ENOMEM;
661
662 if (keep_groups) {
663 if ((k = getgroups(ngroups_max, gids)) < 0) {
664 free(gids);
665 return -errno;
666 }
667 } else
668 k = 0;
669
670 STRV_FOREACH(i, context->supplementary_groups) {
671 const char *g;
672
673 if (k >= ngroups_max) {
674 free(gids);
675 return -E2BIG;
676 }
677
678 g = *i;
679 r = get_group_creds(&g, gids+k);
680 if (r < 0) {
681 free(gids);
682 return r;
683 }
684
685 k++;
686 }
687
688 if (setgroups(k, gids) < 0) {
689 free(gids);
690 return -errno;
691 }
692
693 free(gids);
694 }
695
696 return 0;
697 }
698
699 static int enforce_user(const ExecContext *context, uid_t uid) {
700 assert(context);
701
702 /* Sets (but doesn't lookup) the uid and make sure we keep the
703 * capabilities while doing so. */
704
705 if (context->capabilities) {
706 _cleanup_cap_free_ cap_t d = NULL;
707 static const cap_value_t bits[] = {
708 CAP_SETUID, /* Necessary so that we can run setresuid() below */
709 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
710 };
711
712 /* First step: If we need to keep capabilities but
713 * drop privileges we need to make sure we keep our
714 * caps, while we drop privileges. */
715 if (uid != 0) {
716 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
717
718 if (prctl(PR_GET_SECUREBITS) != sb)
719 if (prctl(PR_SET_SECUREBITS, sb) < 0)
720 return -errno;
721 }
722
723 /* Second step: set the capabilities. This will reduce
724 * the capabilities to the minimum we need. */
725
726 d = cap_dup(context->capabilities);
727 if (!d)
728 return -errno;
729
730 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
731 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
732 return -errno;
733
734 if (cap_set_proc(d) < 0)
735 return -errno;
736 }
737
738 /* Third step: actually set the uids */
739 if (setresuid(uid, uid, uid) < 0)
740 return -errno;
741
742 /* At this point we should have all necessary capabilities but
743 are otherwise a normal user. However, the caps might got
744 corrupted due to the setresuid() so we need clean them up
745 later. This is done outside of this call. */
746
747 return 0;
748 }
749
750 #ifdef HAVE_PAM
751
752 static int null_conv(
753 int num_msg,
754 const struct pam_message **msg,
755 struct pam_response **resp,
756 void *appdata_ptr) {
757
758 /* We don't support conversations */
759
760 return PAM_CONV_ERR;
761 }
762
763 static int setup_pam(
764 const char *name,
765 const char *user,
766 uid_t uid,
767 const char *tty,
768 char ***pam_env,
769 int fds[], unsigned n_fds) {
770
771 static const struct pam_conv conv = {
772 .conv = null_conv,
773 .appdata_ptr = NULL
774 };
775
776 pam_handle_t *handle = NULL;
777 sigset_t ss, old_ss;
778 int pam_code = PAM_SUCCESS;
779 int err;
780 char **e = NULL;
781 bool close_session = false;
782 pid_t pam_pid = 0, parent_pid;
783 int flags = 0;
784
785 assert(name);
786 assert(user);
787 assert(pam_env);
788
789 /* We set up PAM in the parent process, then fork. The child
790 * will then stay around until killed via PR_GET_PDEATHSIG or
791 * systemd via the cgroup logic. It will then remove the PAM
792 * session again. The parent process will exec() the actual
793 * daemon. We do things this way to ensure that the main PID
794 * of the daemon is the one we initially fork()ed. */
795
796 if (log_get_max_level() < LOG_DEBUG)
797 flags |= PAM_SILENT;
798
799 pam_code = pam_start(name, user, &conv, &handle);
800 if (pam_code != PAM_SUCCESS) {
801 handle = NULL;
802 goto fail;
803 }
804
805 if (tty) {
806 pam_code = pam_set_item(handle, PAM_TTY, tty);
807 if (pam_code != PAM_SUCCESS)
808 goto fail;
809 }
810
811 pam_code = pam_acct_mgmt(handle, flags);
812 if (pam_code != PAM_SUCCESS)
813 goto fail;
814
815 pam_code = pam_open_session(handle, flags);
816 if (pam_code != PAM_SUCCESS)
817 goto fail;
818
819 close_session = true;
820
821 e = pam_getenvlist(handle);
822 if (!e) {
823 pam_code = PAM_BUF_ERR;
824 goto fail;
825 }
826
827 /* Block SIGTERM, so that we know that it won't get lost in
828 * the child */
829 if (sigemptyset(&ss) < 0 ||
830 sigaddset(&ss, SIGTERM) < 0 ||
831 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
832 goto fail;
833
834 parent_pid = getpid();
835
836 pam_pid = fork();
837 if (pam_pid < 0)
838 goto fail;
839
840 if (pam_pid == 0) {
841 int sig;
842 int r = EXIT_PAM;
843
844 /* The child's job is to reset the PAM session on
845 * termination */
846
847 /* This string must fit in 10 chars (i.e. the length
848 * of "/sbin/init"), to look pretty in /bin/ps */
849 rename_process("(sd-pam)");
850
851 /* Make sure we don't keep open the passed fds in this
852 child. We assume that otherwise only those fds are
853 open here that have been opened by PAM. */
854 close_many(fds, n_fds);
855
856 /* Drop privileges - we don't need any to pam_close_session
857 * and this will make PR_SET_PDEATHSIG work in most cases.
858 * If this fails, ignore the error - but expect sd-pam threads
859 * to fail to exit normally */
860 if (setresuid(uid, uid, uid) < 0)
861 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
862
863 /* Wait until our parent died. This will only work if
864 * the above setresuid() succeeds, otherwise the kernel
865 * will not allow unprivileged parents kill their privileged
866 * children this way. We rely on the control groups kill logic
867 * to do the rest for us. */
868 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
869 goto child_finish;
870
871 /* Check if our parent process might already have
872 * died? */
873 if (getppid() == parent_pid) {
874 for (;;) {
875 if (sigwait(&ss, &sig) < 0) {
876 if (errno == EINTR)
877 continue;
878
879 goto child_finish;
880 }
881
882 assert(sig == SIGTERM);
883 break;
884 }
885 }
886
887 /* If our parent died we'll end the session */
888 if (getppid() != parent_pid) {
889 pam_code = pam_close_session(handle, flags);
890 if (pam_code != PAM_SUCCESS)
891 goto child_finish;
892 }
893
894 r = 0;
895
896 child_finish:
897 pam_end(handle, pam_code | flags);
898 _exit(r);
899 }
900
901 /* If the child was forked off successfully it will do all the
902 * cleanups, so forget about the handle here. */
903 handle = NULL;
904
905 /* Unblock SIGTERM again in the parent */
906 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
907 goto fail;
908
909 /* We close the log explicitly here, since the PAM modules
910 * might have opened it, but we don't want this fd around. */
911 closelog();
912
913 *pam_env = e;
914 e = NULL;
915
916 return 0;
917
918 fail:
919 if (pam_code != PAM_SUCCESS) {
920 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
921 err = -EPERM; /* PAM errors do not map to errno */
922 } else {
923 log_error_errno(errno, "PAM failed: %m");
924 err = -errno;
925 }
926
927 if (handle) {
928 if (close_session)
929 pam_code = pam_close_session(handle, flags);
930
931 pam_end(handle, pam_code | flags);
932 }
933
934 strv_free(e);
935
936 closelog();
937
938 if (pam_pid > 1) {
939 kill(pam_pid, SIGTERM);
940 kill(pam_pid, SIGCONT);
941 }
942
943 return err;
944 }
945 #endif
946
947 static void rename_process_from_path(const char *path) {
948 char process_name[11];
949 const char *p;
950 size_t l;
951
952 /* This resulting string must fit in 10 chars (i.e. the length
953 * of "/sbin/init") to look pretty in /bin/ps */
954
955 p = basename(path);
956 if (isempty(p)) {
957 rename_process("(...)");
958 return;
959 }
960
961 l = strlen(p);
962 if (l > 8) {
963 /* The end of the process name is usually more
964 * interesting, since the first bit might just be
965 * "systemd-" */
966 p = p + l - 8;
967 l = 8;
968 }
969
970 process_name[0] = '(';
971 memcpy(process_name+1, p, l);
972 process_name[1+l] = ')';
973 process_name[1+l+1] = 0;
974
975 rename_process(process_name);
976 }
977
978 #ifdef HAVE_SECCOMP
979
980 static int apply_seccomp(const ExecContext *c) {
981 uint32_t negative_action, action;
982 scmp_filter_ctx *seccomp;
983 Iterator i;
984 void *id;
985 int r;
986
987 assert(c);
988
989 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
990
991 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
992 if (!seccomp)
993 return -ENOMEM;
994
995 if (c->syscall_archs) {
996
997 SET_FOREACH(id, c->syscall_archs, i) {
998 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
999 if (r == -EEXIST)
1000 continue;
1001 if (r < 0)
1002 goto finish;
1003 }
1004
1005 } else {
1006 r = seccomp_add_secondary_archs(seccomp);
1007 if (r < 0)
1008 goto finish;
1009 }
1010
1011 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1012 SET_FOREACH(id, c->syscall_filter, i) {
1013 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1014 if (r < 0)
1015 goto finish;
1016 }
1017
1018 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1019 if (r < 0)
1020 goto finish;
1021
1022 r = seccomp_load(seccomp);
1023
1024 finish:
1025 seccomp_release(seccomp);
1026 return r;
1027 }
1028
1029 static int apply_address_families(const ExecContext *c) {
1030 scmp_filter_ctx *seccomp;
1031 Iterator i;
1032 int r;
1033
1034 assert(c);
1035
1036 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1037 if (!seccomp)
1038 return -ENOMEM;
1039
1040 r = seccomp_add_secondary_archs(seccomp);
1041 if (r < 0)
1042 goto finish;
1043
1044 if (c->address_families_whitelist) {
1045 int af, first = 0, last = 0;
1046 void *afp;
1047
1048 /* If this is a whitelist, we first block the address
1049 * families that are out of range and then everything
1050 * that is not in the set. First, we find the lowest
1051 * and highest address family in the set. */
1052
1053 SET_FOREACH(afp, c->address_families, i) {
1054 af = PTR_TO_INT(afp);
1055
1056 if (af <= 0 || af >= af_max())
1057 continue;
1058
1059 if (first == 0 || af < first)
1060 first = af;
1061
1062 if (last == 0 || af > last)
1063 last = af;
1064 }
1065
1066 assert((first == 0) == (last == 0));
1067
1068 if (first == 0) {
1069
1070 /* No entries in the valid range, block everything */
1071 r = seccomp_rule_add(
1072 seccomp,
1073 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1074 SCMP_SYS(socket),
1075 0);
1076 if (r < 0)
1077 goto finish;
1078
1079 } else {
1080
1081 /* Block everything below the first entry */
1082 r = seccomp_rule_add(
1083 seccomp,
1084 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1085 SCMP_SYS(socket),
1086 1,
1087 SCMP_A0(SCMP_CMP_LT, first));
1088 if (r < 0)
1089 goto finish;
1090
1091 /* Block everything above the last entry */
1092 r = seccomp_rule_add(
1093 seccomp,
1094 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1095 SCMP_SYS(socket),
1096 1,
1097 SCMP_A0(SCMP_CMP_GT, last));
1098 if (r < 0)
1099 goto finish;
1100
1101 /* Block everything between the first and last
1102 * entry */
1103 for (af = 1; af < af_max(); af++) {
1104
1105 if (set_contains(c->address_families, INT_TO_PTR(af)))
1106 continue;
1107
1108 r = seccomp_rule_add(
1109 seccomp,
1110 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1111 SCMP_SYS(socket),
1112 1,
1113 SCMP_A0(SCMP_CMP_EQ, af));
1114 if (r < 0)
1115 goto finish;
1116 }
1117 }
1118
1119 } else {
1120 void *af;
1121
1122 /* If this is a blacklist, then generate one rule for
1123 * each address family that are then combined in OR
1124 * checks. */
1125
1126 SET_FOREACH(af, c->address_families, i) {
1127
1128 r = seccomp_rule_add(
1129 seccomp,
1130 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1131 SCMP_SYS(socket),
1132 1,
1133 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1134 if (r < 0)
1135 goto finish;
1136 }
1137 }
1138
1139 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1140 if (r < 0)
1141 goto finish;
1142
1143 r = seccomp_load(seccomp);
1144
1145 finish:
1146 seccomp_release(seccomp);
1147 return r;
1148 }
1149
1150 #endif
1151
1152 static void do_idle_pipe_dance(int idle_pipe[4]) {
1153 assert(idle_pipe);
1154
1155
1156 safe_close(idle_pipe[1]);
1157 safe_close(idle_pipe[2]);
1158
1159 if (idle_pipe[0] >= 0) {
1160 int r;
1161
1162 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1163
1164 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1165 /* Signal systemd that we are bored and want to continue. */
1166 r = write(idle_pipe[3], "x", 1);
1167 if (r > 0)
1168 /* Wait for systemd to react to the signal above. */
1169 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1170 }
1171
1172 safe_close(idle_pipe[0]);
1173
1174 }
1175
1176 safe_close(idle_pipe[3]);
1177 }
1178
1179 static int build_environment(
1180 const ExecContext *c,
1181 unsigned n_fds,
1182 usec_t watchdog_usec,
1183 const char *home,
1184 const char *username,
1185 const char *shell,
1186 char ***ret) {
1187
1188 _cleanup_strv_free_ char **our_env = NULL;
1189 unsigned n_env = 0;
1190 char *x;
1191
1192 assert(c);
1193 assert(ret);
1194
1195 our_env = new0(char*, 10);
1196 if (!our_env)
1197 return -ENOMEM;
1198
1199 if (n_fds > 0) {
1200 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1201 return -ENOMEM;
1202 our_env[n_env++] = x;
1203
1204 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1205 return -ENOMEM;
1206 our_env[n_env++] = x;
1207 }
1208
1209 if (watchdog_usec > 0) {
1210 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1211 return -ENOMEM;
1212 our_env[n_env++] = x;
1213
1214 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1215 return -ENOMEM;
1216 our_env[n_env++] = x;
1217 }
1218
1219 if (home) {
1220 x = strappend("HOME=", home);
1221 if (!x)
1222 return -ENOMEM;
1223 our_env[n_env++] = x;
1224 }
1225
1226 if (username) {
1227 x = strappend("LOGNAME=", username);
1228 if (!x)
1229 return -ENOMEM;
1230 our_env[n_env++] = x;
1231
1232 x = strappend("USER=", username);
1233 if (!x)
1234 return -ENOMEM;
1235 our_env[n_env++] = x;
1236 }
1237
1238 if (shell) {
1239 x = strappend("SHELL=", shell);
1240 if (!x)
1241 return -ENOMEM;
1242 our_env[n_env++] = x;
1243 }
1244
1245 if (is_terminal_input(c->std_input) ||
1246 c->std_output == EXEC_OUTPUT_TTY ||
1247 c->std_error == EXEC_OUTPUT_TTY ||
1248 c->tty_path) {
1249
1250 x = strdup(default_term_for_tty(tty_path(c)));
1251 if (!x)
1252 return -ENOMEM;
1253 our_env[n_env++] = x;
1254 }
1255
1256 our_env[n_env++] = NULL;
1257 assert(n_env <= 10);
1258
1259 *ret = our_env;
1260 our_env = NULL;
1261
1262 return 0;
1263 }
1264
1265 static int exec_child(
1266 ExecCommand *command,
1267 const ExecContext *context,
1268 const ExecParameters *params,
1269 ExecRuntime *runtime,
1270 char **argv,
1271 int socket_fd,
1272 int *fds, unsigned n_fds,
1273 char **files_env,
1274 int *exit_status) {
1275
1276 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1277 _cleanup_free_ char *mac_selinux_context_net = NULL;
1278 const char *username = NULL, *home = NULL, *shell = NULL;
1279 unsigned n_dont_close = 0;
1280 int dont_close[n_fds + 4];
1281 uid_t uid = UID_INVALID;
1282 gid_t gid = GID_INVALID;
1283 int i, r;
1284
1285 assert(command);
1286 assert(context);
1287 assert(params);
1288 assert(exit_status);
1289
1290 rename_process_from_path(command->path);
1291
1292 /* We reset exactly these signals, since they are the
1293 * only ones we set to SIG_IGN in the main daemon. All
1294 * others we leave untouched because we set them to
1295 * SIG_DFL or a valid handler initially, both of which
1296 * will be demoted to SIG_DFL. */
1297 default_signals(SIGNALS_CRASH_HANDLER,
1298 SIGNALS_IGNORE, -1);
1299
1300 if (context->ignore_sigpipe)
1301 ignore_signals(SIGPIPE, -1);
1302
1303 r = reset_signal_mask();
1304 if (r < 0) {
1305 *exit_status = EXIT_SIGNAL_MASK;
1306 return r;
1307 }
1308
1309 if (params->idle_pipe)
1310 do_idle_pipe_dance(params->idle_pipe);
1311
1312 /* Close sockets very early to make sure we don't
1313 * block init reexecution because it cannot bind its
1314 * sockets */
1315
1316 log_forget_fds();
1317
1318 if (socket_fd >= 0)
1319 dont_close[n_dont_close++] = socket_fd;
1320 if (n_fds > 0) {
1321 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1322 n_dont_close += n_fds;
1323 }
1324 if (params->bus_endpoint_fd >= 0)
1325 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1326 if (runtime) {
1327 if (runtime->netns_storage_socket[0] >= 0)
1328 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1329 if (runtime->netns_storage_socket[1] >= 0)
1330 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1331 }
1332
1333 r = close_all_fds(dont_close, n_dont_close);
1334 if (r < 0) {
1335 *exit_status = EXIT_FDS;
1336 return r;
1337 }
1338
1339 if (!context->same_pgrp)
1340 if (setsid() < 0) {
1341 *exit_status = EXIT_SETSID;
1342 return -errno;
1343 }
1344
1345 exec_context_tty_reset(context);
1346
1347 if (params->confirm_spawn) {
1348 char response;
1349
1350 r = ask_for_confirmation(&response, argv);
1351 if (r == -ETIMEDOUT)
1352 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1353 else if (r < 0)
1354 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1355 else if (response == 's') {
1356 write_confirm_message("Skipping execution.\n");
1357 *exit_status = EXIT_CONFIRM;
1358 return -ECANCELED;
1359 } else if (response == 'n') {
1360 write_confirm_message("Failing execution.\n");
1361 *exit_status = 0;
1362 return 0;
1363 }
1364 }
1365
1366 if (context->user) {
1367 username = context->user;
1368 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1369 if (r < 0) {
1370 *exit_status = EXIT_USER;
1371 return r;
1372 }
1373 }
1374
1375 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1376 * must sure to drop O_NONBLOCK */
1377 if (socket_fd >= 0)
1378 fd_nonblock(socket_fd, false);
1379
1380 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1381 if (r < 0) {
1382 *exit_status = EXIT_STDIN;
1383 return r;
1384 }
1385
1386 r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1387 if (r < 0) {
1388 *exit_status = EXIT_STDOUT;
1389 return r;
1390 }
1391
1392 r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1393 if (r < 0) {
1394 *exit_status = EXIT_STDERR;
1395 return r;
1396 }
1397
1398 if (params->cgroup_path) {
1399 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1400 if (r < 0) {
1401 *exit_status = EXIT_CGROUP;
1402 return r;
1403 }
1404 }
1405
1406 if (context->oom_score_adjust_set) {
1407 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1408
1409 /* When we can't make this change due to EPERM, then
1410 * let's silently skip over it. User namespaces
1411 * prohibit write access to this file, and we
1412 * shouldn't trip up over that. */
1413
1414 sprintf(t, "%i", context->oom_score_adjust);
1415 r = write_string_file("/proc/self/oom_score_adj", t);
1416 if (r == -EPERM || r == -EACCES) {
1417 log_open();
1418 log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1419 log_close();
1420 } else if (r < 0) {
1421 *exit_status = EXIT_OOM_ADJUST;
1422 return -errno;
1423 }
1424 }
1425
1426 if (context->nice_set)
1427 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1428 *exit_status = EXIT_NICE;
1429 return -errno;
1430 }
1431
1432 if (context->cpu_sched_set) {
1433 struct sched_param param = {
1434 .sched_priority = context->cpu_sched_priority,
1435 };
1436
1437 r = sched_setscheduler(0,
1438 context->cpu_sched_policy |
1439 (context->cpu_sched_reset_on_fork ?
1440 SCHED_RESET_ON_FORK : 0),
1441 &param);
1442 if (r < 0) {
1443 *exit_status = EXIT_SETSCHEDULER;
1444 return -errno;
1445 }
1446 }
1447
1448 if (context->cpuset)
1449 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1450 *exit_status = EXIT_CPUAFFINITY;
1451 return -errno;
1452 }
1453
1454 if (context->ioprio_set)
1455 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1456 *exit_status = EXIT_IOPRIO;
1457 return -errno;
1458 }
1459
1460 if (context->timer_slack_nsec != NSEC_INFINITY)
1461 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1462 *exit_status = EXIT_TIMERSLACK;
1463 return -errno;
1464 }
1465
1466 if (context->personality != 0xffffffffUL)
1467 if (personality(context->personality) < 0) {
1468 *exit_status = EXIT_PERSONALITY;
1469 return -errno;
1470 }
1471
1472 if (context->utmp_id)
1473 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1474
1475 if (context->user && is_terminal_input(context->std_input)) {
1476 r = chown_terminal(STDIN_FILENO, uid);
1477 if (r < 0) {
1478 *exit_status = EXIT_STDIN;
1479 return r;
1480 }
1481 }
1482
1483 #ifdef ENABLE_KDBUS
1484 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1485 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1486
1487 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1488 if (r < 0) {
1489 *exit_status = EXIT_BUS_ENDPOINT;
1490 return r;
1491 }
1492 }
1493 #endif
1494
1495 /* If delegation is enabled we'll pass ownership of the cgroup
1496 * (but only in systemd's own controller hierarchy!) to the
1497 * user of the new process. */
1498 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1499 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1500 if (r < 0) {
1501 *exit_status = EXIT_CGROUP;
1502 return r;
1503 }
1504
1505
1506 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1507 if (r < 0) {
1508 *exit_status = EXIT_CGROUP;
1509 return r;
1510 }
1511 }
1512
1513 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1514 char **rt;
1515
1516 STRV_FOREACH(rt, context->runtime_directory) {
1517 _cleanup_free_ char *p;
1518
1519 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1520 if (!p) {
1521 *exit_status = EXIT_RUNTIME_DIRECTORY;
1522 return -ENOMEM;
1523 }
1524
1525 r = mkdir_safe_label(p, context->runtime_directory_mode, uid, gid);
1526 if (r < 0) {
1527 *exit_status = EXIT_RUNTIME_DIRECTORY;
1528 return r;
1529 }
1530 }
1531 }
1532
1533 if (params->apply_permissions) {
1534 r = enforce_groups(context, username, gid);
1535 if (r < 0) {
1536 *exit_status = EXIT_GROUP;
1537 return r;
1538 }
1539 }
1540
1541 umask(context->umask);
1542
1543 #ifdef HAVE_PAM
1544 if (params->apply_permissions && context->pam_name && username) {
1545 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1546 if (r < 0) {
1547 *exit_status = EXIT_PAM;
1548 return r;
1549 }
1550 }
1551 #endif
1552
1553 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1554 r = setup_netns(runtime->netns_storage_socket);
1555 if (r < 0) {
1556 *exit_status = EXIT_NETWORK;
1557 return r;
1558 }
1559 }
1560
1561 if (!strv_isempty(context->read_write_dirs) ||
1562 !strv_isempty(context->read_only_dirs) ||
1563 !strv_isempty(context->inaccessible_dirs) ||
1564 context->mount_flags != 0 ||
1565 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1566 params->bus_endpoint_path ||
1567 context->private_devices ||
1568 context->protect_system != PROTECT_SYSTEM_NO ||
1569 context->protect_home != PROTECT_HOME_NO) {
1570
1571 char *tmp = NULL, *var = NULL;
1572
1573 /* The runtime struct only contains the parent
1574 * of the private /tmp, which is
1575 * non-accessible to world users. Inside of it
1576 * there's a /tmp that is sticky, and that's
1577 * the one we want to use here. */
1578
1579 if (context->private_tmp && runtime) {
1580 if (runtime->tmp_dir)
1581 tmp = strjoina(runtime->tmp_dir, "/tmp");
1582 if (runtime->var_tmp_dir)
1583 var = strjoina(runtime->var_tmp_dir, "/tmp");
1584 }
1585
1586 r = setup_namespace(
1587 context->read_write_dirs,
1588 context->read_only_dirs,
1589 context->inaccessible_dirs,
1590 tmp,
1591 var,
1592 params->bus_endpoint_path,
1593 context->private_devices,
1594 context->protect_home,
1595 context->protect_system,
1596 context->mount_flags);
1597
1598 /* If we couldn't set up the namespace this is
1599 * probably due to a missing capability. In this case,
1600 * silently proceeed. */
1601 if (r == -EPERM || r == -EACCES) {
1602 log_open();
1603 log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1604 log_close();
1605 } else if (r < 0) {
1606 *exit_status = EXIT_NAMESPACE;
1607 return r;
1608 }
1609 }
1610
1611 if (params->apply_chroot) {
1612 if (context->root_directory)
1613 if (chroot(context->root_directory) < 0) {
1614 *exit_status = EXIT_CHROOT;
1615 return -errno;
1616 }
1617
1618 if (chdir(context->working_directory ?: "/") < 0 &&
1619 !context->working_directory_missing_ok) {
1620 *exit_status = EXIT_CHDIR;
1621 return -errno;
1622 }
1623 } else {
1624 _cleanup_free_ char *d = NULL;
1625
1626 if (asprintf(&d, "%s/%s",
1627 context->root_directory ?: "",
1628 context->working_directory ?: "") < 0) {
1629 *exit_status = EXIT_MEMORY;
1630 return -ENOMEM;
1631 }
1632
1633 if (chdir(d) < 0 &&
1634 !context->working_directory_missing_ok) {
1635 *exit_status = EXIT_CHDIR;
1636 return -errno;
1637 }
1638 }
1639
1640 #ifdef HAVE_SELINUX
1641 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1642 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1643 if (r < 0) {
1644 *exit_status = EXIT_SELINUX_CONTEXT;
1645 return r;
1646 }
1647 }
1648 #endif
1649
1650 /* We repeat the fd closing here, to make sure that
1651 * nothing is leaked from the PAM modules. Note that
1652 * we are more aggressive this time since socket_fd
1653 * and the netns fds we don't need anymore. The custom
1654 * endpoint fd was needed to upload the policy and can
1655 * now be closed as well. */
1656 r = close_all_fds(fds, n_fds);
1657 if (r >= 0)
1658 r = shift_fds(fds, n_fds);
1659 if (r >= 0)
1660 r = flags_fds(fds, n_fds, context->non_blocking);
1661 if (r < 0) {
1662 *exit_status = EXIT_FDS;
1663 return r;
1664 }
1665
1666 if (params->apply_permissions) {
1667
1668 for (i = 0; i < _RLIMIT_MAX; i++) {
1669 if (!context->rlimit[i])
1670 continue;
1671
1672 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1673 *exit_status = EXIT_LIMITS;
1674 return -errno;
1675 }
1676 }
1677
1678 if (context->capability_bounding_set_drop) {
1679 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1680 if (r < 0) {
1681 *exit_status = EXIT_CAPABILITIES;
1682 return r;
1683 }
1684 }
1685
1686 #ifdef HAVE_SMACK
1687 if (context->smack_process_label) {
1688 r = mac_smack_apply_pid(0, context->smack_process_label);
1689 if (r < 0) {
1690 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1691 return r;
1692 }
1693 }
1694 #endif
1695
1696 if (context->user) {
1697 r = enforce_user(context, uid);
1698 if (r < 0) {
1699 *exit_status = EXIT_USER;
1700 return r;
1701 }
1702 }
1703
1704 /* PR_GET_SECUREBITS is not privileged, while
1705 * PR_SET_SECUREBITS is. So to suppress
1706 * potential EPERMs we'll try not to call
1707 * PR_SET_SECUREBITS unless necessary. */
1708 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1709 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1710 *exit_status = EXIT_SECUREBITS;
1711 return -errno;
1712 }
1713
1714 if (context->capabilities)
1715 if (cap_set_proc(context->capabilities) < 0) {
1716 *exit_status = EXIT_CAPABILITIES;
1717 return -errno;
1718 }
1719
1720 if (context->no_new_privileges)
1721 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1722 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1723 return -errno;
1724 }
1725
1726 #ifdef HAVE_SECCOMP
1727 if (context->address_families_whitelist ||
1728 !set_isempty(context->address_families)) {
1729 r = apply_address_families(context);
1730 if (r < 0) {
1731 *exit_status = EXIT_ADDRESS_FAMILIES;
1732 return r;
1733 }
1734 }
1735
1736 if (context->syscall_whitelist ||
1737 !set_isempty(context->syscall_filter) ||
1738 !set_isempty(context->syscall_archs)) {
1739 r = apply_seccomp(context);
1740 if (r < 0) {
1741 *exit_status = EXIT_SECCOMP;
1742 return r;
1743 }
1744 }
1745 #endif
1746
1747 #ifdef HAVE_SELINUX
1748 if (mac_selinux_use()) {
1749 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1750
1751 if (exec_context) {
1752 r = setexeccon(exec_context);
1753 if (r < 0) {
1754 *exit_status = EXIT_SELINUX_CONTEXT;
1755 return r;
1756 }
1757 }
1758 }
1759 #endif
1760
1761 #ifdef HAVE_APPARMOR
1762 if (context->apparmor_profile && mac_apparmor_use()) {
1763 r = aa_change_onexec(context->apparmor_profile);
1764 if (r < 0 && !context->apparmor_profile_ignore) {
1765 *exit_status = EXIT_APPARMOR_PROFILE;
1766 return -errno;
1767 }
1768 }
1769 #endif
1770 }
1771
1772 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1773 if (r < 0) {
1774 *exit_status = EXIT_MEMORY;
1775 return r;
1776 }
1777
1778 final_env = strv_env_merge(5,
1779 params->environment,
1780 our_env,
1781 context->environment,
1782 files_env,
1783 pam_env,
1784 NULL);
1785 if (!final_env) {
1786 *exit_status = EXIT_MEMORY;
1787 return -ENOMEM;
1788 }
1789
1790 final_argv = replace_env_argv(argv, final_env);
1791 if (!final_argv) {
1792 *exit_status = EXIT_MEMORY;
1793 return -ENOMEM;
1794 }
1795
1796 final_env = strv_env_clean(final_env);
1797
1798 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1799 _cleanup_free_ char *line;
1800
1801 line = exec_command_line(final_argv);
1802 if (line) {
1803 log_open();
1804 log_unit_struct(params->unit_id,
1805 LOG_DEBUG,
1806 "EXECUTABLE=%s", command->path,
1807 LOG_MESSAGE("Executing: %s", line),
1808 NULL);
1809 log_close();
1810 }
1811 }
1812
1813 execve(command->path, final_argv, final_env);
1814 *exit_status = EXIT_EXEC;
1815 return -errno;
1816 }
1817
1818 int exec_spawn(ExecCommand *command,
1819 const ExecContext *context,
1820 const ExecParameters *params,
1821 ExecRuntime *runtime,
1822 pid_t *ret) {
1823
1824 _cleanup_strv_free_ char **files_env = NULL;
1825 int *fds = NULL; unsigned n_fds = 0;
1826 _cleanup_free_ char *line = NULL;
1827 int socket_fd, r;
1828 char **argv;
1829 pid_t pid;
1830
1831 assert(command);
1832 assert(context);
1833 assert(ret);
1834 assert(params);
1835 assert(params->fds || params->n_fds <= 0);
1836
1837 if (context->std_input == EXEC_INPUT_SOCKET ||
1838 context->std_output == EXEC_OUTPUT_SOCKET ||
1839 context->std_error == EXEC_OUTPUT_SOCKET) {
1840
1841 if (params->n_fds != 1) {
1842 log_unit_error(params->unit_id, "Got more than one socket.");
1843 return -EINVAL;
1844 }
1845
1846 socket_fd = params->fds[0];
1847 } else {
1848 socket_fd = -1;
1849 fds = params->fds;
1850 n_fds = params->n_fds;
1851 }
1852
1853 r = exec_context_load_environment(context, params->unit_id, &files_env);
1854 if (r < 0)
1855 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1856
1857 argv = params->argv ?: command->argv;
1858 line = exec_command_line(argv);
1859 if (!line)
1860 return log_oom();
1861
1862 log_unit_struct(params->unit_id,
1863 LOG_DEBUG,
1864 "EXECUTABLE=%s", command->path,
1865 LOG_MESSAGE("About to execute: %s", line),
1866 NULL);
1867 pid = fork();
1868 if (pid < 0)
1869 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1870
1871 if (pid == 0) {
1872 int exit_status;
1873
1874 r = exec_child(command,
1875 context,
1876 params,
1877 runtime,
1878 argv,
1879 socket_fd,
1880 fds, n_fds,
1881 files_env,
1882 &exit_status);
1883 if (r < 0) {
1884 log_open();
1885 log_unit_struct(params->unit_id,
1886 LOG_ERR,
1887 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1888 "EXECUTABLE=%s", command->path,
1889 LOG_MESSAGE("Failed at step %s spawning %s: %s",
1890 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1891 command->path, strerror(-r)),
1892 LOG_ERRNO(r),
1893 NULL);
1894 }
1895
1896 _exit(exit_status);
1897 }
1898
1899 log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1900
1901 /* We add the new process to the cgroup both in the child (so
1902 * that we can be sure that no user code is ever executed
1903 * outside of the cgroup) and in the parent (so that we can be
1904 * sure that when we kill the cgroup the process will be
1905 * killed too). */
1906 if (params->cgroup_path)
1907 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1908
1909 exec_status_start(&command->exec_status, pid);
1910
1911 *ret = pid;
1912 return 0;
1913 }
1914
1915 void exec_context_init(ExecContext *c) {
1916 assert(c);
1917
1918 c->umask = 0022;
1919 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1920 c->cpu_sched_policy = SCHED_OTHER;
1921 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1922 c->syslog_level_prefix = true;
1923 c->ignore_sigpipe = true;
1924 c->timer_slack_nsec = NSEC_INFINITY;
1925 c->personality = 0xffffffffUL;
1926 c->runtime_directory_mode = 0755;
1927 }
1928
1929 void exec_context_done(ExecContext *c) {
1930 unsigned l;
1931
1932 assert(c);
1933
1934 strv_free(c->environment);
1935 c->environment = NULL;
1936
1937 strv_free(c->environment_files);
1938 c->environment_files = NULL;
1939
1940 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1941 free(c->rlimit[l]);
1942 c->rlimit[l] = NULL;
1943 }
1944
1945 free(c->working_directory);
1946 c->working_directory = NULL;
1947 free(c->root_directory);
1948 c->root_directory = NULL;
1949
1950 free(c->tty_path);
1951 c->tty_path = NULL;
1952
1953 free(c->syslog_identifier);
1954 c->syslog_identifier = NULL;
1955
1956 free(c->user);
1957 c->user = NULL;
1958
1959 free(c->group);
1960 c->group = NULL;
1961
1962 strv_free(c->supplementary_groups);
1963 c->supplementary_groups = NULL;
1964
1965 free(c->pam_name);
1966 c->pam_name = NULL;
1967
1968 if (c->capabilities) {
1969 cap_free(c->capabilities);
1970 c->capabilities = NULL;
1971 }
1972
1973 strv_free(c->read_only_dirs);
1974 c->read_only_dirs = NULL;
1975
1976 strv_free(c->read_write_dirs);
1977 c->read_write_dirs = NULL;
1978
1979 strv_free(c->inaccessible_dirs);
1980 c->inaccessible_dirs = NULL;
1981
1982 if (c->cpuset)
1983 CPU_FREE(c->cpuset);
1984
1985 free(c->utmp_id);
1986 c->utmp_id = NULL;
1987
1988 free(c->selinux_context);
1989 c->selinux_context = NULL;
1990
1991 free(c->apparmor_profile);
1992 c->apparmor_profile = NULL;
1993
1994 set_free(c->syscall_filter);
1995 c->syscall_filter = NULL;
1996
1997 set_free(c->syscall_archs);
1998 c->syscall_archs = NULL;
1999
2000 set_free(c->address_families);
2001 c->address_families = NULL;
2002
2003 strv_free(c->runtime_directory);
2004 c->runtime_directory = NULL;
2005
2006 bus_endpoint_free(c->bus_endpoint);
2007 c->bus_endpoint = NULL;
2008 }
2009
2010 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2011 char **i;
2012
2013 assert(c);
2014
2015 if (!runtime_prefix)
2016 return 0;
2017
2018 STRV_FOREACH(i, c->runtime_directory) {
2019 _cleanup_free_ char *p;
2020
2021 p = strjoin(runtime_prefix, "/", *i, NULL);
2022 if (!p)
2023 return -ENOMEM;
2024
2025 /* We execute this synchronously, since we need to be
2026 * sure this is gone when we start the service
2027 * next. */
2028 (void) rm_rf(p, REMOVE_ROOT);
2029 }
2030
2031 return 0;
2032 }
2033
2034 void exec_command_done(ExecCommand *c) {
2035 assert(c);
2036
2037 free(c->path);
2038 c->path = NULL;
2039
2040 strv_free(c->argv);
2041 c->argv = NULL;
2042 }
2043
2044 void exec_command_done_array(ExecCommand *c, unsigned n) {
2045 unsigned i;
2046
2047 for (i = 0; i < n; i++)
2048 exec_command_done(c+i);
2049 }
2050
2051 ExecCommand* exec_command_free_list(ExecCommand *c) {
2052 ExecCommand *i;
2053
2054 while ((i = c)) {
2055 LIST_REMOVE(command, c, i);
2056 exec_command_done(i);
2057 free(i);
2058 }
2059
2060 return NULL;
2061 }
2062
2063 void exec_command_free_array(ExecCommand **c, unsigned n) {
2064 unsigned i;
2065
2066 for (i = 0; i < n; i++)
2067 c[i] = exec_command_free_list(c[i]);
2068 }
2069
2070 typedef struct InvalidEnvInfo {
2071 const char *unit_id;
2072 const char *path;
2073 } InvalidEnvInfo;
2074
2075 static void invalid_env(const char *p, void *userdata) {
2076 InvalidEnvInfo *info = userdata;
2077
2078 log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2079 }
2080
2081 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2082 char **i, **r = NULL;
2083
2084 assert(c);
2085 assert(l);
2086
2087 STRV_FOREACH(i, c->environment_files) {
2088 char *fn;
2089 int k;
2090 bool ignore = false;
2091 char **p;
2092 _cleanup_globfree_ glob_t pglob = {};
2093 int count, n;
2094
2095 fn = *i;
2096
2097 if (fn[0] == '-') {
2098 ignore = true;
2099 fn ++;
2100 }
2101
2102 if (!path_is_absolute(fn)) {
2103 if (ignore)
2104 continue;
2105
2106 strv_free(r);
2107 return -EINVAL;
2108 }
2109
2110 /* Filename supports globbing, take all matching files */
2111 errno = 0;
2112 if (glob(fn, 0, NULL, &pglob) != 0) {
2113 if (ignore)
2114 continue;
2115
2116 strv_free(r);
2117 return errno ? -errno : -EINVAL;
2118 }
2119 count = pglob.gl_pathc;
2120 if (count == 0) {
2121 if (ignore)
2122 continue;
2123
2124 strv_free(r);
2125 return -EINVAL;
2126 }
2127 for (n = 0; n < count; n++) {
2128 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2129 if (k < 0) {
2130 if (ignore)
2131 continue;
2132
2133 strv_free(r);
2134 return k;
2135 }
2136 /* Log invalid environment variables with filename */
2137 if (p) {
2138 InvalidEnvInfo info = {
2139 .unit_id = unit_id,
2140 .path = pglob.gl_pathv[n]
2141 };
2142
2143 p = strv_env_clean_with_callback(p, invalid_env, &info);
2144 }
2145
2146 if (r == NULL)
2147 r = p;
2148 else {
2149 char **m;
2150
2151 m = strv_env_merge(2, r, p);
2152 strv_free(r);
2153 strv_free(p);
2154 if (!m)
2155 return -ENOMEM;
2156
2157 r = m;
2158 }
2159 }
2160 }
2161
2162 *l = r;
2163
2164 return 0;
2165 }
2166
2167 static bool tty_may_match_dev_console(const char *tty) {
2168 _cleanup_free_ char *active = NULL;
2169 char *console;
2170
2171 if (startswith(tty, "/dev/"))
2172 tty += 5;
2173
2174 /* trivial identity? */
2175 if (streq(tty, "console"))
2176 return true;
2177
2178 console = resolve_dev_console(&active);
2179 /* if we could not resolve, assume it may */
2180 if (!console)
2181 return true;
2182
2183 /* "tty0" means the active VC, so it may be the same sometimes */
2184 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2185 }
2186
2187 bool exec_context_may_touch_console(ExecContext *ec) {
2188 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2189 is_terminal_input(ec->std_input) ||
2190 is_terminal_output(ec->std_output) ||
2191 is_terminal_output(ec->std_error)) &&
2192 tty_may_match_dev_console(tty_path(ec));
2193 }
2194
2195 static void strv_fprintf(FILE *f, char **l) {
2196 char **g;
2197
2198 assert(f);
2199
2200 STRV_FOREACH(g, l)
2201 fprintf(f, " %s", *g);
2202 }
2203
2204 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2205 char **e;
2206 unsigned i;
2207
2208 assert(c);
2209 assert(f);
2210
2211 prefix = strempty(prefix);
2212
2213 fprintf(f,
2214 "%sUMask: %04o\n"
2215 "%sWorkingDirectory: %s\n"
2216 "%sRootDirectory: %s\n"
2217 "%sNonBlocking: %s\n"
2218 "%sPrivateTmp: %s\n"
2219 "%sPrivateNetwork: %s\n"
2220 "%sPrivateDevices: %s\n"
2221 "%sProtectHome: %s\n"
2222 "%sProtectSystem: %s\n"
2223 "%sIgnoreSIGPIPE: %s\n",
2224 prefix, c->umask,
2225 prefix, c->working_directory ? c->working_directory : "/",
2226 prefix, c->root_directory ? c->root_directory : "/",
2227 prefix, yes_no(c->non_blocking),
2228 prefix, yes_no(c->private_tmp),
2229 prefix, yes_no(c->private_network),
2230 prefix, yes_no(c->private_devices),
2231 prefix, protect_home_to_string(c->protect_home),
2232 prefix, protect_system_to_string(c->protect_system),
2233 prefix, yes_no(c->ignore_sigpipe));
2234
2235 STRV_FOREACH(e, c->environment)
2236 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2237
2238 STRV_FOREACH(e, c->environment_files)
2239 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2240
2241 if (c->nice_set)
2242 fprintf(f,
2243 "%sNice: %i\n",
2244 prefix, c->nice);
2245
2246 if (c->oom_score_adjust_set)
2247 fprintf(f,
2248 "%sOOMScoreAdjust: %i\n",
2249 prefix, c->oom_score_adjust);
2250
2251 for (i = 0; i < RLIM_NLIMITS; i++)
2252 if (c->rlimit[i])
2253 fprintf(f, "%s%s: "RLIM_FMT"\n",
2254 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2255
2256 if (c->ioprio_set) {
2257 _cleanup_free_ char *class_str = NULL;
2258
2259 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2260 fprintf(f,
2261 "%sIOSchedulingClass: %s\n"
2262 "%sIOPriority: %i\n",
2263 prefix, strna(class_str),
2264 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2265 }
2266
2267 if (c->cpu_sched_set) {
2268 _cleanup_free_ char *policy_str = NULL;
2269
2270 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2271 fprintf(f,
2272 "%sCPUSchedulingPolicy: %s\n"
2273 "%sCPUSchedulingPriority: %i\n"
2274 "%sCPUSchedulingResetOnFork: %s\n",
2275 prefix, strna(policy_str),
2276 prefix, c->cpu_sched_priority,
2277 prefix, yes_no(c->cpu_sched_reset_on_fork));
2278 }
2279
2280 if (c->cpuset) {
2281 fprintf(f, "%sCPUAffinity:", prefix);
2282 for (i = 0; i < c->cpuset_ncpus; i++)
2283 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2284 fprintf(f, " %u", i);
2285 fputs("\n", f);
2286 }
2287
2288 if (c->timer_slack_nsec != NSEC_INFINITY)
2289 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2290
2291 fprintf(f,
2292 "%sStandardInput: %s\n"
2293 "%sStandardOutput: %s\n"
2294 "%sStandardError: %s\n",
2295 prefix, exec_input_to_string(c->std_input),
2296 prefix, exec_output_to_string(c->std_output),
2297 prefix, exec_output_to_string(c->std_error));
2298
2299 if (c->tty_path)
2300 fprintf(f,
2301 "%sTTYPath: %s\n"
2302 "%sTTYReset: %s\n"
2303 "%sTTYVHangup: %s\n"
2304 "%sTTYVTDisallocate: %s\n",
2305 prefix, c->tty_path,
2306 prefix, yes_no(c->tty_reset),
2307 prefix, yes_no(c->tty_vhangup),
2308 prefix, yes_no(c->tty_vt_disallocate));
2309
2310 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2311 c->std_output == EXEC_OUTPUT_KMSG ||
2312 c->std_output == EXEC_OUTPUT_JOURNAL ||
2313 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2314 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2315 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2316 c->std_error == EXEC_OUTPUT_SYSLOG ||
2317 c->std_error == EXEC_OUTPUT_KMSG ||
2318 c->std_error == EXEC_OUTPUT_JOURNAL ||
2319 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2320 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2321 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2322
2323 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2324
2325 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2326 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2327
2328 fprintf(f,
2329 "%sSyslogFacility: %s\n"
2330 "%sSyslogLevel: %s\n",
2331 prefix, strna(fac_str),
2332 prefix, strna(lvl_str));
2333 }
2334
2335 if (c->capabilities) {
2336 _cleanup_cap_free_charp_ char *t;
2337
2338 t = cap_to_text(c->capabilities, NULL);
2339 if (t)
2340 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2341 }
2342
2343 if (c->secure_bits)
2344 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2345 prefix,
2346 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2347 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2348 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2349 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2350 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2351 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2352
2353 if (c->capability_bounding_set_drop) {
2354 unsigned long l;
2355 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2356
2357 for (l = 0; l <= cap_last_cap(); l++)
2358 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2359 fprintf(f, " %s", strna(capability_to_name(l)));
2360
2361 fputs("\n", f);
2362 }
2363
2364 if (c->user)
2365 fprintf(f, "%sUser: %s\n", prefix, c->user);
2366 if (c->group)
2367 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2368
2369 if (strv_length(c->supplementary_groups) > 0) {
2370 fprintf(f, "%sSupplementaryGroups:", prefix);
2371 strv_fprintf(f, c->supplementary_groups);
2372 fputs("\n", f);
2373 }
2374
2375 if (c->pam_name)
2376 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2377
2378 if (strv_length(c->read_write_dirs) > 0) {
2379 fprintf(f, "%sReadWriteDirs:", prefix);
2380 strv_fprintf(f, c->read_write_dirs);
2381 fputs("\n", f);
2382 }
2383
2384 if (strv_length(c->read_only_dirs) > 0) {
2385 fprintf(f, "%sReadOnlyDirs:", prefix);
2386 strv_fprintf(f, c->read_only_dirs);
2387 fputs("\n", f);
2388 }
2389
2390 if (strv_length(c->inaccessible_dirs) > 0) {
2391 fprintf(f, "%sInaccessibleDirs:", prefix);
2392 strv_fprintf(f, c->inaccessible_dirs);
2393 fputs("\n", f);
2394 }
2395
2396 if (c->utmp_id)
2397 fprintf(f,
2398 "%sUtmpIdentifier: %s\n",
2399 prefix, c->utmp_id);
2400
2401 if (c->selinux_context)
2402 fprintf(f,
2403 "%sSELinuxContext: %s%s\n",
2404 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2405
2406 if (c->personality != 0xffffffffUL)
2407 fprintf(f,
2408 "%sPersonality: %s\n",
2409 prefix, strna(personality_to_string(c->personality)));
2410
2411 if (c->syscall_filter) {
2412 #ifdef HAVE_SECCOMP
2413 Iterator j;
2414 void *id;
2415 bool first = true;
2416 #endif
2417
2418 fprintf(f,
2419 "%sSystemCallFilter: ",
2420 prefix);
2421
2422 if (!c->syscall_whitelist)
2423 fputc('~', f);
2424
2425 #ifdef HAVE_SECCOMP
2426 SET_FOREACH(id, c->syscall_filter, j) {
2427 _cleanup_free_ char *name = NULL;
2428
2429 if (first)
2430 first = false;
2431 else
2432 fputc(' ', f);
2433
2434 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2435 fputs(strna(name), f);
2436 }
2437 #endif
2438
2439 fputc('\n', f);
2440 }
2441
2442 if (c->syscall_archs) {
2443 #ifdef HAVE_SECCOMP
2444 Iterator j;
2445 void *id;
2446 #endif
2447
2448 fprintf(f,
2449 "%sSystemCallArchitectures:",
2450 prefix);
2451
2452 #ifdef HAVE_SECCOMP
2453 SET_FOREACH(id, c->syscall_archs, j)
2454 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2455 #endif
2456 fputc('\n', f);
2457 }
2458
2459 if (c->syscall_errno != 0)
2460 fprintf(f,
2461 "%sSystemCallErrorNumber: %s\n",
2462 prefix, strna(errno_to_name(c->syscall_errno)));
2463
2464 if (c->apparmor_profile)
2465 fprintf(f,
2466 "%sAppArmorProfile: %s%s\n",
2467 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2468 }
2469
2470 bool exec_context_maintains_privileges(ExecContext *c) {
2471 assert(c);
2472
2473 /* Returns true if the process forked off would run run under
2474 * an unchanged UID or as root. */
2475
2476 if (!c->user)
2477 return true;
2478
2479 if (streq(c->user, "root") || streq(c->user, "0"))
2480 return true;
2481
2482 return false;
2483 }
2484
2485 void exec_status_start(ExecStatus *s, pid_t pid) {
2486 assert(s);
2487
2488 zero(*s);
2489 s->pid = pid;
2490 dual_timestamp_get(&s->start_timestamp);
2491 }
2492
2493 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2494 assert(s);
2495
2496 if (s->pid && s->pid != pid)
2497 zero(*s);
2498
2499 s->pid = pid;
2500 dual_timestamp_get(&s->exit_timestamp);
2501
2502 s->code = code;
2503 s->status = status;
2504
2505 if (context) {
2506 if (context->utmp_id)
2507 utmp_put_dead_process(context->utmp_id, pid, code, status);
2508
2509 exec_context_tty_reset(context);
2510 }
2511 }
2512
2513 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2514 char buf[FORMAT_TIMESTAMP_MAX];
2515
2516 assert(s);
2517 assert(f);
2518
2519 if (s->pid <= 0)
2520 return;
2521
2522 prefix = strempty(prefix);
2523
2524 fprintf(f,
2525 "%sPID: "PID_FMT"\n",
2526 prefix, s->pid);
2527
2528 if (s->start_timestamp.realtime > 0)
2529 fprintf(f,
2530 "%sStart Timestamp: %s\n",
2531 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2532
2533 if (s->exit_timestamp.realtime > 0)
2534 fprintf(f,
2535 "%sExit Timestamp: %s\n"
2536 "%sExit Code: %s\n"
2537 "%sExit Status: %i\n",
2538 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2539 prefix, sigchld_code_to_string(s->code),
2540 prefix, s->status);
2541 }
2542
2543 char *exec_command_line(char **argv) {
2544 size_t k;
2545 char *n, *p, **a;
2546 bool first = true;
2547
2548 assert(argv);
2549
2550 k = 1;
2551 STRV_FOREACH(a, argv)
2552 k += strlen(*a)+3;
2553
2554 if (!(n = new(char, k)))
2555 return NULL;
2556
2557 p = n;
2558 STRV_FOREACH(a, argv) {
2559
2560 if (!first)
2561 *(p++) = ' ';
2562 else
2563 first = false;
2564
2565 if (strpbrk(*a, WHITESPACE)) {
2566 *(p++) = '\'';
2567 p = stpcpy(p, *a);
2568 *(p++) = '\'';
2569 } else
2570 p = stpcpy(p, *a);
2571
2572 }
2573
2574 *p = 0;
2575
2576 /* FIXME: this doesn't really handle arguments that have
2577 * spaces and ticks in them */
2578
2579 return n;
2580 }
2581
2582 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2583 _cleanup_free_ char *cmd = NULL;
2584 const char *prefix2;
2585
2586 assert(c);
2587 assert(f);
2588
2589 prefix = strempty(prefix);
2590 prefix2 = strjoina(prefix, "\t");
2591
2592 cmd = exec_command_line(c->argv);
2593 fprintf(f,
2594 "%sCommand Line: %s\n",
2595 prefix, cmd ? cmd : strerror(ENOMEM));
2596
2597 exec_status_dump(&c->exec_status, f, prefix2);
2598 }
2599
2600 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2601 assert(f);
2602
2603 prefix = strempty(prefix);
2604
2605 LIST_FOREACH(command, c, c)
2606 exec_command_dump(c, f, prefix);
2607 }
2608
2609 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2610 ExecCommand *end;
2611
2612 assert(l);
2613 assert(e);
2614
2615 if (*l) {
2616 /* It's kind of important, that we keep the order here */
2617 LIST_FIND_TAIL(command, *l, end);
2618 LIST_INSERT_AFTER(command, *l, end, e);
2619 } else
2620 *l = e;
2621 }
2622
2623 int exec_command_set(ExecCommand *c, const char *path, ...) {
2624 va_list ap;
2625 char **l, *p;
2626
2627 assert(c);
2628 assert(path);
2629
2630 va_start(ap, path);
2631 l = strv_new_ap(path, ap);
2632 va_end(ap);
2633
2634 if (!l)
2635 return -ENOMEM;
2636
2637 p = strdup(path);
2638 if (!p) {
2639 strv_free(l);
2640 return -ENOMEM;
2641 }
2642
2643 free(c->path);
2644 c->path = p;
2645
2646 strv_free(c->argv);
2647 c->argv = l;
2648
2649 return 0;
2650 }
2651
2652 int exec_command_append(ExecCommand *c, const char *path, ...) {
2653 _cleanup_strv_free_ char **l = NULL;
2654 va_list ap;
2655 int r;
2656
2657 assert(c);
2658 assert(path);
2659
2660 va_start(ap, path);
2661 l = strv_new_ap(path, ap);
2662 va_end(ap);
2663
2664 if (!l)
2665 return -ENOMEM;
2666
2667 r = strv_extend_strv(&c->argv, l);
2668 if (r < 0)
2669 return r;
2670
2671 return 0;
2672 }
2673
2674
2675 static int exec_runtime_allocate(ExecRuntime **rt) {
2676
2677 if (*rt)
2678 return 0;
2679
2680 *rt = new0(ExecRuntime, 1);
2681 if (!*rt)
2682 return -ENOMEM;
2683
2684 (*rt)->n_ref = 1;
2685 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2686
2687 return 0;
2688 }
2689
2690 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2691 int r;
2692
2693 assert(rt);
2694 assert(c);
2695 assert(id);
2696
2697 if (*rt)
2698 return 1;
2699
2700 if (!c->private_network && !c->private_tmp)
2701 return 0;
2702
2703 r = exec_runtime_allocate(rt);
2704 if (r < 0)
2705 return r;
2706
2707 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2708 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2709 return -errno;
2710 }
2711
2712 if (c->private_tmp && !(*rt)->tmp_dir) {
2713 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2714 if (r < 0)
2715 return r;
2716 }
2717
2718 return 1;
2719 }
2720
2721 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2722 assert(r);
2723 assert(r->n_ref > 0);
2724
2725 r->n_ref++;
2726 return r;
2727 }
2728
2729 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2730
2731 if (!r)
2732 return NULL;
2733
2734 assert(r->n_ref > 0);
2735
2736 r->n_ref--;
2737 if (r->n_ref <= 0) {
2738 free(r->tmp_dir);
2739 free(r->var_tmp_dir);
2740 safe_close_pair(r->netns_storage_socket);
2741 free(r);
2742 }
2743
2744 return NULL;
2745 }
2746
2747 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2748 assert(u);
2749 assert(f);
2750 assert(fds);
2751
2752 if (!rt)
2753 return 0;
2754
2755 if (rt->tmp_dir)
2756 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2757
2758 if (rt->var_tmp_dir)
2759 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2760
2761 if (rt->netns_storage_socket[0] >= 0) {
2762 int copy;
2763
2764 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2765 if (copy < 0)
2766 return copy;
2767
2768 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2769 }
2770
2771 if (rt->netns_storage_socket[1] >= 0) {
2772 int copy;
2773
2774 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2775 if (copy < 0)
2776 return copy;
2777
2778 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2779 }
2780
2781 return 0;
2782 }
2783
2784 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2785 int r;
2786
2787 assert(rt);
2788 assert(key);
2789 assert(value);
2790
2791 if (streq(key, "tmp-dir")) {
2792 char *copy;
2793
2794 r = exec_runtime_allocate(rt);
2795 if (r < 0)
2796 return r;
2797
2798 copy = strdup(value);
2799 if (!copy)
2800 return log_oom();
2801
2802 free((*rt)->tmp_dir);
2803 (*rt)->tmp_dir = copy;
2804
2805 } else if (streq(key, "var-tmp-dir")) {
2806 char *copy;
2807
2808 r = exec_runtime_allocate(rt);
2809 if (r < 0)
2810 return r;
2811
2812 copy = strdup(value);
2813 if (!copy)
2814 return log_oom();
2815
2816 free((*rt)->var_tmp_dir);
2817 (*rt)->var_tmp_dir = copy;
2818
2819 } else if (streq(key, "netns-socket-0")) {
2820 int fd;
2821
2822 r = exec_runtime_allocate(rt);
2823 if (r < 0)
2824 return r;
2825
2826 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2827 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2828 else {
2829 safe_close((*rt)->netns_storage_socket[0]);
2830 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2831 }
2832 } else if (streq(key, "netns-socket-1")) {
2833 int fd;
2834
2835 r = exec_runtime_allocate(rt);
2836 if (r < 0)
2837 return r;
2838
2839 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2840 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2841 else {
2842 safe_close((*rt)->netns_storage_socket[1]);
2843 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2844 }
2845 } else
2846 return 0;
2847
2848 return 1;
2849 }
2850
2851 static void *remove_tmpdir_thread(void *p) {
2852 _cleanup_free_ char *path = p;
2853
2854 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
2855 return NULL;
2856 }
2857
2858 void exec_runtime_destroy(ExecRuntime *rt) {
2859 int r;
2860
2861 if (!rt)
2862 return;
2863
2864 /* If there are multiple users of this, let's leave the stuff around */
2865 if (rt->n_ref > 1)
2866 return;
2867
2868 if (rt->tmp_dir) {
2869 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2870
2871 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2872 if (r < 0) {
2873 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2874 free(rt->tmp_dir);
2875 }
2876
2877 rt->tmp_dir = NULL;
2878 }
2879
2880 if (rt->var_tmp_dir) {
2881 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2882
2883 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2884 if (r < 0) {
2885 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2886 free(rt->var_tmp_dir);
2887 }
2888
2889 rt->var_tmp_dir = NULL;
2890 }
2891
2892 safe_close_pair(rt->netns_storage_socket);
2893 }
2894
2895 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2896 [EXEC_INPUT_NULL] = "null",
2897 [EXEC_INPUT_TTY] = "tty",
2898 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2899 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2900 [EXEC_INPUT_SOCKET] = "socket"
2901 };
2902
2903 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2904
2905 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2906 [EXEC_OUTPUT_INHERIT] = "inherit",
2907 [EXEC_OUTPUT_NULL] = "null",
2908 [EXEC_OUTPUT_TTY] = "tty",
2909 [EXEC_OUTPUT_SYSLOG] = "syslog",
2910 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2911 [EXEC_OUTPUT_KMSG] = "kmsg",
2912 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2913 [EXEC_OUTPUT_JOURNAL] = "journal",
2914 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2915 [EXEC_OUTPUT_SOCKET] = "socket"
2916 };
2917
2918 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);