]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
tree-wide: make use of the fact that strv_free() returns NULL
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/un.h>
29 #include <sys/prctl.h>
30 #include <sys/stat.h>
31 #include <grp.h>
32 #include <poll.h>
33 #include <glob.h>
34 #include <utmpx.h>
35 #include <sys/personality.h>
36
37 #ifdef HAVE_PAM
38 #include <security/pam_appl.h>
39 #endif
40
41 #ifdef HAVE_SELINUX
42 #include <selinux/selinux.h>
43 #endif
44
45 #ifdef HAVE_SECCOMP
46 #include <seccomp.h>
47 #endif
48
49 #ifdef HAVE_APPARMOR
50 #include <sys/apparmor.h>
51 #endif
52
53 #include "sd-messages.h"
54 #include "rm-rf.h"
55 #include "strv.h"
56 #include "macro.h"
57 #include "capability.h"
58 #include "util.h"
59 #include "log.h"
60 #include "ioprio.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
64 #include "missing.h"
65 #include "utmp-wtmp.h"
66 #include "def.h"
67 #include "path-util.h"
68 #include "env-util.h"
69 #include "fileio.h"
70 #include "unit.h"
71 #include "async.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
74 #include "af-list.h"
75 #include "mkdir.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
78 #include "cap-list.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
82 #include "signal-util.h"
83
84 #ifdef HAVE_APPARMOR
85 #include "apparmor-util.h"
86 #endif
87
88 #ifdef HAVE_SECCOMP
89 #include "seccomp-util.h"
90 #endif
91
92 #include "execute.h"
93
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
96
97 /* This assumes there is a 'tty' group */
98 #define TTY_MODE 0620
99
100 #define SNDBUF_SIZE (8*1024*1024)
101
102 static int shift_fds(int fds[], unsigned n_fds) {
103 int start, restart_from;
104
105 if (n_fds <= 0)
106 return 0;
107
108 /* Modifies the fds array! (sorts it) */
109
110 assert(fds);
111
112 start = 0;
113 for (;;) {
114 int i;
115
116 restart_from = -1;
117
118 for (i = start; i < (int) n_fds; i++) {
119 int nfd;
120
121 /* Already at right index? */
122 if (fds[i] == i+3)
123 continue;
124
125 nfd = fcntl(fds[i], F_DUPFD, i + 3);
126 if (nfd < 0)
127 return -errno;
128
129 safe_close(fds[i]);
130 fds[i] = nfd;
131
132 /* Hmm, the fd we wanted isn't free? Then
133 * let's remember that and try again from here */
134 if (nfd != i+3 && restart_from < 0)
135 restart_from = i;
136 }
137
138 if (restart_from < 0)
139 break;
140
141 start = restart_from;
142 }
143
144 return 0;
145 }
146
147 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
148 unsigned i;
149 int r;
150
151 if (n_fds <= 0)
152 return 0;
153
154 assert(fds);
155
156 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157
158 for (i = 0; i < n_fds; i++) {
159
160 r = fd_nonblock(fds[i], nonblock);
161 if (r < 0)
162 return r;
163
164 /* We unconditionally drop FD_CLOEXEC from the fds,
165 * since after all we want to pass these fds to our
166 * children */
167
168 r = fd_cloexec(fds[i], false);
169 if (r < 0)
170 return r;
171 }
172
173 return 0;
174 }
175
176 _pure_ static const char *tty_path(const ExecContext *context) {
177 assert(context);
178
179 if (context->tty_path)
180 return context->tty_path;
181
182 return "/dev/console";
183 }
184
185 static void exec_context_tty_reset(const ExecContext *context) {
186 assert(context);
187
188 if (context->tty_vhangup)
189 terminal_vhangup(tty_path(context));
190
191 if (context->tty_reset)
192 reset_terminal(tty_path(context));
193
194 if (context->tty_vt_disallocate && context->tty_path)
195 vt_disallocate(context->tty_path);
196 }
197
198 static bool is_terminal_output(ExecOutput o) {
199 return
200 o == EXEC_OUTPUT_TTY ||
201 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
202 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
203 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
204 }
205
206 static int open_null_as(int flags, int nfd) {
207 int fd, r;
208
209 assert(nfd >= 0);
210
211 fd = open("/dev/null", flags|O_NOCTTY);
212 if (fd < 0)
213 return -errno;
214
215 if (fd != nfd) {
216 r = dup2(fd, nfd) < 0 ? -errno : nfd;
217 safe_close(fd);
218 } else
219 r = nfd;
220
221 return r;
222 }
223
224 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
225 union sockaddr_union sa = {
226 .un.sun_family = AF_UNIX,
227 .un.sun_path = "/run/systemd/journal/stdout",
228 };
229 uid_t olduid = UID_INVALID;
230 gid_t oldgid = GID_INVALID;
231 int r;
232
233 if (gid != GID_INVALID) {
234 oldgid = getgid();
235
236 r = setegid(gid);
237 if (r < 0)
238 return -errno;
239 }
240
241 if (uid != UID_INVALID) {
242 olduid = getuid();
243
244 r = seteuid(uid);
245 if (r < 0) {
246 r = -errno;
247 goto restore_gid;
248 }
249 }
250
251 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
252 if (r < 0)
253 r = -errno;
254
255 /* If we fail to restore the uid or gid, things will likely
256 fail later on. This should only happen if an LSM interferes. */
257
258 if (uid != UID_INVALID)
259 (void) seteuid(olduid);
260
261 restore_gid:
262 if (gid != GID_INVALID)
263 (void) setegid(oldgid);
264
265 return r;
266 }
267
268 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
269 int fd, r;
270
271 assert(context);
272 assert(output < _EXEC_OUTPUT_MAX);
273 assert(ident);
274 assert(nfd >= 0);
275
276 fd = socket(AF_UNIX, SOCK_STREAM, 0);
277 if (fd < 0)
278 return -errno;
279
280 r = connect_journal_socket(fd, uid, gid);
281 if (r < 0)
282 return r;
283
284 if (shutdown(fd, SHUT_RD) < 0) {
285 safe_close(fd);
286 return -errno;
287 }
288
289 fd_inc_sndbuf(fd, SNDBUF_SIZE);
290
291 dprintf(fd,
292 "%s\n"
293 "%s\n"
294 "%i\n"
295 "%i\n"
296 "%i\n"
297 "%i\n"
298 "%i\n",
299 context->syslog_identifier ? context->syslog_identifier : ident,
300 unit_id,
301 context->syslog_priority,
302 !!context->syslog_level_prefix,
303 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
304 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
305 is_terminal_output(output));
306
307 if (fd != nfd) {
308 r = dup2(fd, nfd) < 0 ? -errno : nfd;
309 safe_close(fd);
310 } else
311 r = nfd;
312
313 return r;
314 }
315 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
316 int fd, r;
317
318 assert(path);
319 assert(nfd >= 0);
320
321 fd = open_terminal(path, mode | O_NOCTTY);
322 if (fd < 0)
323 return fd;
324
325 if (fd != nfd) {
326 r = dup2(fd, nfd) < 0 ? -errno : nfd;
327 safe_close(fd);
328 } else
329 r = nfd;
330
331 return r;
332 }
333
334 static bool is_terminal_input(ExecInput i) {
335 return
336 i == EXEC_INPUT_TTY ||
337 i == EXEC_INPUT_TTY_FORCE ||
338 i == EXEC_INPUT_TTY_FAIL;
339 }
340
341 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
342
343 if (is_terminal_input(std_input) && !apply_tty_stdin)
344 return EXEC_INPUT_NULL;
345
346 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
347 return EXEC_INPUT_NULL;
348
349 return std_input;
350 }
351
352 static int fixup_output(ExecOutput std_output, int socket_fd) {
353
354 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
355 return EXEC_OUTPUT_INHERIT;
356
357 return std_output;
358 }
359
360 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
361 ExecInput i;
362
363 assert(context);
364
365 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366
367 switch (i) {
368
369 case EXEC_INPUT_NULL:
370 return open_null_as(O_RDONLY, STDIN_FILENO);
371
372 case EXEC_INPUT_TTY:
373 case EXEC_INPUT_TTY_FORCE:
374 case EXEC_INPUT_TTY_FAIL: {
375 int fd, r;
376
377 fd = acquire_terminal(tty_path(context),
378 i == EXEC_INPUT_TTY_FAIL,
379 i == EXEC_INPUT_TTY_FORCE,
380 false,
381 USEC_INFINITY);
382 if (fd < 0)
383 return fd;
384
385 if (fd != STDIN_FILENO) {
386 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
387 safe_close(fd);
388 } else
389 r = STDIN_FILENO;
390
391 return r;
392 }
393
394 case EXEC_INPUT_SOCKET:
395 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
396
397 default:
398 assert_not_reached("Unknown input type");
399 }
400 }
401
402 static int setup_output(Unit *unit, const ExecContext *context, int fileno, int socket_fd, const char *ident, bool apply_tty_stdin, uid_t uid, gid_t gid) {
403 ExecOutput o;
404 ExecInput i;
405 int r;
406
407 assert(unit);
408 assert(context);
409 assert(ident);
410
411 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
412 o = fixup_output(context->std_output, socket_fd);
413
414 if (fileno == STDERR_FILENO) {
415 ExecOutput e;
416 e = fixup_output(context->std_error, socket_fd);
417
418 /* This expects the input and output are already set up */
419
420 /* Don't change the stderr file descriptor if we inherit all
421 * the way and are not on a tty */
422 if (e == EXEC_OUTPUT_INHERIT &&
423 o == EXEC_OUTPUT_INHERIT &&
424 i == EXEC_INPUT_NULL &&
425 !is_terminal_input(context->std_input) &&
426 getppid () != 1)
427 return fileno;
428
429 /* Duplicate from stdout if possible */
430 if (e == o || e == EXEC_OUTPUT_INHERIT)
431 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
432
433 o = e;
434
435 } else if (o == EXEC_OUTPUT_INHERIT) {
436 /* If input got downgraded, inherit the original value */
437 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
438 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
439
440 /* If the input is connected to anything that's not a /dev/null, inherit that... */
441 if (i != EXEC_INPUT_NULL)
442 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
443
444 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
445 if (getppid() != 1)
446 return fileno;
447
448 /* We need to open /dev/null here anew, to get the right access mode. */
449 return open_null_as(O_WRONLY, fileno);
450 }
451
452 switch (o) {
453
454 case EXEC_OUTPUT_NULL:
455 return open_null_as(O_WRONLY, fileno);
456
457 case EXEC_OUTPUT_TTY:
458 if (is_terminal_input(i))
459 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
460
461 /* We don't reset the terminal if this is just about output */
462 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
463
464 case EXEC_OUTPUT_SYSLOG:
465 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
466 case EXEC_OUTPUT_KMSG:
467 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
468 case EXEC_OUTPUT_JOURNAL:
469 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
470 r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid);
471 if (r < 0) {
472 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
473 r = open_null_as(O_WRONLY, fileno);
474 }
475 return r;
476
477 case EXEC_OUTPUT_SOCKET:
478 assert(socket_fd >= 0);
479 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
480
481 default:
482 assert_not_reached("Unknown error type");
483 }
484 }
485
486 static int chown_terminal(int fd, uid_t uid) {
487 struct stat st;
488
489 assert(fd >= 0);
490
491 /* This might fail. What matters are the results. */
492 (void) fchown(fd, uid, -1);
493 (void) fchmod(fd, TTY_MODE);
494
495 if (fstat(fd, &st) < 0)
496 return -errno;
497
498 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
499 return -EPERM;
500
501 return 0;
502 }
503
504 static int setup_confirm_stdio(int *_saved_stdin,
505 int *_saved_stdout) {
506 int fd = -1, saved_stdin, saved_stdout = -1, r;
507
508 assert(_saved_stdin);
509 assert(_saved_stdout);
510
511 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
512 if (saved_stdin < 0)
513 return -errno;
514
515 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
516 if (saved_stdout < 0) {
517 r = errno;
518 goto fail;
519 }
520
521 fd = acquire_terminal(
522 "/dev/console",
523 false,
524 false,
525 false,
526 DEFAULT_CONFIRM_USEC);
527 if (fd < 0) {
528 r = fd;
529 goto fail;
530 }
531
532 r = chown_terminal(fd, getuid());
533 if (r < 0)
534 goto fail;
535
536 if (dup2(fd, STDIN_FILENO) < 0) {
537 r = -errno;
538 goto fail;
539 }
540
541 if (dup2(fd, STDOUT_FILENO) < 0) {
542 r = -errno;
543 goto fail;
544 }
545
546 if (fd >= 2)
547 safe_close(fd);
548
549 *_saved_stdin = saved_stdin;
550 *_saved_stdout = saved_stdout;
551
552 return 0;
553
554 fail:
555 safe_close(saved_stdout);
556 safe_close(saved_stdin);
557 safe_close(fd);
558
559 return r;
560 }
561
562 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
563 _cleanup_close_ int fd = -1;
564 va_list ap;
565
566 assert(format);
567
568 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
569 if (fd < 0)
570 return fd;
571
572 va_start(ap, format);
573 vdprintf(fd, format, ap);
574 va_end(ap);
575
576 return 0;
577 }
578
579 static int restore_confirm_stdio(int *saved_stdin,
580 int *saved_stdout) {
581
582 int r = 0;
583
584 assert(saved_stdin);
585 assert(saved_stdout);
586
587 release_terminal();
588
589 if (*saved_stdin >= 0)
590 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
591 r = -errno;
592
593 if (*saved_stdout >= 0)
594 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
595 r = -errno;
596
597 safe_close(*saved_stdin);
598 safe_close(*saved_stdout);
599
600 return r;
601 }
602
603 static int ask_for_confirmation(char *response, char **argv) {
604 int saved_stdout = -1, saved_stdin = -1, r;
605 _cleanup_free_ char *line = NULL;
606
607 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
608 if (r < 0)
609 return r;
610
611 line = exec_command_line(argv);
612 if (!line)
613 return -ENOMEM;
614
615 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
616
617 restore_confirm_stdio(&saved_stdin, &saved_stdout);
618
619 return r;
620 }
621
622 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
623 bool keep_groups = false;
624 int r;
625
626 assert(context);
627
628 /* Lookup and set GID and supplementary group list. Here too
629 * we avoid NSS lookups for gid=0. */
630
631 if (context->group || username) {
632
633 if (context->group) {
634 const char *g = context->group;
635
636 r = get_group_creds(&g, &gid);
637 if (r < 0)
638 return r;
639 }
640
641 /* First step, initialize groups from /etc/groups */
642 if (username && gid != 0) {
643 if (initgroups(username, gid) < 0)
644 return -errno;
645
646 keep_groups = true;
647 }
648
649 /* Second step, set our gids */
650 if (setresgid(gid, gid, gid) < 0)
651 return -errno;
652 }
653
654 if (context->supplementary_groups) {
655 int ngroups_max, k;
656 gid_t *gids;
657 char **i;
658
659 /* Final step, initialize any manually set supplementary groups */
660 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
661
662 if (!(gids = new(gid_t, ngroups_max)))
663 return -ENOMEM;
664
665 if (keep_groups) {
666 k = getgroups(ngroups_max, gids);
667 if (k < 0) {
668 free(gids);
669 return -errno;
670 }
671 } else
672 k = 0;
673
674 STRV_FOREACH(i, context->supplementary_groups) {
675 const char *g;
676
677 if (k >= ngroups_max) {
678 free(gids);
679 return -E2BIG;
680 }
681
682 g = *i;
683 r = get_group_creds(&g, gids+k);
684 if (r < 0) {
685 free(gids);
686 return r;
687 }
688
689 k++;
690 }
691
692 if (setgroups(k, gids) < 0) {
693 free(gids);
694 return -errno;
695 }
696
697 free(gids);
698 }
699
700 return 0;
701 }
702
703 static int enforce_user(const ExecContext *context, uid_t uid) {
704 assert(context);
705
706 /* Sets (but doesn't lookup) the uid and make sure we keep the
707 * capabilities while doing so. */
708
709 if (context->capabilities) {
710 _cleanup_cap_free_ cap_t d = NULL;
711 static const cap_value_t bits[] = {
712 CAP_SETUID, /* Necessary so that we can run setresuid() below */
713 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
714 };
715
716 /* First step: If we need to keep capabilities but
717 * drop privileges we need to make sure we keep our
718 * caps, while we drop privileges. */
719 if (uid != 0) {
720 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
721
722 if (prctl(PR_GET_SECUREBITS) != sb)
723 if (prctl(PR_SET_SECUREBITS, sb) < 0)
724 return -errno;
725 }
726
727 /* Second step: set the capabilities. This will reduce
728 * the capabilities to the minimum we need. */
729
730 d = cap_dup(context->capabilities);
731 if (!d)
732 return -errno;
733
734 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
735 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
736 return -errno;
737
738 if (cap_set_proc(d) < 0)
739 return -errno;
740 }
741
742 /* Third step: actually set the uids */
743 if (setresuid(uid, uid, uid) < 0)
744 return -errno;
745
746 /* At this point we should have all necessary capabilities but
747 are otherwise a normal user. However, the caps might got
748 corrupted due to the setresuid() so we need clean them up
749 later. This is done outside of this call. */
750
751 return 0;
752 }
753
754 #ifdef HAVE_PAM
755
756 static int null_conv(
757 int num_msg,
758 const struct pam_message **msg,
759 struct pam_response **resp,
760 void *appdata_ptr) {
761
762 /* We don't support conversations */
763
764 return PAM_CONV_ERR;
765 }
766
767 static int setup_pam(
768 const char *name,
769 const char *user,
770 uid_t uid,
771 const char *tty,
772 char ***pam_env,
773 int fds[], unsigned n_fds) {
774
775 static const struct pam_conv conv = {
776 .conv = null_conv,
777 .appdata_ptr = NULL
778 };
779
780 pam_handle_t *handle = NULL;
781 sigset_t old_ss;
782 int pam_code = PAM_SUCCESS;
783 int err;
784 char **e = NULL;
785 bool close_session = false;
786 pid_t pam_pid = 0, parent_pid;
787 int flags = 0;
788
789 assert(name);
790 assert(user);
791 assert(pam_env);
792
793 /* We set up PAM in the parent process, then fork. The child
794 * will then stay around until killed via PR_GET_PDEATHSIG or
795 * systemd via the cgroup logic. It will then remove the PAM
796 * session again. The parent process will exec() the actual
797 * daemon. We do things this way to ensure that the main PID
798 * of the daemon is the one we initially fork()ed. */
799
800 if (log_get_max_level() < LOG_DEBUG)
801 flags |= PAM_SILENT;
802
803 pam_code = pam_start(name, user, &conv, &handle);
804 if (pam_code != PAM_SUCCESS) {
805 handle = NULL;
806 goto fail;
807 }
808
809 if (tty) {
810 pam_code = pam_set_item(handle, PAM_TTY, tty);
811 if (pam_code != PAM_SUCCESS)
812 goto fail;
813 }
814
815 pam_code = pam_acct_mgmt(handle, flags);
816 if (pam_code != PAM_SUCCESS)
817 goto fail;
818
819 pam_code = pam_open_session(handle, flags);
820 if (pam_code != PAM_SUCCESS)
821 goto fail;
822
823 close_session = true;
824
825 e = pam_getenvlist(handle);
826 if (!e) {
827 pam_code = PAM_BUF_ERR;
828 goto fail;
829 }
830
831 /* Block SIGTERM, so that we know that it won't get lost in
832 * the child */
833
834 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
835
836 parent_pid = getpid();
837
838 pam_pid = fork();
839 if (pam_pid < 0)
840 goto fail;
841
842 if (pam_pid == 0) {
843 int sig;
844 int r = EXIT_PAM;
845
846 /* The child's job is to reset the PAM session on
847 * termination */
848
849 /* This string must fit in 10 chars (i.e. the length
850 * of "/sbin/init"), to look pretty in /bin/ps */
851 rename_process("(sd-pam)");
852
853 /* Make sure we don't keep open the passed fds in this
854 child. We assume that otherwise only those fds are
855 open here that have been opened by PAM. */
856 close_many(fds, n_fds);
857
858 /* Drop privileges - we don't need any to pam_close_session
859 * and this will make PR_SET_PDEATHSIG work in most cases.
860 * If this fails, ignore the error - but expect sd-pam threads
861 * to fail to exit normally */
862 if (setresuid(uid, uid, uid) < 0)
863 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
864
865 (void) ignore_signals(SIGPIPE, -1);
866
867 /* Wait until our parent died. This will only work if
868 * the above setresuid() succeeds, otherwise the kernel
869 * will not allow unprivileged parents kill their privileged
870 * children this way. We rely on the control groups kill logic
871 * to do the rest for us. */
872 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
873 goto child_finish;
874
875 /* Check if our parent process might already have
876 * died? */
877 if (getppid() == parent_pid) {
878 sigset_t ss;
879
880 assert_se(sigemptyset(&ss) >= 0);
881 assert_se(sigaddset(&ss, SIGTERM) >= 0);
882
883 for (;;) {
884 if (sigwait(&ss, &sig) < 0) {
885 if (errno == EINTR)
886 continue;
887
888 goto child_finish;
889 }
890
891 assert(sig == SIGTERM);
892 break;
893 }
894 }
895
896 /* If our parent died we'll end the session */
897 if (getppid() != parent_pid) {
898 pam_code = pam_close_session(handle, flags);
899 if (pam_code != PAM_SUCCESS)
900 goto child_finish;
901 }
902
903 r = 0;
904
905 child_finish:
906 pam_end(handle, pam_code | flags);
907 _exit(r);
908 }
909
910 /* If the child was forked off successfully it will do all the
911 * cleanups, so forget about the handle here. */
912 handle = NULL;
913
914 /* Unblock SIGTERM again in the parent */
915 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
916
917 /* We close the log explicitly here, since the PAM modules
918 * might have opened it, but we don't want this fd around. */
919 closelog();
920
921 *pam_env = e;
922 e = NULL;
923
924 return 0;
925
926 fail:
927 if (pam_code != PAM_SUCCESS) {
928 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
929 err = -EPERM; /* PAM errors do not map to errno */
930 } else {
931 err = log_error_errno(errno, "PAM failed: %m");
932 }
933
934 if (handle) {
935 if (close_session)
936 pam_code = pam_close_session(handle, flags);
937
938 pam_end(handle, pam_code | flags);
939 }
940
941 strv_free(e);
942
943 closelog();
944
945 if (pam_pid > 1) {
946 kill(pam_pid, SIGTERM);
947 kill(pam_pid, SIGCONT);
948 }
949
950 return err;
951 }
952 #endif
953
954 static void rename_process_from_path(const char *path) {
955 char process_name[11];
956 const char *p;
957 size_t l;
958
959 /* This resulting string must fit in 10 chars (i.e. the length
960 * of "/sbin/init") to look pretty in /bin/ps */
961
962 p = basename(path);
963 if (isempty(p)) {
964 rename_process("(...)");
965 return;
966 }
967
968 l = strlen(p);
969 if (l > 8) {
970 /* The end of the process name is usually more
971 * interesting, since the first bit might just be
972 * "systemd-" */
973 p = p + l - 8;
974 l = 8;
975 }
976
977 process_name[0] = '(';
978 memcpy(process_name+1, p, l);
979 process_name[1+l] = ')';
980 process_name[1+l+1] = 0;
981
982 rename_process(process_name);
983 }
984
985 #ifdef HAVE_SECCOMP
986
987 static int apply_seccomp(const ExecContext *c) {
988 uint32_t negative_action, action;
989 scmp_filter_ctx *seccomp;
990 Iterator i;
991 void *id;
992 int r;
993
994 assert(c);
995
996 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
997
998 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
999 if (!seccomp)
1000 return -ENOMEM;
1001
1002 if (c->syscall_archs) {
1003
1004 SET_FOREACH(id, c->syscall_archs, i) {
1005 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1006 if (r == -EEXIST)
1007 continue;
1008 if (r < 0)
1009 goto finish;
1010 }
1011
1012 } else {
1013 r = seccomp_add_secondary_archs(seccomp);
1014 if (r < 0)
1015 goto finish;
1016 }
1017
1018 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1019 SET_FOREACH(id, c->syscall_filter, i) {
1020 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1021 if (r < 0)
1022 goto finish;
1023 }
1024
1025 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1026 if (r < 0)
1027 goto finish;
1028
1029 r = seccomp_load(seccomp);
1030
1031 finish:
1032 seccomp_release(seccomp);
1033 return r;
1034 }
1035
1036 static int apply_address_families(const ExecContext *c) {
1037 scmp_filter_ctx *seccomp;
1038 Iterator i;
1039 int r;
1040
1041 assert(c);
1042
1043 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1044 if (!seccomp)
1045 return -ENOMEM;
1046
1047 r = seccomp_add_secondary_archs(seccomp);
1048 if (r < 0)
1049 goto finish;
1050
1051 if (c->address_families_whitelist) {
1052 int af, first = 0, last = 0;
1053 void *afp;
1054
1055 /* If this is a whitelist, we first block the address
1056 * families that are out of range and then everything
1057 * that is not in the set. First, we find the lowest
1058 * and highest address family in the set. */
1059
1060 SET_FOREACH(afp, c->address_families, i) {
1061 af = PTR_TO_INT(afp);
1062
1063 if (af <= 0 || af >= af_max())
1064 continue;
1065
1066 if (first == 0 || af < first)
1067 first = af;
1068
1069 if (last == 0 || af > last)
1070 last = af;
1071 }
1072
1073 assert((first == 0) == (last == 0));
1074
1075 if (first == 0) {
1076
1077 /* No entries in the valid range, block everything */
1078 r = seccomp_rule_add(
1079 seccomp,
1080 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1081 SCMP_SYS(socket),
1082 0);
1083 if (r < 0)
1084 goto finish;
1085
1086 } else {
1087
1088 /* Block everything below the first entry */
1089 r = seccomp_rule_add(
1090 seccomp,
1091 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1092 SCMP_SYS(socket),
1093 1,
1094 SCMP_A0(SCMP_CMP_LT, first));
1095 if (r < 0)
1096 goto finish;
1097
1098 /* Block everything above the last entry */
1099 r = seccomp_rule_add(
1100 seccomp,
1101 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1102 SCMP_SYS(socket),
1103 1,
1104 SCMP_A0(SCMP_CMP_GT, last));
1105 if (r < 0)
1106 goto finish;
1107
1108 /* Block everything between the first and last
1109 * entry */
1110 for (af = 1; af < af_max(); af++) {
1111
1112 if (set_contains(c->address_families, INT_TO_PTR(af)))
1113 continue;
1114
1115 r = seccomp_rule_add(
1116 seccomp,
1117 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1118 SCMP_SYS(socket),
1119 1,
1120 SCMP_A0(SCMP_CMP_EQ, af));
1121 if (r < 0)
1122 goto finish;
1123 }
1124 }
1125
1126 } else {
1127 void *af;
1128
1129 /* If this is a blacklist, then generate one rule for
1130 * each address family that are then combined in OR
1131 * checks. */
1132
1133 SET_FOREACH(af, c->address_families, i) {
1134
1135 r = seccomp_rule_add(
1136 seccomp,
1137 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1138 SCMP_SYS(socket),
1139 1,
1140 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1141 if (r < 0)
1142 goto finish;
1143 }
1144 }
1145
1146 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1147 if (r < 0)
1148 goto finish;
1149
1150 r = seccomp_load(seccomp);
1151
1152 finish:
1153 seccomp_release(seccomp);
1154 return r;
1155 }
1156
1157 #endif
1158
1159 static void do_idle_pipe_dance(int idle_pipe[4]) {
1160 assert(idle_pipe);
1161
1162
1163 safe_close(idle_pipe[1]);
1164 safe_close(idle_pipe[2]);
1165
1166 if (idle_pipe[0] >= 0) {
1167 int r;
1168
1169 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1170
1171 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1172 /* Signal systemd that we are bored and want to continue. */
1173 r = write(idle_pipe[3], "x", 1);
1174 if (r > 0)
1175 /* Wait for systemd to react to the signal above. */
1176 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1177 }
1178
1179 safe_close(idle_pipe[0]);
1180
1181 }
1182
1183 safe_close(idle_pipe[3]);
1184 }
1185
1186 static int build_environment(
1187 const ExecContext *c,
1188 unsigned n_fds,
1189 usec_t watchdog_usec,
1190 const char *home,
1191 const char *username,
1192 const char *shell,
1193 char ***ret) {
1194
1195 _cleanup_strv_free_ char **our_env = NULL;
1196 unsigned n_env = 0;
1197 char *x;
1198
1199 assert(c);
1200 assert(ret);
1201
1202 our_env = new0(char*, 10);
1203 if (!our_env)
1204 return -ENOMEM;
1205
1206 if (n_fds > 0) {
1207 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1208 return -ENOMEM;
1209 our_env[n_env++] = x;
1210
1211 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1212 return -ENOMEM;
1213 our_env[n_env++] = x;
1214 }
1215
1216 if (watchdog_usec > 0) {
1217 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1218 return -ENOMEM;
1219 our_env[n_env++] = x;
1220
1221 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1222 return -ENOMEM;
1223 our_env[n_env++] = x;
1224 }
1225
1226 if (home) {
1227 x = strappend("HOME=", home);
1228 if (!x)
1229 return -ENOMEM;
1230 our_env[n_env++] = x;
1231 }
1232
1233 if (username) {
1234 x = strappend("LOGNAME=", username);
1235 if (!x)
1236 return -ENOMEM;
1237 our_env[n_env++] = x;
1238
1239 x = strappend("USER=", username);
1240 if (!x)
1241 return -ENOMEM;
1242 our_env[n_env++] = x;
1243 }
1244
1245 if (shell) {
1246 x = strappend("SHELL=", shell);
1247 if (!x)
1248 return -ENOMEM;
1249 our_env[n_env++] = x;
1250 }
1251
1252 if (is_terminal_input(c->std_input) ||
1253 c->std_output == EXEC_OUTPUT_TTY ||
1254 c->std_error == EXEC_OUTPUT_TTY ||
1255 c->tty_path) {
1256
1257 x = strdup(default_term_for_tty(tty_path(c)));
1258 if (!x)
1259 return -ENOMEM;
1260 our_env[n_env++] = x;
1261 }
1262
1263 our_env[n_env++] = NULL;
1264 assert(n_env <= 10);
1265
1266 *ret = our_env;
1267 our_env = NULL;
1268
1269 return 0;
1270 }
1271
1272 static bool exec_needs_mount_namespace(
1273 const ExecContext *context,
1274 const ExecParameters *params,
1275 ExecRuntime *runtime) {
1276
1277 assert(context);
1278 assert(params);
1279
1280 if (!strv_isempty(context->read_write_dirs) ||
1281 !strv_isempty(context->read_only_dirs) ||
1282 !strv_isempty(context->inaccessible_dirs))
1283 return true;
1284
1285 if (context->mount_flags != 0)
1286 return true;
1287
1288 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1289 return true;
1290
1291 if (params->bus_endpoint_path)
1292 return true;
1293
1294 if (context->private_devices ||
1295 context->protect_system != PROTECT_SYSTEM_NO ||
1296 context->protect_home != PROTECT_HOME_NO)
1297 return true;
1298
1299 return false;
1300 }
1301
1302 static int exec_child(
1303 Unit *unit,
1304 ExecCommand *command,
1305 const ExecContext *context,
1306 const ExecParameters *params,
1307 ExecRuntime *runtime,
1308 char **argv,
1309 int socket_fd,
1310 int *fds, unsigned n_fds,
1311 char **files_env,
1312 int *exit_status) {
1313
1314 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1315 _cleanup_free_ char *mac_selinux_context_net = NULL;
1316 const char *username = NULL, *home = NULL, *shell = NULL;
1317 unsigned n_dont_close = 0;
1318 int dont_close[n_fds + 4];
1319 uid_t uid = UID_INVALID;
1320 gid_t gid = GID_INVALID;
1321 int i, r;
1322 bool needs_mount_namespace;
1323
1324 assert(unit);
1325 assert(command);
1326 assert(context);
1327 assert(params);
1328 assert(exit_status);
1329
1330 rename_process_from_path(command->path);
1331
1332 /* We reset exactly these signals, since they are the
1333 * only ones we set to SIG_IGN in the main daemon. All
1334 * others we leave untouched because we set them to
1335 * SIG_DFL or a valid handler initially, both of which
1336 * will be demoted to SIG_DFL. */
1337 (void) default_signals(SIGNALS_CRASH_HANDLER,
1338 SIGNALS_IGNORE, -1);
1339
1340 if (context->ignore_sigpipe)
1341 (void) ignore_signals(SIGPIPE, -1);
1342
1343 r = reset_signal_mask();
1344 if (r < 0) {
1345 *exit_status = EXIT_SIGNAL_MASK;
1346 return r;
1347 }
1348
1349 if (params->idle_pipe)
1350 do_idle_pipe_dance(params->idle_pipe);
1351
1352 /* Close sockets very early to make sure we don't
1353 * block init reexecution because it cannot bind its
1354 * sockets */
1355
1356 log_forget_fds();
1357
1358 if (socket_fd >= 0)
1359 dont_close[n_dont_close++] = socket_fd;
1360 if (n_fds > 0) {
1361 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1362 n_dont_close += n_fds;
1363 }
1364 if (params->bus_endpoint_fd >= 0)
1365 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1366 if (runtime) {
1367 if (runtime->netns_storage_socket[0] >= 0)
1368 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1369 if (runtime->netns_storage_socket[1] >= 0)
1370 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1371 }
1372
1373 r = close_all_fds(dont_close, n_dont_close);
1374 if (r < 0) {
1375 *exit_status = EXIT_FDS;
1376 return r;
1377 }
1378
1379 if (!context->same_pgrp)
1380 if (setsid() < 0) {
1381 *exit_status = EXIT_SETSID;
1382 return -errno;
1383 }
1384
1385 exec_context_tty_reset(context);
1386
1387 if (params->confirm_spawn) {
1388 char response;
1389
1390 r = ask_for_confirmation(&response, argv);
1391 if (r == -ETIMEDOUT)
1392 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1393 else if (r < 0)
1394 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1395 else if (response == 's') {
1396 write_confirm_message("Skipping execution.\n");
1397 *exit_status = EXIT_CONFIRM;
1398 return -ECANCELED;
1399 } else if (response == 'n') {
1400 write_confirm_message("Failing execution.\n");
1401 *exit_status = 0;
1402 return 0;
1403 }
1404 }
1405
1406 if (context->user) {
1407 username = context->user;
1408 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1409 if (r < 0) {
1410 *exit_status = EXIT_USER;
1411 return r;
1412 }
1413 }
1414
1415 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1416 * must sure to drop O_NONBLOCK */
1417 if (socket_fd >= 0)
1418 fd_nonblock(socket_fd, false);
1419
1420 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1421 if (r < 0) {
1422 *exit_status = EXIT_STDIN;
1423 return r;
1424 }
1425
1426 r = setup_output(unit, context, STDOUT_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1427 if (r < 0) {
1428 *exit_status = EXIT_STDOUT;
1429 return r;
1430 }
1431
1432 r = setup_output(unit, context, STDERR_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1433 if (r < 0) {
1434 *exit_status = EXIT_STDERR;
1435 return r;
1436 }
1437
1438 if (params->cgroup_path) {
1439 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1440 if (r < 0) {
1441 *exit_status = EXIT_CGROUP;
1442 return r;
1443 }
1444 }
1445
1446 if (context->oom_score_adjust_set) {
1447 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1448
1449 /* When we can't make this change due to EPERM, then
1450 * let's silently skip over it. User namespaces
1451 * prohibit write access to this file, and we
1452 * shouldn't trip up over that. */
1453
1454 sprintf(t, "%i", context->oom_score_adjust);
1455 r = write_string_file("/proc/self/oom_score_adj", t, 0);
1456 if (r == -EPERM || r == -EACCES) {
1457 log_open();
1458 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1459 log_close();
1460 } else if (r < 0) {
1461 *exit_status = EXIT_OOM_ADJUST;
1462 return -errno;
1463 }
1464 }
1465
1466 if (context->nice_set)
1467 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1468 *exit_status = EXIT_NICE;
1469 return -errno;
1470 }
1471
1472 if (context->cpu_sched_set) {
1473 struct sched_param param = {
1474 .sched_priority = context->cpu_sched_priority,
1475 };
1476
1477 r = sched_setscheduler(0,
1478 context->cpu_sched_policy |
1479 (context->cpu_sched_reset_on_fork ?
1480 SCHED_RESET_ON_FORK : 0),
1481 &param);
1482 if (r < 0) {
1483 *exit_status = EXIT_SETSCHEDULER;
1484 return -errno;
1485 }
1486 }
1487
1488 if (context->cpuset)
1489 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1490 *exit_status = EXIT_CPUAFFINITY;
1491 return -errno;
1492 }
1493
1494 if (context->ioprio_set)
1495 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1496 *exit_status = EXIT_IOPRIO;
1497 return -errno;
1498 }
1499
1500 if (context->timer_slack_nsec != NSEC_INFINITY)
1501 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1502 *exit_status = EXIT_TIMERSLACK;
1503 return -errno;
1504 }
1505
1506 if (context->personality != PERSONALITY_INVALID)
1507 if (personality(context->personality) < 0) {
1508 *exit_status = EXIT_PERSONALITY;
1509 return -errno;
1510 }
1511
1512 if (context->utmp_id)
1513 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path,
1514 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
1515 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
1516 USER_PROCESS,
1517 username ? "root" : context->user);
1518
1519 if (context->user && is_terminal_input(context->std_input)) {
1520 r = chown_terminal(STDIN_FILENO, uid);
1521 if (r < 0) {
1522 *exit_status = EXIT_STDIN;
1523 return r;
1524 }
1525 }
1526
1527 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1528 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1529
1530 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1531 if (r < 0) {
1532 *exit_status = EXIT_BUS_ENDPOINT;
1533 return r;
1534 }
1535 }
1536
1537 /* If delegation is enabled we'll pass ownership of the cgroup
1538 * (but only in systemd's own controller hierarchy!) to the
1539 * user of the new process. */
1540 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1541 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1542 if (r < 0) {
1543 *exit_status = EXIT_CGROUP;
1544 return r;
1545 }
1546
1547
1548 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1549 if (r < 0) {
1550 *exit_status = EXIT_CGROUP;
1551 return r;
1552 }
1553 }
1554
1555 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1556 char **rt;
1557
1558 STRV_FOREACH(rt, context->runtime_directory) {
1559 _cleanup_free_ char *p;
1560
1561 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1562 if (!p) {
1563 *exit_status = EXIT_RUNTIME_DIRECTORY;
1564 return -ENOMEM;
1565 }
1566
1567 r = mkdir_p_label(p, context->runtime_directory_mode);
1568 if (r < 0) {
1569 *exit_status = EXIT_RUNTIME_DIRECTORY;
1570 return r;
1571 }
1572
1573 r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
1574 if (r < 0) {
1575 *exit_status = EXIT_RUNTIME_DIRECTORY;
1576 return r;
1577 }
1578 }
1579 }
1580
1581 if (params->apply_permissions) {
1582 r = enforce_groups(context, username, gid);
1583 if (r < 0) {
1584 *exit_status = EXIT_GROUP;
1585 return r;
1586 }
1587 }
1588
1589 umask(context->umask);
1590
1591 #ifdef HAVE_PAM
1592 if (params->apply_permissions && context->pam_name && username) {
1593 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1594 if (r < 0) {
1595 *exit_status = EXIT_PAM;
1596 return r;
1597 }
1598 }
1599 #endif
1600
1601 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1602 r = setup_netns(runtime->netns_storage_socket);
1603 if (r < 0) {
1604 *exit_status = EXIT_NETWORK;
1605 return r;
1606 }
1607 }
1608
1609 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
1610
1611 if (needs_mount_namespace) {
1612 char *tmp = NULL, *var = NULL;
1613
1614 /* The runtime struct only contains the parent
1615 * of the private /tmp, which is
1616 * non-accessible to world users. Inside of it
1617 * there's a /tmp that is sticky, and that's
1618 * the one we want to use here. */
1619
1620 if (context->private_tmp && runtime) {
1621 if (runtime->tmp_dir)
1622 tmp = strjoina(runtime->tmp_dir, "/tmp");
1623 if (runtime->var_tmp_dir)
1624 var = strjoina(runtime->var_tmp_dir, "/tmp");
1625 }
1626
1627 r = setup_namespace(
1628 params->apply_chroot ? context->root_directory : NULL,
1629 context->read_write_dirs,
1630 context->read_only_dirs,
1631 context->inaccessible_dirs,
1632 tmp,
1633 var,
1634 params->bus_endpoint_path,
1635 context->private_devices,
1636 context->protect_home,
1637 context->protect_system,
1638 context->mount_flags);
1639
1640 /* If we couldn't set up the namespace this is
1641 * probably due to a missing capability. In this case,
1642 * silently proceeed. */
1643 if (r == -EPERM || r == -EACCES) {
1644 log_open();
1645 log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1646 log_close();
1647 } else if (r < 0) {
1648 *exit_status = EXIT_NAMESPACE;
1649 return r;
1650 }
1651 }
1652
1653 if (params->apply_chroot) {
1654 if (!needs_mount_namespace && context->root_directory)
1655 if (chroot(context->root_directory) < 0) {
1656 *exit_status = EXIT_CHROOT;
1657 return -errno;
1658 }
1659
1660 if (chdir(context->working_directory ?: "/") < 0 &&
1661 !context->working_directory_missing_ok) {
1662 *exit_status = EXIT_CHDIR;
1663 return -errno;
1664 }
1665 } else {
1666 _cleanup_free_ char *d = NULL;
1667
1668 if (asprintf(&d, "%s/%s",
1669 context->root_directory ?: "",
1670 context->working_directory ?: "") < 0) {
1671 *exit_status = EXIT_MEMORY;
1672 return -ENOMEM;
1673 }
1674
1675 if (chdir(d) < 0 &&
1676 !context->working_directory_missing_ok) {
1677 *exit_status = EXIT_CHDIR;
1678 return -errno;
1679 }
1680 }
1681
1682 #ifdef HAVE_SELINUX
1683 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1684 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1685 if (r < 0) {
1686 *exit_status = EXIT_SELINUX_CONTEXT;
1687 return r;
1688 }
1689 }
1690 #endif
1691
1692 /* We repeat the fd closing here, to make sure that
1693 * nothing is leaked from the PAM modules. Note that
1694 * we are more aggressive this time since socket_fd
1695 * and the netns fds we don't need anymore. The custom
1696 * endpoint fd was needed to upload the policy and can
1697 * now be closed as well. */
1698 r = close_all_fds(fds, n_fds);
1699 if (r >= 0)
1700 r = shift_fds(fds, n_fds);
1701 if (r >= 0)
1702 r = flags_fds(fds, n_fds, context->non_blocking);
1703 if (r < 0) {
1704 *exit_status = EXIT_FDS;
1705 return r;
1706 }
1707
1708 if (params->apply_permissions) {
1709
1710 for (i = 0; i < _RLIMIT_MAX; i++) {
1711 if (!context->rlimit[i])
1712 continue;
1713
1714 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1715 *exit_status = EXIT_LIMITS;
1716 return -errno;
1717 }
1718 }
1719
1720 if (context->capability_bounding_set_drop) {
1721 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1722 if (r < 0) {
1723 *exit_status = EXIT_CAPABILITIES;
1724 return r;
1725 }
1726 }
1727
1728 #ifdef HAVE_SMACK
1729 if (context->smack_process_label) {
1730 r = mac_smack_apply_pid(0, context->smack_process_label);
1731 if (r < 0) {
1732 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1733 return r;
1734 }
1735 }
1736 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1737 else {
1738 _cleanup_free_ char *exec_label = NULL;
1739
1740 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1741 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) {
1742 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1743 return r;
1744 }
1745
1746 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1747 if (r < 0) {
1748 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1749 return r;
1750 }
1751 }
1752 #endif
1753 #endif
1754
1755 if (context->user) {
1756 r = enforce_user(context, uid);
1757 if (r < 0) {
1758 *exit_status = EXIT_USER;
1759 return r;
1760 }
1761 }
1762
1763 /* PR_GET_SECUREBITS is not privileged, while
1764 * PR_SET_SECUREBITS is. So to suppress
1765 * potential EPERMs we'll try not to call
1766 * PR_SET_SECUREBITS unless necessary. */
1767 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1768 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1769 *exit_status = EXIT_SECUREBITS;
1770 return -errno;
1771 }
1772
1773 if (context->capabilities)
1774 if (cap_set_proc(context->capabilities) < 0) {
1775 *exit_status = EXIT_CAPABILITIES;
1776 return -errno;
1777 }
1778
1779 if (context->no_new_privileges)
1780 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1781 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1782 return -errno;
1783 }
1784
1785 #ifdef HAVE_SECCOMP
1786 if (context->address_families_whitelist ||
1787 !set_isempty(context->address_families)) {
1788 r = apply_address_families(context);
1789 if (r < 0) {
1790 *exit_status = EXIT_ADDRESS_FAMILIES;
1791 return r;
1792 }
1793 }
1794
1795 if (context->syscall_whitelist ||
1796 !set_isempty(context->syscall_filter) ||
1797 !set_isempty(context->syscall_archs)) {
1798 r = apply_seccomp(context);
1799 if (r < 0) {
1800 *exit_status = EXIT_SECCOMP;
1801 return r;
1802 }
1803 }
1804 #endif
1805
1806 #ifdef HAVE_SELINUX
1807 if (mac_selinux_use()) {
1808 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1809
1810 if (exec_context) {
1811 r = setexeccon(exec_context);
1812 if (r < 0) {
1813 *exit_status = EXIT_SELINUX_CONTEXT;
1814 return r;
1815 }
1816 }
1817 }
1818 #endif
1819
1820 #ifdef HAVE_APPARMOR
1821 if (context->apparmor_profile && mac_apparmor_use()) {
1822 r = aa_change_onexec(context->apparmor_profile);
1823 if (r < 0 && !context->apparmor_profile_ignore) {
1824 *exit_status = EXIT_APPARMOR_PROFILE;
1825 return -errno;
1826 }
1827 }
1828 #endif
1829 }
1830
1831 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1832 if (r < 0) {
1833 *exit_status = EXIT_MEMORY;
1834 return r;
1835 }
1836
1837 final_env = strv_env_merge(5,
1838 params->environment,
1839 our_env,
1840 context->environment,
1841 files_env,
1842 pam_env,
1843 NULL);
1844 if (!final_env) {
1845 *exit_status = EXIT_MEMORY;
1846 return -ENOMEM;
1847 }
1848
1849 final_argv = replace_env_argv(argv, final_env);
1850 if (!final_argv) {
1851 *exit_status = EXIT_MEMORY;
1852 return -ENOMEM;
1853 }
1854
1855 final_env = strv_env_clean(final_env);
1856
1857 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1858 _cleanup_free_ char *line;
1859
1860 line = exec_command_line(final_argv);
1861 if (line) {
1862 log_open();
1863 log_struct(LOG_DEBUG,
1864 LOG_UNIT_ID(unit),
1865 "EXECUTABLE=%s", command->path,
1866 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
1867 NULL);
1868 log_close();
1869 }
1870 }
1871
1872 execve(command->path, final_argv, final_env);
1873 *exit_status = EXIT_EXEC;
1874 return -errno;
1875 }
1876
1877 int exec_spawn(Unit *unit,
1878 ExecCommand *command,
1879 const ExecContext *context,
1880 const ExecParameters *params,
1881 ExecRuntime *runtime,
1882 pid_t *ret) {
1883
1884 _cleanup_strv_free_ char **files_env = NULL;
1885 int *fds = NULL; unsigned n_fds = 0;
1886 _cleanup_free_ char *line = NULL;
1887 int socket_fd, r;
1888 char **argv;
1889 pid_t pid;
1890
1891 assert(unit);
1892 assert(command);
1893 assert(context);
1894 assert(ret);
1895 assert(params);
1896 assert(params->fds || params->n_fds <= 0);
1897
1898 if (context->std_input == EXEC_INPUT_SOCKET ||
1899 context->std_output == EXEC_OUTPUT_SOCKET ||
1900 context->std_error == EXEC_OUTPUT_SOCKET) {
1901
1902 if (params->n_fds != 1) {
1903 log_unit_error(unit, "Got more than one socket.");
1904 return -EINVAL;
1905 }
1906
1907 socket_fd = params->fds[0];
1908 } else {
1909 socket_fd = -1;
1910 fds = params->fds;
1911 n_fds = params->n_fds;
1912 }
1913
1914 r = exec_context_load_environment(unit, context, &files_env);
1915 if (r < 0)
1916 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
1917
1918 argv = params->argv ?: command->argv;
1919 line = exec_command_line(argv);
1920 if (!line)
1921 return log_oom();
1922
1923 log_struct(LOG_DEBUG,
1924 LOG_UNIT_ID(unit),
1925 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
1926 "EXECUTABLE=%s", command->path,
1927 NULL);
1928 pid = fork();
1929 if (pid < 0)
1930 return log_unit_error_errno(unit, r, "Failed to fork: %m");
1931
1932 if (pid == 0) {
1933 int exit_status;
1934
1935 r = exec_child(unit,
1936 command,
1937 context,
1938 params,
1939 runtime,
1940 argv,
1941 socket_fd,
1942 fds, n_fds,
1943 files_env,
1944 &exit_status);
1945 if (r < 0) {
1946 log_open();
1947 log_struct_errno(LOG_ERR, r,
1948 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1949 LOG_UNIT_ID(unit),
1950 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
1951 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1952 command->path),
1953 "EXECUTABLE=%s", command->path,
1954 NULL);
1955 }
1956
1957 _exit(exit_status);
1958 }
1959
1960 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
1961
1962 /* We add the new process to the cgroup both in the child (so
1963 * that we can be sure that no user code is ever executed
1964 * outside of the cgroup) and in the parent (so that we can be
1965 * sure that when we kill the cgroup the process will be
1966 * killed too). */
1967 if (params->cgroup_path)
1968 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1969
1970 exec_status_start(&command->exec_status, pid);
1971
1972 *ret = pid;
1973 return 0;
1974 }
1975
1976 void exec_context_init(ExecContext *c) {
1977 assert(c);
1978
1979 c->umask = 0022;
1980 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1981 c->cpu_sched_policy = SCHED_OTHER;
1982 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1983 c->syslog_level_prefix = true;
1984 c->ignore_sigpipe = true;
1985 c->timer_slack_nsec = NSEC_INFINITY;
1986 c->personality = PERSONALITY_INVALID;
1987 c->runtime_directory_mode = 0755;
1988 }
1989
1990 void exec_context_done(ExecContext *c) {
1991 unsigned l;
1992
1993 assert(c);
1994
1995 c->environment = strv_free(c->environment);
1996 c->environment_files = strv_free(c->environment_files);
1997
1998 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
1999 c->rlimit[l] = mfree(c->rlimit[l]);
2000
2001 c->working_directory = mfree(c->working_directory);
2002 c->root_directory = mfree(c->root_directory);
2003 c->tty_path = mfree(c->tty_path);
2004 c->syslog_identifier = mfree(c->syslog_identifier);
2005 c->user = mfree(c->user);
2006 c->group = mfree(c->group);
2007
2008 c->supplementary_groups = strv_free(c->supplementary_groups);
2009
2010 c->pam_name = mfree(c->pam_name);
2011
2012 if (c->capabilities) {
2013 cap_free(c->capabilities);
2014 c->capabilities = NULL;
2015 }
2016
2017 c->read_only_dirs = strv_free(c->read_only_dirs);
2018 c->read_write_dirs = strv_free(c->read_write_dirs);
2019 c->inaccessible_dirs = strv_free(c->inaccessible_dirs);
2020
2021 if (c->cpuset)
2022 CPU_FREE(c->cpuset);
2023
2024 c->utmp_id = mfree(c->utmp_id);
2025 c->selinux_context = mfree(c->selinux_context);
2026 c->apparmor_profile = mfree(c->apparmor_profile);
2027
2028 set_free(c->syscall_filter);
2029 c->syscall_filter = NULL;
2030
2031 set_free(c->syscall_archs);
2032 c->syscall_archs = NULL;
2033
2034 set_free(c->address_families);
2035 c->address_families = NULL;
2036
2037 c->runtime_directory = strv_free(c->runtime_directory);
2038
2039 bus_endpoint_free(c->bus_endpoint);
2040 c->bus_endpoint = NULL;
2041 }
2042
2043 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2044 char **i;
2045
2046 assert(c);
2047
2048 if (!runtime_prefix)
2049 return 0;
2050
2051 STRV_FOREACH(i, c->runtime_directory) {
2052 _cleanup_free_ char *p;
2053
2054 p = strjoin(runtime_prefix, "/", *i, NULL);
2055 if (!p)
2056 return -ENOMEM;
2057
2058 /* We execute this synchronously, since we need to be
2059 * sure this is gone when we start the service
2060 * next. */
2061 (void) rm_rf(p, REMOVE_ROOT);
2062 }
2063
2064 return 0;
2065 }
2066
2067 void exec_command_done(ExecCommand *c) {
2068 assert(c);
2069
2070 c->path = mfree(c->path);
2071
2072 c->argv = strv_free(c->argv);
2073 }
2074
2075 void exec_command_done_array(ExecCommand *c, unsigned n) {
2076 unsigned i;
2077
2078 for (i = 0; i < n; i++)
2079 exec_command_done(c+i);
2080 }
2081
2082 ExecCommand* exec_command_free_list(ExecCommand *c) {
2083 ExecCommand *i;
2084
2085 while ((i = c)) {
2086 LIST_REMOVE(command, c, i);
2087 exec_command_done(i);
2088 free(i);
2089 }
2090
2091 return NULL;
2092 }
2093
2094 void exec_command_free_array(ExecCommand **c, unsigned n) {
2095 unsigned i;
2096
2097 for (i = 0; i < n; i++)
2098 c[i] = exec_command_free_list(c[i]);
2099 }
2100
2101 typedef struct InvalidEnvInfo {
2102 Unit *unit;
2103 const char *path;
2104 } InvalidEnvInfo;
2105
2106 static void invalid_env(const char *p, void *userdata) {
2107 InvalidEnvInfo *info = userdata;
2108
2109 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2110 }
2111
2112 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
2113 char **i, **r = NULL;
2114
2115 assert(c);
2116 assert(l);
2117
2118 STRV_FOREACH(i, c->environment_files) {
2119 char *fn;
2120 int k;
2121 bool ignore = false;
2122 char **p;
2123 _cleanup_globfree_ glob_t pglob = {};
2124 int count, n;
2125
2126 fn = *i;
2127
2128 if (fn[0] == '-') {
2129 ignore = true;
2130 fn ++;
2131 }
2132
2133 if (!path_is_absolute(fn)) {
2134 if (ignore)
2135 continue;
2136
2137 strv_free(r);
2138 return -EINVAL;
2139 }
2140
2141 /* Filename supports globbing, take all matching files */
2142 errno = 0;
2143 if (glob(fn, 0, NULL, &pglob) != 0) {
2144 if (ignore)
2145 continue;
2146
2147 strv_free(r);
2148 return errno ? -errno : -EINVAL;
2149 }
2150 count = pglob.gl_pathc;
2151 if (count == 0) {
2152 if (ignore)
2153 continue;
2154
2155 strv_free(r);
2156 return -EINVAL;
2157 }
2158 for (n = 0; n < count; n++) {
2159 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2160 if (k < 0) {
2161 if (ignore)
2162 continue;
2163
2164 strv_free(r);
2165 return k;
2166 }
2167 /* Log invalid environment variables with filename */
2168 if (p) {
2169 InvalidEnvInfo info = {
2170 .unit = unit,
2171 .path = pglob.gl_pathv[n]
2172 };
2173
2174 p = strv_env_clean_with_callback(p, invalid_env, &info);
2175 }
2176
2177 if (r == NULL)
2178 r = p;
2179 else {
2180 char **m;
2181
2182 m = strv_env_merge(2, r, p);
2183 strv_free(r);
2184 strv_free(p);
2185 if (!m)
2186 return -ENOMEM;
2187
2188 r = m;
2189 }
2190 }
2191 }
2192
2193 *l = r;
2194
2195 return 0;
2196 }
2197
2198 static bool tty_may_match_dev_console(const char *tty) {
2199 _cleanup_free_ char *active = NULL;
2200 char *console;
2201
2202 if (startswith(tty, "/dev/"))
2203 tty += 5;
2204
2205 /* trivial identity? */
2206 if (streq(tty, "console"))
2207 return true;
2208
2209 console = resolve_dev_console(&active);
2210 /* if we could not resolve, assume it may */
2211 if (!console)
2212 return true;
2213
2214 /* "tty0" means the active VC, so it may be the same sometimes */
2215 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2216 }
2217
2218 bool exec_context_may_touch_console(ExecContext *ec) {
2219 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2220 is_terminal_input(ec->std_input) ||
2221 is_terminal_output(ec->std_output) ||
2222 is_terminal_output(ec->std_error)) &&
2223 tty_may_match_dev_console(tty_path(ec));
2224 }
2225
2226 static void strv_fprintf(FILE *f, char **l) {
2227 char **g;
2228
2229 assert(f);
2230
2231 STRV_FOREACH(g, l)
2232 fprintf(f, " %s", *g);
2233 }
2234
2235 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2236 char **e;
2237 unsigned i;
2238
2239 assert(c);
2240 assert(f);
2241
2242 prefix = strempty(prefix);
2243
2244 fprintf(f,
2245 "%sUMask: %04o\n"
2246 "%sWorkingDirectory: %s\n"
2247 "%sRootDirectory: %s\n"
2248 "%sNonBlocking: %s\n"
2249 "%sPrivateTmp: %s\n"
2250 "%sPrivateNetwork: %s\n"
2251 "%sPrivateDevices: %s\n"
2252 "%sProtectHome: %s\n"
2253 "%sProtectSystem: %s\n"
2254 "%sIgnoreSIGPIPE: %s\n",
2255 prefix, c->umask,
2256 prefix, c->working_directory ? c->working_directory : "/",
2257 prefix, c->root_directory ? c->root_directory : "/",
2258 prefix, yes_no(c->non_blocking),
2259 prefix, yes_no(c->private_tmp),
2260 prefix, yes_no(c->private_network),
2261 prefix, yes_no(c->private_devices),
2262 prefix, protect_home_to_string(c->protect_home),
2263 prefix, protect_system_to_string(c->protect_system),
2264 prefix, yes_no(c->ignore_sigpipe));
2265
2266 STRV_FOREACH(e, c->environment)
2267 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2268
2269 STRV_FOREACH(e, c->environment_files)
2270 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2271
2272 if (c->nice_set)
2273 fprintf(f,
2274 "%sNice: %i\n",
2275 prefix, c->nice);
2276
2277 if (c->oom_score_adjust_set)
2278 fprintf(f,
2279 "%sOOMScoreAdjust: %i\n",
2280 prefix, c->oom_score_adjust);
2281
2282 for (i = 0; i < RLIM_NLIMITS; i++)
2283 if (c->rlimit[i])
2284 fprintf(f, "%s%s: "RLIM_FMT"\n",
2285 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2286
2287 if (c->ioprio_set) {
2288 _cleanup_free_ char *class_str = NULL;
2289
2290 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2291 fprintf(f,
2292 "%sIOSchedulingClass: %s\n"
2293 "%sIOPriority: %i\n",
2294 prefix, strna(class_str),
2295 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2296 }
2297
2298 if (c->cpu_sched_set) {
2299 _cleanup_free_ char *policy_str = NULL;
2300
2301 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2302 fprintf(f,
2303 "%sCPUSchedulingPolicy: %s\n"
2304 "%sCPUSchedulingPriority: %i\n"
2305 "%sCPUSchedulingResetOnFork: %s\n",
2306 prefix, strna(policy_str),
2307 prefix, c->cpu_sched_priority,
2308 prefix, yes_no(c->cpu_sched_reset_on_fork));
2309 }
2310
2311 if (c->cpuset) {
2312 fprintf(f, "%sCPUAffinity:", prefix);
2313 for (i = 0; i < c->cpuset_ncpus; i++)
2314 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2315 fprintf(f, " %u", i);
2316 fputs("\n", f);
2317 }
2318
2319 if (c->timer_slack_nsec != NSEC_INFINITY)
2320 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2321
2322 fprintf(f,
2323 "%sStandardInput: %s\n"
2324 "%sStandardOutput: %s\n"
2325 "%sStandardError: %s\n",
2326 prefix, exec_input_to_string(c->std_input),
2327 prefix, exec_output_to_string(c->std_output),
2328 prefix, exec_output_to_string(c->std_error));
2329
2330 if (c->tty_path)
2331 fprintf(f,
2332 "%sTTYPath: %s\n"
2333 "%sTTYReset: %s\n"
2334 "%sTTYVHangup: %s\n"
2335 "%sTTYVTDisallocate: %s\n",
2336 prefix, c->tty_path,
2337 prefix, yes_no(c->tty_reset),
2338 prefix, yes_no(c->tty_vhangup),
2339 prefix, yes_no(c->tty_vt_disallocate));
2340
2341 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2342 c->std_output == EXEC_OUTPUT_KMSG ||
2343 c->std_output == EXEC_OUTPUT_JOURNAL ||
2344 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2345 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2346 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2347 c->std_error == EXEC_OUTPUT_SYSLOG ||
2348 c->std_error == EXEC_OUTPUT_KMSG ||
2349 c->std_error == EXEC_OUTPUT_JOURNAL ||
2350 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2351 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2352 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2353
2354 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2355
2356 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2357 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2358
2359 fprintf(f,
2360 "%sSyslogFacility: %s\n"
2361 "%sSyslogLevel: %s\n",
2362 prefix, strna(fac_str),
2363 prefix, strna(lvl_str));
2364 }
2365
2366 if (c->capabilities) {
2367 _cleanup_cap_free_charp_ char *t;
2368
2369 t = cap_to_text(c->capabilities, NULL);
2370 if (t)
2371 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2372 }
2373
2374 if (c->secure_bits)
2375 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2376 prefix,
2377 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2378 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2379 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2380 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2381 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2382 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2383
2384 if (c->capability_bounding_set_drop) {
2385 unsigned long l;
2386 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2387
2388 for (l = 0; l <= cap_last_cap(); l++)
2389 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2390 fprintf(f, " %s", strna(capability_to_name(l)));
2391
2392 fputs("\n", f);
2393 }
2394
2395 if (c->user)
2396 fprintf(f, "%sUser: %s\n", prefix, c->user);
2397 if (c->group)
2398 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2399
2400 if (strv_length(c->supplementary_groups) > 0) {
2401 fprintf(f, "%sSupplementaryGroups:", prefix);
2402 strv_fprintf(f, c->supplementary_groups);
2403 fputs("\n", f);
2404 }
2405
2406 if (c->pam_name)
2407 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2408
2409 if (strv_length(c->read_write_dirs) > 0) {
2410 fprintf(f, "%sReadWriteDirs:", prefix);
2411 strv_fprintf(f, c->read_write_dirs);
2412 fputs("\n", f);
2413 }
2414
2415 if (strv_length(c->read_only_dirs) > 0) {
2416 fprintf(f, "%sReadOnlyDirs:", prefix);
2417 strv_fprintf(f, c->read_only_dirs);
2418 fputs("\n", f);
2419 }
2420
2421 if (strv_length(c->inaccessible_dirs) > 0) {
2422 fprintf(f, "%sInaccessibleDirs:", prefix);
2423 strv_fprintf(f, c->inaccessible_dirs);
2424 fputs("\n", f);
2425 }
2426
2427 if (c->utmp_id)
2428 fprintf(f,
2429 "%sUtmpIdentifier: %s\n",
2430 prefix, c->utmp_id);
2431
2432 if (c->selinux_context)
2433 fprintf(f,
2434 "%sSELinuxContext: %s%s\n",
2435 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2436
2437 if (c->personality != PERSONALITY_INVALID)
2438 fprintf(f,
2439 "%sPersonality: %s\n",
2440 prefix, strna(personality_to_string(c->personality)));
2441
2442 if (c->syscall_filter) {
2443 #ifdef HAVE_SECCOMP
2444 Iterator j;
2445 void *id;
2446 bool first = true;
2447 #endif
2448
2449 fprintf(f,
2450 "%sSystemCallFilter: ",
2451 prefix);
2452
2453 if (!c->syscall_whitelist)
2454 fputc('~', f);
2455
2456 #ifdef HAVE_SECCOMP
2457 SET_FOREACH(id, c->syscall_filter, j) {
2458 _cleanup_free_ char *name = NULL;
2459
2460 if (first)
2461 first = false;
2462 else
2463 fputc(' ', f);
2464
2465 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2466 fputs(strna(name), f);
2467 }
2468 #endif
2469
2470 fputc('\n', f);
2471 }
2472
2473 if (c->syscall_archs) {
2474 #ifdef HAVE_SECCOMP
2475 Iterator j;
2476 void *id;
2477 #endif
2478
2479 fprintf(f,
2480 "%sSystemCallArchitectures:",
2481 prefix);
2482
2483 #ifdef HAVE_SECCOMP
2484 SET_FOREACH(id, c->syscall_archs, j)
2485 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2486 #endif
2487 fputc('\n', f);
2488 }
2489
2490 if (c->syscall_errno != 0)
2491 fprintf(f,
2492 "%sSystemCallErrorNumber: %s\n",
2493 prefix, strna(errno_to_name(c->syscall_errno)));
2494
2495 if (c->apparmor_profile)
2496 fprintf(f,
2497 "%sAppArmorProfile: %s%s\n",
2498 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2499 }
2500
2501 bool exec_context_maintains_privileges(ExecContext *c) {
2502 assert(c);
2503
2504 /* Returns true if the process forked off would run run under
2505 * an unchanged UID or as root. */
2506
2507 if (!c->user)
2508 return true;
2509
2510 if (streq(c->user, "root") || streq(c->user, "0"))
2511 return true;
2512
2513 return false;
2514 }
2515
2516 void exec_status_start(ExecStatus *s, pid_t pid) {
2517 assert(s);
2518
2519 zero(*s);
2520 s->pid = pid;
2521 dual_timestamp_get(&s->start_timestamp);
2522 }
2523
2524 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2525 assert(s);
2526
2527 if (s->pid && s->pid != pid)
2528 zero(*s);
2529
2530 s->pid = pid;
2531 dual_timestamp_get(&s->exit_timestamp);
2532
2533 s->code = code;
2534 s->status = status;
2535
2536 if (context) {
2537 if (context->utmp_id)
2538 utmp_put_dead_process(context->utmp_id, pid, code, status);
2539
2540 exec_context_tty_reset(context);
2541 }
2542 }
2543
2544 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2545 char buf[FORMAT_TIMESTAMP_MAX];
2546
2547 assert(s);
2548 assert(f);
2549
2550 if (s->pid <= 0)
2551 return;
2552
2553 prefix = strempty(prefix);
2554
2555 fprintf(f,
2556 "%sPID: "PID_FMT"\n",
2557 prefix, s->pid);
2558
2559 if (s->start_timestamp.realtime > 0)
2560 fprintf(f,
2561 "%sStart Timestamp: %s\n",
2562 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2563
2564 if (s->exit_timestamp.realtime > 0)
2565 fprintf(f,
2566 "%sExit Timestamp: %s\n"
2567 "%sExit Code: %s\n"
2568 "%sExit Status: %i\n",
2569 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2570 prefix, sigchld_code_to_string(s->code),
2571 prefix, s->status);
2572 }
2573
2574 char *exec_command_line(char **argv) {
2575 size_t k;
2576 char *n, *p, **a;
2577 bool first = true;
2578
2579 assert(argv);
2580
2581 k = 1;
2582 STRV_FOREACH(a, argv)
2583 k += strlen(*a)+3;
2584
2585 if (!(n = new(char, k)))
2586 return NULL;
2587
2588 p = n;
2589 STRV_FOREACH(a, argv) {
2590
2591 if (!first)
2592 *(p++) = ' ';
2593 else
2594 first = false;
2595
2596 if (strpbrk(*a, WHITESPACE)) {
2597 *(p++) = '\'';
2598 p = stpcpy(p, *a);
2599 *(p++) = '\'';
2600 } else
2601 p = stpcpy(p, *a);
2602
2603 }
2604
2605 *p = 0;
2606
2607 /* FIXME: this doesn't really handle arguments that have
2608 * spaces and ticks in them */
2609
2610 return n;
2611 }
2612
2613 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2614 _cleanup_free_ char *cmd = NULL;
2615 const char *prefix2;
2616
2617 assert(c);
2618 assert(f);
2619
2620 prefix = strempty(prefix);
2621 prefix2 = strjoina(prefix, "\t");
2622
2623 cmd = exec_command_line(c->argv);
2624 fprintf(f,
2625 "%sCommand Line: %s\n",
2626 prefix, cmd ? cmd : strerror(ENOMEM));
2627
2628 exec_status_dump(&c->exec_status, f, prefix2);
2629 }
2630
2631 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2632 assert(f);
2633
2634 prefix = strempty(prefix);
2635
2636 LIST_FOREACH(command, c, c)
2637 exec_command_dump(c, f, prefix);
2638 }
2639
2640 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2641 ExecCommand *end;
2642
2643 assert(l);
2644 assert(e);
2645
2646 if (*l) {
2647 /* It's kind of important, that we keep the order here */
2648 LIST_FIND_TAIL(command, *l, end);
2649 LIST_INSERT_AFTER(command, *l, end, e);
2650 } else
2651 *l = e;
2652 }
2653
2654 int exec_command_set(ExecCommand *c, const char *path, ...) {
2655 va_list ap;
2656 char **l, *p;
2657
2658 assert(c);
2659 assert(path);
2660
2661 va_start(ap, path);
2662 l = strv_new_ap(path, ap);
2663 va_end(ap);
2664
2665 if (!l)
2666 return -ENOMEM;
2667
2668 p = strdup(path);
2669 if (!p) {
2670 strv_free(l);
2671 return -ENOMEM;
2672 }
2673
2674 free(c->path);
2675 c->path = p;
2676
2677 strv_free(c->argv);
2678 c->argv = l;
2679
2680 return 0;
2681 }
2682
2683 int exec_command_append(ExecCommand *c, const char *path, ...) {
2684 _cleanup_strv_free_ char **l = NULL;
2685 va_list ap;
2686 int r;
2687
2688 assert(c);
2689 assert(path);
2690
2691 va_start(ap, path);
2692 l = strv_new_ap(path, ap);
2693 va_end(ap);
2694
2695 if (!l)
2696 return -ENOMEM;
2697
2698 r = strv_extend_strv(&c->argv, l);
2699 if (r < 0)
2700 return r;
2701
2702 return 0;
2703 }
2704
2705
2706 static int exec_runtime_allocate(ExecRuntime **rt) {
2707
2708 if (*rt)
2709 return 0;
2710
2711 *rt = new0(ExecRuntime, 1);
2712 if (!*rt)
2713 return -ENOMEM;
2714
2715 (*rt)->n_ref = 1;
2716 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2717
2718 return 0;
2719 }
2720
2721 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2722 int r;
2723
2724 assert(rt);
2725 assert(c);
2726 assert(id);
2727
2728 if (*rt)
2729 return 1;
2730
2731 if (!c->private_network && !c->private_tmp)
2732 return 0;
2733
2734 r = exec_runtime_allocate(rt);
2735 if (r < 0)
2736 return r;
2737
2738 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2739 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2740 return -errno;
2741 }
2742
2743 if (c->private_tmp && !(*rt)->tmp_dir) {
2744 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2745 if (r < 0)
2746 return r;
2747 }
2748
2749 return 1;
2750 }
2751
2752 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2753 assert(r);
2754 assert(r->n_ref > 0);
2755
2756 r->n_ref++;
2757 return r;
2758 }
2759
2760 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2761
2762 if (!r)
2763 return NULL;
2764
2765 assert(r->n_ref > 0);
2766
2767 r->n_ref--;
2768 if (r->n_ref > 0)
2769 return NULL;
2770
2771 free(r->tmp_dir);
2772 free(r->var_tmp_dir);
2773 safe_close_pair(r->netns_storage_socket);
2774 free(r);
2775
2776 return NULL;
2777 }
2778
2779 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
2780 assert(u);
2781 assert(f);
2782 assert(fds);
2783
2784 if (!rt)
2785 return 0;
2786
2787 if (rt->tmp_dir)
2788 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2789
2790 if (rt->var_tmp_dir)
2791 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2792
2793 if (rt->netns_storage_socket[0] >= 0) {
2794 int copy;
2795
2796 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2797 if (copy < 0)
2798 return copy;
2799
2800 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2801 }
2802
2803 if (rt->netns_storage_socket[1] >= 0) {
2804 int copy;
2805
2806 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2807 if (copy < 0)
2808 return copy;
2809
2810 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2811 }
2812
2813 return 0;
2814 }
2815
2816 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
2817 int r;
2818
2819 assert(rt);
2820 assert(key);
2821 assert(value);
2822
2823 if (streq(key, "tmp-dir")) {
2824 char *copy;
2825
2826 r = exec_runtime_allocate(rt);
2827 if (r < 0)
2828 return log_oom();
2829
2830 copy = strdup(value);
2831 if (!copy)
2832 return log_oom();
2833
2834 free((*rt)->tmp_dir);
2835 (*rt)->tmp_dir = copy;
2836
2837 } else if (streq(key, "var-tmp-dir")) {
2838 char *copy;
2839
2840 r = exec_runtime_allocate(rt);
2841 if (r < 0)
2842 return log_oom();
2843
2844 copy = strdup(value);
2845 if (!copy)
2846 return log_oom();
2847
2848 free((*rt)->var_tmp_dir);
2849 (*rt)->var_tmp_dir = copy;
2850
2851 } else if (streq(key, "netns-socket-0")) {
2852 int fd;
2853
2854 r = exec_runtime_allocate(rt);
2855 if (r < 0)
2856 return log_oom();
2857
2858 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2859 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2860 else {
2861 safe_close((*rt)->netns_storage_socket[0]);
2862 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2863 }
2864 } else if (streq(key, "netns-socket-1")) {
2865 int fd;
2866
2867 r = exec_runtime_allocate(rt);
2868 if (r < 0)
2869 return log_oom();
2870
2871 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2872 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2873 else {
2874 safe_close((*rt)->netns_storage_socket[1]);
2875 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2876 }
2877 } else
2878 return 0;
2879
2880 return 1;
2881 }
2882
2883 static void *remove_tmpdir_thread(void *p) {
2884 _cleanup_free_ char *path = p;
2885
2886 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
2887 return NULL;
2888 }
2889
2890 void exec_runtime_destroy(ExecRuntime *rt) {
2891 int r;
2892
2893 if (!rt)
2894 return;
2895
2896 /* If there are multiple users of this, let's leave the stuff around */
2897 if (rt->n_ref > 1)
2898 return;
2899
2900 if (rt->tmp_dir) {
2901 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2902
2903 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2904 if (r < 0) {
2905 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2906 free(rt->tmp_dir);
2907 }
2908
2909 rt->tmp_dir = NULL;
2910 }
2911
2912 if (rt->var_tmp_dir) {
2913 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2914
2915 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2916 if (r < 0) {
2917 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2918 free(rt->var_tmp_dir);
2919 }
2920
2921 rt->var_tmp_dir = NULL;
2922 }
2923
2924 safe_close_pair(rt->netns_storage_socket);
2925 }
2926
2927 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2928 [EXEC_INPUT_NULL] = "null",
2929 [EXEC_INPUT_TTY] = "tty",
2930 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2931 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2932 [EXEC_INPUT_SOCKET] = "socket"
2933 };
2934
2935 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2936
2937 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2938 [EXEC_OUTPUT_INHERIT] = "inherit",
2939 [EXEC_OUTPUT_NULL] = "null",
2940 [EXEC_OUTPUT_TTY] = "tty",
2941 [EXEC_OUTPUT_SYSLOG] = "syslog",
2942 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2943 [EXEC_OUTPUT_KMSG] = "kmsg",
2944 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2945 [EXEC_OUTPUT_JOURNAL] = "journal",
2946 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2947 [EXEC_OUTPUT_SOCKET] = "socket"
2948 };
2949
2950 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2951
2952 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
2953 [EXEC_UTMP_INIT] = "init",
2954 [EXEC_UTMP_LOGIN] = "login",
2955 [EXEC_UTMP_USER] = "user",
2956 };
2957
2958 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);