]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
tree-wide: use coccinelle to patch a lot of code to use mfree()
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/un.h>
29 #include <sys/prctl.h>
30 #include <sys/stat.h>
31 #include <grp.h>
32 #include <poll.h>
33 #include <glob.h>
34 #include <utmpx.h>
35 #include <sys/personality.h>
36
37 #ifdef HAVE_PAM
38 #include <security/pam_appl.h>
39 #endif
40
41 #ifdef HAVE_SELINUX
42 #include <selinux/selinux.h>
43 #endif
44
45 #ifdef HAVE_SECCOMP
46 #include <seccomp.h>
47 #endif
48
49 #ifdef HAVE_APPARMOR
50 #include <sys/apparmor.h>
51 #endif
52
53 #include "sd-messages.h"
54 #include "rm-rf.h"
55 #include "strv.h"
56 #include "macro.h"
57 #include "capability.h"
58 #include "util.h"
59 #include "log.h"
60 #include "ioprio.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
64 #include "missing.h"
65 #include "utmp-wtmp.h"
66 #include "def.h"
67 #include "path-util.h"
68 #include "env-util.h"
69 #include "fileio.h"
70 #include "unit.h"
71 #include "async.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
74 #include "af-list.h"
75 #include "mkdir.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
78 #include "cap-list.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
82 #include "signal-util.h"
83
84 #ifdef HAVE_APPARMOR
85 #include "apparmor-util.h"
86 #endif
87
88 #ifdef HAVE_SECCOMP
89 #include "seccomp-util.h"
90 #endif
91
92 #include "execute.h"
93
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
96
97 /* This assumes there is a 'tty' group */
98 #define TTY_MODE 0620
99
100 #define SNDBUF_SIZE (8*1024*1024)
101
102 static int shift_fds(int fds[], unsigned n_fds) {
103 int start, restart_from;
104
105 if (n_fds <= 0)
106 return 0;
107
108 /* Modifies the fds array! (sorts it) */
109
110 assert(fds);
111
112 start = 0;
113 for (;;) {
114 int i;
115
116 restart_from = -1;
117
118 for (i = start; i < (int) n_fds; i++) {
119 int nfd;
120
121 /* Already at right index? */
122 if (fds[i] == i+3)
123 continue;
124
125 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
126 return -errno;
127
128 safe_close(fds[i]);
129 fds[i] = nfd;
130
131 /* Hmm, the fd we wanted isn't free? Then
132 * let's remember that and try again from here */
133 if (nfd != i+3 && restart_from < 0)
134 restart_from = i;
135 }
136
137 if (restart_from < 0)
138 break;
139
140 start = restart_from;
141 }
142
143 return 0;
144 }
145
146 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
147 unsigned i;
148 int r;
149
150 if (n_fds <= 0)
151 return 0;
152
153 assert(fds);
154
155 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
156
157 for (i = 0; i < n_fds; i++) {
158
159 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
160 return r;
161
162 /* We unconditionally drop FD_CLOEXEC from the fds,
163 * since after all we want to pass these fds to our
164 * children */
165
166 if ((r = fd_cloexec(fds[i], false)) < 0)
167 return r;
168 }
169
170 return 0;
171 }
172
173 _pure_ static const char *tty_path(const ExecContext *context) {
174 assert(context);
175
176 if (context->tty_path)
177 return context->tty_path;
178
179 return "/dev/console";
180 }
181
182 static void exec_context_tty_reset(const ExecContext *context) {
183 assert(context);
184
185 if (context->tty_vhangup)
186 terminal_vhangup(tty_path(context));
187
188 if (context->tty_reset)
189 reset_terminal(tty_path(context));
190
191 if (context->tty_vt_disallocate && context->tty_path)
192 vt_disallocate(context->tty_path);
193 }
194
195 static bool is_terminal_output(ExecOutput o) {
196 return
197 o == EXEC_OUTPUT_TTY ||
198 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
199 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
200 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
201 }
202
203 static int open_null_as(int flags, int nfd) {
204 int fd, r;
205
206 assert(nfd >= 0);
207
208 fd = open("/dev/null", flags|O_NOCTTY);
209 if (fd < 0)
210 return -errno;
211
212 if (fd != nfd) {
213 r = dup2(fd, nfd) < 0 ? -errno : nfd;
214 safe_close(fd);
215 } else
216 r = nfd;
217
218 return r;
219 }
220
221 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
222 union sockaddr_union sa = {
223 .un.sun_family = AF_UNIX,
224 .un.sun_path = "/run/systemd/journal/stdout",
225 };
226 uid_t olduid = UID_INVALID;
227 gid_t oldgid = GID_INVALID;
228 int r;
229
230 if (gid != GID_INVALID) {
231 oldgid = getgid();
232
233 r = setegid(gid);
234 if (r < 0)
235 return -errno;
236 }
237
238 if (uid != UID_INVALID) {
239 olduid = getuid();
240
241 r = seteuid(uid);
242 if (r < 0) {
243 r = -errno;
244 goto restore_gid;
245 }
246 }
247
248 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
249 if (r < 0)
250 r = -errno;
251
252 /* If we fail to restore the uid or gid, things will likely
253 fail later on. This should only happen if an LSM interferes. */
254
255 if (uid != UID_INVALID)
256 (void) seteuid(olduid);
257
258 restore_gid:
259 if (gid != GID_INVALID)
260 (void) setegid(oldgid);
261
262 return r;
263 }
264
265 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
266 int fd, r;
267
268 assert(context);
269 assert(output < _EXEC_OUTPUT_MAX);
270 assert(ident);
271 assert(nfd >= 0);
272
273 fd = socket(AF_UNIX, SOCK_STREAM, 0);
274 if (fd < 0)
275 return -errno;
276
277 r = connect_journal_socket(fd, uid, gid);
278 if (r < 0)
279 return r;
280
281 if (shutdown(fd, SHUT_RD) < 0) {
282 safe_close(fd);
283 return -errno;
284 }
285
286 fd_inc_sndbuf(fd, SNDBUF_SIZE);
287
288 dprintf(fd,
289 "%s\n"
290 "%s\n"
291 "%i\n"
292 "%i\n"
293 "%i\n"
294 "%i\n"
295 "%i\n",
296 context->syslog_identifier ? context->syslog_identifier : ident,
297 unit_id,
298 context->syslog_priority,
299 !!context->syslog_level_prefix,
300 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
301 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
302 is_terminal_output(output));
303
304 if (fd != nfd) {
305 r = dup2(fd, nfd) < 0 ? -errno : nfd;
306 safe_close(fd);
307 } else
308 r = nfd;
309
310 return r;
311 }
312 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
313 int fd, r;
314
315 assert(path);
316 assert(nfd >= 0);
317
318 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
319 return fd;
320
321 if (fd != nfd) {
322 r = dup2(fd, nfd) < 0 ? -errno : nfd;
323 safe_close(fd);
324 } else
325 r = nfd;
326
327 return r;
328 }
329
330 static bool is_terminal_input(ExecInput i) {
331 return
332 i == EXEC_INPUT_TTY ||
333 i == EXEC_INPUT_TTY_FORCE ||
334 i == EXEC_INPUT_TTY_FAIL;
335 }
336
337 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
338
339 if (is_terminal_input(std_input) && !apply_tty_stdin)
340 return EXEC_INPUT_NULL;
341
342 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
343 return EXEC_INPUT_NULL;
344
345 return std_input;
346 }
347
348 static int fixup_output(ExecOutput std_output, int socket_fd) {
349
350 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
351 return EXEC_OUTPUT_INHERIT;
352
353 return std_output;
354 }
355
356 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
357 ExecInput i;
358
359 assert(context);
360
361 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
362
363 switch (i) {
364
365 case EXEC_INPUT_NULL:
366 return open_null_as(O_RDONLY, STDIN_FILENO);
367
368 case EXEC_INPUT_TTY:
369 case EXEC_INPUT_TTY_FORCE:
370 case EXEC_INPUT_TTY_FAIL: {
371 int fd, r;
372
373 fd = acquire_terminal(tty_path(context),
374 i == EXEC_INPUT_TTY_FAIL,
375 i == EXEC_INPUT_TTY_FORCE,
376 false,
377 USEC_INFINITY);
378 if (fd < 0)
379 return fd;
380
381 if (fd != STDIN_FILENO) {
382 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
383 safe_close(fd);
384 } else
385 r = STDIN_FILENO;
386
387 return r;
388 }
389
390 case EXEC_INPUT_SOCKET:
391 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
392
393 default:
394 assert_not_reached("Unknown input type");
395 }
396 }
397
398 static int setup_output(Unit *unit, const ExecContext *context, int fileno, int socket_fd, const char *ident, bool apply_tty_stdin, uid_t uid, gid_t gid) {
399 ExecOutput o;
400 ExecInput i;
401 int r;
402
403 assert(unit);
404 assert(context);
405 assert(ident);
406
407 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
408 o = fixup_output(context->std_output, socket_fd);
409
410 if (fileno == STDERR_FILENO) {
411 ExecOutput e;
412 e = fixup_output(context->std_error, socket_fd);
413
414 /* This expects the input and output are already set up */
415
416 /* Don't change the stderr file descriptor if we inherit all
417 * the way and are not on a tty */
418 if (e == EXEC_OUTPUT_INHERIT &&
419 o == EXEC_OUTPUT_INHERIT &&
420 i == EXEC_INPUT_NULL &&
421 !is_terminal_input(context->std_input) &&
422 getppid () != 1)
423 return fileno;
424
425 /* Duplicate from stdout if possible */
426 if (e == o || e == EXEC_OUTPUT_INHERIT)
427 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
428
429 o = e;
430
431 } else if (o == EXEC_OUTPUT_INHERIT) {
432 /* If input got downgraded, inherit the original value */
433 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
434 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
435
436 /* If the input is connected to anything that's not a /dev/null, inherit that... */
437 if (i != EXEC_INPUT_NULL)
438 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
439
440 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
441 if (getppid() != 1)
442 return fileno;
443
444 /* We need to open /dev/null here anew, to get the right access mode. */
445 return open_null_as(O_WRONLY, fileno);
446 }
447
448 switch (o) {
449
450 case EXEC_OUTPUT_NULL:
451 return open_null_as(O_WRONLY, fileno);
452
453 case EXEC_OUTPUT_TTY:
454 if (is_terminal_input(i))
455 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
456
457 /* We don't reset the terminal if this is just about output */
458 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
459
460 case EXEC_OUTPUT_SYSLOG:
461 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
462 case EXEC_OUTPUT_KMSG:
463 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
464 case EXEC_OUTPUT_JOURNAL:
465 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
466 r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid);
467 if (r < 0) {
468 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
469 r = open_null_as(O_WRONLY, fileno);
470 }
471 return r;
472
473 case EXEC_OUTPUT_SOCKET:
474 assert(socket_fd >= 0);
475 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
476
477 default:
478 assert_not_reached("Unknown error type");
479 }
480 }
481
482 static int chown_terminal(int fd, uid_t uid) {
483 struct stat st;
484
485 assert(fd >= 0);
486
487 /* This might fail. What matters are the results. */
488 (void) fchown(fd, uid, -1);
489 (void) fchmod(fd, TTY_MODE);
490
491 if (fstat(fd, &st) < 0)
492 return -errno;
493
494 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
495 return -EPERM;
496
497 return 0;
498 }
499
500 static int setup_confirm_stdio(int *_saved_stdin,
501 int *_saved_stdout) {
502 int fd = -1, saved_stdin, saved_stdout = -1, r;
503
504 assert(_saved_stdin);
505 assert(_saved_stdout);
506
507 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
508 if (saved_stdin < 0)
509 return -errno;
510
511 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
512 if (saved_stdout < 0) {
513 r = errno;
514 goto fail;
515 }
516
517 fd = acquire_terminal(
518 "/dev/console",
519 false,
520 false,
521 false,
522 DEFAULT_CONFIRM_USEC);
523 if (fd < 0) {
524 r = fd;
525 goto fail;
526 }
527
528 r = chown_terminal(fd, getuid());
529 if (r < 0)
530 goto fail;
531
532 if (dup2(fd, STDIN_FILENO) < 0) {
533 r = -errno;
534 goto fail;
535 }
536
537 if (dup2(fd, STDOUT_FILENO) < 0) {
538 r = -errno;
539 goto fail;
540 }
541
542 if (fd >= 2)
543 safe_close(fd);
544
545 *_saved_stdin = saved_stdin;
546 *_saved_stdout = saved_stdout;
547
548 return 0;
549
550 fail:
551 safe_close(saved_stdout);
552 safe_close(saved_stdin);
553 safe_close(fd);
554
555 return r;
556 }
557
558 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
559 _cleanup_close_ int fd = -1;
560 va_list ap;
561
562 assert(format);
563
564 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
565 if (fd < 0)
566 return fd;
567
568 va_start(ap, format);
569 vdprintf(fd, format, ap);
570 va_end(ap);
571
572 return 0;
573 }
574
575 static int restore_confirm_stdio(int *saved_stdin,
576 int *saved_stdout) {
577
578 int r = 0;
579
580 assert(saved_stdin);
581 assert(saved_stdout);
582
583 release_terminal();
584
585 if (*saved_stdin >= 0)
586 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
587 r = -errno;
588
589 if (*saved_stdout >= 0)
590 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
591 r = -errno;
592
593 safe_close(*saved_stdin);
594 safe_close(*saved_stdout);
595
596 return r;
597 }
598
599 static int ask_for_confirmation(char *response, char **argv) {
600 int saved_stdout = -1, saved_stdin = -1, r;
601 _cleanup_free_ char *line = NULL;
602
603 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
604 if (r < 0)
605 return r;
606
607 line = exec_command_line(argv);
608 if (!line)
609 return -ENOMEM;
610
611 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
612
613 restore_confirm_stdio(&saved_stdin, &saved_stdout);
614
615 return r;
616 }
617
618 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
619 bool keep_groups = false;
620 int r;
621
622 assert(context);
623
624 /* Lookup and set GID and supplementary group list. Here too
625 * we avoid NSS lookups for gid=0. */
626
627 if (context->group || username) {
628
629 if (context->group) {
630 const char *g = context->group;
631
632 if ((r = get_group_creds(&g, &gid)) < 0)
633 return r;
634 }
635
636 /* First step, initialize groups from /etc/groups */
637 if (username && gid != 0) {
638 if (initgroups(username, gid) < 0)
639 return -errno;
640
641 keep_groups = true;
642 }
643
644 /* Second step, set our gids */
645 if (setresgid(gid, gid, gid) < 0)
646 return -errno;
647 }
648
649 if (context->supplementary_groups) {
650 int ngroups_max, k;
651 gid_t *gids;
652 char **i;
653
654 /* Final step, initialize any manually set supplementary groups */
655 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
656
657 if (!(gids = new(gid_t, ngroups_max)))
658 return -ENOMEM;
659
660 if (keep_groups) {
661 if ((k = getgroups(ngroups_max, gids)) < 0) {
662 free(gids);
663 return -errno;
664 }
665 } else
666 k = 0;
667
668 STRV_FOREACH(i, context->supplementary_groups) {
669 const char *g;
670
671 if (k >= ngroups_max) {
672 free(gids);
673 return -E2BIG;
674 }
675
676 g = *i;
677 r = get_group_creds(&g, gids+k);
678 if (r < 0) {
679 free(gids);
680 return r;
681 }
682
683 k++;
684 }
685
686 if (setgroups(k, gids) < 0) {
687 free(gids);
688 return -errno;
689 }
690
691 free(gids);
692 }
693
694 return 0;
695 }
696
697 static int enforce_user(const ExecContext *context, uid_t uid) {
698 assert(context);
699
700 /* Sets (but doesn't lookup) the uid and make sure we keep the
701 * capabilities while doing so. */
702
703 if (context->capabilities) {
704 _cleanup_cap_free_ cap_t d = NULL;
705 static const cap_value_t bits[] = {
706 CAP_SETUID, /* Necessary so that we can run setresuid() below */
707 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
708 };
709
710 /* First step: If we need to keep capabilities but
711 * drop privileges we need to make sure we keep our
712 * caps, while we drop privileges. */
713 if (uid != 0) {
714 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
715
716 if (prctl(PR_GET_SECUREBITS) != sb)
717 if (prctl(PR_SET_SECUREBITS, sb) < 0)
718 return -errno;
719 }
720
721 /* Second step: set the capabilities. This will reduce
722 * the capabilities to the minimum we need. */
723
724 d = cap_dup(context->capabilities);
725 if (!d)
726 return -errno;
727
728 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
729 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
730 return -errno;
731
732 if (cap_set_proc(d) < 0)
733 return -errno;
734 }
735
736 /* Third step: actually set the uids */
737 if (setresuid(uid, uid, uid) < 0)
738 return -errno;
739
740 /* At this point we should have all necessary capabilities but
741 are otherwise a normal user. However, the caps might got
742 corrupted due to the setresuid() so we need clean them up
743 later. This is done outside of this call. */
744
745 return 0;
746 }
747
748 #ifdef HAVE_PAM
749
750 static int null_conv(
751 int num_msg,
752 const struct pam_message **msg,
753 struct pam_response **resp,
754 void *appdata_ptr) {
755
756 /* We don't support conversations */
757
758 return PAM_CONV_ERR;
759 }
760
761 static int setup_pam(
762 const char *name,
763 const char *user,
764 uid_t uid,
765 const char *tty,
766 char ***pam_env,
767 int fds[], unsigned n_fds) {
768
769 static const struct pam_conv conv = {
770 .conv = null_conv,
771 .appdata_ptr = NULL
772 };
773
774 pam_handle_t *handle = NULL;
775 sigset_t old_ss;
776 int pam_code = PAM_SUCCESS;
777 int err;
778 char **e = NULL;
779 bool close_session = false;
780 pid_t pam_pid = 0, parent_pid;
781 int flags = 0;
782
783 assert(name);
784 assert(user);
785 assert(pam_env);
786
787 /* We set up PAM in the parent process, then fork. The child
788 * will then stay around until killed via PR_GET_PDEATHSIG or
789 * systemd via the cgroup logic. It will then remove the PAM
790 * session again. The parent process will exec() the actual
791 * daemon. We do things this way to ensure that the main PID
792 * of the daemon is the one we initially fork()ed. */
793
794 if (log_get_max_level() < LOG_DEBUG)
795 flags |= PAM_SILENT;
796
797 pam_code = pam_start(name, user, &conv, &handle);
798 if (pam_code != PAM_SUCCESS) {
799 handle = NULL;
800 goto fail;
801 }
802
803 if (tty) {
804 pam_code = pam_set_item(handle, PAM_TTY, tty);
805 if (pam_code != PAM_SUCCESS)
806 goto fail;
807 }
808
809 pam_code = pam_acct_mgmt(handle, flags);
810 if (pam_code != PAM_SUCCESS)
811 goto fail;
812
813 pam_code = pam_open_session(handle, flags);
814 if (pam_code != PAM_SUCCESS)
815 goto fail;
816
817 close_session = true;
818
819 e = pam_getenvlist(handle);
820 if (!e) {
821 pam_code = PAM_BUF_ERR;
822 goto fail;
823 }
824
825 /* Block SIGTERM, so that we know that it won't get lost in
826 * the child */
827
828 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
829
830 parent_pid = getpid();
831
832 pam_pid = fork();
833 if (pam_pid < 0)
834 goto fail;
835
836 if (pam_pid == 0) {
837 int sig;
838 int r = EXIT_PAM;
839
840 /* The child's job is to reset the PAM session on
841 * termination */
842
843 /* This string must fit in 10 chars (i.e. the length
844 * of "/sbin/init"), to look pretty in /bin/ps */
845 rename_process("(sd-pam)");
846
847 /* Make sure we don't keep open the passed fds in this
848 child. We assume that otherwise only those fds are
849 open here that have been opened by PAM. */
850 close_many(fds, n_fds);
851
852 /* Drop privileges - we don't need any to pam_close_session
853 * and this will make PR_SET_PDEATHSIG work in most cases.
854 * If this fails, ignore the error - but expect sd-pam threads
855 * to fail to exit normally */
856 if (setresuid(uid, uid, uid) < 0)
857 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
858
859 (void) ignore_signals(SIGPIPE, -1);
860
861 /* Wait until our parent died. This will only work if
862 * the above setresuid() succeeds, otherwise the kernel
863 * will not allow unprivileged parents kill their privileged
864 * children this way. We rely on the control groups kill logic
865 * to do the rest for us. */
866 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
867 goto child_finish;
868
869 /* Check if our parent process might already have
870 * died? */
871 if (getppid() == parent_pid) {
872 sigset_t ss;
873
874 assert_se(sigemptyset(&ss) >= 0);
875 assert_se(sigaddset(&ss, SIGTERM) >= 0);
876
877 for (;;) {
878 if (sigwait(&ss, &sig) < 0) {
879 if (errno == EINTR)
880 continue;
881
882 goto child_finish;
883 }
884
885 assert(sig == SIGTERM);
886 break;
887 }
888 }
889
890 /* If our parent died we'll end the session */
891 if (getppid() != parent_pid) {
892 pam_code = pam_close_session(handle, flags);
893 if (pam_code != PAM_SUCCESS)
894 goto child_finish;
895 }
896
897 r = 0;
898
899 child_finish:
900 pam_end(handle, pam_code | flags);
901 _exit(r);
902 }
903
904 /* If the child was forked off successfully it will do all the
905 * cleanups, so forget about the handle here. */
906 handle = NULL;
907
908 /* Unblock SIGTERM again in the parent */
909 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
910
911 /* We close the log explicitly here, since the PAM modules
912 * might have opened it, but we don't want this fd around. */
913 closelog();
914
915 *pam_env = e;
916 e = NULL;
917
918 return 0;
919
920 fail:
921 if (pam_code != PAM_SUCCESS) {
922 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
923 err = -EPERM; /* PAM errors do not map to errno */
924 } else {
925 log_error_errno(errno, "PAM failed: %m");
926 err = -errno;
927 }
928
929 if (handle) {
930 if (close_session)
931 pam_code = pam_close_session(handle, flags);
932
933 pam_end(handle, pam_code | flags);
934 }
935
936 strv_free(e);
937
938 closelog();
939
940 if (pam_pid > 1) {
941 kill(pam_pid, SIGTERM);
942 kill(pam_pid, SIGCONT);
943 }
944
945 return err;
946 }
947 #endif
948
949 static void rename_process_from_path(const char *path) {
950 char process_name[11];
951 const char *p;
952 size_t l;
953
954 /* This resulting string must fit in 10 chars (i.e. the length
955 * of "/sbin/init") to look pretty in /bin/ps */
956
957 p = basename(path);
958 if (isempty(p)) {
959 rename_process("(...)");
960 return;
961 }
962
963 l = strlen(p);
964 if (l > 8) {
965 /* The end of the process name is usually more
966 * interesting, since the first bit might just be
967 * "systemd-" */
968 p = p + l - 8;
969 l = 8;
970 }
971
972 process_name[0] = '(';
973 memcpy(process_name+1, p, l);
974 process_name[1+l] = ')';
975 process_name[1+l+1] = 0;
976
977 rename_process(process_name);
978 }
979
980 #ifdef HAVE_SECCOMP
981
982 static int apply_seccomp(const ExecContext *c) {
983 uint32_t negative_action, action;
984 scmp_filter_ctx *seccomp;
985 Iterator i;
986 void *id;
987 int r;
988
989 assert(c);
990
991 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
992
993 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
994 if (!seccomp)
995 return -ENOMEM;
996
997 if (c->syscall_archs) {
998
999 SET_FOREACH(id, c->syscall_archs, i) {
1000 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1001 if (r == -EEXIST)
1002 continue;
1003 if (r < 0)
1004 goto finish;
1005 }
1006
1007 } else {
1008 r = seccomp_add_secondary_archs(seccomp);
1009 if (r < 0)
1010 goto finish;
1011 }
1012
1013 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1014 SET_FOREACH(id, c->syscall_filter, i) {
1015 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1016 if (r < 0)
1017 goto finish;
1018 }
1019
1020 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1021 if (r < 0)
1022 goto finish;
1023
1024 r = seccomp_load(seccomp);
1025
1026 finish:
1027 seccomp_release(seccomp);
1028 return r;
1029 }
1030
1031 static int apply_address_families(const ExecContext *c) {
1032 scmp_filter_ctx *seccomp;
1033 Iterator i;
1034 int r;
1035
1036 assert(c);
1037
1038 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1039 if (!seccomp)
1040 return -ENOMEM;
1041
1042 r = seccomp_add_secondary_archs(seccomp);
1043 if (r < 0)
1044 goto finish;
1045
1046 if (c->address_families_whitelist) {
1047 int af, first = 0, last = 0;
1048 void *afp;
1049
1050 /* If this is a whitelist, we first block the address
1051 * families that are out of range and then everything
1052 * that is not in the set. First, we find the lowest
1053 * and highest address family in the set. */
1054
1055 SET_FOREACH(afp, c->address_families, i) {
1056 af = PTR_TO_INT(afp);
1057
1058 if (af <= 0 || af >= af_max())
1059 continue;
1060
1061 if (first == 0 || af < first)
1062 first = af;
1063
1064 if (last == 0 || af > last)
1065 last = af;
1066 }
1067
1068 assert((first == 0) == (last == 0));
1069
1070 if (first == 0) {
1071
1072 /* No entries in the valid range, block everything */
1073 r = seccomp_rule_add(
1074 seccomp,
1075 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1076 SCMP_SYS(socket),
1077 0);
1078 if (r < 0)
1079 goto finish;
1080
1081 } else {
1082
1083 /* Block everything below the first entry */
1084 r = seccomp_rule_add(
1085 seccomp,
1086 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1087 SCMP_SYS(socket),
1088 1,
1089 SCMP_A0(SCMP_CMP_LT, first));
1090 if (r < 0)
1091 goto finish;
1092
1093 /* Block everything above the last entry */
1094 r = seccomp_rule_add(
1095 seccomp,
1096 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1097 SCMP_SYS(socket),
1098 1,
1099 SCMP_A0(SCMP_CMP_GT, last));
1100 if (r < 0)
1101 goto finish;
1102
1103 /* Block everything between the first and last
1104 * entry */
1105 for (af = 1; af < af_max(); af++) {
1106
1107 if (set_contains(c->address_families, INT_TO_PTR(af)))
1108 continue;
1109
1110 r = seccomp_rule_add(
1111 seccomp,
1112 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1113 SCMP_SYS(socket),
1114 1,
1115 SCMP_A0(SCMP_CMP_EQ, af));
1116 if (r < 0)
1117 goto finish;
1118 }
1119 }
1120
1121 } else {
1122 void *af;
1123
1124 /* If this is a blacklist, then generate one rule for
1125 * each address family that are then combined in OR
1126 * checks. */
1127
1128 SET_FOREACH(af, c->address_families, i) {
1129
1130 r = seccomp_rule_add(
1131 seccomp,
1132 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1133 SCMP_SYS(socket),
1134 1,
1135 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1136 if (r < 0)
1137 goto finish;
1138 }
1139 }
1140
1141 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1142 if (r < 0)
1143 goto finish;
1144
1145 r = seccomp_load(seccomp);
1146
1147 finish:
1148 seccomp_release(seccomp);
1149 return r;
1150 }
1151
1152 #endif
1153
1154 static void do_idle_pipe_dance(int idle_pipe[4]) {
1155 assert(idle_pipe);
1156
1157
1158 safe_close(idle_pipe[1]);
1159 safe_close(idle_pipe[2]);
1160
1161 if (idle_pipe[0] >= 0) {
1162 int r;
1163
1164 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1165
1166 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1167 /* Signal systemd that we are bored and want to continue. */
1168 r = write(idle_pipe[3], "x", 1);
1169 if (r > 0)
1170 /* Wait for systemd to react to the signal above. */
1171 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1172 }
1173
1174 safe_close(idle_pipe[0]);
1175
1176 }
1177
1178 safe_close(idle_pipe[3]);
1179 }
1180
1181 static int build_environment(
1182 const ExecContext *c,
1183 unsigned n_fds,
1184 usec_t watchdog_usec,
1185 const char *home,
1186 const char *username,
1187 const char *shell,
1188 char ***ret) {
1189
1190 _cleanup_strv_free_ char **our_env = NULL;
1191 unsigned n_env = 0;
1192 char *x;
1193
1194 assert(c);
1195 assert(ret);
1196
1197 our_env = new0(char*, 10);
1198 if (!our_env)
1199 return -ENOMEM;
1200
1201 if (n_fds > 0) {
1202 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1203 return -ENOMEM;
1204 our_env[n_env++] = x;
1205
1206 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1207 return -ENOMEM;
1208 our_env[n_env++] = x;
1209 }
1210
1211 if (watchdog_usec > 0) {
1212 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1213 return -ENOMEM;
1214 our_env[n_env++] = x;
1215
1216 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1217 return -ENOMEM;
1218 our_env[n_env++] = x;
1219 }
1220
1221 if (home) {
1222 x = strappend("HOME=", home);
1223 if (!x)
1224 return -ENOMEM;
1225 our_env[n_env++] = x;
1226 }
1227
1228 if (username) {
1229 x = strappend("LOGNAME=", username);
1230 if (!x)
1231 return -ENOMEM;
1232 our_env[n_env++] = x;
1233
1234 x = strappend("USER=", username);
1235 if (!x)
1236 return -ENOMEM;
1237 our_env[n_env++] = x;
1238 }
1239
1240 if (shell) {
1241 x = strappend("SHELL=", shell);
1242 if (!x)
1243 return -ENOMEM;
1244 our_env[n_env++] = x;
1245 }
1246
1247 if (is_terminal_input(c->std_input) ||
1248 c->std_output == EXEC_OUTPUT_TTY ||
1249 c->std_error == EXEC_OUTPUT_TTY ||
1250 c->tty_path) {
1251
1252 x = strdup(default_term_for_tty(tty_path(c)));
1253 if (!x)
1254 return -ENOMEM;
1255 our_env[n_env++] = x;
1256 }
1257
1258 our_env[n_env++] = NULL;
1259 assert(n_env <= 10);
1260
1261 *ret = our_env;
1262 our_env = NULL;
1263
1264 return 0;
1265 }
1266
1267 static bool exec_needs_mount_namespace(
1268 const ExecContext *context,
1269 const ExecParameters *params,
1270 ExecRuntime *runtime) {
1271
1272 assert(context);
1273 assert(params);
1274
1275 if (!strv_isempty(context->read_write_dirs) ||
1276 !strv_isempty(context->read_only_dirs) ||
1277 !strv_isempty(context->inaccessible_dirs))
1278 return true;
1279
1280 if (context->mount_flags != 0)
1281 return true;
1282
1283 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1284 return true;
1285
1286 if (params->bus_endpoint_path)
1287 return true;
1288
1289 if (context->private_devices ||
1290 context->protect_system != PROTECT_SYSTEM_NO ||
1291 context->protect_home != PROTECT_HOME_NO)
1292 return true;
1293
1294 return false;
1295 }
1296
1297 static int exec_child(
1298 Unit *unit,
1299 ExecCommand *command,
1300 const ExecContext *context,
1301 const ExecParameters *params,
1302 ExecRuntime *runtime,
1303 char **argv,
1304 int socket_fd,
1305 int *fds, unsigned n_fds,
1306 char **files_env,
1307 int *exit_status) {
1308
1309 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1310 _cleanup_free_ char *mac_selinux_context_net = NULL;
1311 const char *username = NULL, *home = NULL, *shell = NULL;
1312 unsigned n_dont_close = 0;
1313 int dont_close[n_fds + 4];
1314 uid_t uid = UID_INVALID;
1315 gid_t gid = GID_INVALID;
1316 int i, r;
1317 bool needs_mount_namespace;
1318
1319 assert(unit);
1320 assert(command);
1321 assert(context);
1322 assert(params);
1323 assert(exit_status);
1324
1325 rename_process_from_path(command->path);
1326
1327 /* We reset exactly these signals, since they are the
1328 * only ones we set to SIG_IGN in the main daemon. All
1329 * others we leave untouched because we set them to
1330 * SIG_DFL or a valid handler initially, both of which
1331 * will be demoted to SIG_DFL. */
1332 (void) default_signals(SIGNALS_CRASH_HANDLER,
1333 SIGNALS_IGNORE, -1);
1334
1335 if (context->ignore_sigpipe)
1336 (void) ignore_signals(SIGPIPE, -1);
1337
1338 r = reset_signal_mask();
1339 if (r < 0) {
1340 *exit_status = EXIT_SIGNAL_MASK;
1341 return r;
1342 }
1343
1344 if (params->idle_pipe)
1345 do_idle_pipe_dance(params->idle_pipe);
1346
1347 /* Close sockets very early to make sure we don't
1348 * block init reexecution because it cannot bind its
1349 * sockets */
1350
1351 log_forget_fds();
1352
1353 if (socket_fd >= 0)
1354 dont_close[n_dont_close++] = socket_fd;
1355 if (n_fds > 0) {
1356 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1357 n_dont_close += n_fds;
1358 }
1359 if (params->bus_endpoint_fd >= 0)
1360 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1361 if (runtime) {
1362 if (runtime->netns_storage_socket[0] >= 0)
1363 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1364 if (runtime->netns_storage_socket[1] >= 0)
1365 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1366 }
1367
1368 r = close_all_fds(dont_close, n_dont_close);
1369 if (r < 0) {
1370 *exit_status = EXIT_FDS;
1371 return r;
1372 }
1373
1374 if (!context->same_pgrp)
1375 if (setsid() < 0) {
1376 *exit_status = EXIT_SETSID;
1377 return -errno;
1378 }
1379
1380 exec_context_tty_reset(context);
1381
1382 if (params->confirm_spawn) {
1383 char response;
1384
1385 r = ask_for_confirmation(&response, argv);
1386 if (r == -ETIMEDOUT)
1387 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1388 else if (r < 0)
1389 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1390 else if (response == 's') {
1391 write_confirm_message("Skipping execution.\n");
1392 *exit_status = EXIT_CONFIRM;
1393 return -ECANCELED;
1394 } else if (response == 'n') {
1395 write_confirm_message("Failing execution.\n");
1396 *exit_status = 0;
1397 return 0;
1398 }
1399 }
1400
1401 if (context->user) {
1402 username = context->user;
1403 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1404 if (r < 0) {
1405 *exit_status = EXIT_USER;
1406 return r;
1407 }
1408 }
1409
1410 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1411 * must sure to drop O_NONBLOCK */
1412 if (socket_fd >= 0)
1413 fd_nonblock(socket_fd, false);
1414
1415 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1416 if (r < 0) {
1417 *exit_status = EXIT_STDIN;
1418 return r;
1419 }
1420
1421 r = setup_output(unit, context, STDOUT_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1422 if (r < 0) {
1423 *exit_status = EXIT_STDOUT;
1424 return r;
1425 }
1426
1427 r = setup_output(unit, context, STDERR_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1428 if (r < 0) {
1429 *exit_status = EXIT_STDERR;
1430 return r;
1431 }
1432
1433 if (params->cgroup_path) {
1434 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1435 if (r < 0) {
1436 *exit_status = EXIT_CGROUP;
1437 return r;
1438 }
1439 }
1440
1441 if (context->oom_score_adjust_set) {
1442 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1443
1444 /* When we can't make this change due to EPERM, then
1445 * let's silently skip over it. User namespaces
1446 * prohibit write access to this file, and we
1447 * shouldn't trip up over that. */
1448
1449 sprintf(t, "%i", context->oom_score_adjust);
1450 r = write_string_file("/proc/self/oom_score_adj", t, 0);
1451 if (r == -EPERM || r == -EACCES) {
1452 log_open();
1453 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1454 log_close();
1455 } else if (r < 0) {
1456 *exit_status = EXIT_OOM_ADJUST;
1457 return -errno;
1458 }
1459 }
1460
1461 if (context->nice_set)
1462 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1463 *exit_status = EXIT_NICE;
1464 return -errno;
1465 }
1466
1467 if (context->cpu_sched_set) {
1468 struct sched_param param = {
1469 .sched_priority = context->cpu_sched_priority,
1470 };
1471
1472 r = sched_setscheduler(0,
1473 context->cpu_sched_policy |
1474 (context->cpu_sched_reset_on_fork ?
1475 SCHED_RESET_ON_FORK : 0),
1476 &param);
1477 if (r < 0) {
1478 *exit_status = EXIT_SETSCHEDULER;
1479 return -errno;
1480 }
1481 }
1482
1483 if (context->cpuset)
1484 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1485 *exit_status = EXIT_CPUAFFINITY;
1486 return -errno;
1487 }
1488
1489 if (context->ioprio_set)
1490 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1491 *exit_status = EXIT_IOPRIO;
1492 return -errno;
1493 }
1494
1495 if (context->timer_slack_nsec != NSEC_INFINITY)
1496 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1497 *exit_status = EXIT_TIMERSLACK;
1498 return -errno;
1499 }
1500
1501 if (context->personality != PERSONALITY_INVALID)
1502 if (personality(context->personality) < 0) {
1503 *exit_status = EXIT_PERSONALITY;
1504 return -errno;
1505 }
1506
1507 if (context->utmp_id)
1508 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path,
1509 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
1510 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
1511 USER_PROCESS,
1512 username ? "root" : context->user);
1513
1514 if (context->user && is_terminal_input(context->std_input)) {
1515 r = chown_terminal(STDIN_FILENO, uid);
1516 if (r < 0) {
1517 *exit_status = EXIT_STDIN;
1518 return r;
1519 }
1520 }
1521
1522 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1523 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1524
1525 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1526 if (r < 0) {
1527 *exit_status = EXIT_BUS_ENDPOINT;
1528 return r;
1529 }
1530 }
1531
1532 /* If delegation is enabled we'll pass ownership of the cgroup
1533 * (but only in systemd's own controller hierarchy!) to the
1534 * user of the new process. */
1535 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1536 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1537 if (r < 0) {
1538 *exit_status = EXIT_CGROUP;
1539 return r;
1540 }
1541
1542
1543 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1544 if (r < 0) {
1545 *exit_status = EXIT_CGROUP;
1546 return r;
1547 }
1548 }
1549
1550 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1551 char **rt;
1552
1553 STRV_FOREACH(rt, context->runtime_directory) {
1554 _cleanup_free_ char *p;
1555
1556 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1557 if (!p) {
1558 *exit_status = EXIT_RUNTIME_DIRECTORY;
1559 return -ENOMEM;
1560 }
1561
1562 r = mkdir_p_label(p, context->runtime_directory_mode);
1563 if (r < 0) {
1564 *exit_status = EXIT_RUNTIME_DIRECTORY;
1565 return r;
1566 }
1567
1568 r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
1569 if (r < 0) {
1570 *exit_status = EXIT_RUNTIME_DIRECTORY;
1571 return r;
1572 }
1573 }
1574 }
1575
1576 if (params->apply_permissions) {
1577 r = enforce_groups(context, username, gid);
1578 if (r < 0) {
1579 *exit_status = EXIT_GROUP;
1580 return r;
1581 }
1582 }
1583
1584 umask(context->umask);
1585
1586 #ifdef HAVE_PAM
1587 if (params->apply_permissions && context->pam_name && username) {
1588 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1589 if (r < 0) {
1590 *exit_status = EXIT_PAM;
1591 return r;
1592 }
1593 }
1594 #endif
1595
1596 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1597 r = setup_netns(runtime->netns_storage_socket);
1598 if (r < 0) {
1599 *exit_status = EXIT_NETWORK;
1600 return r;
1601 }
1602 }
1603
1604 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
1605
1606 if (needs_mount_namespace) {
1607 char *tmp = NULL, *var = NULL;
1608
1609 /* The runtime struct only contains the parent
1610 * of the private /tmp, which is
1611 * non-accessible to world users. Inside of it
1612 * there's a /tmp that is sticky, and that's
1613 * the one we want to use here. */
1614
1615 if (context->private_tmp && runtime) {
1616 if (runtime->tmp_dir)
1617 tmp = strjoina(runtime->tmp_dir, "/tmp");
1618 if (runtime->var_tmp_dir)
1619 var = strjoina(runtime->var_tmp_dir, "/tmp");
1620 }
1621
1622 r = setup_namespace(
1623 params->apply_chroot ? context->root_directory : NULL,
1624 context->read_write_dirs,
1625 context->read_only_dirs,
1626 context->inaccessible_dirs,
1627 tmp,
1628 var,
1629 params->bus_endpoint_path,
1630 context->private_devices,
1631 context->protect_home,
1632 context->protect_system,
1633 context->mount_flags);
1634
1635 /* If we couldn't set up the namespace this is
1636 * probably due to a missing capability. In this case,
1637 * silently proceeed. */
1638 if (r == -EPERM || r == -EACCES) {
1639 log_open();
1640 log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1641 log_close();
1642 } else if (r < 0) {
1643 *exit_status = EXIT_NAMESPACE;
1644 return r;
1645 }
1646 }
1647
1648 if (params->apply_chroot) {
1649 if (!needs_mount_namespace && context->root_directory)
1650 if (chroot(context->root_directory) < 0) {
1651 *exit_status = EXIT_CHROOT;
1652 return -errno;
1653 }
1654
1655 if (chdir(context->working_directory ?: "/") < 0 &&
1656 !context->working_directory_missing_ok) {
1657 *exit_status = EXIT_CHDIR;
1658 return -errno;
1659 }
1660 } else {
1661 _cleanup_free_ char *d = NULL;
1662
1663 if (asprintf(&d, "%s/%s",
1664 context->root_directory ?: "",
1665 context->working_directory ?: "") < 0) {
1666 *exit_status = EXIT_MEMORY;
1667 return -ENOMEM;
1668 }
1669
1670 if (chdir(d) < 0 &&
1671 !context->working_directory_missing_ok) {
1672 *exit_status = EXIT_CHDIR;
1673 return -errno;
1674 }
1675 }
1676
1677 #ifdef HAVE_SELINUX
1678 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1679 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1680 if (r < 0) {
1681 *exit_status = EXIT_SELINUX_CONTEXT;
1682 return r;
1683 }
1684 }
1685 #endif
1686
1687 /* We repeat the fd closing here, to make sure that
1688 * nothing is leaked from the PAM modules. Note that
1689 * we are more aggressive this time since socket_fd
1690 * and the netns fds we don't need anymore. The custom
1691 * endpoint fd was needed to upload the policy and can
1692 * now be closed as well. */
1693 r = close_all_fds(fds, n_fds);
1694 if (r >= 0)
1695 r = shift_fds(fds, n_fds);
1696 if (r >= 0)
1697 r = flags_fds(fds, n_fds, context->non_blocking);
1698 if (r < 0) {
1699 *exit_status = EXIT_FDS;
1700 return r;
1701 }
1702
1703 if (params->apply_permissions) {
1704
1705 for (i = 0; i < _RLIMIT_MAX; i++) {
1706 if (!context->rlimit[i])
1707 continue;
1708
1709 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1710 *exit_status = EXIT_LIMITS;
1711 return -errno;
1712 }
1713 }
1714
1715 if (context->capability_bounding_set_drop) {
1716 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1717 if (r < 0) {
1718 *exit_status = EXIT_CAPABILITIES;
1719 return r;
1720 }
1721 }
1722
1723 #ifdef HAVE_SMACK
1724 if (context->smack_process_label) {
1725 r = mac_smack_apply_pid(0, context->smack_process_label);
1726 if (r < 0) {
1727 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1728 return r;
1729 }
1730 }
1731 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1732 else {
1733 _cleanup_free_ char *exec_label = NULL;
1734
1735 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1736 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) {
1737 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1738 return r;
1739 }
1740
1741 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1742 if (r < 0) {
1743 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1744 return r;
1745 }
1746 }
1747 #endif
1748 #endif
1749
1750 if (context->user) {
1751 r = enforce_user(context, uid);
1752 if (r < 0) {
1753 *exit_status = EXIT_USER;
1754 return r;
1755 }
1756 }
1757
1758 /* PR_GET_SECUREBITS is not privileged, while
1759 * PR_SET_SECUREBITS is. So to suppress
1760 * potential EPERMs we'll try not to call
1761 * PR_SET_SECUREBITS unless necessary. */
1762 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1763 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1764 *exit_status = EXIT_SECUREBITS;
1765 return -errno;
1766 }
1767
1768 if (context->capabilities)
1769 if (cap_set_proc(context->capabilities) < 0) {
1770 *exit_status = EXIT_CAPABILITIES;
1771 return -errno;
1772 }
1773
1774 if (context->no_new_privileges)
1775 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1776 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1777 return -errno;
1778 }
1779
1780 #ifdef HAVE_SECCOMP
1781 if (context->address_families_whitelist ||
1782 !set_isempty(context->address_families)) {
1783 r = apply_address_families(context);
1784 if (r < 0) {
1785 *exit_status = EXIT_ADDRESS_FAMILIES;
1786 return r;
1787 }
1788 }
1789
1790 if (context->syscall_whitelist ||
1791 !set_isempty(context->syscall_filter) ||
1792 !set_isempty(context->syscall_archs)) {
1793 r = apply_seccomp(context);
1794 if (r < 0) {
1795 *exit_status = EXIT_SECCOMP;
1796 return r;
1797 }
1798 }
1799 #endif
1800
1801 #ifdef HAVE_SELINUX
1802 if (mac_selinux_use()) {
1803 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1804
1805 if (exec_context) {
1806 r = setexeccon(exec_context);
1807 if (r < 0) {
1808 *exit_status = EXIT_SELINUX_CONTEXT;
1809 return r;
1810 }
1811 }
1812 }
1813 #endif
1814
1815 #ifdef HAVE_APPARMOR
1816 if (context->apparmor_profile && mac_apparmor_use()) {
1817 r = aa_change_onexec(context->apparmor_profile);
1818 if (r < 0 && !context->apparmor_profile_ignore) {
1819 *exit_status = EXIT_APPARMOR_PROFILE;
1820 return -errno;
1821 }
1822 }
1823 #endif
1824 }
1825
1826 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1827 if (r < 0) {
1828 *exit_status = EXIT_MEMORY;
1829 return r;
1830 }
1831
1832 final_env = strv_env_merge(5,
1833 params->environment,
1834 our_env,
1835 context->environment,
1836 files_env,
1837 pam_env,
1838 NULL);
1839 if (!final_env) {
1840 *exit_status = EXIT_MEMORY;
1841 return -ENOMEM;
1842 }
1843
1844 final_argv = replace_env_argv(argv, final_env);
1845 if (!final_argv) {
1846 *exit_status = EXIT_MEMORY;
1847 return -ENOMEM;
1848 }
1849
1850 final_env = strv_env_clean(final_env);
1851
1852 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1853 _cleanup_free_ char *line;
1854
1855 line = exec_command_line(final_argv);
1856 if (line) {
1857 log_open();
1858 log_struct(LOG_DEBUG,
1859 LOG_UNIT_ID(unit),
1860 "EXECUTABLE=%s", command->path,
1861 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
1862 NULL);
1863 log_close();
1864 }
1865 }
1866
1867 execve(command->path, final_argv, final_env);
1868 *exit_status = EXIT_EXEC;
1869 return -errno;
1870 }
1871
1872 int exec_spawn(Unit *unit,
1873 ExecCommand *command,
1874 const ExecContext *context,
1875 const ExecParameters *params,
1876 ExecRuntime *runtime,
1877 pid_t *ret) {
1878
1879 _cleanup_strv_free_ char **files_env = NULL;
1880 int *fds = NULL; unsigned n_fds = 0;
1881 _cleanup_free_ char *line = NULL;
1882 int socket_fd, r;
1883 char **argv;
1884 pid_t pid;
1885
1886 assert(unit);
1887 assert(command);
1888 assert(context);
1889 assert(ret);
1890 assert(params);
1891 assert(params->fds || params->n_fds <= 0);
1892
1893 if (context->std_input == EXEC_INPUT_SOCKET ||
1894 context->std_output == EXEC_OUTPUT_SOCKET ||
1895 context->std_error == EXEC_OUTPUT_SOCKET) {
1896
1897 if (params->n_fds != 1) {
1898 log_unit_error(unit, "Got more than one socket.");
1899 return -EINVAL;
1900 }
1901
1902 socket_fd = params->fds[0];
1903 } else {
1904 socket_fd = -1;
1905 fds = params->fds;
1906 n_fds = params->n_fds;
1907 }
1908
1909 r = exec_context_load_environment(unit, context, &files_env);
1910 if (r < 0)
1911 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
1912
1913 argv = params->argv ?: command->argv;
1914 line = exec_command_line(argv);
1915 if (!line)
1916 return log_oom();
1917
1918 log_struct(LOG_DEBUG,
1919 LOG_UNIT_ID(unit),
1920 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
1921 "EXECUTABLE=%s", command->path,
1922 NULL);
1923 pid = fork();
1924 if (pid < 0)
1925 return log_unit_error_errno(unit, r, "Failed to fork: %m");
1926
1927 if (pid == 0) {
1928 int exit_status;
1929
1930 r = exec_child(unit,
1931 command,
1932 context,
1933 params,
1934 runtime,
1935 argv,
1936 socket_fd,
1937 fds, n_fds,
1938 files_env,
1939 &exit_status);
1940 if (r < 0) {
1941 log_open();
1942 log_struct_errno(LOG_ERR, r,
1943 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1944 LOG_UNIT_ID(unit),
1945 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
1946 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1947 command->path),
1948 "EXECUTABLE=%s", command->path,
1949 NULL);
1950 }
1951
1952 _exit(exit_status);
1953 }
1954
1955 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
1956
1957 /* We add the new process to the cgroup both in the child (so
1958 * that we can be sure that no user code is ever executed
1959 * outside of the cgroup) and in the parent (so that we can be
1960 * sure that when we kill the cgroup the process will be
1961 * killed too). */
1962 if (params->cgroup_path)
1963 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1964
1965 exec_status_start(&command->exec_status, pid);
1966
1967 *ret = pid;
1968 return 0;
1969 }
1970
1971 void exec_context_init(ExecContext *c) {
1972 assert(c);
1973
1974 c->umask = 0022;
1975 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1976 c->cpu_sched_policy = SCHED_OTHER;
1977 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1978 c->syslog_level_prefix = true;
1979 c->ignore_sigpipe = true;
1980 c->timer_slack_nsec = NSEC_INFINITY;
1981 c->personality = PERSONALITY_INVALID;
1982 c->runtime_directory_mode = 0755;
1983 }
1984
1985 void exec_context_done(ExecContext *c) {
1986 unsigned l;
1987
1988 assert(c);
1989
1990 strv_free(c->environment);
1991 c->environment = NULL;
1992
1993 strv_free(c->environment_files);
1994 c->environment_files = NULL;
1995
1996 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1997 c->rlimit[l] = mfree(c->rlimit[l]);
1998 }
1999
2000 c->working_directory = mfree(c->working_directory);
2001 c->root_directory = mfree(c->root_directory);
2002 c->tty_path = mfree(c->tty_path);
2003 c->syslog_identifier = mfree(c->syslog_identifier);
2004 c->user = mfree(c->user);
2005 c->group = mfree(c->group);
2006
2007 strv_free(c->supplementary_groups);
2008 c->supplementary_groups = NULL;
2009
2010 c->pam_name = mfree(c->pam_name);
2011
2012 if (c->capabilities) {
2013 cap_free(c->capabilities);
2014 c->capabilities = NULL;
2015 }
2016
2017 strv_free(c->read_only_dirs);
2018 c->read_only_dirs = NULL;
2019
2020 strv_free(c->read_write_dirs);
2021 c->read_write_dirs = NULL;
2022
2023 strv_free(c->inaccessible_dirs);
2024 c->inaccessible_dirs = NULL;
2025
2026 if (c->cpuset)
2027 CPU_FREE(c->cpuset);
2028
2029 c->utmp_id = mfree(c->utmp_id);
2030 c->selinux_context = mfree(c->selinux_context);
2031 c->apparmor_profile = mfree(c->apparmor_profile);
2032
2033 set_free(c->syscall_filter);
2034 c->syscall_filter = NULL;
2035
2036 set_free(c->syscall_archs);
2037 c->syscall_archs = NULL;
2038
2039 set_free(c->address_families);
2040 c->address_families = NULL;
2041
2042 strv_free(c->runtime_directory);
2043 c->runtime_directory = NULL;
2044
2045 bus_endpoint_free(c->bus_endpoint);
2046 c->bus_endpoint = NULL;
2047 }
2048
2049 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2050 char **i;
2051
2052 assert(c);
2053
2054 if (!runtime_prefix)
2055 return 0;
2056
2057 STRV_FOREACH(i, c->runtime_directory) {
2058 _cleanup_free_ char *p;
2059
2060 p = strjoin(runtime_prefix, "/", *i, NULL);
2061 if (!p)
2062 return -ENOMEM;
2063
2064 /* We execute this synchronously, since we need to be
2065 * sure this is gone when we start the service
2066 * next. */
2067 (void) rm_rf(p, REMOVE_ROOT);
2068 }
2069
2070 return 0;
2071 }
2072
2073 void exec_command_done(ExecCommand *c) {
2074 assert(c);
2075
2076 c->path = mfree(c->path);
2077
2078 strv_free(c->argv);
2079 c->argv = NULL;
2080 }
2081
2082 void exec_command_done_array(ExecCommand *c, unsigned n) {
2083 unsigned i;
2084
2085 for (i = 0; i < n; i++)
2086 exec_command_done(c+i);
2087 }
2088
2089 ExecCommand* exec_command_free_list(ExecCommand *c) {
2090 ExecCommand *i;
2091
2092 while ((i = c)) {
2093 LIST_REMOVE(command, c, i);
2094 exec_command_done(i);
2095 free(i);
2096 }
2097
2098 return NULL;
2099 }
2100
2101 void exec_command_free_array(ExecCommand **c, unsigned n) {
2102 unsigned i;
2103
2104 for (i = 0; i < n; i++)
2105 c[i] = exec_command_free_list(c[i]);
2106 }
2107
2108 typedef struct InvalidEnvInfo {
2109 Unit *unit;
2110 const char *path;
2111 } InvalidEnvInfo;
2112
2113 static void invalid_env(const char *p, void *userdata) {
2114 InvalidEnvInfo *info = userdata;
2115
2116 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2117 }
2118
2119 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
2120 char **i, **r = NULL;
2121
2122 assert(c);
2123 assert(l);
2124
2125 STRV_FOREACH(i, c->environment_files) {
2126 char *fn;
2127 int k;
2128 bool ignore = false;
2129 char **p;
2130 _cleanup_globfree_ glob_t pglob = {};
2131 int count, n;
2132
2133 fn = *i;
2134
2135 if (fn[0] == '-') {
2136 ignore = true;
2137 fn ++;
2138 }
2139
2140 if (!path_is_absolute(fn)) {
2141 if (ignore)
2142 continue;
2143
2144 strv_free(r);
2145 return -EINVAL;
2146 }
2147
2148 /* Filename supports globbing, take all matching files */
2149 errno = 0;
2150 if (glob(fn, 0, NULL, &pglob) != 0) {
2151 if (ignore)
2152 continue;
2153
2154 strv_free(r);
2155 return errno ? -errno : -EINVAL;
2156 }
2157 count = pglob.gl_pathc;
2158 if (count == 0) {
2159 if (ignore)
2160 continue;
2161
2162 strv_free(r);
2163 return -EINVAL;
2164 }
2165 for (n = 0; n < count; n++) {
2166 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2167 if (k < 0) {
2168 if (ignore)
2169 continue;
2170
2171 strv_free(r);
2172 return k;
2173 }
2174 /* Log invalid environment variables with filename */
2175 if (p) {
2176 InvalidEnvInfo info = {
2177 .unit = unit,
2178 .path = pglob.gl_pathv[n]
2179 };
2180
2181 p = strv_env_clean_with_callback(p, invalid_env, &info);
2182 }
2183
2184 if (r == NULL)
2185 r = p;
2186 else {
2187 char **m;
2188
2189 m = strv_env_merge(2, r, p);
2190 strv_free(r);
2191 strv_free(p);
2192 if (!m)
2193 return -ENOMEM;
2194
2195 r = m;
2196 }
2197 }
2198 }
2199
2200 *l = r;
2201
2202 return 0;
2203 }
2204
2205 static bool tty_may_match_dev_console(const char *tty) {
2206 _cleanup_free_ char *active = NULL;
2207 char *console;
2208
2209 if (startswith(tty, "/dev/"))
2210 tty += 5;
2211
2212 /* trivial identity? */
2213 if (streq(tty, "console"))
2214 return true;
2215
2216 console = resolve_dev_console(&active);
2217 /* if we could not resolve, assume it may */
2218 if (!console)
2219 return true;
2220
2221 /* "tty0" means the active VC, so it may be the same sometimes */
2222 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2223 }
2224
2225 bool exec_context_may_touch_console(ExecContext *ec) {
2226 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2227 is_terminal_input(ec->std_input) ||
2228 is_terminal_output(ec->std_output) ||
2229 is_terminal_output(ec->std_error)) &&
2230 tty_may_match_dev_console(tty_path(ec));
2231 }
2232
2233 static void strv_fprintf(FILE *f, char **l) {
2234 char **g;
2235
2236 assert(f);
2237
2238 STRV_FOREACH(g, l)
2239 fprintf(f, " %s", *g);
2240 }
2241
2242 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2243 char **e;
2244 unsigned i;
2245
2246 assert(c);
2247 assert(f);
2248
2249 prefix = strempty(prefix);
2250
2251 fprintf(f,
2252 "%sUMask: %04o\n"
2253 "%sWorkingDirectory: %s\n"
2254 "%sRootDirectory: %s\n"
2255 "%sNonBlocking: %s\n"
2256 "%sPrivateTmp: %s\n"
2257 "%sPrivateNetwork: %s\n"
2258 "%sPrivateDevices: %s\n"
2259 "%sProtectHome: %s\n"
2260 "%sProtectSystem: %s\n"
2261 "%sIgnoreSIGPIPE: %s\n",
2262 prefix, c->umask,
2263 prefix, c->working_directory ? c->working_directory : "/",
2264 prefix, c->root_directory ? c->root_directory : "/",
2265 prefix, yes_no(c->non_blocking),
2266 prefix, yes_no(c->private_tmp),
2267 prefix, yes_no(c->private_network),
2268 prefix, yes_no(c->private_devices),
2269 prefix, protect_home_to_string(c->protect_home),
2270 prefix, protect_system_to_string(c->protect_system),
2271 prefix, yes_no(c->ignore_sigpipe));
2272
2273 STRV_FOREACH(e, c->environment)
2274 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2275
2276 STRV_FOREACH(e, c->environment_files)
2277 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2278
2279 if (c->nice_set)
2280 fprintf(f,
2281 "%sNice: %i\n",
2282 prefix, c->nice);
2283
2284 if (c->oom_score_adjust_set)
2285 fprintf(f,
2286 "%sOOMScoreAdjust: %i\n",
2287 prefix, c->oom_score_adjust);
2288
2289 for (i = 0; i < RLIM_NLIMITS; i++)
2290 if (c->rlimit[i])
2291 fprintf(f, "%s%s: "RLIM_FMT"\n",
2292 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2293
2294 if (c->ioprio_set) {
2295 _cleanup_free_ char *class_str = NULL;
2296
2297 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2298 fprintf(f,
2299 "%sIOSchedulingClass: %s\n"
2300 "%sIOPriority: %i\n",
2301 prefix, strna(class_str),
2302 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2303 }
2304
2305 if (c->cpu_sched_set) {
2306 _cleanup_free_ char *policy_str = NULL;
2307
2308 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2309 fprintf(f,
2310 "%sCPUSchedulingPolicy: %s\n"
2311 "%sCPUSchedulingPriority: %i\n"
2312 "%sCPUSchedulingResetOnFork: %s\n",
2313 prefix, strna(policy_str),
2314 prefix, c->cpu_sched_priority,
2315 prefix, yes_no(c->cpu_sched_reset_on_fork));
2316 }
2317
2318 if (c->cpuset) {
2319 fprintf(f, "%sCPUAffinity:", prefix);
2320 for (i = 0; i < c->cpuset_ncpus; i++)
2321 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2322 fprintf(f, " %u", i);
2323 fputs("\n", f);
2324 }
2325
2326 if (c->timer_slack_nsec != NSEC_INFINITY)
2327 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2328
2329 fprintf(f,
2330 "%sStandardInput: %s\n"
2331 "%sStandardOutput: %s\n"
2332 "%sStandardError: %s\n",
2333 prefix, exec_input_to_string(c->std_input),
2334 prefix, exec_output_to_string(c->std_output),
2335 prefix, exec_output_to_string(c->std_error));
2336
2337 if (c->tty_path)
2338 fprintf(f,
2339 "%sTTYPath: %s\n"
2340 "%sTTYReset: %s\n"
2341 "%sTTYVHangup: %s\n"
2342 "%sTTYVTDisallocate: %s\n",
2343 prefix, c->tty_path,
2344 prefix, yes_no(c->tty_reset),
2345 prefix, yes_no(c->tty_vhangup),
2346 prefix, yes_no(c->tty_vt_disallocate));
2347
2348 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2349 c->std_output == EXEC_OUTPUT_KMSG ||
2350 c->std_output == EXEC_OUTPUT_JOURNAL ||
2351 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2352 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2353 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2354 c->std_error == EXEC_OUTPUT_SYSLOG ||
2355 c->std_error == EXEC_OUTPUT_KMSG ||
2356 c->std_error == EXEC_OUTPUT_JOURNAL ||
2357 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2358 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2359 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2360
2361 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2362
2363 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2364 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2365
2366 fprintf(f,
2367 "%sSyslogFacility: %s\n"
2368 "%sSyslogLevel: %s\n",
2369 prefix, strna(fac_str),
2370 prefix, strna(lvl_str));
2371 }
2372
2373 if (c->capabilities) {
2374 _cleanup_cap_free_charp_ char *t;
2375
2376 t = cap_to_text(c->capabilities, NULL);
2377 if (t)
2378 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2379 }
2380
2381 if (c->secure_bits)
2382 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2383 prefix,
2384 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2385 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2386 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2387 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2388 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2389 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2390
2391 if (c->capability_bounding_set_drop) {
2392 unsigned long l;
2393 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2394
2395 for (l = 0; l <= cap_last_cap(); l++)
2396 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2397 fprintf(f, " %s", strna(capability_to_name(l)));
2398
2399 fputs("\n", f);
2400 }
2401
2402 if (c->user)
2403 fprintf(f, "%sUser: %s\n", prefix, c->user);
2404 if (c->group)
2405 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2406
2407 if (strv_length(c->supplementary_groups) > 0) {
2408 fprintf(f, "%sSupplementaryGroups:", prefix);
2409 strv_fprintf(f, c->supplementary_groups);
2410 fputs("\n", f);
2411 }
2412
2413 if (c->pam_name)
2414 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2415
2416 if (strv_length(c->read_write_dirs) > 0) {
2417 fprintf(f, "%sReadWriteDirs:", prefix);
2418 strv_fprintf(f, c->read_write_dirs);
2419 fputs("\n", f);
2420 }
2421
2422 if (strv_length(c->read_only_dirs) > 0) {
2423 fprintf(f, "%sReadOnlyDirs:", prefix);
2424 strv_fprintf(f, c->read_only_dirs);
2425 fputs("\n", f);
2426 }
2427
2428 if (strv_length(c->inaccessible_dirs) > 0) {
2429 fprintf(f, "%sInaccessibleDirs:", prefix);
2430 strv_fprintf(f, c->inaccessible_dirs);
2431 fputs("\n", f);
2432 }
2433
2434 if (c->utmp_id)
2435 fprintf(f,
2436 "%sUtmpIdentifier: %s\n",
2437 prefix, c->utmp_id);
2438
2439 if (c->selinux_context)
2440 fprintf(f,
2441 "%sSELinuxContext: %s%s\n",
2442 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2443
2444 if (c->personality != PERSONALITY_INVALID)
2445 fprintf(f,
2446 "%sPersonality: %s\n",
2447 prefix, strna(personality_to_string(c->personality)));
2448
2449 if (c->syscall_filter) {
2450 #ifdef HAVE_SECCOMP
2451 Iterator j;
2452 void *id;
2453 bool first = true;
2454 #endif
2455
2456 fprintf(f,
2457 "%sSystemCallFilter: ",
2458 prefix);
2459
2460 if (!c->syscall_whitelist)
2461 fputc('~', f);
2462
2463 #ifdef HAVE_SECCOMP
2464 SET_FOREACH(id, c->syscall_filter, j) {
2465 _cleanup_free_ char *name = NULL;
2466
2467 if (first)
2468 first = false;
2469 else
2470 fputc(' ', f);
2471
2472 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2473 fputs(strna(name), f);
2474 }
2475 #endif
2476
2477 fputc('\n', f);
2478 }
2479
2480 if (c->syscall_archs) {
2481 #ifdef HAVE_SECCOMP
2482 Iterator j;
2483 void *id;
2484 #endif
2485
2486 fprintf(f,
2487 "%sSystemCallArchitectures:",
2488 prefix);
2489
2490 #ifdef HAVE_SECCOMP
2491 SET_FOREACH(id, c->syscall_archs, j)
2492 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2493 #endif
2494 fputc('\n', f);
2495 }
2496
2497 if (c->syscall_errno != 0)
2498 fprintf(f,
2499 "%sSystemCallErrorNumber: %s\n",
2500 prefix, strna(errno_to_name(c->syscall_errno)));
2501
2502 if (c->apparmor_profile)
2503 fprintf(f,
2504 "%sAppArmorProfile: %s%s\n",
2505 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2506 }
2507
2508 bool exec_context_maintains_privileges(ExecContext *c) {
2509 assert(c);
2510
2511 /* Returns true if the process forked off would run run under
2512 * an unchanged UID or as root. */
2513
2514 if (!c->user)
2515 return true;
2516
2517 if (streq(c->user, "root") || streq(c->user, "0"))
2518 return true;
2519
2520 return false;
2521 }
2522
2523 void exec_status_start(ExecStatus *s, pid_t pid) {
2524 assert(s);
2525
2526 zero(*s);
2527 s->pid = pid;
2528 dual_timestamp_get(&s->start_timestamp);
2529 }
2530
2531 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2532 assert(s);
2533
2534 if (s->pid && s->pid != pid)
2535 zero(*s);
2536
2537 s->pid = pid;
2538 dual_timestamp_get(&s->exit_timestamp);
2539
2540 s->code = code;
2541 s->status = status;
2542
2543 if (context) {
2544 if (context->utmp_id)
2545 utmp_put_dead_process(context->utmp_id, pid, code, status);
2546
2547 exec_context_tty_reset(context);
2548 }
2549 }
2550
2551 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2552 char buf[FORMAT_TIMESTAMP_MAX];
2553
2554 assert(s);
2555 assert(f);
2556
2557 if (s->pid <= 0)
2558 return;
2559
2560 prefix = strempty(prefix);
2561
2562 fprintf(f,
2563 "%sPID: "PID_FMT"\n",
2564 prefix, s->pid);
2565
2566 if (s->start_timestamp.realtime > 0)
2567 fprintf(f,
2568 "%sStart Timestamp: %s\n",
2569 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2570
2571 if (s->exit_timestamp.realtime > 0)
2572 fprintf(f,
2573 "%sExit Timestamp: %s\n"
2574 "%sExit Code: %s\n"
2575 "%sExit Status: %i\n",
2576 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2577 prefix, sigchld_code_to_string(s->code),
2578 prefix, s->status);
2579 }
2580
2581 char *exec_command_line(char **argv) {
2582 size_t k;
2583 char *n, *p, **a;
2584 bool first = true;
2585
2586 assert(argv);
2587
2588 k = 1;
2589 STRV_FOREACH(a, argv)
2590 k += strlen(*a)+3;
2591
2592 if (!(n = new(char, k)))
2593 return NULL;
2594
2595 p = n;
2596 STRV_FOREACH(a, argv) {
2597
2598 if (!first)
2599 *(p++) = ' ';
2600 else
2601 first = false;
2602
2603 if (strpbrk(*a, WHITESPACE)) {
2604 *(p++) = '\'';
2605 p = stpcpy(p, *a);
2606 *(p++) = '\'';
2607 } else
2608 p = stpcpy(p, *a);
2609
2610 }
2611
2612 *p = 0;
2613
2614 /* FIXME: this doesn't really handle arguments that have
2615 * spaces and ticks in them */
2616
2617 return n;
2618 }
2619
2620 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2621 _cleanup_free_ char *cmd = NULL;
2622 const char *prefix2;
2623
2624 assert(c);
2625 assert(f);
2626
2627 prefix = strempty(prefix);
2628 prefix2 = strjoina(prefix, "\t");
2629
2630 cmd = exec_command_line(c->argv);
2631 fprintf(f,
2632 "%sCommand Line: %s\n",
2633 prefix, cmd ? cmd : strerror(ENOMEM));
2634
2635 exec_status_dump(&c->exec_status, f, prefix2);
2636 }
2637
2638 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2639 assert(f);
2640
2641 prefix = strempty(prefix);
2642
2643 LIST_FOREACH(command, c, c)
2644 exec_command_dump(c, f, prefix);
2645 }
2646
2647 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2648 ExecCommand *end;
2649
2650 assert(l);
2651 assert(e);
2652
2653 if (*l) {
2654 /* It's kind of important, that we keep the order here */
2655 LIST_FIND_TAIL(command, *l, end);
2656 LIST_INSERT_AFTER(command, *l, end, e);
2657 } else
2658 *l = e;
2659 }
2660
2661 int exec_command_set(ExecCommand *c, const char *path, ...) {
2662 va_list ap;
2663 char **l, *p;
2664
2665 assert(c);
2666 assert(path);
2667
2668 va_start(ap, path);
2669 l = strv_new_ap(path, ap);
2670 va_end(ap);
2671
2672 if (!l)
2673 return -ENOMEM;
2674
2675 p = strdup(path);
2676 if (!p) {
2677 strv_free(l);
2678 return -ENOMEM;
2679 }
2680
2681 free(c->path);
2682 c->path = p;
2683
2684 strv_free(c->argv);
2685 c->argv = l;
2686
2687 return 0;
2688 }
2689
2690 int exec_command_append(ExecCommand *c, const char *path, ...) {
2691 _cleanup_strv_free_ char **l = NULL;
2692 va_list ap;
2693 int r;
2694
2695 assert(c);
2696 assert(path);
2697
2698 va_start(ap, path);
2699 l = strv_new_ap(path, ap);
2700 va_end(ap);
2701
2702 if (!l)
2703 return -ENOMEM;
2704
2705 r = strv_extend_strv(&c->argv, l);
2706 if (r < 0)
2707 return r;
2708
2709 return 0;
2710 }
2711
2712
2713 static int exec_runtime_allocate(ExecRuntime **rt) {
2714
2715 if (*rt)
2716 return 0;
2717
2718 *rt = new0(ExecRuntime, 1);
2719 if (!*rt)
2720 return -ENOMEM;
2721
2722 (*rt)->n_ref = 1;
2723 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2724
2725 return 0;
2726 }
2727
2728 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2729 int r;
2730
2731 assert(rt);
2732 assert(c);
2733 assert(id);
2734
2735 if (*rt)
2736 return 1;
2737
2738 if (!c->private_network && !c->private_tmp)
2739 return 0;
2740
2741 r = exec_runtime_allocate(rt);
2742 if (r < 0)
2743 return r;
2744
2745 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2746 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2747 return -errno;
2748 }
2749
2750 if (c->private_tmp && !(*rt)->tmp_dir) {
2751 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2752 if (r < 0)
2753 return r;
2754 }
2755
2756 return 1;
2757 }
2758
2759 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2760 assert(r);
2761 assert(r->n_ref > 0);
2762
2763 r->n_ref++;
2764 return r;
2765 }
2766
2767 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2768
2769 if (!r)
2770 return NULL;
2771
2772 assert(r->n_ref > 0);
2773
2774 r->n_ref--;
2775 if (r->n_ref > 0)
2776 return NULL;
2777
2778 free(r->tmp_dir);
2779 free(r->var_tmp_dir);
2780 safe_close_pair(r->netns_storage_socket);
2781 free(r);
2782
2783 return NULL;
2784 }
2785
2786 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
2787 assert(u);
2788 assert(f);
2789 assert(fds);
2790
2791 if (!rt)
2792 return 0;
2793
2794 if (rt->tmp_dir)
2795 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2796
2797 if (rt->var_tmp_dir)
2798 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2799
2800 if (rt->netns_storage_socket[0] >= 0) {
2801 int copy;
2802
2803 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2804 if (copy < 0)
2805 return copy;
2806
2807 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2808 }
2809
2810 if (rt->netns_storage_socket[1] >= 0) {
2811 int copy;
2812
2813 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2814 if (copy < 0)
2815 return copy;
2816
2817 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2818 }
2819
2820 return 0;
2821 }
2822
2823 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
2824 int r;
2825
2826 assert(rt);
2827 assert(key);
2828 assert(value);
2829
2830 if (streq(key, "tmp-dir")) {
2831 char *copy;
2832
2833 r = exec_runtime_allocate(rt);
2834 if (r < 0)
2835 return log_oom();
2836
2837 copy = strdup(value);
2838 if (!copy)
2839 return log_oom();
2840
2841 free((*rt)->tmp_dir);
2842 (*rt)->tmp_dir = copy;
2843
2844 } else if (streq(key, "var-tmp-dir")) {
2845 char *copy;
2846
2847 r = exec_runtime_allocate(rt);
2848 if (r < 0)
2849 return log_oom();
2850
2851 copy = strdup(value);
2852 if (!copy)
2853 return log_oom();
2854
2855 free((*rt)->var_tmp_dir);
2856 (*rt)->var_tmp_dir = copy;
2857
2858 } else if (streq(key, "netns-socket-0")) {
2859 int fd;
2860
2861 r = exec_runtime_allocate(rt);
2862 if (r < 0)
2863 return log_oom();
2864
2865 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2866 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2867 else {
2868 safe_close((*rt)->netns_storage_socket[0]);
2869 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2870 }
2871 } else if (streq(key, "netns-socket-1")) {
2872 int fd;
2873
2874 r = exec_runtime_allocate(rt);
2875 if (r < 0)
2876 return log_oom();
2877
2878 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2879 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2880 else {
2881 safe_close((*rt)->netns_storage_socket[1]);
2882 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2883 }
2884 } else
2885 return 0;
2886
2887 return 1;
2888 }
2889
2890 static void *remove_tmpdir_thread(void *p) {
2891 _cleanup_free_ char *path = p;
2892
2893 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
2894 return NULL;
2895 }
2896
2897 void exec_runtime_destroy(ExecRuntime *rt) {
2898 int r;
2899
2900 if (!rt)
2901 return;
2902
2903 /* If there are multiple users of this, let's leave the stuff around */
2904 if (rt->n_ref > 1)
2905 return;
2906
2907 if (rt->tmp_dir) {
2908 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2909
2910 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2911 if (r < 0) {
2912 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2913 free(rt->tmp_dir);
2914 }
2915
2916 rt->tmp_dir = NULL;
2917 }
2918
2919 if (rt->var_tmp_dir) {
2920 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2921
2922 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2923 if (r < 0) {
2924 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2925 free(rt->var_tmp_dir);
2926 }
2927
2928 rt->var_tmp_dir = NULL;
2929 }
2930
2931 safe_close_pair(rt->netns_storage_socket);
2932 }
2933
2934 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2935 [EXEC_INPUT_NULL] = "null",
2936 [EXEC_INPUT_TTY] = "tty",
2937 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2938 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2939 [EXEC_INPUT_SOCKET] = "socket"
2940 };
2941
2942 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2943
2944 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2945 [EXEC_OUTPUT_INHERIT] = "inherit",
2946 [EXEC_OUTPUT_NULL] = "null",
2947 [EXEC_OUTPUT_TTY] = "tty",
2948 [EXEC_OUTPUT_SYSLOG] = "syslog",
2949 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2950 [EXEC_OUTPUT_KMSG] = "kmsg",
2951 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2952 [EXEC_OUTPUT_JOURNAL] = "journal",
2953 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2954 [EXEC_OUTPUT_SOCKET] = "socket"
2955 };
2956
2957 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2958
2959 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
2960 [EXEC_UTMP_INIT] = "init",
2961 [EXEC_UTMP_LOGIN] = "login",
2962 [EXEC_UTMP_USER] = "user",
2963 };
2964
2965 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);