]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
Merge pull request #1249 from lnykryn/sysv-symlinks
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/un.h>
29 #include <sys/prctl.h>
30 #include <sys/stat.h>
31 #include <grp.h>
32 #include <poll.h>
33 #include <glob.h>
34 #include <utmpx.h>
35 #include <sys/personality.h>
36
37 #ifdef HAVE_PAM
38 #include <security/pam_appl.h>
39 #endif
40
41 #ifdef HAVE_SELINUX
42 #include <selinux/selinux.h>
43 #endif
44
45 #ifdef HAVE_SECCOMP
46 #include <seccomp.h>
47 #endif
48
49 #ifdef HAVE_APPARMOR
50 #include <sys/apparmor.h>
51 #endif
52
53 #include "sd-messages.h"
54 #include "rm-rf.h"
55 #include "strv.h"
56 #include "macro.h"
57 #include "capability.h"
58 #include "util.h"
59 #include "log.h"
60 #include "ioprio.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
64 #include "missing.h"
65 #include "utmp-wtmp.h"
66 #include "def.h"
67 #include "path-util.h"
68 #include "env-util.h"
69 #include "fileio.h"
70 #include "unit.h"
71 #include "async.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
74 #include "af-list.h"
75 #include "mkdir.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
78 #include "cap-list.h"
79 #include "formats-util.h"
80 #include "process-util.h"
81 #include "terminal-util.h"
82 #include "signal-util.h"
83
84 #ifdef HAVE_APPARMOR
85 #include "apparmor-util.h"
86 #endif
87
88 #ifdef HAVE_SECCOMP
89 #include "seccomp-util.h"
90 #endif
91
92 #include "execute.h"
93
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
96
97 /* This assumes there is a 'tty' group */
98 #define TTY_MODE 0620
99
100 #define SNDBUF_SIZE (8*1024*1024)
101
102 static int shift_fds(int fds[], unsigned n_fds) {
103 int start, restart_from;
104
105 if (n_fds <= 0)
106 return 0;
107
108 /* Modifies the fds array! (sorts it) */
109
110 assert(fds);
111
112 start = 0;
113 for (;;) {
114 int i;
115
116 restart_from = -1;
117
118 for (i = start; i < (int) n_fds; i++) {
119 int nfd;
120
121 /* Already at right index? */
122 if (fds[i] == i+3)
123 continue;
124
125 nfd = fcntl(fds[i], F_DUPFD, i + 3);
126 if (nfd < 0)
127 return -errno;
128
129 safe_close(fds[i]);
130 fds[i] = nfd;
131
132 /* Hmm, the fd we wanted isn't free? Then
133 * let's remember that and try again from here */
134 if (nfd != i+3 && restart_from < 0)
135 restart_from = i;
136 }
137
138 if (restart_from < 0)
139 break;
140
141 start = restart_from;
142 }
143
144 return 0;
145 }
146
147 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
148 unsigned i;
149 int r;
150
151 if (n_fds <= 0)
152 return 0;
153
154 assert(fds);
155
156 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157
158 for (i = 0; i < n_fds; i++) {
159
160 r = fd_nonblock(fds[i], nonblock);
161 if (r < 0)
162 return r;
163
164 /* We unconditionally drop FD_CLOEXEC from the fds,
165 * since after all we want to pass these fds to our
166 * children */
167
168 r = fd_cloexec(fds[i], false);
169 if (r < 0)
170 return r;
171 }
172
173 return 0;
174 }
175
176 _pure_ static const char *tty_path(const ExecContext *context) {
177 assert(context);
178
179 if (context->tty_path)
180 return context->tty_path;
181
182 return "/dev/console";
183 }
184
185 static void exec_context_tty_reset(const ExecContext *context) {
186 assert(context);
187
188 if (context->tty_vhangup)
189 terminal_vhangup(tty_path(context));
190
191 if (context->tty_reset)
192 reset_terminal(tty_path(context));
193
194 if (context->tty_vt_disallocate && context->tty_path)
195 vt_disallocate(context->tty_path);
196 }
197
198 static bool is_terminal_output(ExecOutput o) {
199 return
200 o == EXEC_OUTPUT_TTY ||
201 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
202 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
203 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
204 }
205
206 static int open_null_as(int flags, int nfd) {
207 int fd, r;
208
209 assert(nfd >= 0);
210
211 fd = open("/dev/null", flags|O_NOCTTY);
212 if (fd < 0)
213 return -errno;
214
215 if (fd != nfd) {
216 r = dup2(fd, nfd) < 0 ? -errno : nfd;
217 safe_close(fd);
218 } else
219 r = nfd;
220
221 return r;
222 }
223
224 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
225 union sockaddr_union sa = {
226 .un.sun_family = AF_UNIX,
227 .un.sun_path = "/run/systemd/journal/stdout",
228 };
229 uid_t olduid = UID_INVALID;
230 gid_t oldgid = GID_INVALID;
231 int r;
232
233 if (gid != GID_INVALID) {
234 oldgid = getgid();
235
236 r = setegid(gid);
237 if (r < 0)
238 return -errno;
239 }
240
241 if (uid != UID_INVALID) {
242 olduid = getuid();
243
244 r = seteuid(uid);
245 if (r < 0) {
246 r = -errno;
247 goto restore_gid;
248 }
249 }
250
251 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
252 if (r < 0)
253 r = -errno;
254
255 /* If we fail to restore the uid or gid, things will likely
256 fail later on. This should only happen if an LSM interferes. */
257
258 if (uid != UID_INVALID)
259 (void) seteuid(olduid);
260
261 restore_gid:
262 if (gid != GID_INVALID)
263 (void) setegid(oldgid);
264
265 return r;
266 }
267
268 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
269 int fd, r;
270
271 assert(context);
272 assert(output < _EXEC_OUTPUT_MAX);
273 assert(ident);
274 assert(nfd >= 0);
275
276 fd = socket(AF_UNIX, SOCK_STREAM, 0);
277 if (fd < 0)
278 return -errno;
279
280 r = connect_journal_socket(fd, uid, gid);
281 if (r < 0)
282 return r;
283
284 if (shutdown(fd, SHUT_RD) < 0) {
285 safe_close(fd);
286 return -errno;
287 }
288
289 fd_inc_sndbuf(fd, SNDBUF_SIZE);
290
291 dprintf(fd,
292 "%s\n"
293 "%s\n"
294 "%i\n"
295 "%i\n"
296 "%i\n"
297 "%i\n"
298 "%i\n",
299 context->syslog_identifier ? context->syslog_identifier : ident,
300 unit_id,
301 context->syslog_priority,
302 !!context->syslog_level_prefix,
303 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
304 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
305 is_terminal_output(output));
306
307 if (fd != nfd) {
308 r = dup2(fd, nfd) < 0 ? -errno : nfd;
309 safe_close(fd);
310 } else
311 r = nfd;
312
313 return r;
314 }
315 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
316 int fd, r;
317
318 assert(path);
319 assert(nfd >= 0);
320
321 fd = open_terminal(path, mode | O_NOCTTY);
322 if (fd < 0)
323 return fd;
324
325 if (fd != nfd) {
326 r = dup2(fd, nfd) < 0 ? -errno : nfd;
327 safe_close(fd);
328 } else
329 r = nfd;
330
331 return r;
332 }
333
334 static bool is_terminal_input(ExecInput i) {
335 return
336 i == EXEC_INPUT_TTY ||
337 i == EXEC_INPUT_TTY_FORCE ||
338 i == EXEC_INPUT_TTY_FAIL;
339 }
340
341 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
342
343 if (is_terminal_input(std_input) && !apply_tty_stdin)
344 return EXEC_INPUT_NULL;
345
346 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
347 return EXEC_INPUT_NULL;
348
349 return std_input;
350 }
351
352 static int fixup_output(ExecOutput std_output, int socket_fd) {
353
354 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
355 return EXEC_OUTPUT_INHERIT;
356
357 return std_output;
358 }
359
360 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
361 ExecInput i;
362
363 assert(context);
364
365 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366
367 switch (i) {
368
369 case EXEC_INPUT_NULL:
370 return open_null_as(O_RDONLY, STDIN_FILENO);
371
372 case EXEC_INPUT_TTY:
373 case EXEC_INPUT_TTY_FORCE:
374 case EXEC_INPUT_TTY_FAIL: {
375 int fd, r;
376
377 fd = acquire_terminal(tty_path(context),
378 i == EXEC_INPUT_TTY_FAIL,
379 i == EXEC_INPUT_TTY_FORCE,
380 false,
381 USEC_INFINITY);
382 if (fd < 0)
383 return fd;
384
385 if (fd != STDIN_FILENO) {
386 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
387 safe_close(fd);
388 } else
389 r = STDIN_FILENO;
390
391 return r;
392 }
393
394 case EXEC_INPUT_SOCKET:
395 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
396
397 default:
398 assert_not_reached("Unknown input type");
399 }
400 }
401
402 static int setup_output(Unit *unit, const ExecContext *context, int fileno, int socket_fd, const char *ident, bool apply_tty_stdin, uid_t uid, gid_t gid) {
403 ExecOutput o;
404 ExecInput i;
405 int r;
406
407 assert(unit);
408 assert(context);
409 assert(ident);
410
411 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
412 o = fixup_output(context->std_output, socket_fd);
413
414 if (fileno == STDERR_FILENO) {
415 ExecOutput e;
416 e = fixup_output(context->std_error, socket_fd);
417
418 /* This expects the input and output are already set up */
419
420 /* Don't change the stderr file descriptor if we inherit all
421 * the way and are not on a tty */
422 if (e == EXEC_OUTPUT_INHERIT &&
423 o == EXEC_OUTPUT_INHERIT &&
424 i == EXEC_INPUT_NULL &&
425 !is_terminal_input(context->std_input) &&
426 getppid () != 1)
427 return fileno;
428
429 /* Duplicate from stdout if possible */
430 if (e == o || e == EXEC_OUTPUT_INHERIT)
431 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
432
433 o = e;
434
435 } else if (o == EXEC_OUTPUT_INHERIT) {
436 /* If input got downgraded, inherit the original value */
437 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
438 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
439
440 /* If the input is connected to anything that's not a /dev/null, inherit that... */
441 if (i != EXEC_INPUT_NULL)
442 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
443
444 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
445 if (getppid() != 1)
446 return fileno;
447
448 /* We need to open /dev/null here anew, to get the right access mode. */
449 return open_null_as(O_WRONLY, fileno);
450 }
451
452 switch (o) {
453
454 case EXEC_OUTPUT_NULL:
455 return open_null_as(O_WRONLY, fileno);
456
457 case EXEC_OUTPUT_TTY:
458 if (is_terminal_input(i))
459 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
460
461 /* We don't reset the terminal if this is just about output */
462 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
463
464 case EXEC_OUTPUT_SYSLOG:
465 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
466 case EXEC_OUTPUT_KMSG:
467 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
468 case EXEC_OUTPUT_JOURNAL:
469 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
470 r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid);
471 if (r < 0) {
472 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
473 r = open_null_as(O_WRONLY, fileno);
474 }
475 return r;
476
477 case EXEC_OUTPUT_SOCKET:
478 assert(socket_fd >= 0);
479 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
480
481 default:
482 assert_not_reached("Unknown error type");
483 }
484 }
485
486 static int chown_terminal(int fd, uid_t uid) {
487 struct stat st;
488
489 assert(fd >= 0);
490
491 /* This might fail. What matters are the results. */
492 (void) fchown(fd, uid, -1);
493 (void) fchmod(fd, TTY_MODE);
494
495 if (fstat(fd, &st) < 0)
496 return -errno;
497
498 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
499 return -EPERM;
500
501 return 0;
502 }
503
504 static int setup_confirm_stdio(int *_saved_stdin,
505 int *_saved_stdout) {
506 int fd = -1, saved_stdin, saved_stdout = -1, r;
507
508 assert(_saved_stdin);
509 assert(_saved_stdout);
510
511 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
512 if (saved_stdin < 0)
513 return -errno;
514
515 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
516 if (saved_stdout < 0) {
517 r = errno;
518 goto fail;
519 }
520
521 fd = acquire_terminal(
522 "/dev/console",
523 false,
524 false,
525 false,
526 DEFAULT_CONFIRM_USEC);
527 if (fd < 0) {
528 r = fd;
529 goto fail;
530 }
531
532 r = chown_terminal(fd, getuid());
533 if (r < 0)
534 goto fail;
535
536 if (dup2(fd, STDIN_FILENO) < 0) {
537 r = -errno;
538 goto fail;
539 }
540
541 if (dup2(fd, STDOUT_FILENO) < 0) {
542 r = -errno;
543 goto fail;
544 }
545
546 if (fd >= 2)
547 safe_close(fd);
548
549 *_saved_stdin = saved_stdin;
550 *_saved_stdout = saved_stdout;
551
552 return 0;
553
554 fail:
555 safe_close(saved_stdout);
556 safe_close(saved_stdin);
557 safe_close(fd);
558
559 return r;
560 }
561
562 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
563 _cleanup_close_ int fd = -1;
564 va_list ap;
565
566 assert(format);
567
568 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
569 if (fd < 0)
570 return fd;
571
572 va_start(ap, format);
573 vdprintf(fd, format, ap);
574 va_end(ap);
575
576 return 0;
577 }
578
579 static int restore_confirm_stdio(int *saved_stdin,
580 int *saved_stdout) {
581
582 int r = 0;
583
584 assert(saved_stdin);
585 assert(saved_stdout);
586
587 release_terminal();
588
589 if (*saved_stdin >= 0)
590 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
591 r = -errno;
592
593 if (*saved_stdout >= 0)
594 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
595 r = -errno;
596
597 safe_close(*saved_stdin);
598 safe_close(*saved_stdout);
599
600 return r;
601 }
602
603 static int ask_for_confirmation(char *response, char **argv) {
604 int saved_stdout = -1, saved_stdin = -1, r;
605 _cleanup_free_ char *line = NULL;
606
607 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
608 if (r < 0)
609 return r;
610
611 line = exec_command_line(argv);
612 if (!line)
613 return -ENOMEM;
614
615 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
616
617 restore_confirm_stdio(&saved_stdin, &saved_stdout);
618
619 return r;
620 }
621
622 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
623 bool keep_groups = false;
624 int r;
625
626 assert(context);
627
628 /* Lookup and set GID and supplementary group list. Here too
629 * we avoid NSS lookups for gid=0. */
630
631 if (context->group || username) {
632 /* First step, initialize groups from /etc/groups */
633 if (username && gid != 0) {
634 if (initgroups(username, gid) < 0)
635 return -errno;
636
637 keep_groups = true;
638 }
639
640 /* Second step, set our gids */
641 if (setresgid(gid, gid, gid) < 0)
642 return -errno;
643 }
644
645 if (context->supplementary_groups) {
646 int ngroups_max, k;
647 gid_t *gids;
648 char **i;
649
650 /* Final step, initialize any manually set supplementary groups */
651 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
652
653 if (!(gids = new(gid_t, ngroups_max)))
654 return -ENOMEM;
655
656 if (keep_groups) {
657 k = getgroups(ngroups_max, gids);
658 if (k < 0) {
659 free(gids);
660 return -errno;
661 }
662 } else
663 k = 0;
664
665 STRV_FOREACH(i, context->supplementary_groups) {
666 const char *g;
667
668 if (k >= ngroups_max) {
669 free(gids);
670 return -E2BIG;
671 }
672
673 g = *i;
674 r = get_group_creds(&g, gids+k);
675 if (r < 0) {
676 free(gids);
677 return r;
678 }
679
680 k++;
681 }
682
683 if (setgroups(k, gids) < 0) {
684 free(gids);
685 return -errno;
686 }
687
688 free(gids);
689 }
690
691 return 0;
692 }
693
694 static int enforce_user(const ExecContext *context, uid_t uid) {
695 assert(context);
696
697 /* Sets (but doesn't lookup) the uid and make sure we keep the
698 * capabilities while doing so. */
699
700 if (context->capabilities) {
701 _cleanup_cap_free_ cap_t d = NULL;
702 static const cap_value_t bits[] = {
703 CAP_SETUID, /* Necessary so that we can run setresuid() below */
704 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
705 };
706
707 /* First step: If we need to keep capabilities but
708 * drop privileges we need to make sure we keep our
709 * caps, while we drop privileges. */
710 if (uid != 0) {
711 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
712
713 if (prctl(PR_GET_SECUREBITS) != sb)
714 if (prctl(PR_SET_SECUREBITS, sb) < 0)
715 return -errno;
716 }
717
718 /* Second step: set the capabilities. This will reduce
719 * the capabilities to the minimum we need. */
720
721 d = cap_dup(context->capabilities);
722 if (!d)
723 return -errno;
724
725 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
726 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
727 return -errno;
728
729 if (cap_set_proc(d) < 0)
730 return -errno;
731 }
732
733 /* Third step: actually set the uids */
734 if (setresuid(uid, uid, uid) < 0)
735 return -errno;
736
737 /* At this point we should have all necessary capabilities but
738 are otherwise a normal user. However, the caps might got
739 corrupted due to the setresuid() so we need clean them up
740 later. This is done outside of this call. */
741
742 return 0;
743 }
744
745 #ifdef HAVE_PAM
746
747 static int null_conv(
748 int num_msg,
749 const struct pam_message **msg,
750 struct pam_response **resp,
751 void *appdata_ptr) {
752
753 /* We don't support conversations */
754
755 return PAM_CONV_ERR;
756 }
757
758 static int setup_pam(
759 const char *name,
760 const char *user,
761 uid_t uid,
762 const char *tty,
763 char ***pam_env,
764 int fds[], unsigned n_fds) {
765
766 static const struct pam_conv conv = {
767 .conv = null_conv,
768 .appdata_ptr = NULL
769 };
770
771 pam_handle_t *handle = NULL;
772 sigset_t old_ss;
773 int pam_code = PAM_SUCCESS;
774 int err;
775 char **e = NULL;
776 bool close_session = false;
777 pid_t pam_pid = 0, parent_pid;
778 int flags = 0;
779
780 assert(name);
781 assert(user);
782 assert(pam_env);
783
784 /* We set up PAM in the parent process, then fork. The child
785 * will then stay around until killed via PR_GET_PDEATHSIG or
786 * systemd via the cgroup logic. It will then remove the PAM
787 * session again. The parent process will exec() the actual
788 * daemon. We do things this way to ensure that the main PID
789 * of the daemon is the one we initially fork()ed. */
790
791 if (log_get_max_level() < LOG_DEBUG)
792 flags |= PAM_SILENT;
793
794 pam_code = pam_start(name, user, &conv, &handle);
795 if (pam_code != PAM_SUCCESS) {
796 handle = NULL;
797 goto fail;
798 }
799
800 if (tty) {
801 pam_code = pam_set_item(handle, PAM_TTY, tty);
802 if (pam_code != PAM_SUCCESS)
803 goto fail;
804 }
805
806 pam_code = pam_acct_mgmt(handle, flags);
807 if (pam_code != PAM_SUCCESS)
808 goto fail;
809
810 pam_code = pam_open_session(handle, flags);
811 if (pam_code != PAM_SUCCESS)
812 goto fail;
813
814 close_session = true;
815
816 e = pam_getenvlist(handle);
817 if (!e) {
818 pam_code = PAM_BUF_ERR;
819 goto fail;
820 }
821
822 /* Block SIGTERM, so that we know that it won't get lost in
823 * the child */
824
825 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
826
827 parent_pid = getpid();
828
829 pam_pid = fork();
830 if (pam_pid < 0)
831 goto fail;
832
833 if (pam_pid == 0) {
834 int sig;
835 int r = EXIT_PAM;
836
837 /* The child's job is to reset the PAM session on
838 * termination */
839
840 /* This string must fit in 10 chars (i.e. the length
841 * of "/sbin/init"), to look pretty in /bin/ps */
842 rename_process("(sd-pam)");
843
844 /* Make sure we don't keep open the passed fds in this
845 child. We assume that otherwise only those fds are
846 open here that have been opened by PAM. */
847 close_many(fds, n_fds);
848
849 /* Drop privileges - we don't need any to pam_close_session
850 * and this will make PR_SET_PDEATHSIG work in most cases.
851 * If this fails, ignore the error - but expect sd-pam threads
852 * to fail to exit normally */
853 if (setresuid(uid, uid, uid) < 0)
854 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
855
856 (void) ignore_signals(SIGPIPE, -1);
857
858 /* Wait until our parent died. This will only work if
859 * the above setresuid() succeeds, otherwise the kernel
860 * will not allow unprivileged parents kill their privileged
861 * children this way. We rely on the control groups kill logic
862 * to do the rest for us. */
863 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
864 goto child_finish;
865
866 /* Check if our parent process might already have
867 * died? */
868 if (getppid() == parent_pid) {
869 sigset_t ss;
870
871 assert_se(sigemptyset(&ss) >= 0);
872 assert_se(sigaddset(&ss, SIGTERM) >= 0);
873
874 for (;;) {
875 if (sigwait(&ss, &sig) < 0) {
876 if (errno == EINTR)
877 continue;
878
879 goto child_finish;
880 }
881
882 assert(sig == SIGTERM);
883 break;
884 }
885 }
886
887 /* If our parent died we'll end the session */
888 if (getppid() != parent_pid) {
889 pam_code = pam_close_session(handle, flags);
890 if (pam_code != PAM_SUCCESS)
891 goto child_finish;
892 }
893
894 r = 0;
895
896 child_finish:
897 pam_end(handle, pam_code | flags);
898 _exit(r);
899 }
900
901 /* If the child was forked off successfully it will do all the
902 * cleanups, so forget about the handle here. */
903 handle = NULL;
904
905 /* Unblock SIGTERM again in the parent */
906 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
907
908 /* We close the log explicitly here, since the PAM modules
909 * might have opened it, but we don't want this fd around. */
910 closelog();
911
912 *pam_env = e;
913 e = NULL;
914
915 return 0;
916
917 fail:
918 if (pam_code != PAM_SUCCESS) {
919 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
920 err = -EPERM; /* PAM errors do not map to errno */
921 } else {
922 err = log_error_errno(errno, "PAM failed: %m");
923 }
924
925 if (handle) {
926 if (close_session)
927 pam_code = pam_close_session(handle, flags);
928
929 pam_end(handle, pam_code | flags);
930 }
931
932 strv_free(e);
933
934 closelog();
935
936 if (pam_pid > 1) {
937 kill(pam_pid, SIGTERM);
938 kill(pam_pid, SIGCONT);
939 }
940
941 return err;
942 }
943 #endif
944
945 static void rename_process_from_path(const char *path) {
946 char process_name[11];
947 const char *p;
948 size_t l;
949
950 /* This resulting string must fit in 10 chars (i.e. the length
951 * of "/sbin/init") to look pretty in /bin/ps */
952
953 p = basename(path);
954 if (isempty(p)) {
955 rename_process("(...)");
956 return;
957 }
958
959 l = strlen(p);
960 if (l > 8) {
961 /* The end of the process name is usually more
962 * interesting, since the first bit might just be
963 * "systemd-" */
964 p = p + l - 8;
965 l = 8;
966 }
967
968 process_name[0] = '(';
969 memcpy(process_name+1, p, l);
970 process_name[1+l] = ')';
971 process_name[1+l+1] = 0;
972
973 rename_process(process_name);
974 }
975
976 #ifdef HAVE_SECCOMP
977
978 static int apply_seccomp(const ExecContext *c) {
979 uint32_t negative_action, action;
980 scmp_filter_ctx *seccomp;
981 Iterator i;
982 void *id;
983 int r;
984
985 assert(c);
986
987 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
988
989 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
990 if (!seccomp)
991 return -ENOMEM;
992
993 if (c->syscall_archs) {
994
995 SET_FOREACH(id, c->syscall_archs, i) {
996 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
997 if (r == -EEXIST)
998 continue;
999 if (r < 0)
1000 goto finish;
1001 }
1002
1003 } else {
1004 r = seccomp_add_secondary_archs(seccomp);
1005 if (r < 0)
1006 goto finish;
1007 }
1008
1009 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1010 SET_FOREACH(id, c->syscall_filter, i) {
1011 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1012 if (r < 0)
1013 goto finish;
1014 }
1015
1016 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1017 if (r < 0)
1018 goto finish;
1019
1020 r = seccomp_load(seccomp);
1021
1022 finish:
1023 seccomp_release(seccomp);
1024 return r;
1025 }
1026
1027 static int apply_address_families(const ExecContext *c) {
1028 scmp_filter_ctx *seccomp;
1029 Iterator i;
1030 int r;
1031
1032 assert(c);
1033
1034 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1035 if (!seccomp)
1036 return -ENOMEM;
1037
1038 r = seccomp_add_secondary_archs(seccomp);
1039 if (r < 0)
1040 goto finish;
1041
1042 if (c->address_families_whitelist) {
1043 int af, first = 0, last = 0;
1044 void *afp;
1045
1046 /* If this is a whitelist, we first block the address
1047 * families that are out of range and then everything
1048 * that is not in the set. First, we find the lowest
1049 * and highest address family in the set. */
1050
1051 SET_FOREACH(afp, c->address_families, i) {
1052 af = PTR_TO_INT(afp);
1053
1054 if (af <= 0 || af >= af_max())
1055 continue;
1056
1057 if (first == 0 || af < first)
1058 first = af;
1059
1060 if (last == 0 || af > last)
1061 last = af;
1062 }
1063
1064 assert((first == 0) == (last == 0));
1065
1066 if (first == 0) {
1067
1068 /* No entries in the valid range, block everything */
1069 r = seccomp_rule_add(
1070 seccomp,
1071 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1072 SCMP_SYS(socket),
1073 0);
1074 if (r < 0)
1075 goto finish;
1076
1077 } else {
1078
1079 /* Block everything below the first entry */
1080 r = seccomp_rule_add(
1081 seccomp,
1082 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1083 SCMP_SYS(socket),
1084 1,
1085 SCMP_A0(SCMP_CMP_LT, first));
1086 if (r < 0)
1087 goto finish;
1088
1089 /* Block everything above the last entry */
1090 r = seccomp_rule_add(
1091 seccomp,
1092 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1093 SCMP_SYS(socket),
1094 1,
1095 SCMP_A0(SCMP_CMP_GT, last));
1096 if (r < 0)
1097 goto finish;
1098
1099 /* Block everything between the first and last
1100 * entry */
1101 for (af = 1; af < af_max(); af++) {
1102
1103 if (set_contains(c->address_families, INT_TO_PTR(af)))
1104 continue;
1105
1106 r = seccomp_rule_add(
1107 seccomp,
1108 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1109 SCMP_SYS(socket),
1110 1,
1111 SCMP_A0(SCMP_CMP_EQ, af));
1112 if (r < 0)
1113 goto finish;
1114 }
1115 }
1116
1117 } else {
1118 void *af;
1119
1120 /* If this is a blacklist, then generate one rule for
1121 * each address family that are then combined in OR
1122 * checks. */
1123
1124 SET_FOREACH(af, c->address_families, i) {
1125
1126 r = seccomp_rule_add(
1127 seccomp,
1128 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1129 SCMP_SYS(socket),
1130 1,
1131 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1132 if (r < 0)
1133 goto finish;
1134 }
1135 }
1136
1137 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1138 if (r < 0)
1139 goto finish;
1140
1141 r = seccomp_load(seccomp);
1142
1143 finish:
1144 seccomp_release(seccomp);
1145 return r;
1146 }
1147
1148 #endif
1149
1150 static void do_idle_pipe_dance(int idle_pipe[4]) {
1151 assert(idle_pipe);
1152
1153
1154 idle_pipe[1] = safe_close(idle_pipe[1]);
1155 idle_pipe[2] = safe_close(idle_pipe[2]);
1156
1157 if (idle_pipe[0] >= 0) {
1158 int r;
1159
1160 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1161
1162 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1163 ssize_t n;
1164
1165 /* Signal systemd that we are bored and want to continue. */
1166 n = write(idle_pipe[3], "x", 1);
1167 if (n > 0)
1168 /* Wait for systemd to react to the signal above. */
1169 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1170 }
1171
1172 idle_pipe[0] = safe_close(idle_pipe[0]);
1173
1174 }
1175
1176 idle_pipe[3] = safe_close(idle_pipe[3]);
1177 }
1178
1179 static int build_environment(
1180 const ExecContext *c,
1181 unsigned n_fds,
1182 usec_t watchdog_usec,
1183 const char *home,
1184 const char *username,
1185 const char *shell,
1186 char ***ret) {
1187
1188 _cleanup_strv_free_ char **our_env = NULL;
1189 unsigned n_env = 0;
1190 char *x;
1191
1192 assert(c);
1193 assert(ret);
1194
1195 our_env = new0(char*, 10);
1196 if (!our_env)
1197 return -ENOMEM;
1198
1199 if (n_fds > 0) {
1200 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1201 return -ENOMEM;
1202 our_env[n_env++] = x;
1203
1204 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1205 return -ENOMEM;
1206 our_env[n_env++] = x;
1207 }
1208
1209 if (watchdog_usec > 0) {
1210 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1211 return -ENOMEM;
1212 our_env[n_env++] = x;
1213
1214 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1215 return -ENOMEM;
1216 our_env[n_env++] = x;
1217 }
1218
1219 if (home) {
1220 x = strappend("HOME=", home);
1221 if (!x)
1222 return -ENOMEM;
1223 our_env[n_env++] = x;
1224 }
1225
1226 if (username) {
1227 x = strappend("LOGNAME=", username);
1228 if (!x)
1229 return -ENOMEM;
1230 our_env[n_env++] = x;
1231
1232 x = strappend("USER=", username);
1233 if (!x)
1234 return -ENOMEM;
1235 our_env[n_env++] = x;
1236 }
1237
1238 if (shell) {
1239 x = strappend("SHELL=", shell);
1240 if (!x)
1241 return -ENOMEM;
1242 our_env[n_env++] = x;
1243 }
1244
1245 if (is_terminal_input(c->std_input) ||
1246 c->std_output == EXEC_OUTPUT_TTY ||
1247 c->std_error == EXEC_OUTPUT_TTY ||
1248 c->tty_path) {
1249
1250 x = strdup(default_term_for_tty(tty_path(c)));
1251 if (!x)
1252 return -ENOMEM;
1253 our_env[n_env++] = x;
1254 }
1255
1256 our_env[n_env++] = NULL;
1257 assert(n_env <= 10);
1258
1259 *ret = our_env;
1260 our_env = NULL;
1261
1262 return 0;
1263 }
1264
1265 static bool exec_needs_mount_namespace(
1266 const ExecContext *context,
1267 const ExecParameters *params,
1268 ExecRuntime *runtime) {
1269
1270 assert(context);
1271 assert(params);
1272
1273 if (!strv_isempty(context->read_write_dirs) ||
1274 !strv_isempty(context->read_only_dirs) ||
1275 !strv_isempty(context->inaccessible_dirs))
1276 return true;
1277
1278 if (context->mount_flags != 0)
1279 return true;
1280
1281 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1282 return true;
1283
1284 if (params->bus_endpoint_path)
1285 return true;
1286
1287 if (context->private_devices ||
1288 context->protect_system != PROTECT_SYSTEM_NO ||
1289 context->protect_home != PROTECT_HOME_NO)
1290 return true;
1291
1292 return false;
1293 }
1294
1295 static int exec_child(
1296 Unit *unit,
1297 ExecCommand *command,
1298 const ExecContext *context,
1299 const ExecParameters *params,
1300 ExecRuntime *runtime,
1301 char **argv,
1302 int socket_fd,
1303 int *fds, unsigned n_fds,
1304 char **files_env,
1305 int *exit_status) {
1306
1307 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1308 _cleanup_free_ char *mac_selinux_context_net = NULL;
1309 const char *username = NULL, *home = NULL, *shell = NULL;
1310 unsigned n_dont_close = 0;
1311 int dont_close[n_fds + 4];
1312 uid_t uid = UID_INVALID;
1313 gid_t gid = GID_INVALID;
1314 int i, r;
1315 bool needs_mount_namespace;
1316
1317 assert(unit);
1318 assert(command);
1319 assert(context);
1320 assert(params);
1321 assert(exit_status);
1322
1323 rename_process_from_path(command->path);
1324
1325 /* We reset exactly these signals, since they are the
1326 * only ones we set to SIG_IGN in the main daemon. All
1327 * others we leave untouched because we set them to
1328 * SIG_DFL or a valid handler initially, both of which
1329 * will be demoted to SIG_DFL. */
1330 (void) default_signals(SIGNALS_CRASH_HANDLER,
1331 SIGNALS_IGNORE, -1);
1332
1333 if (context->ignore_sigpipe)
1334 (void) ignore_signals(SIGPIPE, -1);
1335
1336 r = reset_signal_mask();
1337 if (r < 0) {
1338 *exit_status = EXIT_SIGNAL_MASK;
1339 return r;
1340 }
1341
1342 if (params->idle_pipe)
1343 do_idle_pipe_dance(params->idle_pipe);
1344
1345 /* Close sockets very early to make sure we don't
1346 * block init reexecution because it cannot bind its
1347 * sockets */
1348
1349 log_forget_fds();
1350
1351 if (socket_fd >= 0)
1352 dont_close[n_dont_close++] = socket_fd;
1353 if (n_fds > 0) {
1354 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1355 n_dont_close += n_fds;
1356 }
1357 if (params->bus_endpoint_fd >= 0)
1358 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1359 if (runtime) {
1360 if (runtime->netns_storage_socket[0] >= 0)
1361 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1362 if (runtime->netns_storage_socket[1] >= 0)
1363 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1364 }
1365
1366 r = close_all_fds(dont_close, n_dont_close);
1367 if (r < 0) {
1368 *exit_status = EXIT_FDS;
1369 return r;
1370 }
1371
1372 if (!context->same_pgrp)
1373 if (setsid() < 0) {
1374 *exit_status = EXIT_SETSID;
1375 return -errno;
1376 }
1377
1378 exec_context_tty_reset(context);
1379
1380 if (params->confirm_spawn) {
1381 char response;
1382
1383 r = ask_for_confirmation(&response, argv);
1384 if (r == -ETIMEDOUT)
1385 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1386 else if (r < 0)
1387 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1388 else if (response == 's') {
1389 write_confirm_message("Skipping execution.\n");
1390 *exit_status = EXIT_CONFIRM;
1391 return -ECANCELED;
1392 } else if (response == 'n') {
1393 write_confirm_message("Failing execution.\n");
1394 *exit_status = 0;
1395 return 0;
1396 }
1397 }
1398
1399 if (context->user) {
1400 username = context->user;
1401 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1402 if (r < 0) {
1403 *exit_status = EXIT_USER;
1404 return r;
1405 }
1406 }
1407
1408 if (context->group) {
1409 const char *g = context->group;
1410
1411 r = get_group_creds(&g, &gid);
1412 if (r < 0) {
1413 *exit_status = EXIT_GROUP;
1414 return r;
1415 }
1416 }
1417
1418
1419 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1420 * must sure to drop O_NONBLOCK */
1421 if (socket_fd >= 0)
1422 fd_nonblock(socket_fd, false);
1423
1424 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1425 if (r < 0) {
1426 *exit_status = EXIT_STDIN;
1427 return r;
1428 }
1429
1430 r = setup_output(unit, context, STDOUT_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1431 if (r < 0) {
1432 *exit_status = EXIT_STDOUT;
1433 return r;
1434 }
1435
1436 r = setup_output(unit, context, STDERR_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1437 if (r < 0) {
1438 *exit_status = EXIT_STDERR;
1439 return r;
1440 }
1441
1442 if (params->cgroup_path) {
1443 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1444 if (r < 0) {
1445 *exit_status = EXIT_CGROUP;
1446 return r;
1447 }
1448 }
1449
1450 if (context->oom_score_adjust_set) {
1451 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1452
1453 /* When we can't make this change due to EPERM, then
1454 * let's silently skip over it. User namespaces
1455 * prohibit write access to this file, and we
1456 * shouldn't trip up over that. */
1457
1458 sprintf(t, "%i", context->oom_score_adjust);
1459 r = write_string_file("/proc/self/oom_score_adj", t, 0);
1460 if (r == -EPERM || r == -EACCES) {
1461 log_open();
1462 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1463 log_close();
1464 } else if (r < 0) {
1465 *exit_status = EXIT_OOM_ADJUST;
1466 return -errno;
1467 }
1468 }
1469
1470 if (context->nice_set)
1471 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1472 *exit_status = EXIT_NICE;
1473 return -errno;
1474 }
1475
1476 if (context->cpu_sched_set) {
1477 struct sched_param param = {
1478 .sched_priority = context->cpu_sched_priority,
1479 };
1480
1481 r = sched_setscheduler(0,
1482 context->cpu_sched_policy |
1483 (context->cpu_sched_reset_on_fork ?
1484 SCHED_RESET_ON_FORK : 0),
1485 &param);
1486 if (r < 0) {
1487 *exit_status = EXIT_SETSCHEDULER;
1488 return -errno;
1489 }
1490 }
1491
1492 if (context->cpuset)
1493 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1494 *exit_status = EXIT_CPUAFFINITY;
1495 return -errno;
1496 }
1497
1498 if (context->ioprio_set)
1499 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1500 *exit_status = EXIT_IOPRIO;
1501 return -errno;
1502 }
1503
1504 if (context->timer_slack_nsec != NSEC_INFINITY)
1505 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1506 *exit_status = EXIT_TIMERSLACK;
1507 return -errno;
1508 }
1509
1510 if (context->personality != PERSONALITY_INVALID)
1511 if (personality(context->personality) < 0) {
1512 *exit_status = EXIT_PERSONALITY;
1513 return -errno;
1514 }
1515
1516 if (context->utmp_id)
1517 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path,
1518 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
1519 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
1520 USER_PROCESS,
1521 username ? "root" : context->user);
1522
1523 if (context->user && is_terminal_input(context->std_input)) {
1524 r = chown_terminal(STDIN_FILENO, uid);
1525 if (r < 0) {
1526 *exit_status = EXIT_STDIN;
1527 return r;
1528 }
1529 }
1530
1531 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1532 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1533
1534 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1535 if (r < 0) {
1536 *exit_status = EXIT_BUS_ENDPOINT;
1537 return r;
1538 }
1539 }
1540
1541 /* If delegation is enabled we'll pass ownership of the cgroup
1542 * (but only in systemd's own controller hierarchy!) to the
1543 * user of the new process. */
1544 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1545 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1546 if (r < 0) {
1547 *exit_status = EXIT_CGROUP;
1548 return r;
1549 }
1550
1551
1552 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1553 if (r < 0) {
1554 *exit_status = EXIT_CGROUP;
1555 return r;
1556 }
1557 }
1558
1559 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1560 char **rt;
1561
1562 STRV_FOREACH(rt, context->runtime_directory) {
1563 _cleanup_free_ char *p;
1564
1565 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1566 if (!p) {
1567 *exit_status = EXIT_RUNTIME_DIRECTORY;
1568 return -ENOMEM;
1569 }
1570
1571 r = mkdir_p_label(p, context->runtime_directory_mode);
1572 if (r < 0) {
1573 *exit_status = EXIT_RUNTIME_DIRECTORY;
1574 return r;
1575 }
1576
1577 r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
1578 if (r < 0) {
1579 *exit_status = EXIT_RUNTIME_DIRECTORY;
1580 return r;
1581 }
1582 }
1583 }
1584
1585 if (params->apply_permissions) {
1586 r = enforce_groups(context, username, gid);
1587 if (r < 0) {
1588 *exit_status = EXIT_GROUP;
1589 return r;
1590 }
1591 }
1592
1593 umask(context->umask);
1594
1595 #ifdef HAVE_PAM
1596 if (params->apply_permissions && context->pam_name && username) {
1597 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1598 if (r < 0) {
1599 *exit_status = EXIT_PAM;
1600 return r;
1601 }
1602 }
1603 #endif
1604
1605 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1606 r = setup_netns(runtime->netns_storage_socket);
1607 if (r < 0) {
1608 *exit_status = EXIT_NETWORK;
1609 return r;
1610 }
1611 }
1612
1613 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
1614
1615 if (needs_mount_namespace) {
1616 char *tmp = NULL, *var = NULL;
1617
1618 /* The runtime struct only contains the parent
1619 * of the private /tmp, which is
1620 * non-accessible to world users. Inside of it
1621 * there's a /tmp that is sticky, and that's
1622 * the one we want to use here. */
1623
1624 if (context->private_tmp && runtime) {
1625 if (runtime->tmp_dir)
1626 tmp = strjoina(runtime->tmp_dir, "/tmp");
1627 if (runtime->var_tmp_dir)
1628 var = strjoina(runtime->var_tmp_dir, "/tmp");
1629 }
1630
1631 r = setup_namespace(
1632 params->apply_chroot ? context->root_directory : NULL,
1633 context->read_write_dirs,
1634 context->read_only_dirs,
1635 context->inaccessible_dirs,
1636 tmp,
1637 var,
1638 params->bus_endpoint_path,
1639 context->private_devices,
1640 context->protect_home,
1641 context->protect_system,
1642 context->mount_flags);
1643
1644 /* If we couldn't set up the namespace this is
1645 * probably due to a missing capability. In this case,
1646 * silently proceeed. */
1647 if (r == -EPERM || r == -EACCES) {
1648 log_open();
1649 log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1650 log_close();
1651 } else if (r < 0) {
1652 *exit_status = EXIT_NAMESPACE;
1653 return r;
1654 }
1655 }
1656
1657 if (params->apply_chroot) {
1658 if (!needs_mount_namespace && context->root_directory)
1659 if (chroot(context->root_directory) < 0) {
1660 *exit_status = EXIT_CHROOT;
1661 return -errno;
1662 }
1663
1664 if (chdir(context->working_directory ?: "/") < 0 &&
1665 !context->working_directory_missing_ok) {
1666 *exit_status = EXIT_CHDIR;
1667 return -errno;
1668 }
1669 } else {
1670 _cleanup_free_ char *d = NULL;
1671
1672 if (asprintf(&d, "%s/%s",
1673 context->root_directory ?: "",
1674 context->working_directory ?: "") < 0) {
1675 *exit_status = EXIT_MEMORY;
1676 return -ENOMEM;
1677 }
1678
1679 if (chdir(d) < 0 &&
1680 !context->working_directory_missing_ok) {
1681 *exit_status = EXIT_CHDIR;
1682 return -errno;
1683 }
1684 }
1685
1686 #ifdef HAVE_SELINUX
1687 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1688 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1689 if (r < 0) {
1690 *exit_status = EXIT_SELINUX_CONTEXT;
1691 return r;
1692 }
1693 }
1694 #endif
1695
1696 /* We repeat the fd closing here, to make sure that
1697 * nothing is leaked from the PAM modules. Note that
1698 * we are more aggressive this time since socket_fd
1699 * and the netns fds we don't need anymore. The custom
1700 * endpoint fd was needed to upload the policy and can
1701 * now be closed as well. */
1702 r = close_all_fds(fds, n_fds);
1703 if (r >= 0)
1704 r = shift_fds(fds, n_fds);
1705 if (r >= 0)
1706 r = flags_fds(fds, n_fds, context->non_blocking);
1707 if (r < 0) {
1708 *exit_status = EXIT_FDS;
1709 return r;
1710 }
1711
1712 if (params->apply_permissions) {
1713
1714 for (i = 0; i < _RLIMIT_MAX; i++) {
1715 if (!context->rlimit[i])
1716 continue;
1717
1718 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1719 *exit_status = EXIT_LIMITS;
1720 return -errno;
1721 }
1722 }
1723
1724 if (context->capability_bounding_set_drop) {
1725 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1726 if (r < 0) {
1727 *exit_status = EXIT_CAPABILITIES;
1728 return r;
1729 }
1730 }
1731
1732 #ifdef HAVE_SMACK
1733 if (context->smack_process_label) {
1734 r = mac_smack_apply_pid(0, context->smack_process_label);
1735 if (r < 0) {
1736 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1737 return r;
1738 }
1739 }
1740 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1741 else {
1742 _cleanup_free_ char *exec_label = NULL;
1743
1744 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1745 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) {
1746 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1747 return r;
1748 }
1749
1750 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1751 if (r < 0) {
1752 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1753 return r;
1754 }
1755 }
1756 #endif
1757 #endif
1758
1759 if (context->user) {
1760 r = enforce_user(context, uid);
1761 if (r < 0) {
1762 *exit_status = EXIT_USER;
1763 return r;
1764 }
1765 }
1766
1767 /* PR_GET_SECUREBITS is not privileged, while
1768 * PR_SET_SECUREBITS is. So to suppress
1769 * potential EPERMs we'll try not to call
1770 * PR_SET_SECUREBITS unless necessary. */
1771 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1772 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1773 *exit_status = EXIT_SECUREBITS;
1774 return -errno;
1775 }
1776
1777 if (context->capabilities)
1778 if (cap_set_proc(context->capabilities) < 0) {
1779 *exit_status = EXIT_CAPABILITIES;
1780 return -errno;
1781 }
1782
1783 if (context->no_new_privileges)
1784 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1785 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1786 return -errno;
1787 }
1788
1789 #ifdef HAVE_SECCOMP
1790 if (context->address_families_whitelist ||
1791 !set_isempty(context->address_families)) {
1792 r = apply_address_families(context);
1793 if (r < 0) {
1794 *exit_status = EXIT_ADDRESS_FAMILIES;
1795 return r;
1796 }
1797 }
1798
1799 if (context->syscall_whitelist ||
1800 !set_isempty(context->syscall_filter) ||
1801 !set_isempty(context->syscall_archs)) {
1802 r = apply_seccomp(context);
1803 if (r < 0) {
1804 *exit_status = EXIT_SECCOMP;
1805 return r;
1806 }
1807 }
1808 #endif
1809
1810 #ifdef HAVE_SELINUX
1811 if (mac_selinux_use()) {
1812 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1813
1814 if (exec_context) {
1815 r = setexeccon(exec_context);
1816 if (r < 0) {
1817 *exit_status = EXIT_SELINUX_CONTEXT;
1818 return r;
1819 }
1820 }
1821 }
1822 #endif
1823
1824 #ifdef HAVE_APPARMOR
1825 if (context->apparmor_profile && mac_apparmor_use()) {
1826 r = aa_change_onexec(context->apparmor_profile);
1827 if (r < 0 && !context->apparmor_profile_ignore) {
1828 *exit_status = EXIT_APPARMOR_PROFILE;
1829 return -errno;
1830 }
1831 }
1832 #endif
1833 }
1834
1835 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1836 if (r < 0) {
1837 *exit_status = EXIT_MEMORY;
1838 return r;
1839 }
1840
1841 final_env = strv_env_merge(5,
1842 params->environment,
1843 our_env,
1844 context->environment,
1845 files_env,
1846 pam_env,
1847 NULL);
1848 if (!final_env) {
1849 *exit_status = EXIT_MEMORY;
1850 return -ENOMEM;
1851 }
1852
1853 final_argv = replace_env_argv(argv, final_env);
1854 if (!final_argv) {
1855 *exit_status = EXIT_MEMORY;
1856 return -ENOMEM;
1857 }
1858
1859 final_env = strv_env_clean(final_env);
1860
1861 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1862 _cleanup_free_ char *line;
1863
1864 line = exec_command_line(final_argv);
1865 if (line) {
1866 log_open();
1867 log_struct(LOG_DEBUG,
1868 LOG_UNIT_ID(unit),
1869 "EXECUTABLE=%s", command->path,
1870 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
1871 NULL);
1872 log_close();
1873 }
1874 }
1875
1876 execve(command->path, final_argv, final_env);
1877 *exit_status = EXIT_EXEC;
1878 return -errno;
1879 }
1880
1881 int exec_spawn(Unit *unit,
1882 ExecCommand *command,
1883 const ExecContext *context,
1884 const ExecParameters *params,
1885 ExecRuntime *runtime,
1886 pid_t *ret) {
1887
1888 _cleanup_strv_free_ char **files_env = NULL;
1889 int *fds = NULL; unsigned n_fds = 0;
1890 _cleanup_free_ char *line = NULL;
1891 int socket_fd, r;
1892 char **argv;
1893 pid_t pid;
1894
1895 assert(unit);
1896 assert(command);
1897 assert(context);
1898 assert(ret);
1899 assert(params);
1900 assert(params->fds || params->n_fds <= 0);
1901
1902 if (context->std_input == EXEC_INPUT_SOCKET ||
1903 context->std_output == EXEC_OUTPUT_SOCKET ||
1904 context->std_error == EXEC_OUTPUT_SOCKET) {
1905
1906 if (params->n_fds != 1) {
1907 log_unit_error(unit, "Got more than one socket.");
1908 return -EINVAL;
1909 }
1910
1911 socket_fd = params->fds[0];
1912 } else {
1913 socket_fd = -1;
1914 fds = params->fds;
1915 n_fds = params->n_fds;
1916 }
1917
1918 r = exec_context_load_environment(unit, context, &files_env);
1919 if (r < 0)
1920 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
1921
1922 argv = params->argv ?: command->argv;
1923 line = exec_command_line(argv);
1924 if (!line)
1925 return log_oom();
1926
1927 log_struct(LOG_DEBUG,
1928 LOG_UNIT_ID(unit),
1929 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
1930 "EXECUTABLE=%s", command->path,
1931 NULL);
1932 pid = fork();
1933 if (pid < 0)
1934 return log_unit_error_errno(unit, r, "Failed to fork: %m");
1935
1936 if (pid == 0) {
1937 int exit_status;
1938
1939 r = exec_child(unit,
1940 command,
1941 context,
1942 params,
1943 runtime,
1944 argv,
1945 socket_fd,
1946 fds, n_fds,
1947 files_env,
1948 &exit_status);
1949 if (r < 0) {
1950 log_open();
1951 log_struct_errno(LOG_ERR, r,
1952 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1953 LOG_UNIT_ID(unit),
1954 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
1955 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1956 command->path),
1957 "EXECUTABLE=%s", command->path,
1958 NULL);
1959 }
1960
1961 _exit(exit_status);
1962 }
1963
1964 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
1965
1966 /* We add the new process to the cgroup both in the child (so
1967 * that we can be sure that no user code is ever executed
1968 * outside of the cgroup) and in the parent (so that we can be
1969 * sure that when we kill the cgroup the process will be
1970 * killed too). */
1971 if (params->cgroup_path)
1972 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1973
1974 exec_status_start(&command->exec_status, pid);
1975
1976 *ret = pid;
1977 return 0;
1978 }
1979
1980 void exec_context_init(ExecContext *c) {
1981 assert(c);
1982
1983 c->umask = 0022;
1984 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1985 c->cpu_sched_policy = SCHED_OTHER;
1986 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1987 c->syslog_level_prefix = true;
1988 c->ignore_sigpipe = true;
1989 c->timer_slack_nsec = NSEC_INFINITY;
1990 c->personality = PERSONALITY_INVALID;
1991 c->runtime_directory_mode = 0755;
1992 }
1993
1994 void exec_context_done(ExecContext *c) {
1995 unsigned l;
1996
1997 assert(c);
1998
1999 c->environment = strv_free(c->environment);
2000 c->environment_files = strv_free(c->environment_files);
2001
2002 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
2003 c->rlimit[l] = mfree(c->rlimit[l]);
2004
2005 c->working_directory = mfree(c->working_directory);
2006 c->root_directory = mfree(c->root_directory);
2007 c->tty_path = mfree(c->tty_path);
2008 c->syslog_identifier = mfree(c->syslog_identifier);
2009 c->user = mfree(c->user);
2010 c->group = mfree(c->group);
2011
2012 c->supplementary_groups = strv_free(c->supplementary_groups);
2013
2014 c->pam_name = mfree(c->pam_name);
2015
2016 if (c->capabilities) {
2017 cap_free(c->capabilities);
2018 c->capabilities = NULL;
2019 }
2020
2021 c->read_only_dirs = strv_free(c->read_only_dirs);
2022 c->read_write_dirs = strv_free(c->read_write_dirs);
2023 c->inaccessible_dirs = strv_free(c->inaccessible_dirs);
2024
2025 if (c->cpuset)
2026 CPU_FREE(c->cpuset);
2027
2028 c->utmp_id = mfree(c->utmp_id);
2029 c->selinux_context = mfree(c->selinux_context);
2030 c->apparmor_profile = mfree(c->apparmor_profile);
2031
2032 c->syscall_filter = set_free(c->syscall_filter);
2033 c->syscall_archs = set_free(c->syscall_archs);
2034 c->address_families = set_free(c->address_families);
2035
2036 c->runtime_directory = strv_free(c->runtime_directory);
2037
2038 bus_endpoint_free(c->bus_endpoint);
2039 c->bus_endpoint = NULL;
2040 }
2041
2042 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2043 char **i;
2044
2045 assert(c);
2046
2047 if (!runtime_prefix)
2048 return 0;
2049
2050 STRV_FOREACH(i, c->runtime_directory) {
2051 _cleanup_free_ char *p;
2052
2053 p = strjoin(runtime_prefix, "/", *i, NULL);
2054 if (!p)
2055 return -ENOMEM;
2056
2057 /* We execute this synchronously, since we need to be
2058 * sure this is gone when we start the service
2059 * next. */
2060 (void) rm_rf(p, REMOVE_ROOT);
2061 }
2062
2063 return 0;
2064 }
2065
2066 void exec_command_done(ExecCommand *c) {
2067 assert(c);
2068
2069 c->path = mfree(c->path);
2070
2071 c->argv = strv_free(c->argv);
2072 }
2073
2074 void exec_command_done_array(ExecCommand *c, unsigned n) {
2075 unsigned i;
2076
2077 for (i = 0; i < n; i++)
2078 exec_command_done(c+i);
2079 }
2080
2081 ExecCommand* exec_command_free_list(ExecCommand *c) {
2082 ExecCommand *i;
2083
2084 while ((i = c)) {
2085 LIST_REMOVE(command, c, i);
2086 exec_command_done(i);
2087 free(i);
2088 }
2089
2090 return NULL;
2091 }
2092
2093 void exec_command_free_array(ExecCommand **c, unsigned n) {
2094 unsigned i;
2095
2096 for (i = 0; i < n; i++)
2097 c[i] = exec_command_free_list(c[i]);
2098 }
2099
2100 typedef struct InvalidEnvInfo {
2101 Unit *unit;
2102 const char *path;
2103 } InvalidEnvInfo;
2104
2105 static void invalid_env(const char *p, void *userdata) {
2106 InvalidEnvInfo *info = userdata;
2107
2108 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2109 }
2110
2111 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
2112 char **i, **r = NULL;
2113
2114 assert(c);
2115 assert(l);
2116
2117 STRV_FOREACH(i, c->environment_files) {
2118 char *fn;
2119 int k;
2120 bool ignore = false;
2121 char **p;
2122 _cleanup_globfree_ glob_t pglob = {};
2123 int count, n;
2124
2125 fn = *i;
2126
2127 if (fn[0] == '-') {
2128 ignore = true;
2129 fn ++;
2130 }
2131
2132 if (!path_is_absolute(fn)) {
2133 if (ignore)
2134 continue;
2135
2136 strv_free(r);
2137 return -EINVAL;
2138 }
2139
2140 /* Filename supports globbing, take all matching files */
2141 errno = 0;
2142 if (glob(fn, 0, NULL, &pglob) != 0) {
2143 if (ignore)
2144 continue;
2145
2146 strv_free(r);
2147 return errno ? -errno : -EINVAL;
2148 }
2149 count = pglob.gl_pathc;
2150 if (count == 0) {
2151 if (ignore)
2152 continue;
2153
2154 strv_free(r);
2155 return -EINVAL;
2156 }
2157 for (n = 0; n < count; n++) {
2158 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2159 if (k < 0) {
2160 if (ignore)
2161 continue;
2162
2163 strv_free(r);
2164 return k;
2165 }
2166 /* Log invalid environment variables with filename */
2167 if (p) {
2168 InvalidEnvInfo info = {
2169 .unit = unit,
2170 .path = pglob.gl_pathv[n]
2171 };
2172
2173 p = strv_env_clean_with_callback(p, invalid_env, &info);
2174 }
2175
2176 if (r == NULL)
2177 r = p;
2178 else {
2179 char **m;
2180
2181 m = strv_env_merge(2, r, p);
2182 strv_free(r);
2183 strv_free(p);
2184 if (!m)
2185 return -ENOMEM;
2186
2187 r = m;
2188 }
2189 }
2190 }
2191
2192 *l = r;
2193
2194 return 0;
2195 }
2196
2197 static bool tty_may_match_dev_console(const char *tty) {
2198 _cleanup_free_ char *active = NULL;
2199 char *console;
2200
2201 if (startswith(tty, "/dev/"))
2202 tty += 5;
2203
2204 /* trivial identity? */
2205 if (streq(tty, "console"))
2206 return true;
2207
2208 console = resolve_dev_console(&active);
2209 /* if we could not resolve, assume it may */
2210 if (!console)
2211 return true;
2212
2213 /* "tty0" means the active VC, so it may be the same sometimes */
2214 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2215 }
2216
2217 bool exec_context_may_touch_console(ExecContext *ec) {
2218 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2219 is_terminal_input(ec->std_input) ||
2220 is_terminal_output(ec->std_output) ||
2221 is_terminal_output(ec->std_error)) &&
2222 tty_may_match_dev_console(tty_path(ec));
2223 }
2224
2225 static void strv_fprintf(FILE *f, char **l) {
2226 char **g;
2227
2228 assert(f);
2229
2230 STRV_FOREACH(g, l)
2231 fprintf(f, " %s", *g);
2232 }
2233
2234 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2235 char **e;
2236 unsigned i;
2237
2238 assert(c);
2239 assert(f);
2240
2241 prefix = strempty(prefix);
2242
2243 fprintf(f,
2244 "%sUMask: %04o\n"
2245 "%sWorkingDirectory: %s\n"
2246 "%sRootDirectory: %s\n"
2247 "%sNonBlocking: %s\n"
2248 "%sPrivateTmp: %s\n"
2249 "%sPrivateNetwork: %s\n"
2250 "%sPrivateDevices: %s\n"
2251 "%sProtectHome: %s\n"
2252 "%sProtectSystem: %s\n"
2253 "%sIgnoreSIGPIPE: %s\n",
2254 prefix, c->umask,
2255 prefix, c->working_directory ? c->working_directory : "/",
2256 prefix, c->root_directory ? c->root_directory : "/",
2257 prefix, yes_no(c->non_blocking),
2258 prefix, yes_no(c->private_tmp),
2259 prefix, yes_no(c->private_network),
2260 prefix, yes_no(c->private_devices),
2261 prefix, protect_home_to_string(c->protect_home),
2262 prefix, protect_system_to_string(c->protect_system),
2263 prefix, yes_no(c->ignore_sigpipe));
2264
2265 STRV_FOREACH(e, c->environment)
2266 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2267
2268 STRV_FOREACH(e, c->environment_files)
2269 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2270
2271 if (c->nice_set)
2272 fprintf(f,
2273 "%sNice: %i\n",
2274 prefix, c->nice);
2275
2276 if (c->oom_score_adjust_set)
2277 fprintf(f,
2278 "%sOOMScoreAdjust: %i\n",
2279 prefix, c->oom_score_adjust);
2280
2281 for (i = 0; i < RLIM_NLIMITS; i++)
2282 if (c->rlimit[i])
2283 fprintf(f, "%s%s: "RLIM_FMT"\n",
2284 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2285
2286 if (c->ioprio_set) {
2287 _cleanup_free_ char *class_str = NULL;
2288
2289 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2290 fprintf(f,
2291 "%sIOSchedulingClass: %s\n"
2292 "%sIOPriority: %i\n",
2293 prefix, strna(class_str),
2294 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2295 }
2296
2297 if (c->cpu_sched_set) {
2298 _cleanup_free_ char *policy_str = NULL;
2299
2300 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2301 fprintf(f,
2302 "%sCPUSchedulingPolicy: %s\n"
2303 "%sCPUSchedulingPriority: %i\n"
2304 "%sCPUSchedulingResetOnFork: %s\n",
2305 prefix, strna(policy_str),
2306 prefix, c->cpu_sched_priority,
2307 prefix, yes_no(c->cpu_sched_reset_on_fork));
2308 }
2309
2310 if (c->cpuset) {
2311 fprintf(f, "%sCPUAffinity:", prefix);
2312 for (i = 0; i < c->cpuset_ncpus; i++)
2313 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2314 fprintf(f, " %u", i);
2315 fputs("\n", f);
2316 }
2317
2318 if (c->timer_slack_nsec != NSEC_INFINITY)
2319 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2320
2321 fprintf(f,
2322 "%sStandardInput: %s\n"
2323 "%sStandardOutput: %s\n"
2324 "%sStandardError: %s\n",
2325 prefix, exec_input_to_string(c->std_input),
2326 prefix, exec_output_to_string(c->std_output),
2327 prefix, exec_output_to_string(c->std_error));
2328
2329 if (c->tty_path)
2330 fprintf(f,
2331 "%sTTYPath: %s\n"
2332 "%sTTYReset: %s\n"
2333 "%sTTYVHangup: %s\n"
2334 "%sTTYVTDisallocate: %s\n",
2335 prefix, c->tty_path,
2336 prefix, yes_no(c->tty_reset),
2337 prefix, yes_no(c->tty_vhangup),
2338 prefix, yes_no(c->tty_vt_disallocate));
2339
2340 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2341 c->std_output == EXEC_OUTPUT_KMSG ||
2342 c->std_output == EXEC_OUTPUT_JOURNAL ||
2343 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2344 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2345 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2346 c->std_error == EXEC_OUTPUT_SYSLOG ||
2347 c->std_error == EXEC_OUTPUT_KMSG ||
2348 c->std_error == EXEC_OUTPUT_JOURNAL ||
2349 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2350 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2351 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2352
2353 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2354
2355 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2356 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2357
2358 fprintf(f,
2359 "%sSyslogFacility: %s\n"
2360 "%sSyslogLevel: %s\n",
2361 prefix, strna(fac_str),
2362 prefix, strna(lvl_str));
2363 }
2364
2365 if (c->capabilities) {
2366 _cleanup_cap_free_charp_ char *t;
2367
2368 t = cap_to_text(c->capabilities, NULL);
2369 if (t)
2370 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2371 }
2372
2373 if (c->secure_bits)
2374 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2375 prefix,
2376 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2377 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2378 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2379 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2380 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2381 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2382
2383 if (c->capability_bounding_set_drop) {
2384 unsigned long l;
2385 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2386
2387 for (l = 0; l <= cap_last_cap(); l++)
2388 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2389 fprintf(f, " %s", strna(capability_to_name(l)));
2390
2391 fputs("\n", f);
2392 }
2393
2394 if (c->user)
2395 fprintf(f, "%sUser: %s\n", prefix, c->user);
2396 if (c->group)
2397 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2398
2399 if (strv_length(c->supplementary_groups) > 0) {
2400 fprintf(f, "%sSupplementaryGroups:", prefix);
2401 strv_fprintf(f, c->supplementary_groups);
2402 fputs("\n", f);
2403 }
2404
2405 if (c->pam_name)
2406 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2407
2408 if (strv_length(c->read_write_dirs) > 0) {
2409 fprintf(f, "%sReadWriteDirs:", prefix);
2410 strv_fprintf(f, c->read_write_dirs);
2411 fputs("\n", f);
2412 }
2413
2414 if (strv_length(c->read_only_dirs) > 0) {
2415 fprintf(f, "%sReadOnlyDirs:", prefix);
2416 strv_fprintf(f, c->read_only_dirs);
2417 fputs("\n", f);
2418 }
2419
2420 if (strv_length(c->inaccessible_dirs) > 0) {
2421 fprintf(f, "%sInaccessibleDirs:", prefix);
2422 strv_fprintf(f, c->inaccessible_dirs);
2423 fputs("\n", f);
2424 }
2425
2426 if (c->utmp_id)
2427 fprintf(f,
2428 "%sUtmpIdentifier: %s\n",
2429 prefix, c->utmp_id);
2430
2431 if (c->selinux_context)
2432 fprintf(f,
2433 "%sSELinuxContext: %s%s\n",
2434 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2435
2436 if (c->personality != PERSONALITY_INVALID)
2437 fprintf(f,
2438 "%sPersonality: %s\n",
2439 prefix, strna(personality_to_string(c->personality)));
2440
2441 if (c->syscall_filter) {
2442 #ifdef HAVE_SECCOMP
2443 Iterator j;
2444 void *id;
2445 bool first = true;
2446 #endif
2447
2448 fprintf(f,
2449 "%sSystemCallFilter: ",
2450 prefix);
2451
2452 if (!c->syscall_whitelist)
2453 fputc('~', f);
2454
2455 #ifdef HAVE_SECCOMP
2456 SET_FOREACH(id, c->syscall_filter, j) {
2457 _cleanup_free_ char *name = NULL;
2458
2459 if (first)
2460 first = false;
2461 else
2462 fputc(' ', f);
2463
2464 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2465 fputs(strna(name), f);
2466 }
2467 #endif
2468
2469 fputc('\n', f);
2470 }
2471
2472 if (c->syscall_archs) {
2473 #ifdef HAVE_SECCOMP
2474 Iterator j;
2475 void *id;
2476 #endif
2477
2478 fprintf(f,
2479 "%sSystemCallArchitectures:",
2480 prefix);
2481
2482 #ifdef HAVE_SECCOMP
2483 SET_FOREACH(id, c->syscall_archs, j)
2484 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2485 #endif
2486 fputc('\n', f);
2487 }
2488
2489 if (c->syscall_errno != 0)
2490 fprintf(f,
2491 "%sSystemCallErrorNumber: %s\n",
2492 prefix, strna(errno_to_name(c->syscall_errno)));
2493
2494 if (c->apparmor_profile)
2495 fprintf(f,
2496 "%sAppArmorProfile: %s%s\n",
2497 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2498 }
2499
2500 bool exec_context_maintains_privileges(ExecContext *c) {
2501 assert(c);
2502
2503 /* Returns true if the process forked off would run run under
2504 * an unchanged UID or as root. */
2505
2506 if (!c->user)
2507 return true;
2508
2509 if (streq(c->user, "root") || streq(c->user, "0"))
2510 return true;
2511
2512 return false;
2513 }
2514
2515 void exec_status_start(ExecStatus *s, pid_t pid) {
2516 assert(s);
2517
2518 zero(*s);
2519 s->pid = pid;
2520 dual_timestamp_get(&s->start_timestamp);
2521 }
2522
2523 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2524 assert(s);
2525
2526 if (s->pid && s->pid != pid)
2527 zero(*s);
2528
2529 s->pid = pid;
2530 dual_timestamp_get(&s->exit_timestamp);
2531
2532 s->code = code;
2533 s->status = status;
2534
2535 if (context) {
2536 if (context->utmp_id)
2537 utmp_put_dead_process(context->utmp_id, pid, code, status);
2538
2539 exec_context_tty_reset(context);
2540 }
2541 }
2542
2543 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2544 char buf[FORMAT_TIMESTAMP_MAX];
2545
2546 assert(s);
2547 assert(f);
2548
2549 if (s->pid <= 0)
2550 return;
2551
2552 prefix = strempty(prefix);
2553
2554 fprintf(f,
2555 "%sPID: "PID_FMT"\n",
2556 prefix, s->pid);
2557
2558 if (s->start_timestamp.realtime > 0)
2559 fprintf(f,
2560 "%sStart Timestamp: %s\n",
2561 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2562
2563 if (s->exit_timestamp.realtime > 0)
2564 fprintf(f,
2565 "%sExit Timestamp: %s\n"
2566 "%sExit Code: %s\n"
2567 "%sExit Status: %i\n",
2568 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2569 prefix, sigchld_code_to_string(s->code),
2570 prefix, s->status);
2571 }
2572
2573 char *exec_command_line(char **argv) {
2574 size_t k;
2575 char *n, *p, **a;
2576 bool first = true;
2577
2578 assert(argv);
2579
2580 k = 1;
2581 STRV_FOREACH(a, argv)
2582 k += strlen(*a)+3;
2583
2584 if (!(n = new(char, k)))
2585 return NULL;
2586
2587 p = n;
2588 STRV_FOREACH(a, argv) {
2589
2590 if (!first)
2591 *(p++) = ' ';
2592 else
2593 first = false;
2594
2595 if (strpbrk(*a, WHITESPACE)) {
2596 *(p++) = '\'';
2597 p = stpcpy(p, *a);
2598 *(p++) = '\'';
2599 } else
2600 p = stpcpy(p, *a);
2601
2602 }
2603
2604 *p = 0;
2605
2606 /* FIXME: this doesn't really handle arguments that have
2607 * spaces and ticks in them */
2608
2609 return n;
2610 }
2611
2612 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2613 _cleanup_free_ char *cmd = NULL;
2614 const char *prefix2;
2615
2616 assert(c);
2617 assert(f);
2618
2619 prefix = strempty(prefix);
2620 prefix2 = strjoina(prefix, "\t");
2621
2622 cmd = exec_command_line(c->argv);
2623 fprintf(f,
2624 "%sCommand Line: %s\n",
2625 prefix, cmd ? cmd : strerror(ENOMEM));
2626
2627 exec_status_dump(&c->exec_status, f, prefix2);
2628 }
2629
2630 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2631 assert(f);
2632
2633 prefix = strempty(prefix);
2634
2635 LIST_FOREACH(command, c, c)
2636 exec_command_dump(c, f, prefix);
2637 }
2638
2639 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2640 ExecCommand *end;
2641
2642 assert(l);
2643 assert(e);
2644
2645 if (*l) {
2646 /* It's kind of important, that we keep the order here */
2647 LIST_FIND_TAIL(command, *l, end);
2648 LIST_INSERT_AFTER(command, *l, end, e);
2649 } else
2650 *l = e;
2651 }
2652
2653 int exec_command_set(ExecCommand *c, const char *path, ...) {
2654 va_list ap;
2655 char **l, *p;
2656
2657 assert(c);
2658 assert(path);
2659
2660 va_start(ap, path);
2661 l = strv_new_ap(path, ap);
2662 va_end(ap);
2663
2664 if (!l)
2665 return -ENOMEM;
2666
2667 p = strdup(path);
2668 if (!p) {
2669 strv_free(l);
2670 return -ENOMEM;
2671 }
2672
2673 free(c->path);
2674 c->path = p;
2675
2676 strv_free(c->argv);
2677 c->argv = l;
2678
2679 return 0;
2680 }
2681
2682 int exec_command_append(ExecCommand *c, const char *path, ...) {
2683 _cleanup_strv_free_ char **l = NULL;
2684 va_list ap;
2685 int r;
2686
2687 assert(c);
2688 assert(path);
2689
2690 va_start(ap, path);
2691 l = strv_new_ap(path, ap);
2692 va_end(ap);
2693
2694 if (!l)
2695 return -ENOMEM;
2696
2697 r = strv_extend_strv(&c->argv, l);
2698 if (r < 0)
2699 return r;
2700
2701 return 0;
2702 }
2703
2704
2705 static int exec_runtime_allocate(ExecRuntime **rt) {
2706
2707 if (*rt)
2708 return 0;
2709
2710 *rt = new0(ExecRuntime, 1);
2711 if (!*rt)
2712 return -ENOMEM;
2713
2714 (*rt)->n_ref = 1;
2715 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2716
2717 return 0;
2718 }
2719
2720 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2721 int r;
2722
2723 assert(rt);
2724 assert(c);
2725 assert(id);
2726
2727 if (*rt)
2728 return 1;
2729
2730 if (!c->private_network && !c->private_tmp)
2731 return 0;
2732
2733 r = exec_runtime_allocate(rt);
2734 if (r < 0)
2735 return r;
2736
2737 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2738 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2739 return -errno;
2740 }
2741
2742 if (c->private_tmp && !(*rt)->tmp_dir) {
2743 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2744 if (r < 0)
2745 return r;
2746 }
2747
2748 return 1;
2749 }
2750
2751 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2752 assert(r);
2753 assert(r->n_ref > 0);
2754
2755 r->n_ref++;
2756 return r;
2757 }
2758
2759 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2760
2761 if (!r)
2762 return NULL;
2763
2764 assert(r->n_ref > 0);
2765
2766 r->n_ref--;
2767 if (r->n_ref > 0)
2768 return NULL;
2769
2770 free(r->tmp_dir);
2771 free(r->var_tmp_dir);
2772 safe_close_pair(r->netns_storage_socket);
2773 free(r);
2774
2775 return NULL;
2776 }
2777
2778 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
2779 assert(u);
2780 assert(f);
2781 assert(fds);
2782
2783 if (!rt)
2784 return 0;
2785
2786 if (rt->tmp_dir)
2787 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2788
2789 if (rt->var_tmp_dir)
2790 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2791
2792 if (rt->netns_storage_socket[0] >= 0) {
2793 int copy;
2794
2795 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2796 if (copy < 0)
2797 return copy;
2798
2799 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2800 }
2801
2802 if (rt->netns_storage_socket[1] >= 0) {
2803 int copy;
2804
2805 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2806 if (copy < 0)
2807 return copy;
2808
2809 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2810 }
2811
2812 return 0;
2813 }
2814
2815 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
2816 int r;
2817
2818 assert(rt);
2819 assert(key);
2820 assert(value);
2821
2822 if (streq(key, "tmp-dir")) {
2823 char *copy;
2824
2825 r = exec_runtime_allocate(rt);
2826 if (r < 0)
2827 return log_oom();
2828
2829 copy = strdup(value);
2830 if (!copy)
2831 return log_oom();
2832
2833 free((*rt)->tmp_dir);
2834 (*rt)->tmp_dir = copy;
2835
2836 } else if (streq(key, "var-tmp-dir")) {
2837 char *copy;
2838
2839 r = exec_runtime_allocate(rt);
2840 if (r < 0)
2841 return log_oom();
2842
2843 copy = strdup(value);
2844 if (!copy)
2845 return log_oom();
2846
2847 free((*rt)->var_tmp_dir);
2848 (*rt)->var_tmp_dir = copy;
2849
2850 } else if (streq(key, "netns-socket-0")) {
2851 int fd;
2852
2853 r = exec_runtime_allocate(rt);
2854 if (r < 0)
2855 return log_oom();
2856
2857 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2858 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2859 else {
2860 safe_close((*rt)->netns_storage_socket[0]);
2861 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2862 }
2863 } else if (streq(key, "netns-socket-1")) {
2864 int fd;
2865
2866 r = exec_runtime_allocate(rt);
2867 if (r < 0)
2868 return log_oom();
2869
2870 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2871 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2872 else {
2873 safe_close((*rt)->netns_storage_socket[1]);
2874 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2875 }
2876 } else
2877 return 0;
2878
2879 return 1;
2880 }
2881
2882 static void *remove_tmpdir_thread(void *p) {
2883 _cleanup_free_ char *path = p;
2884
2885 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
2886 return NULL;
2887 }
2888
2889 void exec_runtime_destroy(ExecRuntime *rt) {
2890 int r;
2891
2892 if (!rt)
2893 return;
2894
2895 /* If there are multiple users of this, let's leave the stuff around */
2896 if (rt->n_ref > 1)
2897 return;
2898
2899 if (rt->tmp_dir) {
2900 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2901
2902 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2903 if (r < 0) {
2904 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2905 free(rt->tmp_dir);
2906 }
2907
2908 rt->tmp_dir = NULL;
2909 }
2910
2911 if (rt->var_tmp_dir) {
2912 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2913
2914 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2915 if (r < 0) {
2916 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2917 free(rt->var_tmp_dir);
2918 }
2919
2920 rt->var_tmp_dir = NULL;
2921 }
2922
2923 safe_close_pair(rt->netns_storage_socket);
2924 }
2925
2926 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2927 [EXEC_INPUT_NULL] = "null",
2928 [EXEC_INPUT_TTY] = "tty",
2929 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2930 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2931 [EXEC_INPUT_SOCKET] = "socket"
2932 };
2933
2934 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2935
2936 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2937 [EXEC_OUTPUT_INHERIT] = "inherit",
2938 [EXEC_OUTPUT_NULL] = "null",
2939 [EXEC_OUTPUT_TTY] = "tty",
2940 [EXEC_OUTPUT_SYSLOG] = "syslog",
2941 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2942 [EXEC_OUTPUT_KMSG] = "kmsg",
2943 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2944 [EXEC_OUTPUT_JOURNAL] = "journal",
2945 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2946 [EXEC_OUTPUT_SOCKET] = "socket"
2947 };
2948
2949 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2950
2951 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
2952 [EXEC_UTMP_INIT] = "init",
2953 [EXEC_UTMP_LOGIN] = "login",
2954 [EXEC_UTMP_USER] = "user",
2955 };
2956
2957 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);