]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
Merge pull request #1491 from Danielmachon/danish-translation
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <glob.h>
25 #include <grp.h>
26 #include <poll.h>
27 #include <signal.h>
28 #include <string.h>
29 #include <sys/personality.h>
30 #include <sys/prctl.h>
31 #include <sys/socket.h>
32 #include <sys/stat.h>
33 #include <sys/un.h>
34 #include <unistd.h>
35 #include <utmpx.h>
36
37 #ifdef HAVE_PAM
38 #include <security/pam_appl.h>
39 #endif
40
41 #ifdef HAVE_SELINUX
42 #include <selinux/selinux.h>
43 #endif
44
45 #ifdef HAVE_SECCOMP
46 #include <seccomp.h>
47 #endif
48
49 #ifdef HAVE_APPARMOR
50 #include <sys/apparmor.h>
51 #endif
52
53 #include "sd-messages.h"
54
55 #include "af-list.h"
56 #include "async.h"
57 #include "barrier.h"
58 #include "bus-endpoint.h"
59 #include "cap-list.h"
60 #include "capability.h"
61 #include "def.h"
62 #include "env-util.h"
63 #include "errno-list.h"
64 #include "exit-status.h"
65 #include "fileio.h"
66 #include "formats-util.h"
67 #include "ioprio.h"
68 #include "log.h"
69 #include "macro.h"
70 #include "missing.h"
71 #include "mkdir.h"
72 #include "namespace.h"
73 #include "path-util.h"
74 #include "process-util.h"
75 #include "rm-rf.h"
76 #include "securebits.h"
77 #include "selinux-util.h"
78 #include "signal-util.h"
79 #include "smack-util.h"
80 #include "strv.h"
81 #include "terminal-util.h"
82 #include "unit.h"
83 #include "util.h"
84 #include "utmp-wtmp.h"
85
86 #ifdef HAVE_APPARMOR
87 #include "apparmor-util.h"
88 #endif
89
90 #ifdef HAVE_SECCOMP
91 #include "seccomp-util.h"
92 #endif
93
94 #include "execute.h"
95
96 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
97 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
98
99 /* This assumes there is a 'tty' group */
100 #define TTY_MODE 0620
101
102 #define SNDBUF_SIZE (8*1024*1024)
103
104 static int shift_fds(int fds[], unsigned n_fds) {
105 int start, restart_from;
106
107 if (n_fds <= 0)
108 return 0;
109
110 /* Modifies the fds array! (sorts it) */
111
112 assert(fds);
113
114 start = 0;
115 for (;;) {
116 int i;
117
118 restart_from = -1;
119
120 for (i = start; i < (int) n_fds; i++) {
121 int nfd;
122
123 /* Already at right index? */
124 if (fds[i] == i+3)
125 continue;
126
127 nfd = fcntl(fds[i], F_DUPFD, i + 3);
128 if (nfd < 0)
129 return -errno;
130
131 safe_close(fds[i]);
132 fds[i] = nfd;
133
134 /* Hmm, the fd we wanted isn't free? Then
135 * let's remember that and try again from here */
136 if (nfd != i+3 && restart_from < 0)
137 restart_from = i;
138 }
139
140 if (restart_from < 0)
141 break;
142
143 start = restart_from;
144 }
145
146 return 0;
147 }
148
149 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
150 unsigned i;
151 int r;
152
153 if (n_fds <= 0)
154 return 0;
155
156 assert(fds);
157
158 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
159
160 for (i = 0; i < n_fds; i++) {
161
162 r = fd_nonblock(fds[i], nonblock);
163 if (r < 0)
164 return r;
165
166 /* We unconditionally drop FD_CLOEXEC from the fds,
167 * since after all we want to pass these fds to our
168 * children */
169
170 r = fd_cloexec(fds[i], false);
171 if (r < 0)
172 return r;
173 }
174
175 return 0;
176 }
177
178 _pure_ static const char *tty_path(const ExecContext *context) {
179 assert(context);
180
181 if (context->tty_path)
182 return context->tty_path;
183
184 return "/dev/console";
185 }
186
187 static void exec_context_tty_reset(const ExecContext *context) {
188 assert(context);
189
190 if (context->tty_vhangup)
191 terminal_vhangup(tty_path(context));
192
193 if (context->tty_reset)
194 reset_terminal(tty_path(context));
195
196 if (context->tty_vt_disallocate && context->tty_path)
197 vt_disallocate(context->tty_path);
198 }
199
200 static bool is_terminal_output(ExecOutput o) {
201 return
202 o == EXEC_OUTPUT_TTY ||
203 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
204 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
205 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
206 }
207
208 static int open_null_as(int flags, int nfd) {
209 int fd, r;
210
211 assert(nfd >= 0);
212
213 fd = open("/dev/null", flags|O_NOCTTY);
214 if (fd < 0)
215 return -errno;
216
217 if (fd != nfd) {
218 r = dup2(fd, nfd) < 0 ? -errno : nfd;
219 safe_close(fd);
220 } else
221 r = nfd;
222
223 return r;
224 }
225
226 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
227 union sockaddr_union sa = {
228 .un.sun_family = AF_UNIX,
229 .un.sun_path = "/run/systemd/journal/stdout",
230 };
231 uid_t olduid = UID_INVALID;
232 gid_t oldgid = GID_INVALID;
233 int r;
234
235 if (gid != GID_INVALID) {
236 oldgid = getgid();
237
238 r = setegid(gid);
239 if (r < 0)
240 return -errno;
241 }
242
243 if (uid != UID_INVALID) {
244 olduid = getuid();
245
246 r = seteuid(uid);
247 if (r < 0) {
248 r = -errno;
249 goto restore_gid;
250 }
251 }
252
253 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
254 if (r < 0)
255 r = -errno;
256
257 /* If we fail to restore the uid or gid, things will likely
258 fail later on. This should only happen if an LSM interferes. */
259
260 if (uid != UID_INVALID)
261 (void) seteuid(olduid);
262
263 restore_gid:
264 if (gid != GID_INVALID)
265 (void) setegid(oldgid);
266
267 return r;
268 }
269
270 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
271 int fd, r;
272
273 assert(context);
274 assert(output < _EXEC_OUTPUT_MAX);
275 assert(ident);
276 assert(nfd >= 0);
277
278 fd = socket(AF_UNIX, SOCK_STREAM, 0);
279 if (fd < 0)
280 return -errno;
281
282 r = connect_journal_socket(fd, uid, gid);
283 if (r < 0)
284 return r;
285
286 if (shutdown(fd, SHUT_RD) < 0) {
287 safe_close(fd);
288 return -errno;
289 }
290
291 fd_inc_sndbuf(fd, SNDBUF_SIZE);
292
293 dprintf(fd,
294 "%s\n"
295 "%s\n"
296 "%i\n"
297 "%i\n"
298 "%i\n"
299 "%i\n"
300 "%i\n",
301 context->syslog_identifier ? context->syslog_identifier : ident,
302 unit_id,
303 context->syslog_priority,
304 !!context->syslog_level_prefix,
305 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
306 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
307 is_terminal_output(output));
308
309 if (fd != nfd) {
310 r = dup2(fd, nfd) < 0 ? -errno : nfd;
311 safe_close(fd);
312 } else
313 r = nfd;
314
315 return r;
316 }
317 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
318 int fd, r;
319
320 assert(path);
321 assert(nfd >= 0);
322
323 fd = open_terminal(path, mode | O_NOCTTY);
324 if (fd < 0)
325 return fd;
326
327 if (fd != nfd) {
328 r = dup2(fd, nfd) < 0 ? -errno : nfd;
329 safe_close(fd);
330 } else
331 r = nfd;
332
333 return r;
334 }
335
336 static bool is_terminal_input(ExecInput i) {
337 return
338 i == EXEC_INPUT_TTY ||
339 i == EXEC_INPUT_TTY_FORCE ||
340 i == EXEC_INPUT_TTY_FAIL;
341 }
342
343 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
344
345 if (is_terminal_input(std_input) && !apply_tty_stdin)
346 return EXEC_INPUT_NULL;
347
348 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
349 return EXEC_INPUT_NULL;
350
351 return std_input;
352 }
353
354 static int fixup_output(ExecOutput std_output, int socket_fd) {
355
356 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
357 return EXEC_OUTPUT_INHERIT;
358
359 return std_output;
360 }
361
362 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
363 ExecInput i;
364
365 assert(context);
366
367 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
368
369 switch (i) {
370
371 case EXEC_INPUT_NULL:
372 return open_null_as(O_RDONLY, STDIN_FILENO);
373
374 case EXEC_INPUT_TTY:
375 case EXEC_INPUT_TTY_FORCE:
376 case EXEC_INPUT_TTY_FAIL: {
377 int fd, r;
378
379 fd = acquire_terminal(tty_path(context),
380 i == EXEC_INPUT_TTY_FAIL,
381 i == EXEC_INPUT_TTY_FORCE,
382 false,
383 USEC_INFINITY);
384 if (fd < 0)
385 return fd;
386
387 if (fd != STDIN_FILENO) {
388 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
389 safe_close(fd);
390 } else
391 r = STDIN_FILENO;
392
393 return r;
394 }
395
396 case EXEC_INPUT_SOCKET:
397 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
398
399 default:
400 assert_not_reached("Unknown input type");
401 }
402 }
403
404 static int setup_output(Unit *unit, const ExecContext *context, int fileno, int socket_fd, const char *ident, bool apply_tty_stdin, uid_t uid, gid_t gid) {
405 ExecOutput o;
406 ExecInput i;
407 int r;
408
409 assert(unit);
410 assert(context);
411 assert(ident);
412
413 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
414 o = fixup_output(context->std_output, socket_fd);
415
416 if (fileno == STDERR_FILENO) {
417 ExecOutput e;
418 e = fixup_output(context->std_error, socket_fd);
419
420 /* This expects the input and output are already set up */
421
422 /* Don't change the stderr file descriptor if we inherit all
423 * the way and are not on a tty */
424 if (e == EXEC_OUTPUT_INHERIT &&
425 o == EXEC_OUTPUT_INHERIT &&
426 i == EXEC_INPUT_NULL &&
427 !is_terminal_input(context->std_input) &&
428 getppid () != 1)
429 return fileno;
430
431 /* Duplicate from stdout if possible */
432 if (e == o || e == EXEC_OUTPUT_INHERIT)
433 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
434
435 o = e;
436
437 } else if (o == EXEC_OUTPUT_INHERIT) {
438 /* If input got downgraded, inherit the original value */
439 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
440 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
441
442 /* If the input is connected to anything that's not a /dev/null, inherit that... */
443 if (i != EXEC_INPUT_NULL)
444 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
445
446 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
447 if (getppid() != 1)
448 return fileno;
449
450 /* We need to open /dev/null here anew, to get the right access mode. */
451 return open_null_as(O_WRONLY, fileno);
452 }
453
454 switch (o) {
455
456 case EXEC_OUTPUT_NULL:
457 return open_null_as(O_WRONLY, fileno);
458
459 case EXEC_OUTPUT_TTY:
460 if (is_terminal_input(i))
461 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
462
463 /* We don't reset the terminal if this is just about output */
464 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
465
466 case EXEC_OUTPUT_SYSLOG:
467 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
468 case EXEC_OUTPUT_KMSG:
469 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
470 case EXEC_OUTPUT_JOURNAL:
471 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
472 r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid);
473 if (r < 0) {
474 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
475 r = open_null_as(O_WRONLY, fileno);
476 }
477 return r;
478
479 case EXEC_OUTPUT_SOCKET:
480 assert(socket_fd >= 0);
481 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
482
483 default:
484 assert_not_reached("Unknown error type");
485 }
486 }
487
488 static int chown_terminal(int fd, uid_t uid) {
489 struct stat st;
490
491 assert(fd >= 0);
492
493 /* This might fail. What matters are the results. */
494 (void) fchown(fd, uid, -1);
495 (void) fchmod(fd, TTY_MODE);
496
497 if (fstat(fd, &st) < 0)
498 return -errno;
499
500 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
501 return -EPERM;
502
503 return 0;
504 }
505
506 static int setup_confirm_stdio(int *_saved_stdin,
507 int *_saved_stdout) {
508 int fd = -1, saved_stdin, saved_stdout = -1, r;
509
510 assert(_saved_stdin);
511 assert(_saved_stdout);
512
513 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
514 if (saved_stdin < 0)
515 return -errno;
516
517 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
518 if (saved_stdout < 0) {
519 r = errno;
520 goto fail;
521 }
522
523 fd = acquire_terminal(
524 "/dev/console",
525 false,
526 false,
527 false,
528 DEFAULT_CONFIRM_USEC);
529 if (fd < 0) {
530 r = fd;
531 goto fail;
532 }
533
534 r = chown_terminal(fd, getuid());
535 if (r < 0)
536 goto fail;
537
538 if (dup2(fd, STDIN_FILENO) < 0) {
539 r = -errno;
540 goto fail;
541 }
542
543 if (dup2(fd, STDOUT_FILENO) < 0) {
544 r = -errno;
545 goto fail;
546 }
547
548 if (fd >= 2)
549 safe_close(fd);
550
551 *_saved_stdin = saved_stdin;
552 *_saved_stdout = saved_stdout;
553
554 return 0;
555
556 fail:
557 safe_close(saved_stdout);
558 safe_close(saved_stdin);
559 safe_close(fd);
560
561 return r;
562 }
563
564 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
565 _cleanup_close_ int fd = -1;
566 va_list ap;
567
568 assert(format);
569
570 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
571 if (fd < 0)
572 return fd;
573
574 va_start(ap, format);
575 vdprintf(fd, format, ap);
576 va_end(ap);
577
578 return 0;
579 }
580
581 static int restore_confirm_stdio(int *saved_stdin,
582 int *saved_stdout) {
583
584 int r = 0;
585
586 assert(saved_stdin);
587 assert(saved_stdout);
588
589 release_terminal();
590
591 if (*saved_stdin >= 0)
592 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
593 r = -errno;
594
595 if (*saved_stdout >= 0)
596 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
597 r = -errno;
598
599 safe_close(*saved_stdin);
600 safe_close(*saved_stdout);
601
602 return r;
603 }
604
605 static int ask_for_confirmation(char *response, char **argv) {
606 int saved_stdout = -1, saved_stdin = -1, r;
607 _cleanup_free_ char *line = NULL;
608
609 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
610 if (r < 0)
611 return r;
612
613 line = exec_command_line(argv);
614 if (!line)
615 return -ENOMEM;
616
617 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
618
619 restore_confirm_stdio(&saved_stdin, &saved_stdout);
620
621 return r;
622 }
623
624 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
625 bool keep_groups = false;
626 int r;
627
628 assert(context);
629
630 /* Lookup and set GID and supplementary group list. Here too
631 * we avoid NSS lookups for gid=0. */
632
633 if (context->group || username) {
634 /* First step, initialize groups from /etc/groups */
635 if (username && gid != 0) {
636 if (initgroups(username, gid) < 0)
637 return -errno;
638
639 keep_groups = true;
640 }
641
642 /* Second step, set our gids */
643 if (setresgid(gid, gid, gid) < 0)
644 return -errno;
645 }
646
647 if (context->supplementary_groups) {
648 int ngroups_max, k;
649 gid_t *gids;
650 char **i;
651
652 /* Final step, initialize any manually set supplementary groups */
653 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
654
655 if (!(gids = new(gid_t, ngroups_max)))
656 return -ENOMEM;
657
658 if (keep_groups) {
659 k = getgroups(ngroups_max, gids);
660 if (k < 0) {
661 free(gids);
662 return -errno;
663 }
664 } else
665 k = 0;
666
667 STRV_FOREACH(i, context->supplementary_groups) {
668 const char *g;
669
670 if (k >= ngroups_max) {
671 free(gids);
672 return -E2BIG;
673 }
674
675 g = *i;
676 r = get_group_creds(&g, gids+k);
677 if (r < 0) {
678 free(gids);
679 return r;
680 }
681
682 k++;
683 }
684
685 if (setgroups(k, gids) < 0) {
686 free(gids);
687 return -errno;
688 }
689
690 free(gids);
691 }
692
693 return 0;
694 }
695
696 static int enforce_user(const ExecContext *context, uid_t uid) {
697 assert(context);
698
699 /* Sets (but doesn't lookup) the uid and make sure we keep the
700 * capabilities while doing so. */
701
702 if (context->capabilities) {
703 _cleanup_cap_free_ cap_t d = NULL;
704 static const cap_value_t bits[] = {
705 CAP_SETUID, /* Necessary so that we can run setresuid() below */
706 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
707 };
708
709 /* First step: If we need to keep capabilities but
710 * drop privileges we need to make sure we keep our
711 * caps, while we drop privileges. */
712 if (uid != 0) {
713 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
714
715 if (prctl(PR_GET_SECUREBITS) != sb)
716 if (prctl(PR_SET_SECUREBITS, sb) < 0)
717 return -errno;
718 }
719
720 /* Second step: set the capabilities. This will reduce
721 * the capabilities to the minimum we need. */
722
723 d = cap_dup(context->capabilities);
724 if (!d)
725 return -errno;
726
727 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
728 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
729 return -errno;
730
731 if (cap_set_proc(d) < 0)
732 return -errno;
733 }
734
735 /* Third step: actually set the uids */
736 if (setresuid(uid, uid, uid) < 0)
737 return -errno;
738
739 /* At this point we should have all necessary capabilities but
740 are otherwise a normal user. However, the caps might got
741 corrupted due to the setresuid() so we need clean them up
742 later. This is done outside of this call. */
743
744 return 0;
745 }
746
747 #ifdef HAVE_PAM
748
749 static int null_conv(
750 int num_msg,
751 const struct pam_message **msg,
752 struct pam_response **resp,
753 void *appdata_ptr) {
754
755 /* We don't support conversations */
756
757 return PAM_CONV_ERR;
758 }
759
760 static int setup_pam(
761 const char *name,
762 const char *user,
763 uid_t uid,
764 const char *tty,
765 char ***pam_env,
766 int fds[], unsigned n_fds) {
767
768 static const struct pam_conv conv = {
769 .conv = null_conv,
770 .appdata_ptr = NULL
771 };
772
773 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
774 pam_handle_t *handle = NULL;
775 sigset_t old_ss;
776 int pam_code = PAM_SUCCESS;
777 int err = 0;
778 char **e = NULL;
779 bool close_session = false;
780 pid_t pam_pid = 0, parent_pid;
781 int flags = 0;
782
783 assert(name);
784 assert(user);
785 assert(pam_env);
786
787 /* We set up PAM in the parent process, then fork. The child
788 * will then stay around until killed via PR_GET_PDEATHSIG or
789 * systemd via the cgroup logic. It will then remove the PAM
790 * session again. The parent process will exec() the actual
791 * daemon. We do things this way to ensure that the main PID
792 * of the daemon is the one we initially fork()ed. */
793
794 err = barrier_create(&barrier);
795 if (err < 0)
796 goto fail;
797
798 if (log_get_max_level() < LOG_DEBUG)
799 flags |= PAM_SILENT;
800
801 pam_code = pam_start(name, user, &conv, &handle);
802 if (pam_code != PAM_SUCCESS) {
803 handle = NULL;
804 goto fail;
805 }
806
807 if (tty) {
808 pam_code = pam_set_item(handle, PAM_TTY, tty);
809 if (pam_code != PAM_SUCCESS)
810 goto fail;
811 }
812
813 pam_code = pam_acct_mgmt(handle, flags);
814 if (pam_code != PAM_SUCCESS)
815 goto fail;
816
817 pam_code = pam_open_session(handle, flags);
818 if (pam_code != PAM_SUCCESS)
819 goto fail;
820
821 close_session = true;
822
823 e = pam_getenvlist(handle);
824 if (!e) {
825 pam_code = PAM_BUF_ERR;
826 goto fail;
827 }
828
829 /* Block SIGTERM, so that we know that it won't get lost in
830 * the child */
831
832 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
833
834 parent_pid = getpid();
835
836 pam_pid = fork();
837 if (pam_pid < 0)
838 goto fail;
839
840 if (pam_pid == 0) {
841 int sig;
842 int r = EXIT_PAM;
843
844 /* The child's job is to reset the PAM session on
845 * termination */
846 barrier_set_role(&barrier, BARRIER_CHILD);
847
848 /* This string must fit in 10 chars (i.e. the length
849 * of "/sbin/init"), to look pretty in /bin/ps */
850 rename_process("(sd-pam)");
851
852 /* Make sure we don't keep open the passed fds in this
853 child. We assume that otherwise only those fds are
854 open here that have been opened by PAM. */
855 close_many(fds, n_fds);
856
857 /* Drop privileges - we don't need any to pam_close_session
858 * and this will make PR_SET_PDEATHSIG work in most cases.
859 * If this fails, ignore the error - but expect sd-pam threads
860 * to fail to exit normally */
861 if (setresuid(uid, uid, uid) < 0)
862 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
863
864 (void) ignore_signals(SIGPIPE, -1);
865
866 /* Wait until our parent died. This will only work if
867 * the above setresuid() succeeds, otherwise the kernel
868 * will not allow unprivileged parents kill their privileged
869 * children this way. We rely on the control groups kill logic
870 * to do the rest for us. */
871 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
872 goto child_finish;
873
874 /* Tell the parent that our setup is done. This is especially
875 * important regarding dropping privileges. Otherwise, unit
876 * setup might race against our setresuid(2) call. */
877 barrier_place(&barrier);
878
879 /* Check if our parent process might already have
880 * died? */
881 if (getppid() == parent_pid) {
882 sigset_t ss;
883
884 assert_se(sigemptyset(&ss) >= 0);
885 assert_se(sigaddset(&ss, SIGTERM) >= 0);
886
887 for (;;) {
888 if (sigwait(&ss, &sig) < 0) {
889 if (errno == EINTR)
890 continue;
891
892 goto child_finish;
893 }
894
895 assert(sig == SIGTERM);
896 break;
897 }
898 }
899
900 /* If our parent died we'll end the session */
901 if (getppid() != parent_pid) {
902 pam_code = pam_close_session(handle, flags);
903 if (pam_code != PAM_SUCCESS)
904 goto child_finish;
905 }
906
907 r = 0;
908
909 child_finish:
910 pam_end(handle, pam_code | flags);
911 _exit(r);
912 }
913
914 barrier_set_role(&barrier, BARRIER_PARENT);
915
916 /* If the child was forked off successfully it will do all the
917 * cleanups, so forget about the handle here. */
918 handle = NULL;
919
920 /* Unblock SIGTERM again in the parent */
921 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
922
923 /* We close the log explicitly here, since the PAM modules
924 * might have opened it, but we don't want this fd around. */
925 closelog();
926
927 /* Synchronously wait for the child to initialize. We don't care for
928 * errors as we cannot recover. However, warn loudly if it happens. */
929 if (!barrier_place_and_sync(&barrier))
930 log_error("PAM initialization failed");
931
932 *pam_env = e;
933 e = NULL;
934
935 return 0;
936
937 fail:
938 if (pam_code != PAM_SUCCESS) {
939 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
940 err = -EPERM; /* PAM errors do not map to errno */
941 } else {
942 err = log_error_errno(err < 0 ? err : errno, "PAM failed: %m");
943 }
944
945 if (handle) {
946 if (close_session)
947 pam_code = pam_close_session(handle, flags);
948
949 pam_end(handle, pam_code | flags);
950 }
951
952 strv_free(e);
953
954 closelog();
955
956 if (pam_pid > 1) {
957 kill(pam_pid, SIGTERM);
958 kill(pam_pid, SIGCONT);
959 }
960
961 return err;
962 }
963 #endif
964
965 static void rename_process_from_path(const char *path) {
966 char process_name[11];
967 const char *p;
968 size_t l;
969
970 /* This resulting string must fit in 10 chars (i.e. the length
971 * of "/sbin/init") to look pretty in /bin/ps */
972
973 p = basename(path);
974 if (isempty(p)) {
975 rename_process("(...)");
976 return;
977 }
978
979 l = strlen(p);
980 if (l > 8) {
981 /* The end of the process name is usually more
982 * interesting, since the first bit might just be
983 * "systemd-" */
984 p = p + l - 8;
985 l = 8;
986 }
987
988 process_name[0] = '(';
989 memcpy(process_name+1, p, l);
990 process_name[1+l] = ')';
991 process_name[1+l+1] = 0;
992
993 rename_process(process_name);
994 }
995
996 #ifdef HAVE_SECCOMP
997
998 static int apply_seccomp(const ExecContext *c) {
999 uint32_t negative_action, action;
1000 scmp_filter_ctx *seccomp;
1001 Iterator i;
1002 void *id;
1003 int r;
1004
1005 assert(c);
1006
1007 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1008
1009 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
1010 if (!seccomp)
1011 return -ENOMEM;
1012
1013 if (c->syscall_archs) {
1014
1015 SET_FOREACH(id, c->syscall_archs, i) {
1016 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1017 if (r == -EEXIST)
1018 continue;
1019 if (r < 0)
1020 goto finish;
1021 }
1022
1023 } else {
1024 r = seccomp_add_secondary_archs(seccomp);
1025 if (r < 0)
1026 goto finish;
1027 }
1028
1029 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1030 SET_FOREACH(id, c->syscall_filter, i) {
1031 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1032 if (r < 0)
1033 goto finish;
1034 }
1035
1036 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1037 if (r < 0)
1038 goto finish;
1039
1040 r = seccomp_load(seccomp);
1041
1042 finish:
1043 seccomp_release(seccomp);
1044 return r;
1045 }
1046
1047 static int apply_address_families(const ExecContext *c) {
1048 scmp_filter_ctx *seccomp;
1049 Iterator i;
1050 int r;
1051
1052 assert(c);
1053
1054 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1055 if (!seccomp)
1056 return -ENOMEM;
1057
1058 r = seccomp_add_secondary_archs(seccomp);
1059 if (r < 0)
1060 goto finish;
1061
1062 if (c->address_families_whitelist) {
1063 int af, first = 0, last = 0;
1064 void *afp;
1065
1066 /* If this is a whitelist, we first block the address
1067 * families that are out of range and then everything
1068 * that is not in the set. First, we find the lowest
1069 * and highest address family in the set. */
1070
1071 SET_FOREACH(afp, c->address_families, i) {
1072 af = PTR_TO_INT(afp);
1073
1074 if (af <= 0 || af >= af_max())
1075 continue;
1076
1077 if (first == 0 || af < first)
1078 first = af;
1079
1080 if (last == 0 || af > last)
1081 last = af;
1082 }
1083
1084 assert((first == 0) == (last == 0));
1085
1086 if (first == 0) {
1087
1088 /* No entries in the valid range, block everything */
1089 r = seccomp_rule_add(
1090 seccomp,
1091 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1092 SCMP_SYS(socket),
1093 0);
1094 if (r < 0)
1095 goto finish;
1096
1097 } else {
1098
1099 /* Block everything below the first entry */
1100 r = seccomp_rule_add(
1101 seccomp,
1102 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1103 SCMP_SYS(socket),
1104 1,
1105 SCMP_A0(SCMP_CMP_LT, first));
1106 if (r < 0)
1107 goto finish;
1108
1109 /* Block everything above the last entry */
1110 r = seccomp_rule_add(
1111 seccomp,
1112 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1113 SCMP_SYS(socket),
1114 1,
1115 SCMP_A0(SCMP_CMP_GT, last));
1116 if (r < 0)
1117 goto finish;
1118
1119 /* Block everything between the first and last
1120 * entry */
1121 for (af = 1; af < af_max(); af++) {
1122
1123 if (set_contains(c->address_families, INT_TO_PTR(af)))
1124 continue;
1125
1126 r = seccomp_rule_add(
1127 seccomp,
1128 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1129 SCMP_SYS(socket),
1130 1,
1131 SCMP_A0(SCMP_CMP_EQ, af));
1132 if (r < 0)
1133 goto finish;
1134 }
1135 }
1136
1137 } else {
1138 void *af;
1139
1140 /* If this is a blacklist, then generate one rule for
1141 * each address family that are then combined in OR
1142 * checks. */
1143
1144 SET_FOREACH(af, c->address_families, i) {
1145
1146 r = seccomp_rule_add(
1147 seccomp,
1148 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1149 SCMP_SYS(socket),
1150 1,
1151 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1152 if (r < 0)
1153 goto finish;
1154 }
1155 }
1156
1157 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1158 if (r < 0)
1159 goto finish;
1160
1161 r = seccomp_load(seccomp);
1162
1163 finish:
1164 seccomp_release(seccomp);
1165 return r;
1166 }
1167
1168 #endif
1169
1170 static void do_idle_pipe_dance(int idle_pipe[4]) {
1171 assert(idle_pipe);
1172
1173
1174 idle_pipe[1] = safe_close(idle_pipe[1]);
1175 idle_pipe[2] = safe_close(idle_pipe[2]);
1176
1177 if (idle_pipe[0] >= 0) {
1178 int r;
1179
1180 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1181
1182 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1183 ssize_t n;
1184
1185 /* Signal systemd that we are bored and want to continue. */
1186 n = write(idle_pipe[3], "x", 1);
1187 if (n > 0)
1188 /* Wait for systemd to react to the signal above. */
1189 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1190 }
1191
1192 idle_pipe[0] = safe_close(idle_pipe[0]);
1193
1194 }
1195
1196 idle_pipe[3] = safe_close(idle_pipe[3]);
1197 }
1198
1199 static int build_environment(
1200 const ExecContext *c,
1201 unsigned n_fds,
1202 char ** fd_names,
1203 usec_t watchdog_usec,
1204 const char *home,
1205 const char *username,
1206 const char *shell,
1207 char ***ret) {
1208
1209 _cleanup_strv_free_ char **our_env = NULL;
1210 unsigned n_env = 0;
1211 char *x;
1212
1213 assert(c);
1214 assert(ret);
1215
1216 our_env = new0(char*, 11);
1217 if (!our_env)
1218 return -ENOMEM;
1219
1220 if (n_fds > 0) {
1221 _cleanup_free_ char *joined = NULL;
1222
1223 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1224 return -ENOMEM;
1225 our_env[n_env++] = x;
1226
1227 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1228 return -ENOMEM;
1229 our_env[n_env++] = x;
1230
1231 joined = strv_join(fd_names, ":");
1232 if (!joined)
1233 return -ENOMEM;
1234
1235 x = strjoin("LISTEN_FDNAMES=", joined, NULL);
1236 if (!x)
1237 return -ENOMEM;
1238 our_env[n_env++] = x;
1239 }
1240
1241 if (watchdog_usec > 0) {
1242 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1243 return -ENOMEM;
1244 our_env[n_env++] = x;
1245
1246 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1247 return -ENOMEM;
1248 our_env[n_env++] = x;
1249 }
1250
1251 if (home) {
1252 x = strappend("HOME=", home);
1253 if (!x)
1254 return -ENOMEM;
1255 our_env[n_env++] = x;
1256 }
1257
1258 if (username) {
1259 x = strappend("LOGNAME=", username);
1260 if (!x)
1261 return -ENOMEM;
1262 our_env[n_env++] = x;
1263
1264 x = strappend("USER=", username);
1265 if (!x)
1266 return -ENOMEM;
1267 our_env[n_env++] = x;
1268 }
1269
1270 if (shell) {
1271 x = strappend("SHELL=", shell);
1272 if (!x)
1273 return -ENOMEM;
1274 our_env[n_env++] = x;
1275 }
1276
1277 if (is_terminal_input(c->std_input) ||
1278 c->std_output == EXEC_OUTPUT_TTY ||
1279 c->std_error == EXEC_OUTPUT_TTY ||
1280 c->tty_path) {
1281
1282 x = strdup(default_term_for_tty(tty_path(c)));
1283 if (!x)
1284 return -ENOMEM;
1285 our_env[n_env++] = x;
1286 }
1287
1288 our_env[n_env++] = NULL;
1289 assert(n_env <= 11);
1290
1291 *ret = our_env;
1292 our_env = NULL;
1293
1294 return 0;
1295 }
1296
1297 static bool exec_needs_mount_namespace(
1298 const ExecContext *context,
1299 const ExecParameters *params,
1300 ExecRuntime *runtime) {
1301
1302 assert(context);
1303 assert(params);
1304
1305 if (!strv_isempty(context->read_write_dirs) ||
1306 !strv_isempty(context->read_only_dirs) ||
1307 !strv_isempty(context->inaccessible_dirs))
1308 return true;
1309
1310 if (context->mount_flags != 0)
1311 return true;
1312
1313 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1314 return true;
1315
1316 if (params->bus_endpoint_path)
1317 return true;
1318
1319 if (context->private_devices ||
1320 context->protect_system != PROTECT_SYSTEM_NO ||
1321 context->protect_home != PROTECT_HOME_NO)
1322 return true;
1323
1324 return false;
1325 }
1326
1327 static int exec_child(
1328 Unit *unit,
1329 ExecCommand *command,
1330 const ExecContext *context,
1331 const ExecParameters *params,
1332 ExecRuntime *runtime,
1333 char **argv,
1334 int socket_fd,
1335 int *fds, unsigned n_fds,
1336 char **files_env,
1337 int *exit_status) {
1338
1339 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1340 _cleanup_free_ char *mac_selinux_context_net = NULL;
1341 const char *username = NULL, *home = NULL, *shell = NULL, *wd;
1342 unsigned n_dont_close = 0;
1343 int dont_close[n_fds + 4];
1344 uid_t uid = UID_INVALID;
1345 gid_t gid = GID_INVALID;
1346 int i, r;
1347 bool needs_mount_namespace;
1348
1349 assert(unit);
1350 assert(command);
1351 assert(context);
1352 assert(params);
1353 assert(exit_status);
1354
1355 rename_process_from_path(command->path);
1356
1357 /* We reset exactly these signals, since they are the
1358 * only ones we set to SIG_IGN in the main daemon. All
1359 * others we leave untouched because we set them to
1360 * SIG_DFL or a valid handler initially, both of which
1361 * will be demoted to SIG_DFL. */
1362 (void) default_signals(SIGNALS_CRASH_HANDLER,
1363 SIGNALS_IGNORE, -1);
1364
1365 if (context->ignore_sigpipe)
1366 (void) ignore_signals(SIGPIPE, -1);
1367
1368 r = reset_signal_mask();
1369 if (r < 0) {
1370 *exit_status = EXIT_SIGNAL_MASK;
1371 return r;
1372 }
1373
1374 if (params->idle_pipe)
1375 do_idle_pipe_dance(params->idle_pipe);
1376
1377 /* Close sockets very early to make sure we don't
1378 * block init reexecution because it cannot bind its
1379 * sockets */
1380
1381 log_forget_fds();
1382
1383 if (socket_fd >= 0)
1384 dont_close[n_dont_close++] = socket_fd;
1385 if (n_fds > 0) {
1386 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1387 n_dont_close += n_fds;
1388 }
1389 if (params->bus_endpoint_fd >= 0)
1390 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1391 if (runtime) {
1392 if (runtime->netns_storage_socket[0] >= 0)
1393 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1394 if (runtime->netns_storage_socket[1] >= 0)
1395 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1396 }
1397
1398 r = close_all_fds(dont_close, n_dont_close);
1399 if (r < 0) {
1400 *exit_status = EXIT_FDS;
1401 return r;
1402 }
1403
1404 if (!context->same_pgrp)
1405 if (setsid() < 0) {
1406 *exit_status = EXIT_SETSID;
1407 return -errno;
1408 }
1409
1410 exec_context_tty_reset(context);
1411
1412 if (params->confirm_spawn) {
1413 char response;
1414
1415 r = ask_for_confirmation(&response, argv);
1416 if (r == -ETIMEDOUT)
1417 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1418 else if (r < 0)
1419 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1420 else if (response == 's') {
1421 write_confirm_message("Skipping execution.\n");
1422 *exit_status = EXIT_CONFIRM;
1423 return -ECANCELED;
1424 } else if (response == 'n') {
1425 write_confirm_message("Failing execution.\n");
1426 *exit_status = 0;
1427 return 0;
1428 }
1429 }
1430
1431 if (context->user) {
1432 username = context->user;
1433 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1434 if (r < 0) {
1435 *exit_status = EXIT_USER;
1436 return r;
1437 }
1438 }
1439
1440 if (context->group) {
1441 const char *g = context->group;
1442
1443 r = get_group_creds(&g, &gid);
1444 if (r < 0) {
1445 *exit_status = EXIT_GROUP;
1446 return r;
1447 }
1448 }
1449
1450
1451 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1452 * must sure to drop O_NONBLOCK */
1453 if (socket_fd >= 0)
1454 fd_nonblock(socket_fd, false);
1455
1456 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1457 if (r < 0) {
1458 *exit_status = EXIT_STDIN;
1459 return r;
1460 }
1461
1462 r = setup_output(unit, context, STDOUT_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1463 if (r < 0) {
1464 *exit_status = EXIT_STDOUT;
1465 return r;
1466 }
1467
1468 r = setup_output(unit, context, STDERR_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1469 if (r < 0) {
1470 *exit_status = EXIT_STDERR;
1471 return r;
1472 }
1473
1474 if (params->cgroup_path) {
1475 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1476 if (r < 0) {
1477 *exit_status = EXIT_CGROUP;
1478 return r;
1479 }
1480 }
1481
1482 if (context->oom_score_adjust_set) {
1483 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1484
1485 /* When we can't make this change due to EPERM, then
1486 * let's silently skip over it. User namespaces
1487 * prohibit write access to this file, and we
1488 * shouldn't trip up over that. */
1489
1490 sprintf(t, "%i", context->oom_score_adjust);
1491 r = write_string_file("/proc/self/oom_score_adj", t, 0);
1492 if (r == -EPERM || r == -EACCES) {
1493 log_open();
1494 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1495 log_close();
1496 } else if (r < 0) {
1497 *exit_status = EXIT_OOM_ADJUST;
1498 return -errno;
1499 }
1500 }
1501
1502 if (context->nice_set)
1503 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1504 *exit_status = EXIT_NICE;
1505 return -errno;
1506 }
1507
1508 if (context->cpu_sched_set) {
1509 struct sched_param param = {
1510 .sched_priority = context->cpu_sched_priority,
1511 };
1512
1513 r = sched_setscheduler(0,
1514 context->cpu_sched_policy |
1515 (context->cpu_sched_reset_on_fork ?
1516 SCHED_RESET_ON_FORK : 0),
1517 &param);
1518 if (r < 0) {
1519 *exit_status = EXIT_SETSCHEDULER;
1520 return -errno;
1521 }
1522 }
1523
1524 if (context->cpuset)
1525 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1526 *exit_status = EXIT_CPUAFFINITY;
1527 return -errno;
1528 }
1529
1530 if (context->ioprio_set)
1531 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1532 *exit_status = EXIT_IOPRIO;
1533 return -errno;
1534 }
1535
1536 if (context->timer_slack_nsec != NSEC_INFINITY)
1537 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1538 *exit_status = EXIT_TIMERSLACK;
1539 return -errno;
1540 }
1541
1542 if (context->personality != PERSONALITY_INVALID)
1543 if (personality(context->personality) < 0) {
1544 *exit_status = EXIT_PERSONALITY;
1545 return -errno;
1546 }
1547
1548 if (context->utmp_id)
1549 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path,
1550 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
1551 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
1552 USER_PROCESS,
1553 username ? "root" : context->user);
1554
1555 if (context->user && is_terminal_input(context->std_input)) {
1556 r = chown_terminal(STDIN_FILENO, uid);
1557 if (r < 0) {
1558 *exit_status = EXIT_STDIN;
1559 return r;
1560 }
1561 }
1562
1563 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1564 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1565
1566 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1567 if (r < 0) {
1568 *exit_status = EXIT_BUS_ENDPOINT;
1569 return r;
1570 }
1571 }
1572
1573 /* If delegation is enabled we'll pass ownership of the cgroup
1574 * (but only in systemd's own controller hierarchy!) to the
1575 * user of the new process. */
1576 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1577 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1578 if (r < 0) {
1579 *exit_status = EXIT_CGROUP;
1580 return r;
1581 }
1582
1583
1584 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1585 if (r < 0) {
1586 *exit_status = EXIT_CGROUP;
1587 return r;
1588 }
1589 }
1590
1591 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1592 char **rt;
1593
1594 STRV_FOREACH(rt, context->runtime_directory) {
1595 _cleanup_free_ char *p;
1596
1597 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1598 if (!p) {
1599 *exit_status = EXIT_RUNTIME_DIRECTORY;
1600 return -ENOMEM;
1601 }
1602
1603 r = mkdir_p_label(p, context->runtime_directory_mode);
1604 if (r < 0) {
1605 *exit_status = EXIT_RUNTIME_DIRECTORY;
1606 return r;
1607 }
1608
1609 r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
1610 if (r < 0) {
1611 *exit_status = EXIT_RUNTIME_DIRECTORY;
1612 return r;
1613 }
1614 }
1615 }
1616
1617 umask(context->umask);
1618
1619 if (params->apply_permissions) {
1620 r = enforce_groups(context, username, gid);
1621 if (r < 0) {
1622 *exit_status = EXIT_GROUP;
1623 return r;
1624 }
1625 #ifdef HAVE_SMACK
1626 if (context->smack_process_label) {
1627 r = mac_smack_apply_pid(0, context->smack_process_label);
1628 if (r < 0) {
1629 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1630 return r;
1631 }
1632 }
1633 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1634 else {
1635 _cleanup_free_ char *exec_label = NULL;
1636
1637 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1638 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) {
1639 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1640 return r;
1641 }
1642
1643 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1644 if (r < 0) {
1645 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1646 return r;
1647 }
1648 }
1649 #endif
1650 #endif
1651 #ifdef HAVE_PAM
1652 if (context->pam_name && username) {
1653 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1654 if (r < 0) {
1655 *exit_status = EXIT_PAM;
1656 return r;
1657 }
1658 }
1659 #endif
1660 }
1661
1662 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1663 r = setup_netns(runtime->netns_storage_socket);
1664 if (r < 0) {
1665 *exit_status = EXIT_NETWORK;
1666 return r;
1667 }
1668 }
1669
1670 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
1671
1672 if (needs_mount_namespace) {
1673 char *tmp = NULL, *var = NULL;
1674
1675 /* The runtime struct only contains the parent
1676 * of the private /tmp, which is
1677 * non-accessible to world users. Inside of it
1678 * there's a /tmp that is sticky, and that's
1679 * the one we want to use here. */
1680
1681 if (context->private_tmp && runtime) {
1682 if (runtime->tmp_dir)
1683 tmp = strjoina(runtime->tmp_dir, "/tmp");
1684 if (runtime->var_tmp_dir)
1685 var = strjoina(runtime->var_tmp_dir, "/tmp");
1686 }
1687
1688 r = setup_namespace(
1689 params->apply_chroot ? context->root_directory : NULL,
1690 context->read_write_dirs,
1691 context->read_only_dirs,
1692 context->inaccessible_dirs,
1693 tmp,
1694 var,
1695 params->bus_endpoint_path,
1696 context->private_devices,
1697 context->protect_home,
1698 context->protect_system,
1699 context->mount_flags);
1700
1701 /* If we couldn't set up the namespace this is
1702 * probably due to a missing capability. In this case,
1703 * silently proceeed. */
1704 if (r == -EPERM || r == -EACCES) {
1705 log_open();
1706 log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1707 log_close();
1708 } else if (r < 0) {
1709 *exit_status = EXIT_NAMESPACE;
1710 return r;
1711 }
1712 }
1713
1714 if (context->working_directory_home)
1715 wd = home;
1716 else if (context->working_directory)
1717 wd = context->working_directory;
1718 else
1719 wd = "/";
1720
1721 if (params->apply_chroot) {
1722 if (!needs_mount_namespace && context->root_directory)
1723 if (chroot(context->root_directory) < 0) {
1724 *exit_status = EXIT_CHROOT;
1725 return -errno;
1726 }
1727
1728 if (chdir(wd) < 0 &&
1729 !context->working_directory_missing_ok) {
1730 *exit_status = EXIT_CHDIR;
1731 return -errno;
1732 }
1733 } else {
1734 const char *d;
1735
1736 d = strjoina(strempty(context->root_directory), "/", strempty(wd));
1737 if (chdir(d) < 0 &&
1738 !context->working_directory_missing_ok) {
1739 *exit_status = EXIT_CHDIR;
1740 return -errno;
1741 }
1742 }
1743
1744 #ifdef HAVE_SELINUX
1745 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1746 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1747 if (r < 0) {
1748 *exit_status = EXIT_SELINUX_CONTEXT;
1749 return r;
1750 }
1751 }
1752 #endif
1753
1754 /* We repeat the fd closing here, to make sure that
1755 * nothing is leaked from the PAM modules. Note that
1756 * we are more aggressive this time since socket_fd
1757 * and the netns fds we don't need anymore. The custom
1758 * endpoint fd was needed to upload the policy and can
1759 * now be closed as well. */
1760 r = close_all_fds(fds, n_fds);
1761 if (r >= 0)
1762 r = shift_fds(fds, n_fds);
1763 if (r >= 0)
1764 r = flags_fds(fds, n_fds, context->non_blocking);
1765 if (r < 0) {
1766 *exit_status = EXIT_FDS;
1767 return r;
1768 }
1769
1770 if (params->apply_permissions) {
1771
1772 for (i = 0; i < _RLIMIT_MAX; i++) {
1773 if (!context->rlimit[i])
1774 continue;
1775
1776 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1777 *exit_status = EXIT_LIMITS;
1778 return -errno;
1779 }
1780 }
1781
1782 if (context->capability_bounding_set_drop) {
1783 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1784 if (r < 0) {
1785 *exit_status = EXIT_CAPABILITIES;
1786 return r;
1787 }
1788 }
1789
1790 if (context->user) {
1791 r = enforce_user(context, uid);
1792 if (r < 0) {
1793 *exit_status = EXIT_USER;
1794 return r;
1795 }
1796 }
1797
1798 /* PR_GET_SECUREBITS is not privileged, while
1799 * PR_SET_SECUREBITS is. So to suppress
1800 * potential EPERMs we'll try not to call
1801 * PR_SET_SECUREBITS unless necessary. */
1802 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1803 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1804 *exit_status = EXIT_SECUREBITS;
1805 return -errno;
1806 }
1807
1808 if (context->capabilities)
1809 if (cap_set_proc(context->capabilities) < 0) {
1810 *exit_status = EXIT_CAPABILITIES;
1811 return -errno;
1812 }
1813
1814 if (context->no_new_privileges)
1815 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1816 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1817 return -errno;
1818 }
1819
1820 #ifdef HAVE_SECCOMP
1821 if (context->address_families_whitelist ||
1822 !set_isempty(context->address_families)) {
1823 r = apply_address_families(context);
1824 if (r < 0) {
1825 *exit_status = EXIT_ADDRESS_FAMILIES;
1826 return r;
1827 }
1828 }
1829
1830 if (context->syscall_whitelist ||
1831 !set_isempty(context->syscall_filter) ||
1832 !set_isempty(context->syscall_archs)) {
1833 r = apply_seccomp(context);
1834 if (r < 0) {
1835 *exit_status = EXIT_SECCOMP;
1836 return r;
1837 }
1838 }
1839 #endif
1840
1841 #ifdef HAVE_SELINUX
1842 if (mac_selinux_use()) {
1843 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1844
1845 if (exec_context) {
1846 r = setexeccon(exec_context);
1847 if (r < 0) {
1848 *exit_status = EXIT_SELINUX_CONTEXT;
1849 return r;
1850 }
1851 }
1852 }
1853 #endif
1854
1855 #ifdef HAVE_APPARMOR
1856 if (context->apparmor_profile && mac_apparmor_use()) {
1857 r = aa_change_onexec(context->apparmor_profile);
1858 if (r < 0 && !context->apparmor_profile_ignore) {
1859 *exit_status = EXIT_APPARMOR_PROFILE;
1860 return -errno;
1861 }
1862 }
1863 #endif
1864 }
1865
1866 r = build_environment(context, n_fds, params->fd_names, params->watchdog_usec, home, username, shell, &our_env);
1867 if (r < 0) {
1868 *exit_status = EXIT_MEMORY;
1869 return r;
1870 }
1871
1872 final_env = strv_env_merge(5,
1873 params->environment,
1874 our_env,
1875 context->environment,
1876 files_env,
1877 pam_env,
1878 NULL);
1879 if (!final_env) {
1880 *exit_status = EXIT_MEMORY;
1881 return -ENOMEM;
1882 }
1883
1884 final_argv = replace_env_argv(argv, final_env);
1885 if (!final_argv) {
1886 *exit_status = EXIT_MEMORY;
1887 return -ENOMEM;
1888 }
1889
1890 final_env = strv_env_clean(final_env);
1891
1892 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1893 _cleanup_free_ char *line;
1894
1895 line = exec_command_line(final_argv);
1896 if (line) {
1897 log_open();
1898 log_struct(LOG_DEBUG,
1899 LOG_UNIT_ID(unit),
1900 "EXECUTABLE=%s", command->path,
1901 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
1902 NULL);
1903 log_close();
1904 }
1905 }
1906
1907 execve(command->path, final_argv, final_env);
1908 *exit_status = EXIT_EXEC;
1909 return -errno;
1910 }
1911
1912 int exec_spawn(Unit *unit,
1913 ExecCommand *command,
1914 const ExecContext *context,
1915 const ExecParameters *params,
1916 ExecRuntime *runtime,
1917 pid_t *ret) {
1918
1919 _cleanup_strv_free_ char **files_env = NULL;
1920 int *fds = NULL; unsigned n_fds = 0;
1921 _cleanup_free_ char *line = NULL;
1922 int socket_fd, r;
1923 char **argv;
1924 pid_t pid;
1925
1926 assert(unit);
1927 assert(command);
1928 assert(context);
1929 assert(ret);
1930 assert(params);
1931 assert(params->fds || params->n_fds <= 0);
1932
1933 if (context->std_input == EXEC_INPUT_SOCKET ||
1934 context->std_output == EXEC_OUTPUT_SOCKET ||
1935 context->std_error == EXEC_OUTPUT_SOCKET) {
1936
1937 if (params->n_fds != 1) {
1938 log_unit_error(unit, "Got more than one socket.");
1939 return -EINVAL;
1940 }
1941
1942 socket_fd = params->fds[0];
1943 } else {
1944 socket_fd = -1;
1945 fds = params->fds;
1946 n_fds = params->n_fds;
1947 }
1948
1949 r = exec_context_load_environment(unit, context, &files_env);
1950 if (r < 0)
1951 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
1952
1953 argv = params->argv ?: command->argv;
1954 line = exec_command_line(argv);
1955 if (!line)
1956 return log_oom();
1957
1958 log_struct(LOG_DEBUG,
1959 LOG_UNIT_ID(unit),
1960 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
1961 "EXECUTABLE=%s", command->path,
1962 NULL);
1963 pid = fork();
1964 if (pid < 0)
1965 return log_unit_error_errno(unit, r, "Failed to fork: %m");
1966
1967 if (pid == 0) {
1968 int exit_status;
1969
1970 r = exec_child(unit,
1971 command,
1972 context,
1973 params,
1974 runtime,
1975 argv,
1976 socket_fd,
1977 fds, n_fds,
1978 files_env,
1979 &exit_status);
1980 if (r < 0) {
1981 log_open();
1982 log_struct_errno(LOG_ERR, r,
1983 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1984 LOG_UNIT_ID(unit),
1985 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
1986 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1987 command->path),
1988 "EXECUTABLE=%s", command->path,
1989 NULL);
1990 }
1991
1992 _exit(exit_status);
1993 }
1994
1995 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
1996
1997 /* We add the new process to the cgroup both in the child (so
1998 * that we can be sure that no user code is ever executed
1999 * outside of the cgroup) and in the parent (so that we can be
2000 * sure that when we kill the cgroup the process will be
2001 * killed too). */
2002 if (params->cgroup_path)
2003 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
2004
2005 exec_status_start(&command->exec_status, pid);
2006
2007 *ret = pid;
2008 return 0;
2009 }
2010
2011 void exec_context_init(ExecContext *c) {
2012 assert(c);
2013
2014 c->umask = 0022;
2015 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
2016 c->cpu_sched_policy = SCHED_OTHER;
2017 c->syslog_priority = LOG_DAEMON|LOG_INFO;
2018 c->syslog_level_prefix = true;
2019 c->ignore_sigpipe = true;
2020 c->timer_slack_nsec = NSEC_INFINITY;
2021 c->personality = PERSONALITY_INVALID;
2022 c->runtime_directory_mode = 0755;
2023 }
2024
2025 void exec_context_done(ExecContext *c) {
2026 unsigned l;
2027
2028 assert(c);
2029
2030 c->environment = strv_free(c->environment);
2031 c->environment_files = strv_free(c->environment_files);
2032
2033 for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
2034 c->rlimit[l] = mfree(c->rlimit[l]);
2035
2036 c->working_directory = mfree(c->working_directory);
2037 c->root_directory = mfree(c->root_directory);
2038 c->tty_path = mfree(c->tty_path);
2039 c->syslog_identifier = mfree(c->syslog_identifier);
2040 c->user = mfree(c->user);
2041 c->group = mfree(c->group);
2042
2043 c->supplementary_groups = strv_free(c->supplementary_groups);
2044
2045 c->pam_name = mfree(c->pam_name);
2046
2047 if (c->capabilities) {
2048 cap_free(c->capabilities);
2049 c->capabilities = NULL;
2050 }
2051
2052 c->read_only_dirs = strv_free(c->read_only_dirs);
2053 c->read_write_dirs = strv_free(c->read_write_dirs);
2054 c->inaccessible_dirs = strv_free(c->inaccessible_dirs);
2055
2056 if (c->cpuset)
2057 CPU_FREE(c->cpuset);
2058
2059 c->utmp_id = mfree(c->utmp_id);
2060 c->selinux_context = mfree(c->selinux_context);
2061 c->apparmor_profile = mfree(c->apparmor_profile);
2062
2063 c->syscall_filter = set_free(c->syscall_filter);
2064 c->syscall_archs = set_free(c->syscall_archs);
2065 c->address_families = set_free(c->address_families);
2066
2067 c->runtime_directory = strv_free(c->runtime_directory);
2068
2069 bus_endpoint_free(c->bus_endpoint);
2070 c->bus_endpoint = NULL;
2071 }
2072
2073 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2074 char **i;
2075
2076 assert(c);
2077
2078 if (!runtime_prefix)
2079 return 0;
2080
2081 STRV_FOREACH(i, c->runtime_directory) {
2082 _cleanup_free_ char *p;
2083
2084 p = strjoin(runtime_prefix, "/", *i, NULL);
2085 if (!p)
2086 return -ENOMEM;
2087
2088 /* We execute this synchronously, since we need to be
2089 * sure this is gone when we start the service
2090 * next. */
2091 (void) rm_rf(p, REMOVE_ROOT);
2092 }
2093
2094 return 0;
2095 }
2096
2097 void exec_command_done(ExecCommand *c) {
2098 assert(c);
2099
2100 c->path = mfree(c->path);
2101
2102 c->argv = strv_free(c->argv);
2103 }
2104
2105 void exec_command_done_array(ExecCommand *c, unsigned n) {
2106 unsigned i;
2107
2108 for (i = 0; i < n; i++)
2109 exec_command_done(c+i);
2110 }
2111
2112 ExecCommand* exec_command_free_list(ExecCommand *c) {
2113 ExecCommand *i;
2114
2115 while ((i = c)) {
2116 LIST_REMOVE(command, c, i);
2117 exec_command_done(i);
2118 free(i);
2119 }
2120
2121 return NULL;
2122 }
2123
2124 void exec_command_free_array(ExecCommand **c, unsigned n) {
2125 unsigned i;
2126
2127 for (i = 0; i < n; i++)
2128 c[i] = exec_command_free_list(c[i]);
2129 }
2130
2131 typedef struct InvalidEnvInfo {
2132 Unit *unit;
2133 const char *path;
2134 } InvalidEnvInfo;
2135
2136 static void invalid_env(const char *p, void *userdata) {
2137 InvalidEnvInfo *info = userdata;
2138
2139 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2140 }
2141
2142 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
2143 char **i, **r = NULL;
2144
2145 assert(c);
2146 assert(l);
2147
2148 STRV_FOREACH(i, c->environment_files) {
2149 char *fn;
2150 int k;
2151 bool ignore = false;
2152 char **p;
2153 _cleanup_globfree_ glob_t pglob = {};
2154 int count, n;
2155
2156 fn = *i;
2157
2158 if (fn[0] == '-') {
2159 ignore = true;
2160 fn ++;
2161 }
2162
2163 if (!path_is_absolute(fn)) {
2164 if (ignore)
2165 continue;
2166
2167 strv_free(r);
2168 return -EINVAL;
2169 }
2170
2171 /* Filename supports globbing, take all matching files */
2172 errno = 0;
2173 if (glob(fn, 0, NULL, &pglob) != 0) {
2174 if (ignore)
2175 continue;
2176
2177 strv_free(r);
2178 return errno ? -errno : -EINVAL;
2179 }
2180 count = pglob.gl_pathc;
2181 if (count == 0) {
2182 if (ignore)
2183 continue;
2184
2185 strv_free(r);
2186 return -EINVAL;
2187 }
2188 for (n = 0; n < count; n++) {
2189 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2190 if (k < 0) {
2191 if (ignore)
2192 continue;
2193
2194 strv_free(r);
2195 return k;
2196 }
2197 /* Log invalid environment variables with filename */
2198 if (p) {
2199 InvalidEnvInfo info = {
2200 .unit = unit,
2201 .path = pglob.gl_pathv[n]
2202 };
2203
2204 p = strv_env_clean_with_callback(p, invalid_env, &info);
2205 }
2206
2207 if (r == NULL)
2208 r = p;
2209 else {
2210 char **m;
2211
2212 m = strv_env_merge(2, r, p);
2213 strv_free(r);
2214 strv_free(p);
2215 if (!m)
2216 return -ENOMEM;
2217
2218 r = m;
2219 }
2220 }
2221 }
2222
2223 *l = r;
2224
2225 return 0;
2226 }
2227
2228 static bool tty_may_match_dev_console(const char *tty) {
2229 _cleanup_free_ char *active = NULL;
2230 char *console;
2231
2232 if (startswith(tty, "/dev/"))
2233 tty += 5;
2234
2235 /* trivial identity? */
2236 if (streq(tty, "console"))
2237 return true;
2238
2239 console = resolve_dev_console(&active);
2240 /* if we could not resolve, assume it may */
2241 if (!console)
2242 return true;
2243
2244 /* "tty0" means the active VC, so it may be the same sometimes */
2245 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2246 }
2247
2248 bool exec_context_may_touch_console(ExecContext *ec) {
2249 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2250 is_terminal_input(ec->std_input) ||
2251 is_terminal_output(ec->std_output) ||
2252 is_terminal_output(ec->std_error)) &&
2253 tty_may_match_dev_console(tty_path(ec));
2254 }
2255
2256 static void strv_fprintf(FILE *f, char **l) {
2257 char **g;
2258
2259 assert(f);
2260
2261 STRV_FOREACH(g, l)
2262 fprintf(f, " %s", *g);
2263 }
2264
2265 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2266 char **e;
2267 unsigned i;
2268
2269 assert(c);
2270 assert(f);
2271
2272 prefix = strempty(prefix);
2273
2274 fprintf(f,
2275 "%sUMask: %04o\n"
2276 "%sWorkingDirectory: %s\n"
2277 "%sRootDirectory: %s\n"
2278 "%sNonBlocking: %s\n"
2279 "%sPrivateTmp: %s\n"
2280 "%sPrivateNetwork: %s\n"
2281 "%sPrivateDevices: %s\n"
2282 "%sProtectHome: %s\n"
2283 "%sProtectSystem: %s\n"
2284 "%sIgnoreSIGPIPE: %s\n",
2285 prefix, c->umask,
2286 prefix, c->working_directory ? c->working_directory : "/",
2287 prefix, c->root_directory ? c->root_directory : "/",
2288 prefix, yes_no(c->non_blocking),
2289 prefix, yes_no(c->private_tmp),
2290 prefix, yes_no(c->private_network),
2291 prefix, yes_no(c->private_devices),
2292 prefix, protect_home_to_string(c->protect_home),
2293 prefix, protect_system_to_string(c->protect_system),
2294 prefix, yes_no(c->ignore_sigpipe));
2295
2296 STRV_FOREACH(e, c->environment)
2297 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2298
2299 STRV_FOREACH(e, c->environment_files)
2300 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2301
2302 if (c->nice_set)
2303 fprintf(f,
2304 "%sNice: %i\n",
2305 prefix, c->nice);
2306
2307 if (c->oom_score_adjust_set)
2308 fprintf(f,
2309 "%sOOMScoreAdjust: %i\n",
2310 prefix, c->oom_score_adjust);
2311
2312 for (i = 0; i < RLIM_NLIMITS; i++)
2313 if (c->rlimit[i])
2314 fprintf(f, "%s%s: "RLIM_FMT"\n",
2315 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2316
2317 if (c->ioprio_set) {
2318 _cleanup_free_ char *class_str = NULL;
2319
2320 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2321 fprintf(f,
2322 "%sIOSchedulingClass: %s\n"
2323 "%sIOPriority: %i\n",
2324 prefix, strna(class_str),
2325 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2326 }
2327
2328 if (c->cpu_sched_set) {
2329 _cleanup_free_ char *policy_str = NULL;
2330
2331 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2332 fprintf(f,
2333 "%sCPUSchedulingPolicy: %s\n"
2334 "%sCPUSchedulingPriority: %i\n"
2335 "%sCPUSchedulingResetOnFork: %s\n",
2336 prefix, strna(policy_str),
2337 prefix, c->cpu_sched_priority,
2338 prefix, yes_no(c->cpu_sched_reset_on_fork));
2339 }
2340
2341 if (c->cpuset) {
2342 fprintf(f, "%sCPUAffinity:", prefix);
2343 for (i = 0; i < c->cpuset_ncpus; i++)
2344 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2345 fprintf(f, " %u", i);
2346 fputs("\n", f);
2347 }
2348
2349 if (c->timer_slack_nsec != NSEC_INFINITY)
2350 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2351
2352 fprintf(f,
2353 "%sStandardInput: %s\n"
2354 "%sStandardOutput: %s\n"
2355 "%sStandardError: %s\n",
2356 prefix, exec_input_to_string(c->std_input),
2357 prefix, exec_output_to_string(c->std_output),
2358 prefix, exec_output_to_string(c->std_error));
2359
2360 if (c->tty_path)
2361 fprintf(f,
2362 "%sTTYPath: %s\n"
2363 "%sTTYReset: %s\n"
2364 "%sTTYVHangup: %s\n"
2365 "%sTTYVTDisallocate: %s\n",
2366 prefix, c->tty_path,
2367 prefix, yes_no(c->tty_reset),
2368 prefix, yes_no(c->tty_vhangup),
2369 prefix, yes_no(c->tty_vt_disallocate));
2370
2371 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2372 c->std_output == EXEC_OUTPUT_KMSG ||
2373 c->std_output == EXEC_OUTPUT_JOURNAL ||
2374 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2375 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2376 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2377 c->std_error == EXEC_OUTPUT_SYSLOG ||
2378 c->std_error == EXEC_OUTPUT_KMSG ||
2379 c->std_error == EXEC_OUTPUT_JOURNAL ||
2380 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2381 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2382 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2383
2384 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2385
2386 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2387 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2388
2389 fprintf(f,
2390 "%sSyslogFacility: %s\n"
2391 "%sSyslogLevel: %s\n",
2392 prefix, strna(fac_str),
2393 prefix, strna(lvl_str));
2394 }
2395
2396 if (c->capabilities) {
2397 _cleanup_cap_free_charp_ char *t;
2398
2399 t = cap_to_text(c->capabilities, NULL);
2400 if (t)
2401 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2402 }
2403
2404 if (c->secure_bits)
2405 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2406 prefix,
2407 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2408 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2409 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2410 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2411 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2412 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2413
2414 if (c->capability_bounding_set_drop) {
2415 unsigned long l;
2416 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2417
2418 for (l = 0; l <= cap_last_cap(); l++)
2419 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2420 fprintf(f, " %s", strna(capability_to_name(l)));
2421
2422 fputs("\n", f);
2423 }
2424
2425 if (c->user)
2426 fprintf(f, "%sUser: %s\n", prefix, c->user);
2427 if (c->group)
2428 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2429
2430 if (strv_length(c->supplementary_groups) > 0) {
2431 fprintf(f, "%sSupplementaryGroups:", prefix);
2432 strv_fprintf(f, c->supplementary_groups);
2433 fputs("\n", f);
2434 }
2435
2436 if (c->pam_name)
2437 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2438
2439 if (strv_length(c->read_write_dirs) > 0) {
2440 fprintf(f, "%sReadWriteDirs:", prefix);
2441 strv_fprintf(f, c->read_write_dirs);
2442 fputs("\n", f);
2443 }
2444
2445 if (strv_length(c->read_only_dirs) > 0) {
2446 fprintf(f, "%sReadOnlyDirs:", prefix);
2447 strv_fprintf(f, c->read_only_dirs);
2448 fputs("\n", f);
2449 }
2450
2451 if (strv_length(c->inaccessible_dirs) > 0) {
2452 fprintf(f, "%sInaccessibleDirs:", prefix);
2453 strv_fprintf(f, c->inaccessible_dirs);
2454 fputs("\n", f);
2455 }
2456
2457 if (c->utmp_id)
2458 fprintf(f,
2459 "%sUtmpIdentifier: %s\n",
2460 prefix, c->utmp_id);
2461
2462 if (c->selinux_context)
2463 fprintf(f,
2464 "%sSELinuxContext: %s%s\n",
2465 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2466
2467 if (c->personality != PERSONALITY_INVALID)
2468 fprintf(f,
2469 "%sPersonality: %s\n",
2470 prefix, strna(personality_to_string(c->personality)));
2471
2472 if (c->syscall_filter) {
2473 #ifdef HAVE_SECCOMP
2474 Iterator j;
2475 void *id;
2476 bool first = true;
2477 #endif
2478
2479 fprintf(f,
2480 "%sSystemCallFilter: ",
2481 prefix);
2482
2483 if (!c->syscall_whitelist)
2484 fputc('~', f);
2485
2486 #ifdef HAVE_SECCOMP
2487 SET_FOREACH(id, c->syscall_filter, j) {
2488 _cleanup_free_ char *name = NULL;
2489
2490 if (first)
2491 first = false;
2492 else
2493 fputc(' ', f);
2494
2495 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2496 fputs(strna(name), f);
2497 }
2498 #endif
2499
2500 fputc('\n', f);
2501 }
2502
2503 if (c->syscall_archs) {
2504 #ifdef HAVE_SECCOMP
2505 Iterator j;
2506 void *id;
2507 #endif
2508
2509 fprintf(f,
2510 "%sSystemCallArchitectures:",
2511 prefix);
2512
2513 #ifdef HAVE_SECCOMP
2514 SET_FOREACH(id, c->syscall_archs, j)
2515 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2516 #endif
2517 fputc('\n', f);
2518 }
2519
2520 if (c->syscall_errno != 0)
2521 fprintf(f,
2522 "%sSystemCallErrorNumber: %s\n",
2523 prefix, strna(errno_to_name(c->syscall_errno)));
2524
2525 if (c->apparmor_profile)
2526 fprintf(f,
2527 "%sAppArmorProfile: %s%s\n",
2528 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2529 }
2530
2531 bool exec_context_maintains_privileges(ExecContext *c) {
2532 assert(c);
2533
2534 /* Returns true if the process forked off would run run under
2535 * an unchanged UID or as root. */
2536
2537 if (!c->user)
2538 return true;
2539
2540 if (streq(c->user, "root") || streq(c->user, "0"))
2541 return true;
2542
2543 return false;
2544 }
2545
2546 void exec_status_start(ExecStatus *s, pid_t pid) {
2547 assert(s);
2548
2549 zero(*s);
2550 s->pid = pid;
2551 dual_timestamp_get(&s->start_timestamp);
2552 }
2553
2554 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2555 assert(s);
2556
2557 if (s->pid && s->pid != pid)
2558 zero(*s);
2559
2560 s->pid = pid;
2561 dual_timestamp_get(&s->exit_timestamp);
2562
2563 s->code = code;
2564 s->status = status;
2565
2566 if (context) {
2567 if (context->utmp_id)
2568 utmp_put_dead_process(context->utmp_id, pid, code, status);
2569
2570 exec_context_tty_reset(context);
2571 }
2572 }
2573
2574 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2575 char buf[FORMAT_TIMESTAMP_MAX];
2576
2577 assert(s);
2578 assert(f);
2579
2580 if (s->pid <= 0)
2581 return;
2582
2583 prefix = strempty(prefix);
2584
2585 fprintf(f,
2586 "%sPID: "PID_FMT"\n",
2587 prefix, s->pid);
2588
2589 if (s->start_timestamp.realtime > 0)
2590 fprintf(f,
2591 "%sStart Timestamp: %s\n",
2592 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2593
2594 if (s->exit_timestamp.realtime > 0)
2595 fprintf(f,
2596 "%sExit Timestamp: %s\n"
2597 "%sExit Code: %s\n"
2598 "%sExit Status: %i\n",
2599 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2600 prefix, sigchld_code_to_string(s->code),
2601 prefix, s->status);
2602 }
2603
2604 char *exec_command_line(char **argv) {
2605 size_t k;
2606 char *n, *p, **a;
2607 bool first = true;
2608
2609 assert(argv);
2610
2611 k = 1;
2612 STRV_FOREACH(a, argv)
2613 k += strlen(*a)+3;
2614
2615 if (!(n = new(char, k)))
2616 return NULL;
2617
2618 p = n;
2619 STRV_FOREACH(a, argv) {
2620
2621 if (!first)
2622 *(p++) = ' ';
2623 else
2624 first = false;
2625
2626 if (strpbrk(*a, WHITESPACE)) {
2627 *(p++) = '\'';
2628 p = stpcpy(p, *a);
2629 *(p++) = '\'';
2630 } else
2631 p = stpcpy(p, *a);
2632
2633 }
2634
2635 *p = 0;
2636
2637 /* FIXME: this doesn't really handle arguments that have
2638 * spaces and ticks in them */
2639
2640 return n;
2641 }
2642
2643 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2644 _cleanup_free_ char *cmd = NULL;
2645 const char *prefix2;
2646
2647 assert(c);
2648 assert(f);
2649
2650 prefix = strempty(prefix);
2651 prefix2 = strjoina(prefix, "\t");
2652
2653 cmd = exec_command_line(c->argv);
2654 fprintf(f,
2655 "%sCommand Line: %s\n",
2656 prefix, cmd ? cmd : strerror(ENOMEM));
2657
2658 exec_status_dump(&c->exec_status, f, prefix2);
2659 }
2660
2661 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2662 assert(f);
2663
2664 prefix = strempty(prefix);
2665
2666 LIST_FOREACH(command, c, c)
2667 exec_command_dump(c, f, prefix);
2668 }
2669
2670 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2671 ExecCommand *end;
2672
2673 assert(l);
2674 assert(e);
2675
2676 if (*l) {
2677 /* It's kind of important, that we keep the order here */
2678 LIST_FIND_TAIL(command, *l, end);
2679 LIST_INSERT_AFTER(command, *l, end, e);
2680 } else
2681 *l = e;
2682 }
2683
2684 int exec_command_set(ExecCommand *c, const char *path, ...) {
2685 va_list ap;
2686 char **l, *p;
2687
2688 assert(c);
2689 assert(path);
2690
2691 va_start(ap, path);
2692 l = strv_new_ap(path, ap);
2693 va_end(ap);
2694
2695 if (!l)
2696 return -ENOMEM;
2697
2698 p = strdup(path);
2699 if (!p) {
2700 strv_free(l);
2701 return -ENOMEM;
2702 }
2703
2704 free(c->path);
2705 c->path = p;
2706
2707 strv_free(c->argv);
2708 c->argv = l;
2709
2710 return 0;
2711 }
2712
2713 int exec_command_append(ExecCommand *c, const char *path, ...) {
2714 _cleanup_strv_free_ char **l = NULL;
2715 va_list ap;
2716 int r;
2717
2718 assert(c);
2719 assert(path);
2720
2721 va_start(ap, path);
2722 l = strv_new_ap(path, ap);
2723 va_end(ap);
2724
2725 if (!l)
2726 return -ENOMEM;
2727
2728 r = strv_extend_strv(&c->argv, l, false);
2729 if (r < 0)
2730 return r;
2731
2732 return 0;
2733 }
2734
2735
2736 static int exec_runtime_allocate(ExecRuntime **rt) {
2737
2738 if (*rt)
2739 return 0;
2740
2741 *rt = new0(ExecRuntime, 1);
2742 if (!*rt)
2743 return -ENOMEM;
2744
2745 (*rt)->n_ref = 1;
2746 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2747
2748 return 0;
2749 }
2750
2751 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2752 int r;
2753
2754 assert(rt);
2755 assert(c);
2756 assert(id);
2757
2758 if (*rt)
2759 return 1;
2760
2761 if (!c->private_network && !c->private_tmp)
2762 return 0;
2763
2764 r = exec_runtime_allocate(rt);
2765 if (r < 0)
2766 return r;
2767
2768 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2769 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2770 return -errno;
2771 }
2772
2773 if (c->private_tmp && !(*rt)->tmp_dir) {
2774 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2775 if (r < 0)
2776 return r;
2777 }
2778
2779 return 1;
2780 }
2781
2782 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2783 assert(r);
2784 assert(r->n_ref > 0);
2785
2786 r->n_ref++;
2787 return r;
2788 }
2789
2790 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2791
2792 if (!r)
2793 return NULL;
2794
2795 assert(r->n_ref > 0);
2796
2797 r->n_ref--;
2798 if (r->n_ref > 0)
2799 return NULL;
2800
2801 free(r->tmp_dir);
2802 free(r->var_tmp_dir);
2803 safe_close_pair(r->netns_storage_socket);
2804 free(r);
2805
2806 return NULL;
2807 }
2808
2809 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
2810 assert(u);
2811 assert(f);
2812 assert(fds);
2813
2814 if (!rt)
2815 return 0;
2816
2817 if (rt->tmp_dir)
2818 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2819
2820 if (rt->var_tmp_dir)
2821 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2822
2823 if (rt->netns_storage_socket[0] >= 0) {
2824 int copy;
2825
2826 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2827 if (copy < 0)
2828 return copy;
2829
2830 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2831 }
2832
2833 if (rt->netns_storage_socket[1] >= 0) {
2834 int copy;
2835
2836 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2837 if (copy < 0)
2838 return copy;
2839
2840 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2841 }
2842
2843 return 0;
2844 }
2845
2846 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
2847 int r;
2848
2849 assert(rt);
2850 assert(key);
2851 assert(value);
2852
2853 if (streq(key, "tmp-dir")) {
2854 char *copy;
2855
2856 r = exec_runtime_allocate(rt);
2857 if (r < 0)
2858 return log_oom();
2859
2860 copy = strdup(value);
2861 if (!copy)
2862 return log_oom();
2863
2864 free((*rt)->tmp_dir);
2865 (*rt)->tmp_dir = copy;
2866
2867 } else if (streq(key, "var-tmp-dir")) {
2868 char *copy;
2869
2870 r = exec_runtime_allocate(rt);
2871 if (r < 0)
2872 return log_oom();
2873
2874 copy = strdup(value);
2875 if (!copy)
2876 return log_oom();
2877
2878 free((*rt)->var_tmp_dir);
2879 (*rt)->var_tmp_dir = copy;
2880
2881 } else if (streq(key, "netns-socket-0")) {
2882 int fd;
2883
2884 r = exec_runtime_allocate(rt);
2885 if (r < 0)
2886 return log_oom();
2887
2888 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2889 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2890 else {
2891 safe_close((*rt)->netns_storage_socket[0]);
2892 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2893 }
2894 } else if (streq(key, "netns-socket-1")) {
2895 int fd;
2896
2897 r = exec_runtime_allocate(rt);
2898 if (r < 0)
2899 return log_oom();
2900
2901 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2902 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2903 else {
2904 safe_close((*rt)->netns_storage_socket[1]);
2905 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2906 }
2907 } else
2908 return 0;
2909
2910 return 1;
2911 }
2912
2913 static void *remove_tmpdir_thread(void *p) {
2914 _cleanup_free_ char *path = p;
2915
2916 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
2917 return NULL;
2918 }
2919
2920 void exec_runtime_destroy(ExecRuntime *rt) {
2921 int r;
2922
2923 if (!rt)
2924 return;
2925
2926 /* If there are multiple users of this, let's leave the stuff around */
2927 if (rt->n_ref > 1)
2928 return;
2929
2930 if (rt->tmp_dir) {
2931 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2932
2933 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2934 if (r < 0) {
2935 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2936 free(rt->tmp_dir);
2937 }
2938
2939 rt->tmp_dir = NULL;
2940 }
2941
2942 if (rt->var_tmp_dir) {
2943 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2944
2945 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2946 if (r < 0) {
2947 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2948 free(rt->var_tmp_dir);
2949 }
2950
2951 rt->var_tmp_dir = NULL;
2952 }
2953
2954 safe_close_pair(rt->netns_storage_socket);
2955 }
2956
2957 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2958 [EXEC_INPUT_NULL] = "null",
2959 [EXEC_INPUT_TTY] = "tty",
2960 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2961 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2962 [EXEC_INPUT_SOCKET] = "socket"
2963 };
2964
2965 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2966
2967 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2968 [EXEC_OUTPUT_INHERIT] = "inherit",
2969 [EXEC_OUTPUT_NULL] = "null",
2970 [EXEC_OUTPUT_TTY] = "tty",
2971 [EXEC_OUTPUT_SYSLOG] = "syslog",
2972 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2973 [EXEC_OUTPUT_KMSG] = "kmsg",
2974 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2975 [EXEC_OUTPUT_JOURNAL] = "journal",
2976 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2977 [EXEC_OUTPUT_SOCKET] = "socket"
2978 };
2979
2980 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2981
2982 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
2983 [EXEC_UTMP_INIT] = "init",
2984 [EXEC_UTMP_LOGIN] = "login",
2985 [EXEC_UTMP_USER] = "user",
2986 };
2987
2988 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);