]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
smack: introduce new SmackProcessLabel option
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-kernel.h"
88 #include "label.h"
89
90 #ifdef HAVE_SECCOMP
91 #include "seccomp-util.h"
92 #endif
93
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
96
97 /* This assumes there is a 'tty' group */
98 #define TTY_MODE 0620
99
100 #define SNDBUF_SIZE (8*1024*1024)
101
102 static int shift_fds(int fds[], unsigned n_fds) {
103 int start, restart_from;
104
105 if (n_fds <= 0)
106 return 0;
107
108 /* Modifies the fds array! (sorts it) */
109
110 assert(fds);
111
112 start = 0;
113 for (;;) {
114 int i;
115
116 restart_from = -1;
117
118 for (i = start; i < (int) n_fds; i++) {
119 int nfd;
120
121 /* Already at right index? */
122 if (fds[i] == i+3)
123 continue;
124
125 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
126 return -errno;
127
128 safe_close(fds[i]);
129 fds[i] = nfd;
130
131 /* Hmm, the fd we wanted isn't free? Then
132 * let's remember that and try again from here*/
133 if (nfd != i+3 && restart_from < 0)
134 restart_from = i;
135 }
136
137 if (restart_from < 0)
138 break;
139
140 start = restart_from;
141 }
142
143 return 0;
144 }
145
146 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
147 unsigned i;
148 int r;
149
150 if (n_fds <= 0)
151 return 0;
152
153 assert(fds);
154
155 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
156
157 for (i = 0; i < n_fds; i++) {
158
159 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
160 return r;
161
162 /* We unconditionally drop FD_CLOEXEC from the fds,
163 * since after all we want to pass these fds to our
164 * children */
165
166 if ((r = fd_cloexec(fds[i], false)) < 0)
167 return r;
168 }
169
170 return 0;
171 }
172
173 _pure_ static const char *tty_path(const ExecContext *context) {
174 assert(context);
175
176 if (context->tty_path)
177 return context->tty_path;
178
179 return "/dev/console";
180 }
181
182 static void exec_context_tty_reset(const ExecContext *context) {
183 assert(context);
184
185 if (context->tty_vhangup)
186 terminal_vhangup(tty_path(context));
187
188 if (context->tty_reset)
189 reset_terminal(tty_path(context));
190
191 if (context->tty_vt_disallocate && context->tty_path)
192 vt_disallocate(context->tty_path);
193 }
194
195 static bool is_terminal_output(ExecOutput o) {
196 return
197 o == EXEC_OUTPUT_TTY ||
198 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
199 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
200 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
201 }
202
203 static int open_null_as(int flags, int nfd) {
204 int fd, r;
205
206 assert(nfd >= 0);
207
208 fd = open("/dev/null", flags|O_NOCTTY);
209 if (fd < 0)
210 return -errno;
211
212 if (fd != nfd) {
213 r = dup2(fd, nfd) < 0 ? -errno : nfd;
214 safe_close(fd);
215 } else
216 r = nfd;
217
218 return r;
219 }
220
221 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
222 int fd, r;
223 union sockaddr_union sa = {
224 .un.sun_family = AF_UNIX,
225 .un.sun_path = "/run/systemd/journal/stdout",
226 };
227
228 assert(context);
229 assert(output < _EXEC_OUTPUT_MAX);
230 assert(ident);
231 assert(nfd >= 0);
232
233 fd = socket(AF_UNIX, SOCK_STREAM, 0);
234 if (fd < 0)
235 return -errno;
236
237 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
238 if (r < 0) {
239 safe_close(fd);
240 return -errno;
241 }
242
243 if (shutdown(fd, SHUT_RD) < 0) {
244 safe_close(fd);
245 return -errno;
246 }
247
248 fd_inc_sndbuf(fd, SNDBUF_SIZE);
249
250 dprintf(fd,
251 "%s\n"
252 "%s\n"
253 "%i\n"
254 "%i\n"
255 "%i\n"
256 "%i\n"
257 "%i\n",
258 context->syslog_identifier ? context->syslog_identifier : ident,
259 unit_id,
260 context->syslog_priority,
261 !!context->syslog_level_prefix,
262 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
263 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
264 is_terminal_output(output));
265
266 if (fd != nfd) {
267 r = dup2(fd, nfd) < 0 ? -errno : nfd;
268 safe_close(fd);
269 } else
270 r = nfd;
271
272 return r;
273 }
274 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
275 int fd, r;
276
277 assert(path);
278 assert(nfd >= 0);
279
280 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
281 return fd;
282
283 if (fd != nfd) {
284 r = dup2(fd, nfd) < 0 ? -errno : nfd;
285 safe_close(fd);
286 } else
287 r = nfd;
288
289 return r;
290 }
291
292 static bool is_terminal_input(ExecInput i) {
293 return
294 i == EXEC_INPUT_TTY ||
295 i == EXEC_INPUT_TTY_FORCE ||
296 i == EXEC_INPUT_TTY_FAIL;
297 }
298
299 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
300
301 if (is_terminal_input(std_input) && !apply_tty_stdin)
302 return EXEC_INPUT_NULL;
303
304 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
305 return EXEC_INPUT_NULL;
306
307 return std_input;
308 }
309
310 static int fixup_output(ExecOutput std_output, int socket_fd) {
311
312 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
313 return EXEC_OUTPUT_INHERIT;
314
315 return std_output;
316 }
317
318 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
319 ExecInput i;
320
321 assert(context);
322
323 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
324
325 switch (i) {
326
327 case EXEC_INPUT_NULL:
328 return open_null_as(O_RDONLY, STDIN_FILENO);
329
330 case EXEC_INPUT_TTY:
331 case EXEC_INPUT_TTY_FORCE:
332 case EXEC_INPUT_TTY_FAIL: {
333 int fd, r;
334
335 fd = acquire_terminal(tty_path(context),
336 i == EXEC_INPUT_TTY_FAIL,
337 i == EXEC_INPUT_TTY_FORCE,
338 false,
339 USEC_INFINITY);
340 if (fd < 0)
341 return fd;
342
343 if (fd != STDIN_FILENO) {
344 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
345 safe_close(fd);
346 } else
347 r = STDIN_FILENO;
348
349 return r;
350 }
351
352 case EXEC_INPUT_SOCKET:
353 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
354
355 default:
356 assert_not_reached("Unknown input type");
357 }
358 }
359
360 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
361 ExecOutput o;
362 ExecInput i;
363 int r;
364
365 assert(context);
366 assert(ident);
367
368 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
369 o = fixup_output(context->std_output, socket_fd);
370
371 if (fileno == STDERR_FILENO) {
372 ExecOutput e;
373 e = fixup_output(context->std_error, socket_fd);
374
375 /* This expects the input and output are already set up */
376
377 /* Don't change the stderr file descriptor if we inherit all
378 * the way and are not on a tty */
379 if (e == EXEC_OUTPUT_INHERIT &&
380 o == EXEC_OUTPUT_INHERIT &&
381 i == EXEC_INPUT_NULL &&
382 !is_terminal_input(context->std_input) &&
383 getppid () != 1)
384 return fileno;
385
386 /* Duplicate from stdout if possible */
387 if (e == o || e == EXEC_OUTPUT_INHERIT)
388 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
389
390 o = e;
391
392 } else if (o == EXEC_OUTPUT_INHERIT) {
393 /* If input got downgraded, inherit the original value */
394 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
395 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
396
397 /* If the input is connected to anything that's not a /dev/null, inherit that... */
398 if (i != EXEC_INPUT_NULL)
399 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
400
401 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
402 if (getppid() != 1)
403 return fileno;
404
405 /* We need to open /dev/null here anew, to get the right access mode. */
406 return open_null_as(O_WRONLY, fileno);
407 }
408
409 switch (o) {
410
411 case EXEC_OUTPUT_NULL:
412 return open_null_as(O_WRONLY, fileno);
413
414 case EXEC_OUTPUT_TTY:
415 if (is_terminal_input(i))
416 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
417
418 /* We don't reset the terminal if this is just about output */
419 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
420
421 case EXEC_OUTPUT_SYSLOG:
422 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
423 case EXEC_OUTPUT_KMSG:
424 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
425 case EXEC_OUTPUT_JOURNAL:
426 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
427 r = connect_logger_as(context, o, ident, unit_id, fileno);
428 if (r < 0) {
429 log_struct_unit(LOG_CRIT, unit_id,
430 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
431 fileno == STDOUT_FILENO ? "out" : "err",
432 unit_id, strerror(-r),
433 "ERRNO=%d", -r,
434 NULL);
435 r = open_null_as(O_WRONLY, fileno);
436 }
437 return r;
438
439 case EXEC_OUTPUT_SOCKET:
440 assert(socket_fd >= 0);
441 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
442
443 default:
444 assert_not_reached("Unknown error type");
445 }
446 }
447
448 static int chown_terminal(int fd, uid_t uid) {
449 struct stat st;
450
451 assert(fd >= 0);
452
453 /* This might fail. What matters are the results. */
454 (void) fchown(fd, uid, -1);
455 (void) fchmod(fd, TTY_MODE);
456
457 if (fstat(fd, &st) < 0)
458 return -errno;
459
460 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
461 return -EPERM;
462
463 return 0;
464 }
465
466 static int setup_confirm_stdio(int *_saved_stdin,
467 int *_saved_stdout) {
468 int fd = -1, saved_stdin, saved_stdout = -1, r;
469
470 assert(_saved_stdin);
471 assert(_saved_stdout);
472
473 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
474 if (saved_stdin < 0)
475 return -errno;
476
477 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
478 if (saved_stdout < 0) {
479 r = errno;
480 goto fail;
481 }
482
483 fd = acquire_terminal(
484 "/dev/console",
485 false,
486 false,
487 false,
488 DEFAULT_CONFIRM_USEC);
489 if (fd < 0) {
490 r = fd;
491 goto fail;
492 }
493
494 r = chown_terminal(fd, getuid());
495 if (r < 0)
496 goto fail;
497
498 if (dup2(fd, STDIN_FILENO) < 0) {
499 r = -errno;
500 goto fail;
501 }
502
503 if (dup2(fd, STDOUT_FILENO) < 0) {
504 r = -errno;
505 goto fail;
506 }
507
508 if (fd >= 2)
509 safe_close(fd);
510
511 *_saved_stdin = saved_stdin;
512 *_saved_stdout = saved_stdout;
513
514 return 0;
515
516 fail:
517 safe_close(saved_stdout);
518 safe_close(saved_stdin);
519 safe_close(fd);
520
521 return r;
522 }
523
524 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
525 _cleanup_close_ int fd = -1;
526 va_list ap;
527
528 assert(format);
529
530 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
531 if (fd < 0)
532 return fd;
533
534 va_start(ap, format);
535 vdprintf(fd, format, ap);
536 va_end(ap);
537
538 return 0;
539 }
540
541 static int restore_confirm_stdio(int *saved_stdin,
542 int *saved_stdout) {
543
544 int r = 0;
545
546 assert(saved_stdin);
547 assert(saved_stdout);
548
549 release_terminal();
550
551 if (*saved_stdin >= 0)
552 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
553 r = -errno;
554
555 if (*saved_stdout >= 0)
556 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
557 r = -errno;
558
559 safe_close(*saved_stdin);
560 safe_close(*saved_stdout);
561
562 return r;
563 }
564
565 static int ask_for_confirmation(char *response, char **argv) {
566 int saved_stdout = -1, saved_stdin = -1, r;
567 _cleanup_free_ char *line = NULL;
568
569 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
570 if (r < 0)
571 return r;
572
573 line = exec_command_line(argv);
574 if (!line)
575 return -ENOMEM;
576
577 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
578
579 restore_confirm_stdio(&saved_stdin, &saved_stdout);
580
581 return r;
582 }
583
584 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
585 bool keep_groups = false;
586 int r;
587
588 assert(context);
589
590 /* Lookup and set GID and supplementary group list. Here too
591 * we avoid NSS lookups for gid=0. */
592
593 if (context->group || username) {
594
595 if (context->group) {
596 const char *g = context->group;
597
598 if ((r = get_group_creds(&g, &gid)) < 0)
599 return r;
600 }
601
602 /* First step, initialize groups from /etc/groups */
603 if (username && gid != 0) {
604 if (initgroups(username, gid) < 0)
605 return -errno;
606
607 keep_groups = true;
608 }
609
610 /* Second step, set our gids */
611 if (setresgid(gid, gid, gid) < 0)
612 return -errno;
613 }
614
615 if (context->supplementary_groups) {
616 int ngroups_max, k;
617 gid_t *gids;
618 char **i;
619
620 /* Final step, initialize any manually set supplementary groups */
621 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
622
623 if (!(gids = new(gid_t, ngroups_max)))
624 return -ENOMEM;
625
626 if (keep_groups) {
627 if ((k = getgroups(ngroups_max, gids)) < 0) {
628 free(gids);
629 return -errno;
630 }
631 } else
632 k = 0;
633
634 STRV_FOREACH(i, context->supplementary_groups) {
635 const char *g;
636
637 if (k >= ngroups_max) {
638 free(gids);
639 return -E2BIG;
640 }
641
642 g = *i;
643 r = get_group_creds(&g, gids+k);
644 if (r < 0) {
645 free(gids);
646 return r;
647 }
648
649 k++;
650 }
651
652 if (setgroups(k, gids) < 0) {
653 free(gids);
654 return -errno;
655 }
656
657 free(gids);
658 }
659
660 return 0;
661 }
662
663 static int enforce_user(const ExecContext *context, uid_t uid) {
664 assert(context);
665
666 /* Sets (but doesn't lookup) the uid and make sure we keep the
667 * capabilities while doing so. */
668
669 if (context->capabilities) {
670 _cleanup_cap_free_ cap_t d = NULL;
671 static const cap_value_t bits[] = {
672 CAP_SETUID, /* Necessary so that we can run setresuid() below */
673 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
674 };
675
676 /* First step: If we need to keep capabilities but
677 * drop privileges we need to make sure we keep our
678 * caps, while we drop privileges. */
679 if (uid != 0) {
680 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
681
682 if (prctl(PR_GET_SECUREBITS) != sb)
683 if (prctl(PR_SET_SECUREBITS, sb) < 0)
684 return -errno;
685 }
686
687 /* Second step: set the capabilities. This will reduce
688 * the capabilities to the minimum we need. */
689
690 d = cap_dup(context->capabilities);
691 if (!d)
692 return -errno;
693
694 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
695 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
696 return -errno;
697
698 if (cap_set_proc(d) < 0)
699 return -errno;
700 }
701
702 /* Third step: actually set the uids */
703 if (setresuid(uid, uid, uid) < 0)
704 return -errno;
705
706 /* At this point we should have all necessary capabilities but
707 are otherwise a normal user. However, the caps might got
708 corrupted due to the setresuid() so we need clean them up
709 later. This is done outside of this call. */
710
711 return 0;
712 }
713
714 #ifdef HAVE_PAM
715
716 static int null_conv(
717 int num_msg,
718 const struct pam_message **msg,
719 struct pam_response **resp,
720 void *appdata_ptr) {
721
722 /* We don't support conversations */
723
724 return PAM_CONV_ERR;
725 }
726
727 static int setup_pam(
728 const char *name,
729 const char *user,
730 uid_t uid,
731 const char *tty,
732 char ***pam_env,
733 int fds[], unsigned n_fds) {
734
735 static const struct pam_conv conv = {
736 .conv = null_conv,
737 .appdata_ptr = NULL
738 };
739
740 pam_handle_t *handle = NULL;
741 sigset_t ss, old_ss;
742 int pam_code = PAM_SUCCESS;
743 int err;
744 char **e = NULL;
745 bool close_session = false;
746 pid_t pam_pid = 0, parent_pid;
747 int flags = 0;
748
749 assert(name);
750 assert(user);
751 assert(pam_env);
752
753 /* We set up PAM in the parent process, then fork. The child
754 * will then stay around until killed via PR_GET_PDEATHSIG or
755 * systemd via the cgroup logic. It will then remove the PAM
756 * session again. The parent process will exec() the actual
757 * daemon. We do things this way to ensure that the main PID
758 * of the daemon is the one we initially fork()ed. */
759
760 if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
761 flags |= PAM_SILENT;
762
763 pam_code = pam_start(name, user, &conv, &handle);
764 if (pam_code != PAM_SUCCESS) {
765 handle = NULL;
766 goto fail;
767 }
768
769 if (tty) {
770 pam_code = pam_set_item(handle, PAM_TTY, tty);
771 if (pam_code != PAM_SUCCESS)
772 goto fail;
773 }
774
775 pam_code = pam_acct_mgmt(handle, flags);
776 if (pam_code != PAM_SUCCESS)
777 goto fail;
778
779 pam_code = pam_open_session(handle, flags);
780 if (pam_code != PAM_SUCCESS)
781 goto fail;
782
783 close_session = true;
784
785 e = pam_getenvlist(handle);
786 if (!e) {
787 pam_code = PAM_BUF_ERR;
788 goto fail;
789 }
790
791 /* Block SIGTERM, so that we know that it won't get lost in
792 * the child */
793 if (sigemptyset(&ss) < 0 ||
794 sigaddset(&ss, SIGTERM) < 0 ||
795 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
796 goto fail;
797
798 parent_pid = getpid();
799
800 pam_pid = fork();
801 if (pam_pid < 0)
802 goto fail;
803
804 if (pam_pid == 0) {
805 int sig;
806 int r = EXIT_PAM;
807
808 /* The child's job is to reset the PAM session on
809 * termination */
810
811 /* This string must fit in 10 chars (i.e. the length
812 * of "/sbin/init"), to look pretty in /bin/ps */
813 rename_process("(sd-pam)");
814
815 /* Make sure we don't keep open the passed fds in this
816 child. We assume that otherwise only those fds are
817 open here that have been opened by PAM. */
818 close_many(fds, n_fds);
819
820 /* Drop privileges - we don't need any to pam_close_session
821 * and this will make PR_SET_PDEATHSIG work in most cases.
822 * If this fails, ignore the error - but expect sd-pam threads
823 * to fail to exit normally */
824 if (setresuid(uid, uid, uid) < 0)
825 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
826
827 /* Wait until our parent died. This will only work if
828 * the above setresuid() succeeds, otherwise the kernel
829 * will not allow unprivileged parents kill their privileged
830 * children this way. We rely on the control groups kill logic
831 * to do the rest for us. */
832 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
833 goto child_finish;
834
835 /* Check if our parent process might already have
836 * died? */
837 if (getppid() == parent_pid) {
838 for (;;) {
839 if (sigwait(&ss, &sig) < 0) {
840 if (errno == EINTR)
841 continue;
842
843 goto child_finish;
844 }
845
846 assert(sig == SIGTERM);
847 break;
848 }
849 }
850
851 /* If our parent died we'll end the session */
852 if (getppid() != parent_pid) {
853 pam_code = pam_close_session(handle, flags);
854 if (pam_code != PAM_SUCCESS)
855 goto child_finish;
856 }
857
858 r = 0;
859
860 child_finish:
861 pam_end(handle, pam_code | flags);
862 _exit(r);
863 }
864
865 /* If the child was forked off successfully it will do all the
866 * cleanups, so forget about the handle here. */
867 handle = NULL;
868
869 /* Unblock SIGTERM again in the parent */
870 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
871 goto fail;
872
873 /* We close the log explicitly here, since the PAM modules
874 * might have opened it, but we don't want this fd around. */
875 closelog();
876
877 *pam_env = e;
878 e = NULL;
879
880 return 0;
881
882 fail:
883 if (pam_code != PAM_SUCCESS) {
884 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
885 err = -EPERM; /* PAM errors do not map to errno */
886 } else {
887 log_error("PAM failed: %m");
888 err = -errno;
889 }
890
891 if (handle) {
892 if (close_session)
893 pam_code = pam_close_session(handle, flags);
894
895 pam_end(handle, pam_code | flags);
896 }
897
898 strv_free(e);
899
900 closelog();
901
902 if (pam_pid > 1) {
903 kill(pam_pid, SIGTERM);
904 kill(pam_pid, SIGCONT);
905 }
906
907 return err;
908 }
909 #endif
910
911 static void rename_process_from_path(const char *path) {
912 char process_name[11];
913 const char *p;
914 size_t l;
915
916 /* This resulting string must fit in 10 chars (i.e. the length
917 * of "/sbin/init") to look pretty in /bin/ps */
918
919 p = basename(path);
920 if (isempty(p)) {
921 rename_process("(...)");
922 return;
923 }
924
925 l = strlen(p);
926 if (l > 8) {
927 /* The end of the process name is usually more
928 * interesting, since the first bit might just be
929 * "systemd-" */
930 p = p + l - 8;
931 l = 8;
932 }
933
934 process_name[0] = '(';
935 memcpy(process_name+1, p, l);
936 process_name[1+l] = ')';
937 process_name[1+l+1] = 0;
938
939 rename_process(process_name);
940 }
941
942 #ifdef HAVE_SECCOMP
943
944 static int apply_seccomp(const ExecContext *c) {
945 uint32_t negative_action, action;
946 scmp_filter_ctx *seccomp;
947 Iterator i;
948 void *id;
949 int r;
950
951 assert(c);
952
953 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
954
955 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
956 if (!seccomp)
957 return -ENOMEM;
958
959 if (c->syscall_archs) {
960
961 SET_FOREACH(id, c->syscall_archs, i) {
962 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
963 if (r == -EEXIST)
964 continue;
965 if (r < 0)
966 goto finish;
967 }
968
969 } else {
970 r = seccomp_add_secondary_archs(seccomp);
971 if (r < 0)
972 goto finish;
973 }
974
975 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
976 SET_FOREACH(id, c->syscall_filter, i) {
977 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
978 if (r < 0)
979 goto finish;
980 }
981
982 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
983 if (r < 0)
984 goto finish;
985
986 r = seccomp_load(seccomp);
987
988 finish:
989 seccomp_release(seccomp);
990 return r;
991 }
992
993 static int apply_address_families(const ExecContext *c) {
994 scmp_filter_ctx *seccomp;
995 Iterator i;
996 int r;
997
998 assert(c);
999
1000 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1001 if (!seccomp)
1002 return -ENOMEM;
1003
1004 r = seccomp_add_secondary_archs(seccomp);
1005 if (r < 0)
1006 goto finish;
1007
1008 if (c->address_families_whitelist) {
1009 int af, first = 0, last = 0;
1010 void *afp;
1011
1012 /* If this is a whitelist, we first block the address
1013 * families that are out of range and then everything
1014 * that is not in the set. First, we find the lowest
1015 * and highest address family in the set. */
1016
1017 SET_FOREACH(afp, c->address_families, i) {
1018 af = PTR_TO_INT(afp);
1019
1020 if (af <= 0 || af >= af_max())
1021 continue;
1022
1023 if (first == 0 || af < first)
1024 first = af;
1025
1026 if (last == 0 || af > last)
1027 last = af;
1028 }
1029
1030 assert((first == 0) == (last == 0));
1031
1032 if (first == 0) {
1033
1034 /* No entries in the valid range, block everything */
1035 r = seccomp_rule_add(
1036 seccomp,
1037 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1038 SCMP_SYS(socket),
1039 0);
1040 if (r < 0)
1041 goto finish;
1042
1043 } else {
1044
1045 /* Block everything below the first entry */
1046 r = seccomp_rule_add(
1047 seccomp,
1048 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1049 SCMP_SYS(socket),
1050 1,
1051 SCMP_A0(SCMP_CMP_LT, first));
1052 if (r < 0)
1053 goto finish;
1054
1055 /* Block everything above the last entry */
1056 r = seccomp_rule_add(
1057 seccomp,
1058 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1059 SCMP_SYS(socket),
1060 1,
1061 SCMP_A0(SCMP_CMP_GT, last));
1062 if (r < 0)
1063 goto finish;
1064
1065 /* Block everything between the first and last
1066 * entry */
1067 for (af = 1; af < af_max(); af++) {
1068
1069 if (set_contains(c->address_families, INT_TO_PTR(af)))
1070 continue;
1071
1072 r = seccomp_rule_add(
1073 seccomp,
1074 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1075 SCMP_SYS(socket),
1076 1,
1077 SCMP_A0(SCMP_CMP_EQ, af));
1078 if (r < 0)
1079 goto finish;
1080 }
1081 }
1082
1083 } else {
1084 void *af;
1085
1086 /* If this is a blacklist, then generate one rule for
1087 * each address family that are then combined in OR
1088 * checks. */
1089
1090 SET_FOREACH(af, c->address_families, i) {
1091
1092 r = seccomp_rule_add(
1093 seccomp,
1094 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1095 SCMP_SYS(socket),
1096 1,
1097 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1098 if (r < 0)
1099 goto finish;
1100 }
1101 }
1102
1103 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1104 if (r < 0)
1105 goto finish;
1106
1107 r = seccomp_load(seccomp);
1108
1109 finish:
1110 seccomp_release(seccomp);
1111 return r;
1112 }
1113
1114 #endif
1115
1116 static void do_idle_pipe_dance(int idle_pipe[4]) {
1117 assert(idle_pipe);
1118
1119
1120 safe_close(idle_pipe[1]);
1121 safe_close(idle_pipe[2]);
1122
1123 if (idle_pipe[0] >= 0) {
1124 int r;
1125
1126 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1127
1128 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1129 /* Signal systemd that we are bored and want to continue. */
1130 write(idle_pipe[3], "x", 1);
1131
1132 /* Wait for systemd to react to the signal above. */
1133 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1134 }
1135
1136 safe_close(idle_pipe[0]);
1137
1138 }
1139
1140 safe_close(idle_pipe[3]);
1141 }
1142
1143 static int build_environment(
1144 const ExecContext *c,
1145 unsigned n_fds,
1146 usec_t watchdog_usec,
1147 const char *home,
1148 const char *username,
1149 const char *shell,
1150 char ***ret) {
1151
1152 _cleanup_strv_free_ char **our_env = NULL;
1153 unsigned n_env = 0;
1154 char *x;
1155
1156 assert(c);
1157 assert(ret);
1158
1159 our_env = new0(char*, 10);
1160 if (!our_env)
1161 return -ENOMEM;
1162
1163 if (n_fds > 0) {
1164 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1165 return -ENOMEM;
1166 our_env[n_env++] = x;
1167
1168 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1169 return -ENOMEM;
1170 our_env[n_env++] = x;
1171 }
1172
1173 if (watchdog_usec > 0) {
1174 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1175 return -ENOMEM;
1176 our_env[n_env++] = x;
1177
1178 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1179 return -ENOMEM;
1180 our_env[n_env++] = x;
1181 }
1182
1183 if (home) {
1184 x = strappend("HOME=", home);
1185 if (!x)
1186 return -ENOMEM;
1187 our_env[n_env++] = x;
1188 }
1189
1190 if (username) {
1191 x = strappend("LOGNAME=", username);
1192 if (!x)
1193 return -ENOMEM;
1194 our_env[n_env++] = x;
1195
1196 x = strappend("USER=", username);
1197 if (!x)
1198 return -ENOMEM;
1199 our_env[n_env++] = x;
1200 }
1201
1202 if (shell) {
1203 x = strappend("SHELL=", shell);
1204 if (!x)
1205 return -ENOMEM;
1206 our_env[n_env++] = x;
1207 }
1208
1209 if (is_terminal_input(c->std_input) ||
1210 c->std_output == EXEC_OUTPUT_TTY ||
1211 c->std_error == EXEC_OUTPUT_TTY ||
1212 c->tty_path) {
1213
1214 x = strdup(default_term_for_tty(tty_path(c)));
1215 if (!x)
1216 return -ENOMEM;
1217 our_env[n_env++] = x;
1218 }
1219
1220 our_env[n_env++] = NULL;
1221 assert(n_env <= 10);
1222
1223 *ret = our_env;
1224 our_env = NULL;
1225
1226 return 0;
1227 }
1228
1229 static int exec_child(ExecCommand *command,
1230 const ExecContext *context,
1231 const ExecParameters *params,
1232 ExecRuntime *runtime,
1233 char **argv,
1234 int socket_fd,
1235 int *fds, unsigned n_fds,
1236 char **files_env,
1237 int *error) {
1238
1239 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1240 const char *username = NULL, *home = NULL, *shell = NULL;
1241 unsigned n_dont_close = 0;
1242 int dont_close[n_fds + 4];
1243 uid_t uid = (uid_t) -1;
1244 gid_t gid = (gid_t) -1;
1245 int i, err;
1246
1247 assert(command);
1248 assert(context);
1249 assert(params);
1250 assert(error);
1251
1252 rename_process_from_path(command->path);
1253
1254 /* We reset exactly these signals, since they are the
1255 * only ones we set to SIG_IGN in the main daemon. All
1256 * others we leave untouched because we set them to
1257 * SIG_DFL or a valid handler initially, both of which
1258 * will be demoted to SIG_DFL. */
1259 default_signals(SIGNALS_CRASH_HANDLER,
1260 SIGNALS_IGNORE, -1);
1261
1262 if (context->ignore_sigpipe)
1263 ignore_signals(SIGPIPE, -1);
1264
1265 err = reset_signal_mask();
1266 if (err < 0) {
1267 *error = EXIT_SIGNAL_MASK;
1268 return err;
1269 }
1270
1271 if (params->idle_pipe)
1272 do_idle_pipe_dance(params->idle_pipe);
1273
1274 /* Close sockets very early to make sure we don't
1275 * block init reexecution because it cannot bind its
1276 * sockets */
1277 log_forget_fds();
1278
1279 if (socket_fd >= 0)
1280 dont_close[n_dont_close++] = socket_fd;
1281 if (n_fds > 0) {
1282 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1283 n_dont_close += n_fds;
1284 }
1285 if (params->bus_endpoint_fd >= 0)
1286 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1287 if (runtime) {
1288 if (runtime->netns_storage_socket[0] >= 0)
1289 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1290 if (runtime->netns_storage_socket[1] >= 0)
1291 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1292 }
1293
1294 err = close_all_fds(dont_close, n_dont_close);
1295 if (err < 0) {
1296 *error = EXIT_FDS;
1297 return err;
1298 }
1299
1300 if (!context->same_pgrp)
1301 if (setsid() < 0) {
1302 *error = EXIT_SETSID;
1303 return -errno;
1304 }
1305
1306 exec_context_tty_reset(context);
1307
1308 if (params->confirm_spawn) {
1309 char response;
1310
1311 err = ask_for_confirmation(&response, argv);
1312 if (err == -ETIMEDOUT)
1313 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1314 else if (err < 0)
1315 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1316 else if (response == 's') {
1317 write_confirm_message("Skipping execution.\n");
1318 *error = EXIT_CONFIRM;
1319 return -ECANCELED;
1320 } else if (response == 'n') {
1321 write_confirm_message("Failing execution.\n");
1322 *error = 0;
1323 return 0;
1324 }
1325 }
1326
1327 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1328 * must sure to drop O_NONBLOCK */
1329 if (socket_fd >= 0)
1330 fd_nonblock(socket_fd, false);
1331
1332 err = setup_input(context, socket_fd, params->apply_tty_stdin);
1333 if (err < 0) {
1334 *error = EXIT_STDIN;
1335 return err;
1336 }
1337
1338 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1339 if (err < 0) {
1340 *error = EXIT_STDOUT;
1341 return err;
1342 }
1343
1344 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1345 if (err < 0) {
1346 *error = EXIT_STDERR;
1347 return err;
1348 }
1349
1350 if (params->cgroup_path) {
1351 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0);
1352 if (err < 0) {
1353 *error = EXIT_CGROUP;
1354 return err;
1355 }
1356 }
1357
1358 if (context->oom_score_adjust_set) {
1359 char t[16];
1360
1361 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1362 char_array_0(t);
1363
1364 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1365 *error = EXIT_OOM_ADJUST;
1366 return -errno;
1367 }
1368 }
1369
1370 if (context->nice_set)
1371 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1372 *error = EXIT_NICE;
1373 return -errno;
1374 }
1375
1376 if (context->cpu_sched_set) {
1377 struct sched_param param = {
1378 .sched_priority = context->cpu_sched_priority,
1379 };
1380
1381 err = sched_setscheduler(0,
1382 context->cpu_sched_policy |
1383 (context->cpu_sched_reset_on_fork ?
1384 SCHED_RESET_ON_FORK : 0),
1385 &param);
1386 if (err < 0) {
1387 *error = EXIT_SETSCHEDULER;
1388 return -errno;
1389 }
1390 }
1391
1392 if (context->cpuset)
1393 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1394 *error = EXIT_CPUAFFINITY;
1395 return -errno;
1396 }
1397
1398 if (context->ioprio_set)
1399 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1400 *error = EXIT_IOPRIO;
1401 return -errno;
1402 }
1403
1404 if (context->timer_slack_nsec != NSEC_INFINITY)
1405 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1406 *error = EXIT_TIMERSLACK;
1407 return -errno;
1408 }
1409
1410 if (context->personality != 0xffffffffUL)
1411 if (personality(context->personality) < 0) {
1412 *error = EXIT_PERSONALITY;
1413 return -errno;
1414 }
1415
1416 if (context->utmp_id)
1417 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1418
1419 if (context->user) {
1420 username = context->user;
1421 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1422 if (err < 0) {
1423 *error = EXIT_USER;
1424 return err;
1425 }
1426
1427 if (is_terminal_input(context->std_input)) {
1428 err = chown_terminal(STDIN_FILENO, uid);
1429 if (err < 0) {
1430 *error = EXIT_STDIN;
1431 return err;
1432 }
1433 }
1434 }
1435
1436 #ifdef ENABLE_KDBUS
1437 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1438 uid_t ep_uid = (uid == (uid_t) -1) ? 0 : uid;
1439
1440 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1441 if (err < 0) {
1442 *error = EXIT_BUS_ENDPOINT;
1443 return err;
1444 }
1445 }
1446 #endif
1447
1448 /* If delegation is enabled we'll pass ownership of the cgroup
1449 * (but only in systemd's own controller hierarchy!) to the
1450 * user of the new process. */
1451 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1452 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1453 if (err < 0) {
1454 *error = EXIT_CGROUP;
1455 return err;
1456 }
1457
1458
1459 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1460 if (err < 0) {
1461 *error = EXIT_CGROUP;
1462 return err;
1463 }
1464 }
1465
1466 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1467 char **rt;
1468
1469 STRV_FOREACH(rt, context->runtime_directory) {
1470 _cleanup_free_ char *p;
1471
1472 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1473 if (!p) {
1474 *error = EXIT_RUNTIME_DIRECTORY;
1475 return -ENOMEM;
1476 }
1477
1478 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1479 if (err < 0) {
1480 *error = EXIT_RUNTIME_DIRECTORY;
1481 return err;
1482 }
1483 }
1484 }
1485
1486 if (params->apply_permissions) {
1487 err = enforce_groups(context, username, gid);
1488 if (err < 0) {
1489 *error = EXIT_GROUP;
1490 return err;
1491 }
1492 }
1493
1494 umask(context->umask);
1495
1496 #ifdef HAVE_PAM
1497 if (params->apply_permissions && context->pam_name && username) {
1498 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1499 if (err < 0) {
1500 *error = EXIT_PAM;
1501 return err;
1502 }
1503 }
1504 #endif
1505
1506 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1507 err = setup_netns(runtime->netns_storage_socket);
1508 if (err < 0) {
1509 *error = EXIT_NETWORK;
1510 return err;
1511 }
1512 }
1513
1514 if (!strv_isempty(context->read_write_dirs) ||
1515 !strv_isempty(context->read_only_dirs) ||
1516 !strv_isempty(context->inaccessible_dirs) ||
1517 context->mount_flags != 0 ||
1518 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1519 params->bus_endpoint_path ||
1520 context->private_devices ||
1521 context->protect_system != PROTECT_SYSTEM_NO ||
1522 context->protect_home != PROTECT_HOME_NO) {
1523
1524 char *tmp = NULL, *var = NULL;
1525
1526 /* The runtime struct only contains the parent
1527 * of the private /tmp, which is
1528 * non-accessible to world users. Inside of it
1529 * there's a /tmp that is sticky, and that's
1530 * the one we want to use here. */
1531
1532 if (context->private_tmp && runtime) {
1533 if (runtime->tmp_dir)
1534 tmp = strappenda(runtime->tmp_dir, "/tmp");
1535 if (runtime->var_tmp_dir)
1536 var = strappenda(runtime->var_tmp_dir, "/tmp");
1537 }
1538
1539 err = setup_namespace(
1540 context->read_write_dirs,
1541 context->read_only_dirs,
1542 context->inaccessible_dirs,
1543 tmp,
1544 var,
1545 params->bus_endpoint_path,
1546 context->private_devices,
1547 context->protect_home,
1548 context->protect_system,
1549 context->mount_flags);
1550
1551 if (err == -EPERM)
1552 log_warning_unit(params->unit_id, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %s", strerror(-err));
1553 else if (err < 0) {
1554 *error = EXIT_NAMESPACE;
1555 return err;
1556 }
1557 }
1558
1559 if (params->apply_chroot) {
1560 if (context->root_directory)
1561 if (chroot(context->root_directory) < 0) {
1562 *error = EXIT_CHROOT;
1563 return -errno;
1564 }
1565
1566 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1567 *error = EXIT_CHDIR;
1568 return -errno;
1569 }
1570 } else {
1571 _cleanup_free_ char *d = NULL;
1572
1573 if (asprintf(&d, "%s/%s",
1574 context->root_directory ? context->root_directory : "",
1575 context->working_directory ? context->working_directory : "") < 0) {
1576 *error = EXIT_MEMORY;
1577 return -ENOMEM;
1578 }
1579
1580 if (chdir(d) < 0) {
1581 *error = EXIT_CHDIR;
1582 return -errno;
1583 }
1584 }
1585
1586 /* We repeat the fd closing here, to make sure that
1587 * nothing is leaked from the PAM modules. Note that
1588 * we are more aggressive this time since socket_fd
1589 * and the netns fds we don't need anymore. The custom
1590 * endpoint fd was needed to upload the policy and can
1591 * now be closed as well. */
1592 err = close_all_fds(fds, n_fds);
1593 if (err >= 0)
1594 err = shift_fds(fds, n_fds);
1595 if (err >= 0)
1596 err = flags_fds(fds, n_fds, context->non_blocking);
1597 if (err < 0) {
1598 *error = EXIT_FDS;
1599 return err;
1600 }
1601
1602 if (params->apply_permissions) {
1603
1604 for (i = 0; i < _RLIMIT_MAX; i++) {
1605 if (!context->rlimit[i])
1606 continue;
1607
1608 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1609 *error = EXIT_LIMITS;
1610 return -errno;
1611 }
1612 }
1613
1614 if (context->capability_bounding_set_drop) {
1615 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1616 if (err < 0) {
1617 *error = EXIT_CAPABILITIES;
1618 return err;
1619 }
1620 }
1621
1622 #ifdef HAVE_SMACK
1623 if (context->smack_process_label) {
1624 err = mac_smack_apply_pid(0, context->smack_process_label);
1625 if (err < 0) {
1626 *error = EXIT_SMACK_PROCESS_LABEL;
1627 return err;
1628 }
1629 }
1630 #endif
1631
1632 if (context->user) {
1633 err = enforce_user(context, uid);
1634 if (err < 0) {
1635 *error = EXIT_USER;
1636 return err;
1637 }
1638 }
1639
1640 /* PR_GET_SECUREBITS is not privileged, while
1641 * PR_SET_SECUREBITS is. So to suppress
1642 * potential EPERMs we'll try not to call
1643 * PR_SET_SECUREBITS unless necessary. */
1644 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1645 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1646 *error = EXIT_SECUREBITS;
1647 return -errno;
1648 }
1649
1650 if (context->capabilities)
1651 if (cap_set_proc(context->capabilities) < 0) {
1652 *error = EXIT_CAPABILITIES;
1653 return -errno;
1654 }
1655
1656 if (context->no_new_privileges)
1657 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1658 *error = EXIT_NO_NEW_PRIVILEGES;
1659 return -errno;
1660 }
1661
1662 #ifdef HAVE_SECCOMP
1663 if (context->address_families_whitelist ||
1664 !set_isempty(context->address_families)) {
1665 err = apply_address_families(context);
1666 if (err < 0) {
1667 *error = EXIT_ADDRESS_FAMILIES;
1668 return err;
1669 }
1670 }
1671
1672 if (context->syscall_whitelist ||
1673 !set_isempty(context->syscall_filter) ||
1674 !set_isempty(context->syscall_archs)) {
1675 err = apply_seccomp(context);
1676 if (err < 0) {
1677 *error = EXIT_SECCOMP;
1678 return err;
1679 }
1680 }
1681 #endif
1682
1683 #ifdef HAVE_SELINUX
1684 if (mac_selinux_use()) {
1685 if (context->selinux_context) {
1686 err = setexeccon(context->selinux_context);
1687 if (err < 0 && !context->selinux_context_ignore) {
1688 *error = EXIT_SELINUX_CONTEXT;
1689 return err;
1690 }
1691 }
1692
1693 if (params->selinux_context_net && socket_fd >= 0) {
1694 _cleanup_free_ char *label = NULL;
1695
1696 err = mac_selinux_get_child_mls_label(socket_fd, command->path, &label);
1697 if (err < 0) {
1698 *error = EXIT_SELINUX_CONTEXT;
1699 return err;
1700 }
1701
1702 err = setexeccon(label);
1703 if (err < 0) {
1704 *error = EXIT_SELINUX_CONTEXT;
1705 return err;
1706 }
1707 }
1708 }
1709 #endif
1710
1711 #ifdef HAVE_APPARMOR
1712 if (context->apparmor_profile && mac_apparmor_use()) {
1713 err = aa_change_onexec(context->apparmor_profile);
1714 if (err < 0 && !context->apparmor_profile_ignore) {
1715 *error = EXIT_APPARMOR_PROFILE;
1716 return -errno;
1717 }
1718 }
1719 #endif
1720 }
1721
1722 err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1723 if (err < 0) {
1724 *error = EXIT_MEMORY;
1725 return err;
1726 }
1727
1728 final_env = strv_env_merge(5,
1729 params->environment,
1730 our_env,
1731 context->environment,
1732 files_env,
1733 pam_env,
1734 NULL);
1735 if (!final_env) {
1736 *error = EXIT_MEMORY;
1737 return -ENOMEM;
1738 }
1739
1740 final_argv = replace_env_argv(argv, final_env);
1741 if (!final_argv) {
1742 *error = EXIT_MEMORY;
1743 return -ENOMEM;
1744 }
1745
1746 final_env = strv_env_clean(final_env);
1747
1748 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1749 _cleanup_free_ char *line;
1750
1751 line = exec_command_line(final_argv);
1752 if (line) {
1753 log_open();
1754 log_struct_unit(LOG_DEBUG,
1755 params->unit_id,
1756 "EXECUTABLE=%s", command->path,
1757 "MESSAGE=Executing: %s", line,
1758 NULL);
1759 log_close();
1760 }
1761 }
1762 execve(command->path, final_argv, final_env);
1763 *error = EXIT_EXEC;
1764 return -errno;
1765 }
1766
1767 int exec_spawn(ExecCommand *command,
1768 const ExecContext *context,
1769 const ExecParameters *params,
1770 ExecRuntime *runtime,
1771 pid_t *ret) {
1772
1773 _cleanup_strv_free_ char **files_env = NULL;
1774 int *fds = NULL; unsigned n_fds = 0;
1775 char *line, **argv;
1776 int socket_fd;
1777 pid_t pid;
1778 int err;
1779
1780 assert(command);
1781 assert(context);
1782 assert(ret);
1783 assert(params);
1784 assert(params->fds || params->n_fds <= 0);
1785
1786 if (context->std_input == EXEC_INPUT_SOCKET ||
1787 context->std_output == EXEC_OUTPUT_SOCKET ||
1788 context->std_error == EXEC_OUTPUT_SOCKET) {
1789
1790 if (params->n_fds != 1)
1791 return -EINVAL;
1792
1793 socket_fd = params->fds[0];
1794 } else {
1795 socket_fd = -1;
1796 fds = params->fds;
1797 n_fds = params->n_fds;
1798 }
1799
1800 err = exec_context_load_environment(context, params->unit_id, &files_env);
1801 if (err < 0) {
1802 log_struct_unit(LOG_ERR,
1803 params->unit_id,
1804 "MESSAGE=Failed to load environment files: %s", strerror(-err),
1805 "ERRNO=%d", -err,
1806 NULL);
1807 return err;
1808 }
1809
1810 argv = params->argv ?: command->argv;
1811
1812 line = exec_command_line(argv);
1813 if (!line)
1814 return log_oom();
1815
1816 log_struct_unit(LOG_DEBUG,
1817 params->unit_id,
1818 "EXECUTABLE=%s", command->path,
1819 "MESSAGE=About to execute: %s", line,
1820 NULL);
1821 free(line);
1822
1823 pid = fork();
1824 if (pid < 0)
1825 return -errno;
1826
1827 if (pid == 0) {
1828 int r;
1829
1830 err = exec_child(command,
1831 context,
1832 params,
1833 runtime,
1834 argv,
1835 socket_fd,
1836 fds, n_fds,
1837 files_env,
1838 &r);
1839 if (r != 0) {
1840 log_open();
1841 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1842 "EXECUTABLE=%s", command->path,
1843 "MESSAGE=Failed at step %s spawning %s: %s",
1844 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1845 command->path, strerror(-err),
1846 "ERRNO=%d", -err,
1847 NULL);
1848 log_close();
1849 }
1850
1851 _exit(r);
1852 }
1853
1854 log_struct_unit(LOG_DEBUG,
1855 params->unit_id,
1856 "MESSAGE=Forked %s as "PID_FMT,
1857 command->path, pid,
1858 NULL);
1859
1860 /* We add the new process to the cgroup both in the child (so
1861 * that we can be sure that no user code is ever executed
1862 * outside of the cgroup) and in the parent (so that we can be
1863 * sure that when we kill the cgroup the process will be
1864 * killed too). */
1865 if (params->cgroup_path)
1866 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1867
1868 exec_status_start(&command->exec_status, pid);
1869
1870 *ret = pid;
1871 return 0;
1872 }
1873
1874 void exec_context_init(ExecContext *c) {
1875 assert(c);
1876
1877 c->umask = 0022;
1878 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1879 c->cpu_sched_policy = SCHED_OTHER;
1880 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1881 c->syslog_level_prefix = true;
1882 c->ignore_sigpipe = true;
1883 c->timer_slack_nsec = NSEC_INFINITY;
1884 c->personality = 0xffffffffUL;
1885 c->runtime_directory_mode = 0755;
1886 }
1887
1888 void exec_context_done(ExecContext *c) {
1889 unsigned l;
1890
1891 assert(c);
1892
1893 strv_free(c->environment);
1894 c->environment = NULL;
1895
1896 strv_free(c->environment_files);
1897 c->environment_files = NULL;
1898
1899 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1900 free(c->rlimit[l]);
1901 c->rlimit[l] = NULL;
1902 }
1903
1904 free(c->working_directory);
1905 c->working_directory = NULL;
1906 free(c->root_directory);
1907 c->root_directory = NULL;
1908
1909 free(c->tty_path);
1910 c->tty_path = NULL;
1911
1912 free(c->syslog_identifier);
1913 c->syslog_identifier = NULL;
1914
1915 free(c->user);
1916 c->user = NULL;
1917
1918 free(c->group);
1919 c->group = NULL;
1920
1921 strv_free(c->supplementary_groups);
1922 c->supplementary_groups = NULL;
1923
1924 free(c->pam_name);
1925 c->pam_name = NULL;
1926
1927 if (c->capabilities) {
1928 cap_free(c->capabilities);
1929 c->capabilities = NULL;
1930 }
1931
1932 strv_free(c->read_only_dirs);
1933 c->read_only_dirs = NULL;
1934
1935 strv_free(c->read_write_dirs);
1936 c->read_write_dirs = NULL;
1937
1938 strv_free(c->inaccessible_dirs);
1939 c->inaccessible_dirs = NULL;
1940
1941 if (c->cpuset)
1942 CPU_FREE(c->cpuset);
1943
1944 free(c->utmp_id);
1945 c->utmp_id = NULL;
1946
1947 free(c->selinux_context);
1948 c->selinux_context = NULL;
1949
1950 free(c->apparmor_profile);
1951 c->apparmor_profile = NULL;
1952
1953 set_free(c->syscall_filter);
1954 c->syscall_filter = NULL;
1955
1956 set_free(c->syscall_archs);
1957 c->syscall_archs = NULL;
1958
1959 set_free(c->address_families);
1960 c->address_families = NULL;
1961
1962 strv_free(c->runtime_directory);
1963 c->runtime_directory = NULL;
1964
1965 bus_endpoint_free(c->bus_endpoint);
1966 c->bus_endpoint = NULL;
1967 }
1968
1969 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1970 char **i;
1971
1972 assert(c);
1973
1974 if (!runtime_prefix)
1975 return 0;
1976
1977 STRV_FOREACH(i, c->runtime_directory) {
1978 _cleanup_free_ char *p;
1979
1980 p = strjoin(runtime_prefix, "/", *i, NULL);
1981 if (!p)
1982 return -ENOMEM;
1983
1984 /* We execute this synchronously, since we need to be
1985 * sure this is gone when we start the service
1986 * next. */
1987 rm_rf_dangerous(p, false, true, false);
1988 }
1989
1990 return 0;
1991 }
1992
1993 void exec_command_done(ExecCommand *c) {
1994 assert(c);
1995
1996 free(c->path);
1997 c->path = NULL;
1998
1999 strv_free(c->argv);
2000 c->argv = NULL;
2001 }
2002
2003 void exec_command_done_array(ExecCommand *c, unsigned n) {
2004 unsigned i;
2005
2006 for (i = 0; i < n; i++)
2007 exec_command_done(c+i);
2008 }
2009
2010 void exec_command_free_list(ExecCommand *c) {
2011 ExecCommand *i;
2012
2013 while ((i = c)) {
2014 LIST_REMOVE(command, c, i);
2015 exec_command_done(i);
2016 free(i);
2017 }
2018 }
2019
2020 void exec_command_free_array(ExecCommand **c, unsigned n) {
2021 unsigned i;
2022
2023 for (i = 0; i < n; i++) {
2024 exec_command_free_list(c[i]);
2025 c[i] = NULL;
2026 }
2027 }
2028
2029 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2030 char **i, **r = NULL;
2031
2032 assert(c);
2033 assert(l);
2034
2035 STRV_FOREACH(i, c->environment_files) {
2036 char *fn;
2037 int k;
2038 bool ignore = false;
2039 char **p;
2040 _cleanup_globfree_ glob_t pglob = {};
2041 int count, n;
2042
2043 fn = *i;
2044
2045 if (fn[0] == '-') {
2046 ignore = true;
2047 fn ++;
2048 }
2049
2050 if (!path_is_absolute(fn)) {
2051 if (ignore)
2052 continue;
2053
2054 strv_free(r);
2055 return -EINVAL;
2056 }
2057
2058 /* Filename supports globbing, take all matching files */
2059 errno = 0;
2060 if (glob(fn, 0, NULL, &pglob) != 0) {
2061 if (ignore)
2062 continue;
2063
2064 strv_free(r);
2065 return errno ? -errno : -EINVAL;
2066 }
2067 count = pglob.gl_pathc;
2068 if (count == 0) {
2069 if (ignore)
2070 continue;
2071
2072 strv_free(r);
2073 return -EINVAL;
2074 }
2075 for (n = 0; n < count; n++) {
2076 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2077 if (k < 0) {
2078 if (ignore)
2079 continue;
2080
2081 strv_free(r);
2082 return k;
2083 }
2084 /* Log invalid environment variables with filename */
2085 if (p)
2086 p = strv_env_clean_log(p, unit_id, pglob.gl_pathv[n]);
2087
2088 if (r == NULL)
2089 r = p;
2090 else {
2091 char **m;
2092
2093 m = strv_env_merge(2, r, p);
2094 strv_free(r);
2095 strv_free(p);
2096 if (!m)
2097 return -ENOMEM;
2098
2099 r = m;
2100 }
2101 }
2102 }
2103
2104 *l = r;
2105
2106 return 0;
2107 }
2108
2109 static bool tty_may_match_dev_console(const char *tty) {
2110 _cleanup_free_ char *active = NULL;
2111 char *console;
2112
2113 if (startswith(tty, "/dev/"))
2114 tty += 5;
2115
2116 /* trivial identity? */
2117 if (streq(tty, "console"))
2118 return true;
2119
2120 console = resolve_dev_console(&active);
2121 /* if we could not resolve, assume it may */
2122 if (!console)
2123 return true;
2124
2125 /* "tty0" means the active VC, so it may be the same sometimes */
2126 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2127 }
2128
2129 bool exec_context_may_touch_console(ExecContext *ec) {
2130 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2131 is_terminal_input(ec->std_input) ||
2132 is_terminal_output(ec->std_output) ||
2133 is_terminal_output(ec->std_error)) &&
2134 tty_may_match_dev_console(tty_path(ec));
2135 }
2136
2137 static void strv_fprintf(FILE *f, char **l) {
2138 char **g;
2139
2140 assert(f);
2141
2142 STRV_FOREACH(g, l)
2143 fprintf(f, " %s", *g);
2144 }
2145
2146 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2147 char **e;
2148 unsigned i;
2149
2150 assert(c);
2151 assert(f);
2152
2153 prefix = strempty(prefix);
2154
2155 fprintf(f,
2156 "%sUMask: %04o\n"
2157 "%sWorkingDirectory: %s\n"
2158 "%sRootDirectory: %s\n"
2159 "%sNonBlocking: %s\n"
2160 "%sPrivateTmp: %s\n"
2161 "%sPrivateNetwork: %s\n"
2162 "%sPrivateDevices: %s\n"
2163 "%sProtectHome: %s\n"
2164 "%sProtectSystem: %s\n"
2165 "%sIgnoreSIGPIPE: %s\n",
2166 prefix, c->umask,
2167 prefix, c->working_directory ? c->working_directory : "/",
2168 prefix, c->root_directory ? c->root_directory : "/",
2169 prefix, yes_no(c->non_blocking),
2170 prefix, yes_no(c->private_tmp),
2171 prefix, yes_no(c->private_network),
2172 prefix, yes_no(c->private_devices),
2173 prefix, protect_home_to_string(c->protect_home),
2174 prefix, protect_system_to_string(c->protect_system),
2175 prefix, yes_no(c->ignore_sigpipe));
2176
2177 STRV_FOREACH(e, c->environment)
2178 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2179
2180 STRV_FOREACH(e, c->environment_files)
2181 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2182
2183 if (c->nice_set)
2184 fprintf(f,
2185 "%sNice: %i\n",
2186 prefix, c->nice);
2187
2188 if (c->oom_score_adjust_set)
2189 fprintf(f,
2190 "%sOOMScoreAdjust: %i\n",
2191 prefix, c->oom_score_adjust);
2192
2193 for (i = 0; i < RLIM_NLIMITS; i++)
2194 if (c->rlimit[i])
2195 fprintf(f, "%s%s: "RLIM_FMT"\n",
2196 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2197
2198 if (c->ioprio_set) {
2199 _cleanup_free_ char *class_str = NULL;
2200
2201 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2202 fprintf(f,
2203 "%sIOSchedulingClass: %s\n"
2204 "%sIOPriority: %i\n",
2205 prefix, strna(class_str),
2206 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2207 }
2208
2209 if (c->cpu_sched_set) {
2210 _cleanup_free_ char *policy_str = NULL;
2211
2212 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2213 fprintf(f,
2214 "%sCPUSchedulingPolicy: %s\n"
2215 "%sCPUSchedulingPriority: %i\n"
2216 "%sCPUSchedulingResetOnFork: %s\n",
2217 prefix, strna(policy_str),
2218 prefix, c->cpu_sched_priority,
2219 prefix, yes_no(c->cpu_sched_reset_on_fork));
2220 }
2221
2222 if (c->cpuset) {
2223 fprintf(f, "%sCPUAffinity:", prefix);
2224 for (i = 0; i < c->cpuset_ncpus; i++)
2225 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2226 fprintf(f, " %u", i);
2227 fputs("\n", f);
2228 }
2229
2230 if (c->timer_slack_nsec != NSEC_INFINITY)
2231 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2232
2233 fprintf(f,
2234 "%sStandardInput: %s\n"
2235 "%sStandardOutput: %s\n"
2236 "%sStandardError: %s\n",
2237 prefix, exec_input_to_string(c->std_input),
2238 prefix, exec_output_to_string(c->std_output),
2239 prefix, exec_output_to_string(c->std_error));
2240
2241 if (c->tty_path)
2242 fprintf(f,
2243 "%sTTYPath: %s\n"
2244 "%sTTYReset: %s\n"
2245 "%sTTYVHangup: %s\n"
2246 "%sTTYVTDisallocate: %s\n",
2247 prefix, c->tty_path,
2248 prefix, yes_no(c->tty_reset),
2249 prefix, yes_no(c->tty_vhangup),
2250 prefix, yes_no(c->tty_vt_disallocate));
2251
2252 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2253 c->std_output == EXEC_OUTPUT_KMSG ||
2254 c->std_output == EXEC_OUTPUT_JOURNAL ||
2255 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2256 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2257 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2258 c->std_error == EXEC_OUTPUT_SYSLOG ||
2259 c->std_error == EXEC_OUTPUT_KMSG ||
2260 c->std_error == EXEC_OUTPUT_JOURNAL ||
2261 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2262 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2263 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2264
2265 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2266
2267 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2268 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2269
2270 fprintf(f,
2271 "%sSyslogFacility: %s\n"
2272 "%sSyslogLevel: %s\n",
2273 prefix, strna(fac_str),
2274 prefix, strna(lvl_str));
2275 }
2276
2277 if (c->capabilities) {
2278 _cleanup_cap_free_charp_ char *t;
2279
2280 t = cap_to_text(c->capabilities, NULL);
2281 if (t)
2282 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2283 }
2284
2285 if (c->secure_bits)
2286 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2287 prefix,
2288 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2289 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2290 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2291 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2292 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2293 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2294
2295 if (c->capability_bounding_set_drop) {
2296 unsigned long l;
2297 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2298
2299 for (l = 0; l <= cap_last_cap(); l++)
2300 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2301 _cleanup_cap_free_charp_ char *t;
2302
2303 t = cap_to_name(l);
2304 if (t)
2305 fprintf(f, " %s", t);
2306 }
2307
2308 fputs("\n", f);
2309 }
2310
2311 if (c->user)
2312 fprintf(f, "%sUser: %s\n", prefix, c->user);
2313 if (c->group)
2314 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2315
2316 if (strv_length(c->supplementary_groups) > 0) {
2317 fprintf(f, "%sSupplementaryGroups:", prefix);
2318 strv_fprintf(f, c->supplementary_groups);
2319 fputs("\n", f);
2320 }
2321
2322 if (c->pam_name)
2323 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2324
2325 if (strv_length(c->read_write_dirs) > 0) {
2326 fprintf(f, "%sReadWriteDirs:", prefix);
2327 strv_fprintf(f, c->read_write_dirs);
2328 fputs("\n", f);
2329 }
2330
2331 if (strv_length(c->read_only_dirs) > 0) {
2332 fprintf(f, "%sReadOnlyDirs:", prefix);
2333 strv_fprintf(f, c->read_only_dirs);
2334 fputs("\n", f);
2335 }
2336
2337 if (strv_length(c->inaccessible_dirs) > 0) {
2338 fprintf(f, "%sInaccessibleDirs:", prefix);
2339 strv_fprintf(f, c->inaccessible_dirs);
2340 fputs("\n", f);
2341 }
2342
2343 if (c->utmp_id)
2344 fprintf(f,
2345 "%sUtmpIdentifier: %s\n",
2346 prefix, c->utmp_id);
2347
2348 if (c->selinux_context)
2349 fprintf(f,
2350 "%sSELinuxContext: %s%s\n",
2351 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2352
2353 if (c->personality != 0xffffffffUL)
2354 fprintf(f,
2355 "%sPersonality: %s\n",
2356 prefix, strna(personality_to_string(c->personality)));
2357
2358 if (c->syscall_filter) {
2359 #ifdef HAVE_SECCOMP
2360 Iterator j;
2361 void *id;
2362 bool first = true;
2363 #endif
2364
2365 fprintf(f,
2366 "%sSystemCallFilter: ",
2367 prefix);
2368
2369 if (!c->syscall_whitelist)
2370 fputc('~', f);
2371
2372 #ifdef HAVE_SECCOMP
2373 SET_FOREACH(id, c->syscall_filter, j) {
2374 _cleanup_free_ char *name = NULL;
2375
2376 if (first)
2377 first = false;
2378 else
2379 fputc(' ', f);
2380
2381 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2382 fputs(strna(name), f);
2383 }
2384 #endif
2385
2386 fputc('\n', f);
2387 }
2388
2389 if (c->syscall_archs) {
2390 #ifdef HAVE_SECCOMP
2391 Iterator j;
2392 void *id;
2393 #endif
2394
2395 fprintf(f,
2396 "%sSystemCallArchitectures:",
2397 prefix);
2398
2399 #ifdef HAVE_SECCOMP
2400 SET_FOREACH(id, c->syscall_archs, j)
2401 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2402 #endif
2403 fputc('\n', f);
2404 }
2405
2406 if (c->syscall_errno != 0)
2407 fprintf(f,
2408 "%sSystemCallErrorNumber: %s\n",
2409 prefix, strna(errno_to_name(c->syscall_errno)));
2410
2411 if (c->apparmor_profile)
2412 fprintf(f,
2413 "%sAppArmorProfile: %s%s\n",
2414 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2415 }
2416
2417 bool exec_context_maintains_privileges(ExecContext *c) {
2418 assert(c);
2419
2420 /* Returns true if the process forked off would run run under
2421 * an unchanged UID or as root. */
2422
2423 if (!c->user)
2424 return true;
2425
2426 if (streq(c->user, "root") || streq(c->user, "0"))
2427 return true;
2428
2429 return false;
2430 }
2431
2432 void exec_status_start(ExecStatus *s, pid_t pid) {
2433 assert(s);
2434
2435 zero(*s);
2436 s->pid = pid;
2437 dual_timestamp_get(&s->start_timestamp);
2438 }
2439
2440 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2441 assert(s);
2442
2443 if (s->pid && s->pid != pid)
2444 zero(*s);
2445
2446 s->pid = pid;
2447 dual_timestamp_get(&s->exit_timestamp);
2448
2449 s->code = code;
2450 s->status = status;
2451
2452 if (context) {
2453 if (context->utmp_id)
2454 utmp_put_dead_process(context->utmp_id, pid, code, status);
2455
2456 exec_context_tty_reset(context);
2457 }
2458 }
2459
2460 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2461 char buf[FORMAT_TIMESTAMP_MAX];
2462
2463 assert(s);
2464 assert(f);
2465
2466 if (s->pid <= 0)
2467 return;
2468
2469 prefix = strempty(prefix);
2470
2471 fprintf(f,
2472 "%sPID: "PID_FMT"\n",
2473 prefix, s->pid);
2474
2475 if (s->start_timestamp.realtime > 0)
2476 fprintf(f,
2477 "%sStart Timestamp: %s\n",
2478 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2479
2480 if (s->exit_timestamp.realtime > 0)
2481 fprintf(f,
2482 "%sExit Timestamp: %s\n"
2483 "%sExit Code: %s\n"
2484 "%sExit Status: %i\n",
2485 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2486 prefix, sigchld_code_to_string(s->code),
2487 prefix, s->status);
2488 }
2489
2490 char *exec_command_line(char **argv) {
2491 size_t k;
2492 char *n, *p, **a;
2493 bool first = true;
2494
2495 assert(argv);
2496
2497 k = 1;
2498 STRV_FOREACH(a, argv)
2499 k += strlen(*a)+3;
2500
2501 if (!(n = new(char, k)))
2502 return NULL;
2503
2504 p = n;
2505 STRV_FOREACH(a, argv) {
2506
2507 if (!first)
2508 *(p++) = ' ';
2509 else
2510 first = false;
2511
2512 if (strpbrk(*a, WHITESPACE)) {
2513 *(p++) = '\'';
2514 p = stpcpy(p, *a);
2515 *(p++) = '\'';
2516 } else
2517 p = stpcpy(p, *a);
2518
2519 }
2520
2521 *p = 0;
2522
2523 /* FIXME: this doesn't really handle arguments that have
2524 * spaces and ticks in them */
2525
2526 return n;
2527 }
2528
2529 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2530 _cleanup_free_ char *cmd = NULL;
2531 const char *prefix2;
2532
2533 assert(c);
2534 assert(f);
2535
2536 prefix = strempty(prefix);
2537 prefix2 = strappenda(prefix, "\t");
2538
2539 cmd = exec_command_line(c->argv);
2540 fprintf(f,
2541 "%sCommand Line: %s\n",
2542 prefix, cmd ? cmd : strerror(ENOMEM));
2543
2544 exec_status_dump(&c->exec_status, f, prefix2);
2545 }
2546
2547 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2548 assert(f);
2549
2550 prefix = strempty(prefix);
2551
2552 LIST_FOREACH(command, c, c)
2553 exec_command_dump(c, f, prefix);
2554 }
2555
2556 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2557 ExecCommand *end;
2558
2559 assert(l);
2560 assert(e);
2561
2562 if (*l) {
2563 /* It's kind of important, that we keep the order here */
2564 LIST_FIND_TAIL(command, *l, end);
2565 LIST_INSERT_AFTER(command, *l, end, e);
2566 } else
2567 *l = e;
2568 }
2569
2570 int exec_command_set(ExecCommand *c, const char *path, ...) {
2571 va_list ap;
2572 char **l, *p;
2573
2574 assert(c);
2575 assert(path);
2576
2577 va_start(ap, path);
2578 l = strv_new_ap(path, ap);
2579 va_end(ap);
2580
2581 if (!l)
2582 return -ENOMEM;
2583
2584 p = strdup(path);
2585 if (!p) {
2586 strv_free(l);
2587 return -ENOMEM;
2588 }
2589
2590 free(c->path);
2591 c->path = p;
2592
2593 strv_free(c->argv);
2594 c->argv = l;
2595
2596 return 0;
2597 }
2598
2599 int exec_command_append(ExecCommand *c, const char *path, ...) {
2600 _cleanup_strv_free_ char **l = NULL;
2601 va_list ap;
2602 int r;
2603
2604 assert(c);
2605 assert(path);
2606
2607 va_start(ap, path);
2608 l = strv_new_ap(path, ap);
2609 va_end(ap);
2610
2611 if (!l)
2612 return -ENOMEM;
2613
2614 r = strv_extend_strv(&c->argv, l);
2615 if (r < 0)
2616 return r;
2617
2618 return 0;
2619 }
2620
2621
2622 static int exec_runtime_allocate(ExecRuntime **rt) {
2623
2624 if (*rt)
2625 return 0;
2626
2627 *rt = new0(ExecRuntime, 1);
2628 if (!*rt)
2629 return -ENOMEM;
2630
2631 (*rt)->n_ref = 1;
2632 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2633
2634 return 0;
2635 }
2636
2637 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2638 int r;
2639
2640 assert(rt);
2641 assert(c);
2642 assert(id);
2643
2644 if (*rt)
2645 return 1;
2646
2647 if (!c->private_network && !c->private_tmp)
2648 return 0;
2649
2650 r = exec_runtime_allocate(rt);
2651 if (r < 0)
2652 return r;
2653
2654 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2655 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2656 return -errno;
2657 }
2658
2659 if (c->private_tmp && !(*rt)->tmp_dir) {
2660 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2661 if (r < 0)
2662 return r;
2663 }
2664
2665 return 1;
2666 }
2667
2668 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2669 assert(r);
2670 assert(r->n_ref > 0);
2671
2672 r->n_ref++;
2673 return r;
2674 }
2675
2676 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2677
2678 if (!r)
2679 return NULL;
2680
2681 assert(r->n_ref > 0);
2682
2683 r->n_ref--;
2684 if (r->n_ref <= 0) {
2685 free(r->tmp_dir);
2686 free(r->var_tmp_dir);
2687 safe_close_pair(r->netns_storage_socket);
2688 free(r);
2689 }
2690
2691 return NULL;
2692 }
2693
2694 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2695 assert(u);
2696 assert(f);
2697 assert(fds);
2698
2699 if (!rt)
2700 return 0;
2701
2702 if (rt->tmp_dir)
2703 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2704
2705 if (rt->var_tmp_dir)
2706 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2707
2708 if (rt->netns_storage_socket[0] >= 0) {
2709 int copy;
2710
2711 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2712 if (copy < 0)
2713 return copy;
2714
2715 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2716 }
2717
2718 if (rt->netns_storage_socket[1] >= 0) {
2719 int copy;
2720
2721 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2722 if (copy < 0)
2723 return copy;
2724
2725 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2726 }
2727
2728 return 0;
2729 }
2730
2731 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2732 int r;
2733
2734 assert(rt);
2735 assert(key);
2736 assert(value);
2737
2738 if (streq(key, "tmp-dir")) {
2739 char *copy;
2740
2741 r = exec_runtime_allocate(rt);
2742 if (r < 0)
2743 return r;
2744
2745 copy = strdup(value);
2746 if (!copy)
2747 return log_oom();
2748
2749 free((*rt)->tmp_dir);
2750 (*rt)->tmp_dir = copy;
2751
2752 } else if (streq(key, "var-tmp-dir")) {
2753 char *copy;
2754
2755 r = exec_runtime_allocate(rt);
2756 if (r < 0)
2757 return r;
2758
2759 copy = strdup(value);
2760 if (!copy)
2761 return log_oom();
2762
2763 free((*rt)->var_tmp_dir);
2764 (*rt)->var_tmp_dir = copy;
2765
2766 } else if (streq(key, "netns-socket-0")) {
2767 int fd;
2768
2769 r = exec_runtime_allocate(rt);
2770 if (r < 0)
2771 return r;
2772
2773 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2774 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2775 else {
2776 safe_close((*rt)->netns_storage_socket[0]);
2777 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2778 }
2779 } else if (streq(key, "netns-socket-1")) {
2780 int fd;
2781
2782 r = exec_runtime_allocate(rt);
2783 if (r < 0)
2784 return r;
2785
2786 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2787 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2788 else {
2789 safe_close((*rt)->netns_storage_socket[1]);
2790 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2791 }
2792 } else
2793 return 0;
2794
2795 return 1;
2796 }
2797
2798 static void *remove_tmpdir_thread(void *p) {
2799 _cleanup_free_ char *path = p;
2800
2801 rm_rf_dangerous(path, false, true, false);
2802 return NULL;
2803 }
2804
2805 void exec_runtime_destroy(ExecRuntime *rt) {
2806 int r;
2807
2808 if (!rt)
2809 return;
2810
2811 /* If there are multiple users of this, let's leave the stuff around */
2812 if (rt->n_ref > 1)
2813 return;
2814
2815 if (rt->tmp_dir) {
2816 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2817
2818 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2819 if (r < 0) {
2820 log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2821 free(rt->tmp_dir);
2822 }
2823
2824 rt->tmp_dir = NULL;
2825 }
2826
2827 if (rt->var_tmp_dir) {
2828 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2829
2830 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2831 if (r < 0) {
2832 log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2833 free(rt->var_tmp_dir);
2834 }
2835
2836 rt->var_tmp_dir = NULL;
2837 }
2838
2839 safe_close_pair(rt->netns_storage_socket);
2840 }
2841
2842 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2843 [EXEC_INPUT_NULL] = "null",
2844 [EXEC_INPUT_TTY] = "tty",
2845 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2846 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2847 [EXEC_INPUT_SOCKET] = "socket"
2848 };
2849
2850 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2851
2852 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2853 [EXEC_OUTPUT_INHERIT] = "inherit",
2854 [EXEC_OUTPUT_NULL] = "null",
2855 [EXEC_OUTPUT_TTY] = "tty",
2856 [EXEC_OUTPUT_SYSLOG] = "syslog",
2857 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2858 [EXEC_OUTPUT_KMSG] = "kmsg",
2859 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2860 [EXEC_OUTPUT_JOURNAL] = "journal",
2861 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2862 [EXEC_OUTPUT_SOCKET] = "socket"
2863 };
2864
2865 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);