]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
scope: make attachment of initial PIDs a bit more robust
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-kernel.h"
88 #include "label.h"
89 #include "cap-list.h"
90
91 #ifdef HAVE_SECCOMP
92 #include "seccomp-util.h"
93 #endif
94
95 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
96 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97
98 /* This assumes there is a 'tty' group */
99 #define TTY_MODE 0620
100
101 #define SNDBUF_SIZE (8*1024*1024)
102
103 static int shift_fds(int fds[], unsigned n_fds) {
104 int start, restart_from;
105
106 if (n_fds <= 0)
107 return 0;
108
109 /* Modifies the fds array! (sorts it) */
110
111 assert(fds);
112
113 start = 0;
114 for (;;) {
115 int i;
116
117 restart_from = -1;
118
119 for (i = start; i < (int) n_fds; i++) {
120 int nfd;
121
122 /* Already at right index? */
123 if (fds[i] == i+3)
124 continue;
125
126 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
127 return -errno;
128
129 safe_close(fds[i]);
130 fds[i] = nfd;
131
132 /* Hmm, the fd we wanted isn't free? Then
133 * let's remember that and try again from here*/
134 if (nfd != i+3 && restart_from < 0)
135 restart_from = i;
136 }
137
138 if (restart_from < 0)
139 break;
140
141 start = restart_from;
142 }
143
144 return 0;
145 }
146
147 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
148 unsigned i;
149 int r;
150
151 if (n_fds <= 0)
152 return 0;
153
154 assert(fds);
155
156 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157
158 for (i = 0; i < n_fds; i++) {
159
160 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
161 return r;
162
163 /* We unconditionally drop FD_CLOEXEC from the fds,
164 * since after all we want to pass these fds to our
165 * children */
166
167 if ((r = fd_cloexec(fds[i], false)) < 0)
168 return r;
169 }
170
171 return 0;
172 }
173
174 _pure_ static const char *tty_path(const ExecContext *context) {
175 assert(context);
176
177 if (context->tty_path)
178 return context->tty_path;
179
180 return "/dev/console";
181 }
182
183 static void exec_context_tty_reset(const ExecContext *context) {
184 assert(context);
185
186 if (context->tty_vhangup)
187 terminal_vhangup(tty_path(context));
188
189 if (context->tty_reset)
190 reset_terminal(tty_path(context));
191
192 if (context->tty_vt_disallocate && context->tty_path)
193 vt_disallocate(context->tty_path);
194 }
195
196 static bool is_terminal_output(ExecOutput o) {
197 return
198 o == EXEC_OUTPUT_TTY ||
199 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
200 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
201 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
202 }
203
204 static int open_null_as(int flags, int nfd) {
205 int fd, r;
206
207 assert(nfd >= 0);
208
209 fd = open("/dev/null", flags|O_NOCTTY);
210 if (fd < 0)
211 return -errno;
212
213 if (fd != nfd) {
214 r = dup2(fd, nfd) < 0 ? -errno : nfd;
215 safe_close(fd);
216 } else
217 r = nfd;
218
219 return r;
220 }
221
222 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
223 int fd, r;
224 union sockaddr_union sa = {
225 .un.sun_family = AF_UNIX,
226 .un.sun_path = "/run/systemd/journal/stdout",
227 };
228
229 assert(context);
230 assert(output < _EXEC_OUTPUT_MAX);
231 assert(ident);
232 assert(nfd >= 0);
233
234 fd = socket(AF_UNIX, SOCK_STREAM, 0);
235 if (fd < 0)
236 return -errno;
237
238 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
239 if (r < 0) {
240 safe_close(fd);
241 return -errno;
242 }
243
244 if (shutdown(fd, SHUT_RD) < 0) {
245 safe_close(fd);
246 return -errno;
247 }
248
249 fd_inc_sndbuf(fd, SNDBUF_SIZE);
250
251 dprintf(fd,
252 "%s\n"
253 "%s\n"
254 "%i\n"
255 "%i\n"
256 "%i\n"
257 "%i\n"
258 "%i\n",
259 context->syslog_identifier ? context->syslog_identifier : ident,
260 unit_id,
261 context->syslog_priority,
262 !!context->syslog_level_prefix,
263 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
264 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
265 is_terminal_output(output));
266
267 if (fd != nfd) {
268 r = dup2(fd, nfd) < 0 ? -errno : nfd;
269 safe_close(fd);
270 } else
271 r = nfd;
272
273 return r;
274 }
275 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
276 int fd, r;
277
278 assert(path);
279 assert(nfd >= 0);
280
281 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
282 return fd;
283
284 if (fd != nfd) {
285 r = dup2(fd, nfd) < 0 ? -errno : nfd;
286 safe_close(fd);
287 } else
288 r = nfd;
289
290 return r;
291 }
292
293 static bool is_terminal_input(ExecInput i) {
294 return
295 i == EXEC_INPUT_TTY ||
296 i == EXEC_INPUT_TTY_FORCE ||
297 i == EXEC_INPUT_TTY_FAIL;
298 }
299
300 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
301
302 if (is_terminal_input(std_input) && !apply_tty_stdin)
303 return EXEC_INPUT_NULL;
304
305 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
306 return EXEC_INPUT_NULL;
307
308 return std_input;
309 }
310
311 static int fixup_output(ExecOutput std_output, int socket_fd) {
312
313 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
314 return EXEC_OUTPUT_INHERIT;
315
316 return std_output;
317 }
318
319 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
320 ExecInput i;
321
322 assert(context);
323
324 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
325
326 switch (i) {
327
328 case EXEC_INPUT_NULL:
329 return open_null_as(O_RDONLY, STDIN_FILENO);
330
331 case EXEC_INPUT_TTY:
332 case EXEC_INPUT_TTY_FORCE:
333 case EXEC_INPUT_TTY_FAIL: {
334 int fd, r;
335
336 fd = acquire_terminal(tty_path(context),
337 i == EXEC_INPUT_TTY_FAIL,
338 i == EXEC_INPUT_TTY_FORCE,
339 false,
340 USEC_INFINITY);
341 if (fd < 0)
342 return fd;
343
344 if (fd != STDIN_FILENO) {
345 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
346 safe_close(fd);
347 } else
348 r = STDIN_FILENO;
349
350 return r;
351 }
352
353 case EXEC_INPUT_SOCKET:
354 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
355
356 default:
357 assert_not_reached("Unknown input type");
358 }
359 }
360
361 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
362 ExecOutput o;
363 ExecInput i;
364 int r;
365
366 assert(context);
367 assert(ident);
368
369 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
370 o = fixup_output(context->std_output, socket_fd);
371
372 if (fileno == STDERR_FILENO) {
373 ExecOutput e;
374 e = fixup_output(context->std_error, socket_fd);
375
376 /* This expects the input and output are already set up */
377
378 /* Don't change the stderr file descriptor if we inherit all
379 * the way and are not on a tty */
380 if (e == EXEC_OUTPUT_INHERIT &&
381 o == EXEC_OUTPUT_INHERIT &&
382 i == EXEC_INPUT_NULL &&
383 !is_terminal_input(context->std_input) &&
384 getppid () != 1)
385 return fileno;
386
387 /* Duplicate from stdout if possible */
388 if (e == o || e == EXEC_OUTPUT_INHERIT)
389 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
390
391 o = e;
392
393 } else if (o == EXEC_OUTPUT_INHERIT) {
394 /* If input got downgraded, inherit the original value */
395 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
396 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
397
398 /* If the input is connected to anything that's not a /dev/null, inherit that... */
399 if (i != EXEC_INPUT_NULL)
400 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
401
402 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
403 if (getppid() != 1)
404 return fileno;
405
406 /* We need to open /dev/null here anew, to get the right access mode. */
407 return open_null_as(O_WRONLY, fileno);
408 }
409
410 switch (o) {
411
412 case EXEC_OUTPUT_NULL:
413 return open_null_as(O_WRONLY, fileno);
414
415 case EXEC_OUTPUT_TTY:
416 if (is_terminal_input(i))
417 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
418
419 /* We don't reset the terminal if this is just about output */
420 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
421
422 case EXEC_OUTPUT_SYSLOG:
423 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
424 case EXEC_OUTPUT_KMSG:
425 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
426 case EXEC_OUTPUT_JOURNAL:
427 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
428 r = connect_logger_as(context, o, ident, unit_id, fileno);
429 if (r < 0) {
430 log_unit_struct(unit_id,
431 LOG_CRIT,
432 LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
433 fileno == STDOUT_FILENO ? "stdout" : "stderr",
434 unit_id, strerror(-r)),
435 LOG_ERRNO(-r),
436 NULL);
437 r = open_null_as(O_WRONLY, fileno);
438 }
439 return r;
440
441 case EXEC_OUTPUT_SOCKET:
442 assert(socket_fd >= 0);
443 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
444
445 default:
446 assert_not_reached("Unknown error type");
447 }
448 }
449
450 static int chown_terminal(int fd, uid_t uid) {
451 struct stat st;
452
453 assert(fd >= 0);
454
455 /* This might fail. What matters are the results. */
456 (void) fchown(fd, uid, -1);
457 (void) fchmod(fd, TTY_MODE);
458
459 if (fstat(fd, &st) < 0)
460 return -errno;
461
462 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
463 return -EPERM;
464
465 return 0;
466 }
467
468 static int setup_confirm_stdio(int *_saved_stdin,
469 int *_saved_stdout) {
470 int fd = -1, saved_stdin, saved_stdout = -1, r;
471
472 assert(_saved_stdin);
473 assert(_saved_stdout);
474
475 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
476 if (saved_stdin < 0)
477 return -errno;
478
479 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
480 if (saved_stdout < 0) {
481 r = errno;
482 goto fail;
483 }
484
485 fd = acquire_terminal(
486 "/dev/console",
487 false,
488 false,
489 false,
490 DEFAULT_CONFIRM_USEC);
491 if (fd < 0) {
492 r = fd;
493 goto fail;
494 }
495
496 r = chown_terminal(fd, getuid());
497 if (r < 0)
498 goto fail;
499
500 if (dup2(fd, STDIN_FILENO) < 0) {
501 r = -errno;
502 goto fail;
503 }
504
505 if (dup2(fd, STDOUT_FILENO) < 0) {
506 r = -errno;
507 goto fail;
508 }
509
510 if (fd >= 2)
511 safe_close(fd);
512
513 *_saved_stdin = saved_stdin;
514 *_saved_stdout = saved_stdout;
515
516 return 0;
517
518 fail:
519 safe_close(saved_stdout);
520 safe_close(saved_stdin);
521 safe_close(fd);
522
523 return r;
524 }
525
526 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
527 _cleanup_close_ int fd = -1;
528 va_list ap;
529
530 assert(format);
531
532 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
533 if (fd < 0)
534 return fd;
535
536 va_start(ap, format);
537 vdprintf(fd, format, ap);
538 va_end(ap);
539
540 return 0;
541 }
542
543 static int restore_confirm_stdio(int *saved_stdin,
544 int *saved_stdout) {
545
546 int r = 0;
547
548 assert(saved_stdin);
549 assert(saved_stdout);
550
551 release_terminal();
552
553 if (*saved_stdin >= 0)
554 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
555 r = -errno;
556
557 if (*saved_stdout >= 0)
558 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
559 r = -errno;
560
561 safe_close(*saved_stdin);
562 safe_close(*saved_stdout);
563
564 return r;
565 }
566
567 static int ask_for_confirmation(char *response, char **argv) {
568 int saved_stdout = -1, saved_stdin = -1, r;
569 _cleanup_free_ char *line = NULL;
570
571 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
572 if (r < 0)
573 return r;
574
575 line = exec_command_line(argv);
576 if (!line)
577 return -ENOMEM;
578
579 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
580
581 restore_confirm_stdio(&saved_stdin, &saved_stdout);
582
583 return r;
584 }
585
586 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
587 bool keep_groups = false;
588 int r;
589
590 assert(context);
591
592 /* Lookup and set GID and supplementary group list. Here too
593 * we avoid NSS lookups for gid=0. */
594
595 if (context->group || username) {
596
597 if (context->group) {
598 const char *g = context->group;
599
600 if ((r = get_group_creds(&g, &gid)) < 0)
601 return r;
602 }
603
604 /* First step, initialize groups from /etc/groups */
605 if (username && gid != 0) {
606 if (initgroups(username, gid) < 0)
607 return -errno;
608
609 keep_groups = true;
610 }
611
612 /* Second step, set our gids */
613 if (setresgid(gid, gid, gid) < 0)
614 return -errno;
615 }
616
617 if (context->supplementary_groups) {
618 int ngroups_max, k;
619 gid_t *gids;
620 char **i;
621
622 /* Final step, initialize any manually set supplementary groups */
623 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
624
625 if (!(gids = new(gid_t, ngroups_max)))
626 return -ENOMEM;
627
628 if (keep_groups) {
629 if ((k = getgroups(ngroups_max, gids)) < 0) {
630 free(gids);
631 return -errno;
632 }
633 } else
634 k = 0;
635
636 STRV_FOREACH(i, context->supplementary_groups) {
637 const char *g;
638
639 if (k >= ngroups_max) {
640 free(gids);
641 return -E2BIG;
642 }
643
644 g = *i;
645 r = get_group_creds(&g, gids+k);
646 if (r < 0) {
647 free(gids);
648 return r;
649 }
650
651 k++;
652 }
653
654 if (setgroups(k, gids) < 0) {
655 free(gids);
656 return -errno;
657 }
658
659 free(gids);
660 }
661
662 return 0;
663 }
664
665 static int enforce_user(const ExecContext *context, uid_t uid) {
666 assert(context);
667
668 /* Sets (but doesn't lookup) the uid and make sure we keep the
669 * capabilities while doing so. */
670
671 if (context->capabilities) {
672 _cleanup_cap_free_ cap_t d = NULL;
673 static const cap_value_t bits[] = {
674 CAP_SETUID, /* Necessary so that we can run setresuid() below */
675 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
676 };
677
678 /* First step: If we need to keep capabilities but
679 * drop privileges we need to make sure we keep our
680 * caps, while we drop privileges. */
681 if (uid != 0) {
682 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
683
684 if (prctl(PR_GET_SECUREBITS) != sb)
685 if (prctl(PR_SET_SECUREBITS, sb) < 0)
686 return -errno;
687 }
688
689 /* Second step: set the capabilities. This will reduce
690 * the capabilities to the minimum we need. */
691
692 d = cap_dup(context->capabilities);
693 if (!d)
694 return -errno;
695
696 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
697 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
698 return -errno;
699
700 if (cap_set_proc(d) < 0)
701 return -errno;
702 }
703
704 /* Third step: actually set the uids */
705 if (setresuid(uid, uid, uid) < 0)
706 return -errno;
707
708 /* At this point we should have all necessary capabilities but
709 are otherwise a normal user. However, the caps might got
710 corrupted due to the setresuid() so we need clean them up
711 later. This is done outside of this call. */
712
713 return 0;
714 }
715
716 #ifdef HAVE_PAM
717
718 static int null_conv(
719 int num_msg,
720 const struct pam_message **msg,
721 struct pam_response **resp,
722 void *appdata_ptr) {
723
724 /* We don't support conversations */
725
726 return PAM_CONV_ERR;
727 }
728
729 static int setup_pam(
730 const char *name,
731 const char *user,
732 uid_t uid,
733 const char *tty,
734 char ***pam_env,
735 int fds[], unsigned n_fds) {
736
737 static const struct pam_conv conv = {
738 .conv = null_conv,
739 .appdata_ptr = NULL
740 };
741
742 pam_handle_t *handle = NULL;
743 sigset_t ss, old_ss;
744 int pam_code = PAM_SUCCESS;
745 int err;
746 char **e = NULL;
747 bool close_session = false;
748 pid_t pam_pid = 0, parent_pid;
749 int flags = 0;
750
751 assert(name);
752 assert(user);
753 assert(pam_env);
754
755 /* We set up PAM in the parent process, then fork. The child
756 * will then stay around until killed via PR_GET_PDEATHSIG or
757 * systemd via the cgroup logic. It will then remove the PAM
758 * session again. The parent process will exec() the actual
759 * daemon. We do things this way to ensure that the main PID
760 * of the daemon is the one we initially fork()ed. */
761
762 if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
763 flags |= PAM_SILENT;
764
765 pam_code = pam_start(name, user, &conv, &handle);
766 if (pam_code != PAM_SUCCESS) {
767 handle = NULL;
768 goto fail;
769 }
770
771 if (tty) {
772 pam_code = pam_set_item(handle, PAM_TTY, tty);
773 if (pam_code != PAM_SUCCESS)
774 goto fail;
775 }
776
777 pam_code = pam_acct_mgmt(handle, flags);
778 if (pam_code != PAM_SUCCESS)
779 goto fail;
780
781 pam_code = pam_open_session(handle, flags);
782 if (pam_code != PAM_SUCCESS)
783 goto fail;
784
785 close_session = true;
786
787 e = pam_getenvlist(handle);
788 if (!e) {
789 pam_code = PAM_BUF_ERR;
790 goto fail;
791 }
792
793 /* Block SIGTERM, so that we know that it won't get lost in
794 * the child */
795 if (sigemptyset(&ss) < 0 ||
796 sigaddset(&ss, SIGTERM) < 0 ||
797 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
798 goto fail;
799
800 parent_pid = getpid();
801
802 pam_pid = fork();
803 if (pam_pid < 0)
804 goto fail;
805
806 if (pam_pid == 0) {
807 int sig;
808 int r = EXIT_PAM;
809
810 /* The child's job is to reset the PAM session on
811 * termination */
812
813 /* This string must fit in 10 chars (i.e. the length
814 * of "/sbin/init"), to look pretty in /bin/ps */
815 rename_process("(sd-pam)");
816
817 /* Make sure we don't keep open the passed fds in this
818 child. We assume that otherwise only those fds are
819 open here that have been opened by PAM. */
820 close_many(fds, n_fds);
821
822 /* Drop privileges - we don't need any to pam_close_session
823 * and this will make PR_SET_PDEATHSIG work in most cases.
824 * If this fails, ignore the error - but expect sd-pam threads
825 * to fail to exit normally */
826 if (setresuid(uid, uid, uid) < 0)
827 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
828
829 /* Wait until our parent died. This will only work if
830 * the above setresuid() succeeds, otherwise the kernel
831 * will not allow unprivileged parents kill their privileged
832 * children this way. We rely on the control groups kill logic
833 * to do the rest for us. */
834 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
835 goto child_finish;
836
837 /* Check if our parent process might already have
838 * died? */
839 if (getppid() == parent_pid) {
840 for (;;) {
841 if (sigwait(&ss, &sig) < 0) {
842 if (errno == EINTR)
843 continue;
844
845 goto child_finish;
846 }
847
848 assert(sig == SIGTERM);
849 break;
850 }
851 }
852
853 /* If our parent died we'll end the session */
854 if (getppid() != parent_pid) {
855 pam_code = pam_close_session(handle, flags);
856 if (pam_code != PAM_SUCCESS)
857 goto child_finish;
858 }
859
860 r = 0;
861
862 child_finish:
863 pam_end(handle, pam_code | flags);
864 _exit(r);
865 }
866
867 /* If the child was forked off successfully it will do all the
868 * cleanups, so forget about the handle here. */
869 handle = NULL;
870
871 /* Unblock SIGTERM again in the parent */
872 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
873 goto fail;
874
875 /* We close the log explicitly here, since the PAM modules
876 * might have opened it, but we don't want this fd around. */
877 closelog();
878
879 *pam_env = e;
880 e = NULL;
881
882 return 0;
883
884 fail:
885 if (pam_code != PAM_SUCCESS) {
886 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
887 err = -EPERM; /* PAM errors do not map to errno */
888 } else {
889 log_error_errno(errno, "PAM failed: %m");
890 err = -errno;
891 }
892
893 if (handle) {
894 if (close_session)
895 pam_code = pam_close_session(handle, flags);
896
897 pam_end(handle, pam_code | flags);
898 }
899
900 strv_free(e);
901
902 closelog();
903
904 if (pam_pid > 1) {
905 kill(pam_pid, SIGTERM);
906 kill(pam_pid, SIGCONT);
907 }
908
909 return err;
910 }
911 #endif
912
913 static void rename_process_from_path(const char *path) {
914 char process_name[11];
915 const char *p;
916 size_t l;
917
918 /* This resulting string must fit in 10 chars (i.e. the length
919 * of "/sbin/init") to look pretty in /bin/ps */
920
921 p = basename(path);
922 if (isempty(p)) {
923 rename_process("(...)");
924 return;
925 }
926
927 l = strlen(p);
928 if (l > 8) {
929 /* The end of the process name is usually more
930 * interesting, since the first bit might just be
931 * "systemd-" */
932 p = p + l - 8;
933 l = 8;
934 }
935
936 process_name[0] = '(';
937 memcpy(process_name+1, p, l);
938 process_name[1+l] = ')';
939 process_name[1+l+1] = 0;
940
941 rename_process(process_name);
942 }
943
944 #ifdef HAVE_SECCOMP
945
946 static int apply_seccomp(const ExecContext *c) {
947 uint32_t negative_action, action;
948 scmp_filter_ctx *seccomp;
949 Iterator i;
950 void *id;
951 int r;
952
953 assert(c);
954
955 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
956
957 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
958 if (!seccomp)
959 return -ENOMEM;
960
961 if (c->syscall_archs) {
962
963 SET_FOREACH(id, c->syscall_archs, i) {
964 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
965 if (r == -EEXIST)
966 continue;
967 if (r < 0)
968 goto finish;
969 }
970
971 } else {
972 r = seccomp_add_secondary_archs(seccomp);
973 if (r < 0)
974 goto finish;
975 }
976
977 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
978 SET_FOREACH(id, c->syscall_filter, i) {
979 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
980 if (r < 0)
981 goto finish;
982 }
983
984 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
985 if (r < 0)
986 goto finish;
987
988 r = seccomp_load(seccomp);
989
990 finish:
991 seccomp_release(seccomp);
992 return r;
993 }
994
995 static int apply_address_families(const ExecContext *c) {
996 scmp_filter_ctx *seccomp;
997 Iterator i;
998 int r;
999
1000 assert(c);
1001
1002 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1003 if (!seccomp)
1004 return -ENOMEM;
1005
1006 r = seccomp_add_secondary_archs(seccomp);
1007 if (r < 0)
1008 goto finish;
1009
1010 if (c->address_families_whitelist) {
1011 int af, first = 0, last = 0;
1012 void *afp;
1013
1014 /* If this is a whitelist, we first block the address
1015 * families that are out of range and then everything
1016 * that is not in the set. First, we find the lowest
1017 * and highest address family in the set. */
1018
1019 SET_FOREACH(afp, c->address_families, i) {
1020 af = PTR_TO_INT(afp);
1021
1022 if (af <= 0 || af >= af_max())
1023 continue;
1024
1025 if (first == 0 || af < first)
1026 first = af;
1027
1028 if (last == 0 || af > last)
1029 last = af;
1030 }
1031
1032 assert((first == 0) == (last == 0));
1033
1034 if (first == 0) {
1035
1036 /* No entries in the valid range, block everything */
1037 r = seccomp_rule_add(
1038 seccomp,
1039 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1040 SCMP_SYS(socket),
1041 0);
1042 if (r < 0)
1043 goto finish;
1044
1045 } else {
1046
1047 /* Block everything below the first entry */
1048 r = seccomp_rule_add(
1049 seccomp,
1050 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1051 SCMP_SYS(socket),
1052 1,
1053 SCMP_A0(SCMP_CMP_LT, first));
1054 if (r < 0)
1055 goto finish;
1056
1057 /* Block everything above the last entry */
1058 r = seccomp_rule_add(
1059 seccomp,
1060 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1061 SCMP_SYS(socket),
1062 1,
1063 SCMP_A0(SCMP_CMP_GT, last));
1064 if (r < 0)
1065 goto finish;
1066
1067 /* Block everything between the first and last
1068 * entry */
1069 for (af = 1; af < af_max(); af++) {
1070
1071 if (set_contains(c->address_families, INT_TO_PTR(af)))
1072 continue;
1073
1074 r = seccomp_rule_add(
1075 seccomp,
1076 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1077 SCMP_SYS(socket),
1078 1,
1079 SCMP_A0(SCMP_CMP_EQ, af));
1080 if (r < 0)
1081 goto finish;
1082 }
1083 }
1084
1085 } else {
1086 void *af;
1087
1088 /* If this is a blacklist, then generate one rule for
1089 * each address family that are then combined in OR
1090 * checks. */
1091
1092 SET_FOREACH(af, c->address_families, i) {
1093
1094 r = seccomp_rule_add(
1095 seccomp,
1096 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1097 SCMP_SYS(socket),
1098 1,
1099 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1100 if (r < 0)
1101 goto finish;
1102 }
1103 }
1104
1105 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1106 if (r < 0)
1107 goto finish;
1108
1109 r = seccomp_load(seccomp);
1110
1111 finish:
1112 seccomp_release(seccomp);
1113 return r;
1114 }
1115
1116 #endif
1117
1118 static void do_idle_pipe_dance(int idle_pipe[4]) {
1119 assert(idle_pipe);
1120
1121
1122 safe_close(idle_pipe[1]);
1123 safe_close(idle_pipe[2]);
1124
1125 if (idle_pipe[0] >= 0) {
1126 int r;
1127
1128 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1129
1130 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1131 /* Signal systemd that we are bored and want to continue. */
1132 write(idle_pipe[3], "x", 1);
1133
1134 /* Wait for systemd to react to the signal above. */
1135 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1136 }
1137
1138 safe_close(idle_pipe[0]);
1139
1140 }
1141
1142 safe_close(idle_pipe[3]);
1143 }
1144
1145 static int build_environment(
1146 const ExecContext *c,
1147 unsigned n_fds,
1148 usec_t watchdog_usec,
1149 const char *home,
1150 const char *username,
1151 const char *shell,
1152 char ***ret) {
1153
1154 _cleanup_strv_free_ char **our_env = NULL;
1155 unsigned n_env = 0;
1156 char *x;
1157
1158 assert(c);
1159 assert(ret);
1160
1161 our_env = new0(char*, 10);
1162 if (!our_env)
1163 return -ENOMEM;
1164
1165 if (n_fds > 0) {
1166 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1167 return -ENOMEM;
1168 our_env[n_env++] = x;
1169
1170 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1171 return -ENOMEM;
1172 our_env[n_env++] = x;
1173 }
1174
1175 if (watchdog_usec > 0) {
1176 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1177 return -ENOMEM;
1178 our_env[n_env++] = x;
1179
1180 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1181 return -ENOMEM;
1182 our_env[n_env++] = x;
1183 }
1184
1185 if (home) {
1186 x = strappend("HOME=", home);
1187 if (!x)
1188 return -ENOMEM;
1189 our_env[n_env++] = x;
1190 }
1191
1192 if (username) {
1193 x = strappend("LOGNAME=", username);
1194 if (!x)
1195 return -ENOMEM;
1196 our_env[n_env++] = x;
1197
1198 x = strappend("USER=", username);
1199 if (!x)
1200 return -ENOMEM;
1201 our_env[n_env++] = x;
1202 }
1203
1204 if (shell) {
1205 x = strappend("SHELL=", shell);
1206 if (!x)
1207 return -ENOMEM;
1208 our_env[n_env++] = x;
1209 }
1210
1211 if (is_terminal_input(c->std_input) ||
1212 c->std_output == EXEC_OUTPUT_TTY ||
1213 c->std_error == EXEC_OUTPUT_TTY ||
1214 c->tty_path) {
1215
1216 x = strdup(default_term_for_tty(tty_path(c)));
1217 if (!x)
1218 return -ENOMEM;
1219 our_env[n_env++] = x;
1220 }
1221
1222 our_env[n_env++] = NULL;
1223 assert(n_env <= 10);
1224
1225 *ret = our_env;
1226 our_env = NULL;
1227
1228 return 0;
1229 }
1230
1231 static int exec_child(ExecCommand *command,
1232 const ExecContext *context,
1233 const ExecParameters *params,
1234 ExecRuntime *runtime,
1235 char **argv,
1236 int socket_fd,
1237 int *fds, unsigned n_fds,
1238 char **files_env,
1239 int *error) {
1240
1241 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1242 _cleanup_free_ char *mac_selinux_context_net = NULL;
1243 const char *username = NULL, *home = NULL, *shell = NULL;
1244 unsigned n_dont_close = 0;
1245 int dont_close[n_fds + 4];
1246 uid_t uid = UID_INVALID;
1247 gid_t gid = GID_INVALID;
1248 int i, err;
1249
1250 assert(command);
1251 assert(context);
1252 assert(params);
1253 assert(error);
1254
1255 rename_process_from_path(command->path);
1256
1257 /* We reset exactly these signals, since they are the
1258 * only ones we set to SIG_IGN in the main daemon. All
1259 * others we leave untouched because we set them to
1260 * SIG_DFL or a valid handler initially, both of which
1261 * will be demoted to SIG_DFL. */
1262 default_signals(SIGNALS_CRASH_HANDLER,
1263 SIGNALS_IGNORE, -1);
1264
1265 if (context->ignore_sigpipe)
1266 ignore_signals(SIGPIPE, -1);
1267
1268 err = reset_signal_mask();
1269 if (err < 0) {
1270 *error = EXIT_SIGNAL_MASK;
1271 return err;
1272 }
1273
1274 if (params->idle_pipe)
1275 do_idle_pipe_dance(params->idle_pipe);
1276
1277 /* Close sockets very early to make sure we don't
1278 * block init reexecution because it cannot bind its
1279 * sockets */
1280 log_forget_fds();
1281
1282 if (socket_fd >= 0)
1283 dont_close[n_dont_close++] = socket_fd;
1284 if (n_fds > 0) {
1285 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1286 n_dont_close += n_fds;
1287 }
1288 if (params->bus_endpoint_fd >= 0)
1289 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1290 if (runtime) {
1291 if (runtime->netns_storage_socket[0] >= 0)
1292 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1293 if (runtime->netns_storage_socket[1] >= 0)
1294 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1295 }
1296
1297 err = close_all_fds(dont_close, n_dont_close);
1298 if (err < 0) {
1299 *error = EXIT_FDS;
1300 return err;
1301 }
1302
1303 if (!context->same_pgrp)
1304 if (setsid() < 0) {
1305 *error = EXIT_SETSID;
1306 return -errno;
1307 }
1308
1309 exec_context_tty_reset(context);
1310
1311 if (params->confirm_spawn) {
1312 char response;
1313
1314 err = ask_for_confirmation(&response, argv);
1315 if (err == -ETIMEDOUT)
1316 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1317 else if (err < 0)
1318 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1319 else if (response == 's') {
1320 write_confirm_message("Skipping execution.\n");
1321 *error = EXIT_CONFIRM;
1322 return -ECANCELED;
1323 } else if (response == 'n') {
1324 write_confirm_message("Failing execution.\n");
1325 *error = 0;
1326 return 0;
1327 }
1328 }
1329
1330 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1331 * must sure to drop O_NONBLOCK */
1332 if (socket_fd >= 0)
1333 fd_nonblock(socket_fd, false);
1334
1335 err = setup_input(context, socket_fd, params->apply_tty_stdin);
1336 if (err < 0) {
1337 *error = EXIT_STDIN;
1338 return err;
1339 }
1340
1341 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1342 if (err < 0) {
1343 *error = EXIT_STDOUT;
1344 return err;
1345 }
1346
1347 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1348 if (err < 0) {
1349 *error = EXIT_STDERR;
1350 return err;
1351 }
1352
1353 if (params->cgroup_path) {
1354 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1355 if (err < 0) {
1356 *error = EXIT_CGROUP;
1357 return err;
1358 }
1359 }
1360
1361 if (context->oom_score_adjust_set) {
1362 char t[16];
1363
1364 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1365 char_array_0(t);
1366
1367 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1368 *error = EXIT_OOM_ADJUST;
1369 return -errno;
1370 }
1371 }
1372
1373 if (context->nice_set)
1374 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1375 *error = EXIT_NICE;
1376 return -errno;
1377 }
1378
1379 if (context->cpu_sched_set) {
1380 struct sched_param param = {
1381 .sched_priority = context->cpu_sched_priority,
1382 };
1383
1384 err = sched_setscheduler(0,
1385 context->cpu_sched_policy |
1386 (context->cpu_sched_reset_on_fork ?
1387 SCHED_RESET_ON_FORK : 0),
1388 &param);
1389 if (err < 0) {
1390 *error = EXIT_SETSCHEDULER;
1391 return -errno;
1392 }
1393 }
1394
1395 if (context->cpuset)
1396 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1397 *error = EXIT_CPUAFFINITY;
1398 return -errno;
1399 }
1400
1401 if (context->ioprio_set)
1402 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1403 *error = EXIT_IOPRIO;
1404 return -errno;
1405 }
1406
1407 if (context->timer_slack_nsec != NSEC_INFINITY)
1408 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1409 *error = EXIT_TIMERSLACK;
1410 return -errno;
1411 }
1412
1413 if (context->personality != 0xffffffffUL)
1414 if (personality(context->personality) < 0) {
1415 *error = EXIT_PERSONALITY;
1416 return -errno;
1417 }
1418
1419 if (context->utmp_id)
1420 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1421
1422 if (context->user) {
1423 username = context->user;
1424 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1425 if (err < 0) {
1426 *error = EXIT_USER;
1427 return err;
1428 }
1429
1430 if (is_terminal_input(context->std_input)) {
1431 err = chown_terminal(STDIN_FILENO, uid);
1432 if (err < 0) {
1433 *error = EXIT_STDIN;
1434 return err;
1435 }
1436 }
1437 }
1438
1439 #ifdef ENABLE_KDBUS
1440 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1441 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1442
1443 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1444 if (err < 0) {
1445 *error = EXIT_BUS_ENDPOINT;
1446 return err;
1447 }
1448 }
1449 #endif
1450
1451 /* If delegation is enabled we'll pass ownership of the cgroup
1452 * (but only in systemd's own controller hierarchy!) to the
1453 * user of the new process. */
1454 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1455 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1456 if (err < 0) {
1457 *error = EXIT_CGROUP;
1458 return err;
1459 }
1460
1461
1462 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1463 if (err < 0) {
1464 *error = EXIT_CGROUP;
1465 return err;
1466 }
1467 }
1468
1469 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1470 char **rt;
1471
1472 STRV_FOREACH(rt, context->runtime_directory) {
1473 _cleanup_free_ char *p;
1474
1475 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1476 if (!p) {
1477 *error = EXIT_RUNTIME_DIRECTORY;
1478 return -ENOMEM;
1479 }
1480
1481 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1482 if (err < 0) {
1483 *error = EXIT_RUNTIME_DIRECTORY;
1484 return err;
1485 }
1486 }
1487 }
1488
1489 if (params->apply_permissions) {
1490 err = enforce_groups(context, username, gid);
1491 if (err < 0) {
1492 *error = EXIT_GROUP;
1493 return err;
1494 }
1495 }
1496
1497 umask(context->umask);
1498
1499 #ifdef HAVE_PAM
1500 if (params->apply_permissions && context->pam_name && username) {
1501 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1502 if (err < 0) {
1503 *error = EXIT_PAM;
1504 return err;
1505 }
1506 }
1507 #endif
1508
1509 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1510 err = setup_netns(runtime->netns_storage_socket);
1511 if (err < 0) {
1512 *error = EXIT_NETWORK;
1513 return err;
1514 }
1515 }
1516
1517 if (!strv_isempty(context->read_write_dirs) ||
1518 !strv_isempty(context->read_only_dirs) ||
1519 !strv_isempty(context->inaccessible_dirs) ||
1520 context->mount_flags != 0 ||
1521 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1522 params->bus_endpoint_path ||
1523 context->private_devices ||
1524 context->protect_system != PROTECT_SYSTEM_NO ||
1525 context->protect_home != PROTECT_HOME_NO) {
1526
1527 char *tmp = NULL, *var = NULL;
1528
1529 /* The runtime struct only contains the parent
1530 * of the private /tmp, which is
1531 * non-accessible to world users. Inside of it
1532 * there's a /tmp that is sticky, and that's
1533 * the one we want to use here. */
1534
1535 if (context->private_tmp && runtime) {
1536 if (runtime->tmp_dir)
1537 tmp = strappenda(runtime->tmp_dir, "/tmp");
1538 if (runtime->var_tmp_dir)
1539 var = strappenda(runtime->var_tmp_dir, "/tmp");
1540 }
1541
1542 err = setup_namespace(
1543 context->read_write_dirs,
1544 context->read_only_dirs,
1545 context->inaccessible_dirs,
1546 tmp,
1547 var,
1548 params->bus_endpoint_path,
1549 context->private_devices,
1550 context->protect_home,
1551 context->protect_system,
1552 context->mount_flags);
1553
1554 if (err == -EPERM)
1555 log_unit_warning_errno(params->unit_id, err, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %m");
1556 else if (err < 0) {
1557 *error = EXIT_NAMESPACE;
1558 return err;
1559 }
1560 }
1561
1562 if (params->apply_chroot) {
1563 if (context->root_directory)
1564 if (chroot(context->root_directory) < 0) {
1565 *error = EXIT_CHROOT;
1566 return -errno;
1567 }
1568
1569 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1570 *error = EXIT_CHDIR;
1571 return -errno;
1572 }
1573 } else {
1574 _cleanup_free_ char *d = NULL;
1575
1576 if (asprintf(&d, "%s/%s",
1577 context->root_directory ? context->root_directory : "",
1578 context->working_directory ? context->working_directory : "") < 0) {
1579 *error = EXIT_MEMORY;
1580 return -ENOMEM;
1581 }
1582
1583 if (chdir(d) < 0) {
1584 *error = EXIT_CHDIR;
1585 return -errno;
1586 }
1587 }
1588
1589 #ifdef HAVE_SELINUX
1590 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1591 err = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1592 if (err < 0) {
1593 *error = EXIT_SELINUX_CONTEXT;
1594 return err;
1595 }
1596 }
1597 #endif
1598
1599 /* We repeat the fd closing here, to make sure that
1600 * nothing is leaked from the PAM modules. Note that
1601 * we are more aggressive this time since socket_fd
1602 * and the netns fds we don't need anymore. The custom
1603 * endpoint fd was needed to upload the policy and can
1604 * now be closed as well. */
1605 err = close_all_fds(fds, n_fds);
1606 if (err >= 0)
1607 err = shift_fds(fds, n_fds);
1608 if (err >= 0)
1609 err = flags_fds(fds, n_fds, context->non_blocking);
1610 if (err < 0) {
1611 *error = EXIT_FDS;
1612 return err;
1613 }
1614
1615 if (params->apply_permissions) {
1616
1617 for (i = 0; i < _RLIMIT_MAX; i++) {
1618 if (!context->rlimit[i])
1619 continue;
1620
1621 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1622 *error = EXIT_LIMITS;
1623 return -errno;
1624 }
1625 }
1626
1627 if (context->capability_bounding_set_drop) {
1628 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1629 if (err < 0) {
1630 *error = EXIT_CAPABILITIES;
1631 return err;
1632 }
1633 }
1634
1635 #ifdef HAVE_SMACK
1636 if (context->smack_process_label) {
1637 err = mac_smack_apply_pid(0, context->smack_process_label);
1638 if (err < 0) {
1639 *error = EXIT_SMACK_PROCESS_LABEL;
1640 return err;
1641 }
1642 }
1643 #endif
1644
1645 if (context->user) {
1646 err = enforce_user(context, uid);
1647 if (err < 0) {
1648 *error = EXIT_USER;
1649 return err;
1650 }
1651 }
1652
1653 /* PR_GET_SECUREBITS is not privileged, while
1654 * PR_SET_SECUREBITS is. So to suppress
1655 * potential EPERMs we'll try not to call
1656 * PR_SET_SECUREBITS unless necessary. */
1657 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1658 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1659 *error = EXIT_SECUREBITS;
1660 return -errno;
1661 }
1662
1663 if (context->capabilities)
1664 if (cap_set_proc(context->capabilities) < 0) {
1665 *error = EXIT_CAPABILITIES;
1666 return -errno;
1667 }
1668
1669 if (context->no_new_privileges)
1670 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1671 *error = EXIT_NO_NEW_PRIVILEGES;
1672 return -errno;
1673 }
1674
1675 #ifdef HAVE_SECCOMP
1676 if (context->address_families_whitelist ||
1677 !set_isempty(context->address_families)) {
1678 err = apply_address_families(context);
1679 if (err < 0) {
1680 *error = EXIT_ADDRESS_FAMILIES;
1681 return err;
1682 }
1683 }
1684
1685 if (context->syscall_whitelist ||
1686 !set_isempty(context->syscall_filter) ||
1687 !set_isempty(context->syscall_archs)) {
1688 err = apply_seccomp(context);
1689 if (err < 0) {
1690 *error = EXIT_SECCOMP;
1691 return err;
1692 }
1693 }
1694 #endif
1695
1696 #ifdef HAVE_SELINUX
1697 if (mac_selinux_use()) {
1698 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1699
1700 if (exec_context) {
1701 err = setexeccon(exec_context);
1702 if (err < 0) {
1703 *error = EXIT_SELINUX_CONTEXT;
1704 return err;
1705 }
1706 }
1707 }
1708 #endif
1709
1710 #ifdef HAVE_APPARMOR
1711 if (context->apparmor_profile && mac_apparmor_use()) {
1712 err = aa_change_onexec(context->apparmor_profile);
1713 if (err < 0 && !context->apparmor_profile_ignore) {
1714 *error = EXIT_APPARMOR_PROFILE;
1715 return -errno;
1716 }
1717 }
1718 #endif
1719 }
1720
1721 err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1722 if (err < 0) {
1723 *error = EXIT_MEMORY;
1724 return err;
1725 }
1726
1727 final_env = strv_env_merge(5,
1728 params->environment,
1729 our_env,
1730 context->environment,
1731 files_env,
1732 pam_env,
1733 NULL);
1734 if (!final_env) {
1735 *error = EXIT_MEMORY;
1736 return -ENOMEM;
1737 }
1738
1739 final_argv = replace_env_argv(argv, final_env);
1740 if (!final_argv) {
1741 *error = EXIT_MEMORY;
1742 return -ENOMEM;
1743 }
1744
1745 final_env = strv_env_clean(final_env);
1746
1747 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1748 _cleanup_free_ char *line;
1749
1750 line = exec_command_line(final_argv);
1751 if (line) {
1752 log_open();
1753 log_unit_struct(params->unit_id,
1754 LOG_DEBUG,
1755 "EXECUTABLE=%s", command->path,
1756 LOG_MESSAGE("Executing: %s", line),
1757 NULL);
1758 log_close();
1759 }
1760 }
1761 execve(command->path, final_argv, final_env);
1762 *error = EXIT_EXEC;
1763 return -errno;
1764 }
1765
1766 int exec_spawn(ExecCommand *command,
1767 const ExecContext *context,
1768 const ExecParameters *params,
1769 ExecRuntime *runtime,
1770 pid_t *ret) {
1771
1772 _cleanup_strv_free_ char **files_env = NULL;
1773 int *fds = NULL; unsigned n_fds = 0;
1774 char *line, **argv;
1775 int socket_fd;
1776 pid_t pid;
1777 int err;
1778
1779 assert(command);
1780 assert(context);
1781 assert(ret);
1782 assert(params);
1783 assert(params->fds || params->n_fds <= 0);
1784
1785 if (context->std_input == EXEC_INPUT_SOCKET ||
1786 context->std_output == EXEC_OUTPUT_SOCKET ||
1787 context->std_error == EXEC_OUTPUT_SOCKET) {
1788
1789 if (params->n_fds != 1)
1790 return -EINVAL;
1791
1792 socket_fd = params->fds[0];
1793 } else {
1794 socket_fd = -1;
1795 fds = params->fds;
1796 n_fds = params->n_fds;
1797 }
1798
1799 err = exec_context_load_environment(context, params->unit_id, &files_env);
1800 if (err < 0) {
1801 log_unit_struct(params->unit_id,
1802 LOG_ERR,
1803 LOG_MESSAGE("Failed to load environment files: %s", strerror(-err)),
1804 LOG_ERRNO(-err),
1805 NULL);
1806 return err;
1807 }
1808
1809 argv = params->argv ?: command->argv;
1810
1811 line = exec_command_line(argv);
1812 if (!line)
1813 return log_oom();
1814
1815 log_unit_struct(params->unit_id,
1816 LOG_DEBUG,
1817 "EXECUTABLE=%s", command->path,
1818 LOG_MESSAGE("About to execute: %s", line),
1819 NULL);
1820 free(line);
1821
1822 pid = fork();
1823 if (pid < 0)
1824 return -errno;
1825
1826 if (pid == 0) {
1827 int r;
1828
1829 err = exec_child(command,
1830 context,
1831 params,
1832 runtime,
1833 argv,
1834 socket_fd,
1835 fds, n_fds,
1836 files_env,
1837 &r);
1838 if (r != 0) {
1839 log_open();
1840 log_struct(LOG_ERR,
1841 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1842 "EXECUTABLE=%s", command->path,
1843 LOG_MESSAGE("Failed at step %s spawning %s: %s",
1844 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1845 command->path, strerror(-err)),
1846 LOG_ERRNO(-err),
1847 NULL);
1848 log_close();
1849 }
1850
1851 _exit(r);
1852 }
1853
1854 log_unit_struct(params->unit_id,
1855 LOG_DEBUG,
1856 LOG_MESSAGE("Forked %s as "PID_FMT,
1857 command->path, pid),
1858 NULL);
1859
1860 /* We add the new process to the cgroup both in the child (so
1861 * that we can be sure that no user code is ever executed
1862 * outside of the cgroup) and in the parent (so that we can be
1863 * sure that when we kill the cgroup the process will be
1864 * killed too). */
1865 if (params->cgroup_path)
1866 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1867
1868 exec_status_start(&command->exec_status, pid);
1869
1870 *ret = pid;
1871 return 0;
1872 }
1873
1874 void exec_context_init(ExecContext *c) {
1875 assert(c);
1876
1877 c->umask = 0022;
1878 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1879 c->cpu_sched_policy = SCHED_OTHER;
1880 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1881 c->syslog_level_prefix = true;
1882 c->ignore_sigpipe = true;
1883 c->timer_slack_nsec = NSEC_INFINITY;
1884 c->personality = 0xffffffffUL;
1885 c->runtime_directory_mode = 0755;
1886 }
1887
1888 void exec_context_done(ExecContext *c) {
1889 unsigned l;
1890
1891 assert(c);
1892
1893 strv_free(c->environment);
1894 c->environment = NULL;
1895
1896 strv_free(c->environment_files);
1897 c->environment_files = NULL;
1898
1899 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1900 free(c->rlimit[l]);
1901 c->rlimit[l] = NULL;
1902 }
1903
1904 free(c->working_directory);
1905 c->working_directory = NULL;
1906 free(c->root_directory);
1907 c->root_directory = NULL;
1908
1909 free(c->tty_path);
1910 c->tty_path = NULL;
1911
1912 free(c->syslog_identifier);
1913 c->syslog_identifier = NULL;
1914
1915 free(c->user);
1916 c->user = NULL;
1917
1918 free(c->group);
1919 c->group = NULL;
1920
1921 strv_free(c->supplementary_groups);
1922 c->supplementary_groups = NULL;
1923
1924 free(c->pam_name);
1925 c->pam_name = NULL;
1926
1927 if (c->capabilities) {
1928 cap_free(c->capabilities);
1929 c->capabilities = NULL;
1930 }
1931
1932 strv_free(c->read_only_dirs);
1933 c->read_only_dirs = NULL;
1934
1935 strv_free(c->read_write_dirs);
1936 c->read_write_dirs = NULL;
1937
1938 strv_free(c->inaccessible_dirs);
1939 c->inaccessible_dirs = NULL;
1940
1941 if (c->cpuset)
1942 CPU_FREE(c->cpuset);
1943
1944 free(c->utmp_id);
1945 c->utmp_id = NULL;
1946
1947 free(c->selinux_context);
1948 c->selinux_context = NULL;
1949
1950 free(c->apparmor_profile);
1951 c->apparmor_profile = NULL;
1952
1953 set_free(c->syscall_filter);
1954 c->syscall_filter = NULL;
1955
1956 set_free(c->syscall_archs);
1957 c->syscall_archs = NULL;
1958
1959 set_free(c->address_families);
1960 c->address_families = NULL;
1961
1962 strv_free(c->runtime_directory);
1963 c->runtime_directory = NULL;
1964
1965 bus_endpoint_free(c->bus_endpoint);
1966 c->bus_endpoint = NULL;
1967 }
1968
1969 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1970 char **i;
1971
1972 assert(c);
1973
1974 if (!runtime_prefix)
1975 return 0;
1976
1977 STRV_FOREACH(i, c->runtime_directory) {
1978 _cleanup_free_ char *p;
1979
1980 p = strjoin(runtime_prefix, "/", *i, NULL);
1981 if (!p)
1982 return -ENOMEM;
1983
1984 /* We execute this synchronously, since we need to be
1985 * sure this is gone when we start the service
1986 * next. */
1987 rm_rf_dangerous(p, false, true, false);
1988 }
1989
1990 return 0;
1991 }
1992
1993 void exec_command_done(ExecCommand *c) {
1994 assert(c);
1995
1996 free(c->path);
1997 c->path = NULL;
1998
1999 strv_free(c->argv);
2000 c->argv = NULL;
2001 }
2002
2003 void exec_command_done_array(ExecCommand *c, unsigned n) {
2004 unsigned i;
2005
2006 for (i = 0; i < n; i++)
2007 exec_command_done(c+i);
2008 }
2009
2010 void exec_command_free_list(ExecCommand *c) {
2011 ExecCommand *i;
2012
2013 while ((i = c)) {
2014 LIST_REMOVE(command, c, i);
2015 exec_command_done(i);
2016 free(i);
2017 }
2018 }
2019
2020 void exec_command_free_array(ExecCommand **c, unsigned n) {
2021 unsigned i;
2022
2023 for (i = 0; i < n; i++) {
2024 exec_command_free_list(c[i]);
2025 c[i] = NULL;
2026 }
2027 }
2028
2029 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2030 char **i, **r = NULL;
2031
2032 assert(c);
2033 assert(l);
2034
2035 STRV_FOREACH(i, c->environment_files) {
2036 char *fn;
2037 int k;
2038 bool ignore = false;
2039 char **p;
2040 _cleanup_globfree_ glob_t pglob = {};
2041 int count, n;
2042
2043 fn = *i;
2044
2045 if (fn[0] == '-') {
2046 ignore = true;
2047 fn ++;
2048 }
2049
2050 if (!path_is_absolute(fn)) {
2051 if (ignore)
2052 continue;
2053
2054 strv_free(r);
2055 return -EINVAL;
2056 }
2057
2058 /* Filename supports globbing, take all matching files */
2059 errno = 0;
2060 if (glob(fn, 0, NULL, &pglob) != 0) {
2061 if (ignore)
2062 continue;
2063
2064 strv_free(r);
2065 return errno ? -errno : -EINVAL;
2066 }
2067 count = pglob.gl_pathc;
2068 if (count == 0) {
2069 if (ignore)
2070 continue;
2071
2072 strv_free(r);
2073 return -EINVAL;
2074 }
2075 for (n = 0; n < count; n++) {
2076 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2077 if (k < 0) {
2078 if (ignore)
2079 continue;
2080
2081 strv_free(r);
2082 return k;
2083 }
2084 /* Log invalid environment variables with filename */
2085 if (p)
2086 p = strv_env_clean_log(p, unit_id, pglob.gl_pathv[n]);
2087
2088 if (r == NULL)
2089 r = p;
2090 else {
2091 char **m;
2092
2093 m = strv_env_merge(2, r, p);
2094 strv_free(r);
2095 strv_free(p);
2096 if (!m)
2097 return -ENOMEM;
2098
2099 r = m;
2100 }
2101 }
2102 }
2103
2104 *l = r;
2105
2106 return 0;
2107 }
2108
2109 static bool tty_may_match_dev_console(const char *tty) {
2110 _cleanup_free_ char *active = NULL;
2111 char *console;
2112
2113 if (startswith(tty, "/dev/"))
2114 tty += 5;
2115
2116 /* trivial identity? */
2117 if (streq(tty, "console"))
2118 return true;
2119
2120 console = resolve_dev_console(&active);
2121 /* if we could not resolve, assume it may */
2122 if (!console)
2123 return true;
2124
2125 /* "tty0" means the active VC, so it may be the same sometimes */
2126 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2127 }
2128
2129 bool exec_context_may_touch_console(ExecContext *ec) {
2130 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2131 is_terminal_input(ec->std_input) ||
2132 is_terminal_output(ec->std_output) ||
2133 is_terminal_output(ec->std_error)) &&
2134 tty_may_match_dev_console(tty_path(ec));
2135 }
2136
2137 static void strv_fprintf(FILE *f, char **l) {
2138 char **g;
2139
2140 assert(f);
2141
2142 STRV_FOREACH(g, l)
2143 fprintf(f, " %s", *g);
2144 }
2145
2146 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2147 char **e;
2148 unsigned i;
2149
2150 assert(c);
2151 assert(f);
2152
2153 prefix = strempty(prefix);
2154
2155 fprintf(f,
2156 "%sUMask: %04o\n"
2157 "%sWorkingDirectory: %s\n"
2158 "%sRootDirectory: %s\n"
2159 "%sNonBlocking: %s\n"
2160 "%sPrivateTmp: %s\n"
2161 "%sPrivateNetwork: %s\n"
2162 "%sPrivateDevices: %s\n"
2163 "%sProtectHome: %s\n"
2164 "%sProtectSystem: %s\n"
2165 "%sIgnoreSIGPIPE: %s\n",
2166 prefix, c->umask,
2167 prefix, c->working_directory ? c->working_directory : "/",
2168 prefix, c->root_directory ? c->root_directory : "/",
2169 prefix, yes_no(c->non_blocking),
2170 prefix, yes_no(c->private_tmp),
2171 prefix, yes_no(c->private_network),
2172 prefix, yes_no(c->private_devices),
2173 prefix, protect_home_to_string(c->protect_home),
2174 prefix, protect_system_to_string(c->protect_system),
2175 prefix, yes_no(c->ignore_sigpipe));
2176
2177 STRV_FOREACH(e, c->environment)
2178 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2179
2180 STRV_FOREACH(e, c->environment_files)
2181 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2182
2183 if (c->nice_set)
2184 fprintf(f,
2185 "%sNice: %i\n",
2186 prefix, c->nice);
2187
2188 if (c->oom_score_adjust_set)
2189 fprintf(f,
2190 "%sOOMScoreAdjust: %i\n",
2191 prefix, c->oom_score_adjust);
2192
2193 for (i = 0; i < RLIM_NLIMITS; i++)
2194 if (c->rlimit[i])
2195 fprintf(f, "%s%s: "RLIM_FMT"\n",
2196 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2197
2198 if (c->ioprio_set) {
2199 _cleanup_free_ char *class_str = NULL;
2200
2201 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2202 fprintf(f,
2203 "%sIOSchedulingClass: %s\n"
2204 "%sIOPriority: %i\n",
2205 prefix, strna(class_str),
2206 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2207 }
2208
2209 if (c->cpu_sched_set) {
2210 _cleanup_free_ char *policy_str = NULL;
2211
2212 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2213 fprintf(f,
2214 "%sCPUSchedulingPolicy: %s\n"
2215 "%sCPUSchedulingPriority: %i\n"
2216 "%sCPUSchedulingResetOnFork: %s\n",
2217 prefix, strna(policy_str),
2218 prefix, c->cpu_sched_priority,
2219 prefix, yes_no(c->cpu_sched_reset_on_fork));
2220 }
2221
2222 if (c->cpuset) {
2223 fprintf(f, "%sCPUAffinity:", prefix);
2224 for (i = 0; i < c->cpuset_ncpus; i++)
2225 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2226 fprintf(f, " %u", i);
2227 fputs("\n", f);
2228 }
2229
2230 if (c->timer_slack_nsec != NSEC_INFINITY)
2231 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2232
2233 fprintf(f,
2234 "%sStandardInput: %s\n"
2235 "%sStandardOutput: %s\n"
2236 "%sStandardError: %s\n",
2237 prefix, exec_input_to_string(c->std_input),
2238 prefix, exec_output_to_string(c->std_output),
2239 prefix, exec_output_to_string(c->std_error));
2240
2241 if (c->tty_path)
2242 fprintf(f,
2243 "%sTTYPath: %s\n"
2244 "%sTTYReset: %s\n"
2245 "%sTTYVHangup: %s\n"
2246 "%sTTYVTDisallocate: %s\n",
2247 prefix, c->tty_path,
2248 prefix, yes_no(c->tty_reset),
2249 prefix, yes_no(c->tty_vhangup),
2250 prefix, yes_no(c->tty_vt_disallocate));
2251
2252 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2253 c->std_output == EXEC_OUTPUT_KMSG ||
2254 c->std_output == EXEC_OUTPUT_JOURNAL ||
2255 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2256 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2257 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2258 c->std_error == EXEC_OUTPUT_SYSLOG ||
2259 c->std_error == EXEC_OUTPUT_KMSG ||
2260 c->std_error == EXEC_OUTPUT_JOURNAL ||
2261 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2262 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2263 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2264
2265 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2266
2267 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2268 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2269
2270 fprintf(f,
2271 "%sSyslogFacility: %s\n"
2272 "%sSyslogLevel: %s\n",
2273 prefix, strna(fac_str),
2274 prefix, strna(lvl_str));
2275 }
2276
2277 if (c->capabilities) {
2278 _cleanup_cap_free_charp_ char *t;
2279
2280 t = cap_to_text(c->capabilities, NULL);
2281 if (t)
2282 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2283 }
2284
2285 if (c->secure_bits)
2286 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2287 prefix,
2288 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2289 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2290 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2291 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2292 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2293 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2294
2295 if (c->capability_bounding_set_drop) {
2296 unsigned long l;
2297 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2298
2299 for (l = 0; l <= cap_last_cap(); l++)
2300 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2301 fprintf(f, " %s", strna(capability_to_name(l)));
2302
2303 fputs("\n", f);
2304 }
2305
2306 if (c->user)
2307 fprintf(f, "%sUser: %s\n", prefix, c->user);
2308 if (c->group)
2309 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2310
2311 if (strv_length(c->supplementary_groups) > 0) {
2312 fprintf(f, "%sSupplementaryGroups:", prefix);
2313 strv_fprintf(f, c->supplementary_groups);
2314 fputs("\n", f);
2315 }
2316
2317 if (c->pam_name)
2318 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2319
2320 if (strv_length(c->read_write_dirs) > 0) {
2321 fprintf(f, "%sReadWriteDirs:", prefix);
2322 strv_fprintf(f, c->read_write_dirs);
2323 fputs("\n", f);
2324 }
2325
2326 if (strv_length(c->read_only_dirs) > 0) {
2327 fprintf(f, "%sReadOnlyDirs:", prefix);
2328 strv_fprintf(f, c->read_only_dirs);
2329 fputs("\n", f);
2330 }
2331
2332 if (strv_length(c->inaccessible_dirs) > 0) {
2333 fprintf(f, "%sInaccessibleDirs:", prefix);
2334 strv_fprintf(f, c->inaccessible_dirs);
2335 fputs("\n", f);
2336 }
2337
2338 if (c->utmp_id)
2339 fprintf(f,
2340 "%sUtmpIdentifier: %s\n",
2341 prefix, c->utmp_id);
2342
2343 if (c->selinux_context)
2344 fprintf(f,
2345 "%sSELinuxContext: %s%s\n",
2346 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2347
2348 if (c->personality != 0xffffffffUL)
2349 fprintf(f,
2350 "%sPersonality: %s\n",
2351 prefix, strna(personality_to_string(c->personality)));
2352
2353 if (c->syscall_filter) {
2354 #ifdef HAVE_SECCOMP
2355 Iterator j;
2356 void *id;
2357 bool first = true;
2358 #endif
2359
2360 fprintf(f,
2361 "%sSystemCallFilter: ",
2362 prefix);
2363
2364 if (!c->syscall_whitelist)
2365 fputc('~', f);
2366
2367 #ifdef HAVE_SECCOMP
2368 SET_FOREACH(id, c->syscall_filter, j) {
2369 _cleanup_free_ char *name = NULL;
2370
2371 if (first)
2372 first = false;
2373 else
2374 fputc(' ', f);
2375
2376 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2377 fputs(strna(name), f);
2378 }
2379 #endif
2380
2381 fputc('\n', f);
2382 }
2383
2384 if (c->syscall_archs) {
2385 #ifdef HAVE_SECCOMP
2386 Iterator j;
2387 void *id;
2388 #endif
2389
2390 fprintf(f,
2391 "%sSystemCallArchitectures:",
2392 prefix);
2393
2394 #ifdef HAVE_SECCOMP
2395 SET_FOREACH(id, c->syscall_archs, j)
2396 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2397 #endif
2398 fputc('\n', f);
2399 }
2400
2401 if (c->syscall_errno != 0)
2402 fprintf(f,
2403 "%sSystemCallErrorNumber: %s\n",
2404 prefix, strna(errno_to_name(c->syscall_errno)));
2405
2406 if (c->apparmor_profile)
2407 fprintf(f,
2408 "%sAppArmorProfile: %s%s\n",
2409 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2410 }
2411
2412 bool exec_context_maintains_privileges(ExecContext *c) {
2413 assert(c);
2414
2415 /* Returns true if the process forked off would run run under
2416 * an unchanged UID or as root. */
2417
2418 if (!c->user)
2419 return true;
2420
2421 if (streq(c->user, "root") || streq(c->user, "0"))
2422 return true;
2423
2424 return false;
2425 }
2426
2427 void exec_status_start(ExecStatus *s, pid_t pid) {
2428 assert(s);
2429
2430 zero(*s);
2431 s->pid = pid;
2432 dual_timestamp_get(&s->start_timestamp);
2433 }
2434
2435 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2436 assert(s);
2437
2438 if (s->pid && s->pid != pid)
2439 zero(*s);
2440
2441 s->pid = pid;
2442 dual_timestamp_get(&s->exit_timestamp);
2443
2444 s->code = code;
2445 s->status = status;
2446
2447 if (context) {
2448 if (context->utmp_id)
2449 utmp_put_dead_process(context->utmp_id, pid, code, status);
2450
2451 exec_context_tty_reset(context);
2452 }
2453 }
2454
2455 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2456 char buf[FORMAT_TIMESTAMP_MAX];
2457
2458 assert(s);
2459 assert(f);
2460
2461 if (s->pid <= 0)
2462 return;
2463
2464 prefix = strempty(prefix);
2465
2466 fprintf(f,
2467 "%sPID: "PID_FMT"\n",
2468 prefix, s->pid);
2469
2470 if (s->start_timestamp.realtime > 0)
2471 fprintf(f,
2472 "%sStart Timestamp: %s\n",
2473 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2474
2475 if (s->exit_timestamp.realtime > 0)
2476 fprintf(f,
2477 "%sExit Timestamp: %s\n"
2478 "%sExit Code: %s\n"
2479 "%sExit Status: %i\n",
2480 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2481 prefix, sigchld_code_to_string(s->code),
2482 prefix, s->status);
2483 }
2484
2485 char *exec_command_line(char **argv) {
2486 size_t k;
2487 char *n, *p, **a;
2488 bool first = true;
2489
2490 assert(argv);
2491
2492 k = 1;
2493 STRV_FOREACH(a, argv)
2494 k += strlen(*a)+3;
2495
2496 if (!(n = new(char, k)))
2497 return NULL;
2498
2499 p = n;
2500 STRV_FOREACH(a, argv) {
2501
2502 if (!first)
2503 *(p++) = ' ';
2504 else
2505 first = false;
2506
2507 if (strpbrk(*a, WHITESPACE)) {
2508 *(p++) = '\'';
2509 p = stpcpy(p, *a);
2510 *(p++) = '\'';
2511 } else
2512 p = stpcpy(p, *a);
2513
2514 }
2515
2516 *p = 0;
2517
2518 /* FIXME: this doesn't really handle arguments that have
2519 * spaces and ticks in them */
2520
2521 return n;
2522 }
2523
2524 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2525 _cleanup_free_ char *cmd = NULL;
2526 const char *prefix2;
2527
2528 assert(c);
2529 assert(f);
2530
2531 prefix = strempty(prefix);
2532 prefix2 = strappenda(prefix, "\t");
2533
2534 cmd = exec_command_line(c->argv);
2535 fprintf(f,
2536 "%sCommand Line: %s\n",
2537 prefix, cmd ? cmd : strerror(ENOMEM));
2538
2539 exec_status_dump(&c->exec_status, f, prefix2);
2540 }
2541
2542 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2543 assert(f);
2544
2545 prefix = strempty(prefix);
2546
2547 LIST_FOREACH(command, c, c)
2548 exec_command_dump(c, f, prefix);
2549 }
2550
2551 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2552 ExecCommand *end;
2553
2554 assert(l);
2555 assert(e);
2556
2557 if (*l) {
2558 /* It's kind of important, that we keep the order here */
2559 LIST_FIND_TAIL(command, *l, end);
2560 LIST_INSERT_AFTER(command, *l, end, e);
2561 } else
2562 *l = e;
2563 }
2564
2565 int exec_command_set(ExecCommand *c, const char *path, ...) {
2566 va_list ap;
2567 char **l, *p;
2568
2569 assert(c);
2570 assert(path);
2571
2572 va_start(ap, path);
2573 l = strv_new_ap(path, ap);
2574 va_end(ap);
2575
2576 if (!l)
2577 return -ENOMEM;
2578
2579 p = strdup(path);
2580 if (!p) {
2581 strv_free(l);
2582 return -ENOMEM;
2583 }
2584
2585 free(c->path);
2586 c->path = p;
2587
2588 strv_free(c->argv);
2589 c->argv = l;
2590
2591 return 0;
2592 }
2593
2594 int exec_command_append(ExecCommand *c, const char *path, ...) {
2595 _cleanup_strv_free_ char **l = NULL;
2596 va_list ap;
2597 int r;
2598
2599 assert(c);
2600 assert(path);
2601
2602 va_start(ap, path);
2603 l = strv_new_ap(path, ap);
2604 va_end(ap);
2605
2606 if (!l)
2607 return -ENOMEM;
2608
2609 r = strv_extend_strv(&c->argv, l);
2610 if (r < 0)
2611 return r;
2612
2613 return 0;
2614 }
2615
2616
2617 static int exec_runtime_allocate(ExecRuntime **rt) {
2618
2619 if (*rt)
2620 return 0;
2621
2622 *rt = new0(ExecRuntime, 1);
2623 if (!*rt)
2624 return -ENOMEM;
2625
2626 (*rt)->n_ref = 1;
2627 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2628
2629 return 0;
2630 }
2631
2632 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2633 int r;
2634
2635 assert(rt);
2636 assert(c);
2637 assert(id);
2638
2639 if (*rt)
2640 return 1;
2641
2642 if (!c->private_network && !c->private_tmp)
2643 return 0;
2644
2645 r = exec_runtime_allocate(rt);
2646 if (r < 0)
2647 return r;
2648
2649 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2650 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2651 return -errno;
2652 }
2653
2654 if (c->private_tmp && !(*rt)->tmp_dir) {
2655 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2656 if (r < 0)
2657 return r;
2658 }
2659
2660 return 1;
2661 }
2662
2663 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2664 assert(r);
2665 assert(r->n_ref > 0);
2666
2667 r->n_ref++;
2668 return r;
2669 }
2670
2671 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2672
2673 if (!r)
2674 return NULL;
2675
2676 assert(r->n_ref > 0);
2677
2678 r->n_ref--;
2679 if (r->n_ref <= 0) {
2680 free(r->tmp_dir);
2681 free(r->var_tmp_dir);
2682 safe_close_pair(r->netns_storage_socket);
2683 free(r);
2684 }
2685
2686 return NULL;
2687 }
2688
2689 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2690 assert(u);
2691 assert(f);
2692 assert(fds);
2693
2694 if (!rt)
2695 return 0;
2696
2697 if (rt->tmp_dir)
2698 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2699
2700 if (rt->var_tmp_dir)
2701 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2702
2703 if (rt->netns_storage_socket[0] >= 0) {
2704 int copy;
2705
2706 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2707 if (copy < 0)
2708 return copy;
2709
2710 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2711 }
2712
2713 if (rt->netns_storage_socket[1] >= 0) {
2714 int copy;
2715
2716 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2717 if (copy < 0)
2718 return copy;
2719
2720 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2721 }
2722
2723 return 0;
2724 }
2725
2726 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2727 int r;
2728
2729 assert(rt);
2730 assert(key);
2731 assert(value);
2732
2733 if (streq(key, "tmp-dir")) {
2734 char *copy;
2735
2736 r = exec_runtime_allocate(rt);
2737 if (r < 0)
2738 return r;
2739
2740 copy = strdup(value);
2741 if (!copy)
2742 return log_oom();
2743
2744 free((*rt)->tmp_dir);
2745 (*rt)->tmp_dir = copy;
2746
2747 } else if (streq(key, "var-tmp-dir")) {
2748 char *copy;
2749
2750 r = exec_runtime_allocate(rt);
2751 if (r < 0)
2752 return r;
2753
2754 copy = strdup(value);
2755 if (!copy)
2756 return log_oom();
2757
2758 free((*rt)->var_tmp_dir);
2759 (*rt)->var_tmp_dir = copy;
2760
2761 } else if (streq(key, "netns-socket-0")) {
2762 int fd;
2763
2764 r = exec_runtime_allocate(rt);
2765 if (r < 0)
2766 return r;
2767
2768 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2769 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2770 else {
2771 safe_close((*rt)->netns_storage_socket[0]);
2772 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2773 }
2774 } else if (streq(key, "netns-socket-1")) {
2775 int fd;
2776
2777 r = exec_runtime_allocate(rt);
2778 if (r < 0)
2779 return r;
2780
2781 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2782 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2783 else {
2784 safe_close((*rt)->netns_storage_socket[1]);
2785 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2786 }
2787 } else
2788 return 0;
2789
2790 return 1;
2791 }
2792
2793 static void *remove_tmpdir_thread(void *p) {
2794 _cleanup_free_ char *path = p;
2795
2796 rm_rf_dangerous(path, false, true, false);
2797 return NULL;
2798 }
2799
2800 void exec_runtime_destroy(ExecRuntime *rt) {
2801 int r;
2802
2803 if (!rt)
2804 return;
2805
2806 /* If there are multiple users of this, let's leave the stuff around */
2807 if (rt->n_ref > 1)
2808 return;
2809
2810 if (rt->tmp_dir) {
2811 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2812
2813 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2814 if (r < 0) {
2815 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2816 free(rt->tmp_dir);
2817 }
2818
2819 rt->tmp_dir = NULL;
2820 }
2821
2822 if (rt->var_tmp_dir) {
2823 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2824
2825 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2826 if (r < 0) {
2827 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2828 free(rt->var_tmp_dir);
2829 }
2830
2831 rt->var_tmp_dir = NULL;
2832 }
2833
2834 safe_close_pair(rt->netns_storage_socket);
2835 }
2836
2837 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2838 [EXEC_INPUT_NULL] = "null",
2839 [EXEC_INPUT_TTY] = "tty",
2840 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2841 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2842 [EXEC_INPUT_SOCKET] = "socket"
2843 };
2844
2845 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2846
2847 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2848 [EXEC_OUTPUT_INHERIT] = "inherit",
2849 [EXEC_OUTPUT_NULL] = "null",
2850 [EXEC_OUTPUT_TTY] = "tty",
2851 [EXEC_OUTPUT_SYSLOG] = "syslog",
2852 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2853 [EXEC_OUTPUT_KMSG] = "kmsg",
2854 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2855 [EXEC_OUTPUT_JOURNAL] = "journal",
2856 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2857 [EXEC_OUTPUT_SOCKET] = "socket"
2858 };
2859
2860 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);