]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
util: rework rm_rf() logic
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/un.h>
29 #include <sys/prctl.h>
30 #include <sys/stat.h>
31 #include <grp.h>
32 #include <poll.h>
33 #include <glob.h>
34 #include <sys/personality.h>
35
36 #ifdef HAVE_PAM
37 #include <security/pam_appl.h>
38 #endif
39
40 #ifdef HAVE_SELINUX
41 #include <selinux/selinux.h>
42 #endif
43
44 #ifdef HAVE_SECCOMP
45 #include <seccomp.h>
46 #endif
47
48 #ifdef HAVE_APPARMOR
49 #include <sys/apparmor.h>
50 #endif
51
52 #include "rm-rf.h"
53 #include "execute.h"
54 #include "strv.h"
55 #include "macro.h"
56 #include "capability.h"
57 #include "util.h"
58 #include "log.h"
59 #include "sd-messages.h"
60 #include "ioprio.h"
61 #include "securebits.h"
62 #include "namespace.h"
63 #include "exit-status.h"
64 #include "missing.h"
65 #include "utmp-wtmp.h"
66 #include "def.h"
67 #include "path-util.h"
68 #include "env-util.h"
69 #include "fileio.h"
70 #include "unit.h"
71 #include "async.h"
72 #include "selinux-util.h"
73 #include "errno-list.h"
74 #include "af-list.h"
75 #include "mkdir.h"
76 #include "smack-util.h"
77 #include "bus-endpoint.h"
78 #include "cap-list.h"
79
80 #ifdef HAVE_APPARMOR
81 #include "apparmor-util.h"
82 #endif
83
84 #ifdef HAVE_SECCOMP
85 #include "seccomp-util.h"
86 #endif
87
88 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
89 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
90
91 /* This assumes there is a 'tty' group */
92 #define TTY_MODE 0620
93
94 #define SNDBUF_SIZE (8*1024*1024)
95
96 static int shift_fds(int fds[], unsigned n_fds) {
97 int start, restart_from;
98
99 if (n_fds <= 0)
100 return 0;
101
102 /* Modifies the fds array! (sorts it) */
103
104 assert(fds);
105
106 start = 0;
107 for (;;) {
108 int i;
109
110 restart_from = -1;
111
112 for (i = start; i < (int) n_fds; i++) {
113 int nfd;
114
115 /* Already at right index? */
116 if (fds[i] == i+3)
117 continue;
118
119 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
120 return -errno;
121
122 safe_close(fds[i]);
123 fds[i] = nfd;
124
125 /* Hmm, the fd we wanted isn't free? Then
126 * let's remember that and try again from here */
127 if (nfd != i+3 && restart_from < 0)
128 restart_from = i;
129 }
130
131 if (restart_from < 0)
132 break;
133
134 start = restart_from;
135 }
136
137 return 0;
138 }
139
140 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
141 unsigned i;
142 int r;
143
144 if (n_fds <= 0)
145 return 0;
146
147 assert(fds);
148
149 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
150
151 for (i = 0; i < n_fds; i++) {
152
153 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
154 return r;
155
156 /* We unconditionally drop FD_CLOEXEC from the fds,
157 * since after all we want to pass these fds to our
158 * children */
159
160 if ((r = fd_cloexec(fds[i], false)) < 0)
161 return r;
162 }
163
164 return 0;
165 }
166
167 _pure_ static const char *tty_path(const ExecContext *context) {
168 assert(context);
169
170 if (context->tty_path)
171 return context->tty_path;
172
173 return "/dev/console";
174 }
175
176 static void exec_context_tty_reset(const ExecContext *context) {
177 assert(context);
178
179 if (context->tty_vhangup)
180 terminal_vhangup(tty_path(context));
181
182 if (context->tty_reset)
183 reset_terminal(tty_path(context));
184
185 if (context->tty_vt_disallocate && context->tty_path)
186 vt_disallocate(context->tty_path);
187 }
188
189 static bool is_terminal_output(ExecOutput o) {
190 return
191 o == EXEC_OUTPUT_TTY ||
192 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
193 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
194 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
195 }
196
197 static int open_null_as(int flags, int nfd) {
198 int fd, r;
199
200 assert(nfd >= 0);
201
202 fd = open("/dev/null", flags|O_NOCTTY);
203 if (fd < 0)
204 return -errno;
205
206 if (fd != nfd) {
207 r = dup2(fd, nfd) < 0 ? -errno : nfd;
208 safe_close(fd);
209 } else
210 r = nfd;
211
212 return r;
213 }
214
215 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
216 union sockaddr_union sa = {
217 .un.sun_family = AF_UNIX,
218 .un.sun_path = "/run/systemd/journal/stdout",
219 };
220 uid_t olduid = UID_INVALID;
221 gid_t oldgid = GID_INVALID;
222 int r;
223
224 if (gid != GID_INVALID) {
225 oldgid = getgid();
226
227 r = setegid(gid);
228 if (r < 0)
229 return -errno;
230 }
231
232 if (uid != UID_INVALID) {
233 olduid = getuid();
234
235 r = seteuid(uid);
236 if (r < 0) {
237 r = -errno;
238 goto restore_gid;
239 }
240 }
241
242 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
243 if (r < 0)
244 r = -errno;
245
246 /* If we fail to restore the uid or gid, things will likely
247 fail later on. This should only happen if an LSM interferes. */
248
249 if (uid != UID_INVALID)
250 (void) seteuid(olduid);
251
252 restore_gid:
253 if (gid != GID_INVALID)
254 (void) setegid(oldgid);
255
256 return r;
257 }
258
259 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
260 int fd, r;
261
262 assert(context);
263 assert(output < _EXEC_OUTPUT_MAX);
264 assert(ident);
265 assert(nfd >= 0);
266
267 fd = socket(AF_UNIX, SOCK_STREAM, 0);
268 if (fd < 0)
269 return -errno;
270
271 r = connect_journal_socket(fd, uid, gid);
272 if (r < 0)
273 return r;
274
275 if (shutdown(fd, SHUT_RD) < 0) {
276 safe_close(fd);
277 return -errno;
278 }
279
280 fd_inc_sndbuf(fd, SNDBUF_SIZE);
281
282 dprintf(fd,
283 "%s\n"
284 "%s\n"
285 "%i\n"
286 "%i\n"
287 "%i\n"
288 "%i\n"
289 "%i\n",
290 context->syslog_identifier ? context->syslog_identifier : ident,
291 unit_id,
292 context->syslog_priority,
293 !!context->syslog_level_prefix,
294 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
295 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
296 is_terminal_output(output));
297
298 if (fd != nfd) {
299 r = dup2(fd, nfd) < 0 ? -errno : nfd;
300 safe_close(fd);
301 } else
302 r = nfd;
303
304 return r;
305 }
306 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
307 int fd, r;
308
309 assert(path);
310 assert(nfd >= 0);
311
312 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
313 return fd;
314
315 if (fd != nfd) {
316 r = dup2(fd, nfd) < 0 ? -errno : nfd;
317 safe_close(fd);
318 } else
319 r = nfd;
320
321 return r;
322 }
323
324 static bool is_terminal_input(ExecInput i) {
325 return
326 i == EXEC_INPUT_TTY ||
327 i == EXEC_INPUT_TTY_FORCE ||
328 i == EXEC_INPUT_TTY_FAIL;
329 }
330
331 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
332
333 if (is_terminal_input(std_input) && !apply_tty_stdin)
334 return EXEC_INPUT_NULL;
335
336 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
337 return EXEC_INPUT_NULL;
338
339 return std_input;
340 }
341
342 static int fixup_output(ExecOutput std_output, int socket_fd) {
343
344 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
345 return EXEC_OUTPUT_INHERIT;
346
347 return std_output;
348 }
349
350 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
351 ExecInput i;
352
353 assert(context);
354
355 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
356
357 switch (i) {
358
359 case EXEC_INPUT_NULL:
360 return open_null_as(O_RDONLY, STDIN_FILENO);
361
362 case EXEC_INPUT_TTY:
363 case EXEC_INPUT_TTY_FORCE:
364 case EXEC_INPUT_TTY_FAIL: {
365 int fd, r;
366
367 fd = acquire_terminal(tty_path(context),
368 i == EXEC_INPUT_TTY_FAIL,
369 i == EXEC_INPUT_TTY_FORCE,
370 false,
371 USEC_INFINITY);
372 if (fd < 0)
373 return fd;
374
375 if (fd != STDIN_FILENO) {
376 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
377 safe_close(fd);
378 } else
379 r = STDIN_FILENO;
380
381 return r;
382 }
383
384 case EXEC_INPUT_SOCKET:
385 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
386
387 default:
388 assert_not_reached("Unknown input type");
389 }
390 }
391
392 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
393 ExecOutput o;
394 ExecInput i;
395 int r;
396
397 assert(context);
398 assert(ident);
399
400 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
401 o = fixup_output(context->std_output, socket_fd);
402
403 if (fileno == STDERR_FILENO) {
404 ExecOutput e;
405 e = fixup_output(context->std_error, socket_fd);
406
407 /* This expects the input and output are already set up */
408
409 /* Don't change the stderr file descriptor if we inherit all
410 * the way and are not on a tty */
411 if (e == EXEC_OUTPUT_INHERIT &&
412 o == EXEC_OUTPUT_INHERIT &&
413 i == EXEC_INPUT_NULL &&
414 !is_terminal_input(context->std_input) &&
415 getppid () != 1)
416 return fileno;
417
418 /* Duplicate from stdout if possible */
419 if (e == o || e == EXEC_OUTPUT_INHERIT)
420 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
421
422 o = e;
423
424 } else if (o == EXEC_OUTPUT_INHERIT) {
425 /* If input got downgraded, inherit the original value */
426 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
427 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
428
429 /* If the input is connected to anything that's not a /dev/null, inherit that... */
430 if (i != EXEC_INPUT_NULL)
431 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
432
433 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
434 if (getppid() != 1)
435 return fileno;
436
437 /* We need to open /dev/null here anew, to get the right access mode. */
438 return open_null_as(O_WRONLY, fileno);
439 }
440
441 switch (o) {
442
443 case EXEC_OUTPUT_NULL:
444 return open_null_as(O_WRONLY, fileno);
445
446 case EXEC_OUTPUT_TTY:
447 if (is_terminal_input(i))
448 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
449
450 /* We don't reset the terminal if this is just about output */
451 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
452
453 case EXEC_OUTPUT_SYSLOG:
454 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
455 case EXEC_OUTPUT_KMSG:
456 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
457 case EXEC_OUTPUT_JOURNAL:
458 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
459 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
460 if (r < 0) {
461 log_unit_struct(unit_id,
462 LOG_ERR,
463 LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
464 fileno == STDOUT_FILENO ? "stdout" : "stderr",
465 unit_id, strerror(-r)),
466 LOG_ERRNO(-r),
467 NULL);
468 r = open_null_as(O_WRONLY, fileno);
469 }
470 return r;
471
472 case EXEC_OUTPUT_SOCKET:
473 assert(socket_fd >= 0);
474 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
475
476 default:
477 assert_not_reached("Unknown error type");
478 }
479 }
480
481 static int chown_terminal(int fd, uid_t uid) {
482 struct stat st;
483
484 assert(fd >= 0);
485
486 /* This might fail. What matters are the results. */
487 (void) fchown(fd, uid, -1);
488 (void) fchmod(fd, TTY_MODE);
489
490 if (fstat(fd, &st) < 0)
491 return -errno;
492
493 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
494 return -EPERM;
495
496 return 0;
497 }
498
499 static int setup_confirm_stdio(int *_saved_stdin,
500 int *_saved_stdout) {
501 int fd = -1, saved_stdin, saved_stdout = -1, r;
502
503 assert(_saved_stdin);
504 assert(_saved_stdout);
505
506 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
507 if (saved_stdin < 0)
508 return -errno;
509
510 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
511 if (saved_stdout < 0) {
512 r = errno;
513 goto fail;
514 }
515
516 fd = acquire_terminal(
517 "/dev/console",
518 false,
519 false,
520 false,
521 DEFAULT_CONFIRM_USEC);
522 if (fd < 0) {
523 r = fd;
524 goto fail;
525 }
526
527 r = chown_terminal(fd, getuid());
528 if (r < 0)
529 goto fail;
530
531 if (dup2(fd, STDIN_FILENO) < 0) {
532 r = -errno;
533 goto fail;
534 }
535
536 if (dup2(fd, STDOUT_FILENO) < 0) {
537 r = -errno;
538 goto fail;
539 }
540
541 if (fd >= 2)
542 safe_close(fd);
543
544 *_saved_stdin = saved_stdin;
545 *_saved_stdout = saved_stdout;
546
547 return 0;
548
549 fail:
550 safe_close(saved_stdout);
551 safe_close(saved_stdin);
552 safe_close(fd);
553
554 return r;
555 }
556
557 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
558 _cleanup_close_ int fd = -1;
559 va_list ap;
560
561 assert(format);
562
563 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
564 if (fd < 0)
565 return fd;
566
567 va_start(ap, format);
568 vdprintf(fd, format, ap);
569 va_end(ap);
570
571 return 0;
572 }
573
574 static int restore_confirm_stdio(int *saved_stdin,
575 int *saved_stdout) {
576
577 int r = 0;
578
579 assert(saved_stdin);
580 assert(saved_stdout);
581
582 release_terminal();
583
584 if (*saved_stdin >= 0)
585 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
586 r = -errno;
587
588 if (*saved_stdout >= 0)
589 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
590 r = -errno;
591
592 safe_close(*saved_stdin);
593 safe_close(*saved_stdout);
594
595 return r;
596 }
597
598 static int ask_for_confirmation(char *response, char **argv) {
599 int saved_stdout = -1, saved_stdin = -1, r;
600 _cleanup_free_ char *line = NULL;
601
602 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
603 if (r < 0)
604 return r;
605
606 line = exec_command_line(argv);
607 if (!line)
608 return -ENOMEM;
609
610 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
611
612 restore_confirm_stdio(&saved_stdin, &saved_stdout);
613
614 return r;
615 }
616
617 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
618 bool keep_groups = false;
619 int r;
620
621 assert(context);
622
623 /* Lookup and set GID and supplementary group list. Here too
624 * we avoid NSS lookups for gid=0. */
625
626 if (context->group || username) {
627
628 if (context->group) {
629 const char *g = context->group;
630
631 if ((r = get_group_creds(&g, &gid)) < 0)
632 return r;
633 }
634
635 /* First step, initialize groups from /etc/groups */
636 if (username && gid != 0) {
637 if (initgroups(username, gid) < 0)
638 return -errno;
639
640 keep_groups = true;
641 }
642
643 /* Second step, set our gids */
644 if (setresgid(gid, gid, gid) < 0)
645 return -errno;
646 }
647
648 if (context->supplementary_groups) {
649 int ngroups_max, k;
650 gid_t *gids;
651 char **i;
652
653 /* Final step, initialize any manually set supplementary groups */
654 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
655
656 if (!(gids = new(gid_t, ngroups_max)))
657 return -ENOMEM;
658
659 if (keep_groups) {
660 if ((k = getgroups(ngroups_max, gids)) < 0) {
661 free(gids);
662 return -errno;
663 }
664 } else
665 k = 0;
666
667 STRV_FOREACH(i, context->supplementary_groups) {
668 const char *g;
669
670 if (k >= ngroups_max) {
671 free(gids);
672 return -E2BIG;
673 }
674
675 g = *i;
676 r = get_group_creds(&g, gids+k);
677 if (r < 0) {
678 free(gids);
679 return r;
680 }
681
682 k++;
683 }
684
685 if (setgroups(k, gids) < 0) {
686 free(gids);
687 return -errno;
688 }
689
690 free(gids);
691 }
692
693 return 0;
694 }
695
696 static int enforce_user(const ExecContext *context, uid_t uid) {
697 assert(context);
698
699 /* Sets (but doesn't lookup) the uid and make sure we keep the
700 * capabilities while doing so. */
701
702 if (context->capabilities) {
703 _cleanup_cap_free_ cap_t d = NULL;
704 static const cap_value_t bits[] = {
705 CAP_SETUID, /* Necessary so that we can run setresuid() below */
706 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
707 };
708
709 /* First step: If we need to keep capabilities but
710 * drop privileges we need to make sure we keep our
711 * caps, while we drop privileges. */
712 if (uid != 0) {
713 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
714
715 if (prctl(PR_GET_SECUREBITS) != sb)
716 if (prctl(PR_SET_SECUREBITS, sb) < 0)
717 return -errno;
718 }
719
720 /* Second step: set the capabilities. This will reduce
721 * the capabilities to the minimum we need. */
722
723 d = cap_dup(context->capabilities);
724 if (!d)
725 return -errno;
726
727 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
728 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
729 return -errno;
730
731 if (cap_set_proc(d) < 0)
732 return -errno;
733 }
734
735 /* Third step: actually set the uids */
736 if (setresuid(uid, uid, uid) < 0)
737 return -errno;
738
739 /* At this point we should have all necessary capabilities but
740 are otherwise a normal user. However, the caps might got
741 corrupted due to the setresuid() so we need clean them up
742 later. This is done outside of this call. */
743
744 return 0;
745 }
746
747 #ifdef HAVE_PAM
748
749 static int null_conv(
750 int num_msg,
751 const struct pam_message **msg,
752 struct pam_response **resp,
753 void *appdata_ptr) {
754
755 /* We don't support conversations */
756
757 return PAM_CONV_ERR;
758 }
759
760 static int setup_pam(
761 const char *name,
762 const char *user,
763 uid_t uid,
764 const char *tty,
765 char ***pam_env,
766 int fds[], unsigned n_fds) {
767
768 static const struct pam_conv conv = {
769 .conv = null_conv,
770 .appdata_ptr = NULL
771 };
772
773 pam_handle_t *handle = NULL;
774 sigset_t ss, old_ss;
775 int pam_code = PAM_SUCCESS;
776 int err;
777 char **e = NULL;
778 bool close_session = false;
779 pid_t pam_pid = 0, parent_pid;
780 int flags = 0;
781
782 assert(name);
783 assert(user);
784 assert(pam_env);
785
786 /* We set up PAM in the parent process, then fork. The child
787 * will then stay around until killed via PR_GET_PDEATHSIG or
788 * systemd via the cgroup logic. It will then remove the PAM
789 * session again. The parent process will exec() the actual
790 * daemon. We do things this way to ensure that the main PID
791 * of the daemon is the one we initially fork()ed. */
792
793 if (log_get_max_level() < LOG_DEBUG)
794 flags |= PAM_SILENT;
795
796 pam_code = pam_start(name, user, &conv, &handle);
797 if (pam_code != PAM_SUCCESS) {
798 handle = NULL;
799 goto fail;
800 }
801
802 if (tty) {
803 pam_code = pam_set_item(handle, PAM_TTY, tty);
804 if (pam_code != PAM_SUCCESS)
805 goto fail;
806 }
807
808 pam_code = pam_acct_mgmt(handle, flags);
809 if (pam_code != PAM_SUCCESS)
810 goto fail;
811
812 pam_code = pam_open_session(handle, flags);
813 if (pam_code != PAM_SUCCESS)
814 goto fail;
815
816 close_session = true;
817
818 e = pam_getenvlist(handle);
819 if (!e) {
820 pam_code = PAM_BUF_ERR;
821 goto fail;
822 }
823
824 /* Block SIGTERM, so that we know that it won't get lost in
825 * the child */
826 if (sigemptyset(&ss) < 0 ||
827 sigaddset(&ss, SIGTERM) < 0 ||
828 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
829 goto fail;
830
831 parent_pid = getpid();
832
833 pam_pid = fork();
834 if (pam_pid < 0)
835 goto fail;
836
837 if (pam_pid == 0) {
838 int sig;
839 int r = EXIT_PAM;
840
841 /* The child's job is to reset the PAM session on
842 * termination */
843
844 /* This string must fit in 10 chars (i.e. the length
845 * of "/sbin/init"), to look pretty in /bin/ps */
846 rename_process("(sd-pam)");
847
848 /* Make sure we don't keep open the passed fds in this
849 child. We assume that otherwise only those fds are
850 open here that have been opened by PAM. */
851 close_many(fds, n_fds);
852
853 /* Drop privileges - we don't need any to pam_close_session
854 * and this will make PR_SET_PDEATHSIG work in most cases.
855 * If this fails, ignore the error - but expect sd-pam threads
856 * to fail to exit normally */
857 if (setresuid(uid, uid, uid) < 0)
858 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
859
860 /* Wait until our parent died. This will only work if
861 * the above setresuid() succeeds, otherwise the kernel
862 * will not allow unprivileged parents kill their privileged
863 * children this way. We rely on the control groups kill logic
864 * to do the rest for us. */
865 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
866 goto child_finish;
867
868 /* Check if our parent process might already have
869 * died? */
870 if (getppid() == parent_pid) {
871 for (;;) {
872 if (sigwait(&ss, &sig) < 0) {
873 if (errno == EINTR)
874 continue;
875
876 goto child_finish;
877 }
878
879 assert(sig == SIGTERM);
880 break;
881 }
882 }
883
884 /* If our parent died we'll end the session */
885 if (getppid() != parent_pid) {
886 pam_code = pam_close_session(handle, flags);
887 if (pam_code != PAM_SUCCESS)
888 goto child_finish;
889 }
890
891 r = 0;
892
893 child_finish:
894 pam_end(handle, pam_code | flags);
895 _exit(r);
896 }
897
898 /* If the child was forked off successfully it will do all the
899 * cleanups, so forget about the handle here. */
900 handle = NULL;
901
902 /* Unblock SIGTERM again in the parent */
903 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
904 goto fail;
905
906 /* We close the log explicitly here, since the PAM modules
907 * might have opened it, but we don't want this fd around. */
908 closelog();
909
910 *pam_env = e;
911 e = NULL;
912
913 return 0;
914
915 fail:
916 if (pam_code != PAM_SUCCESS) {
917 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
918 err = -EPERM; /* PAM errors do not map to errno */
919 } else {
920 log_error_errno(errno, "PAM failed: %m");
921 err = -errno;
922 }
923
924 if (handle) {
925 if (close_session)
926 pam_code = pam_close_session(handle, flags);
927
928 pam_end(handle, pam_code | flags);
929 }
930
931 strv_free(e);
932
933 closelog();
934
935 if (pam_pid > 1) {
936 kill(pam_pid, SIGTERM);
937 kill(pam_pid, SIGCONT);
938 }
939
940 return err;
941 }
942 #endif
943
944 static void rename_process_from_path(const char *path) {
945 char process_name[11];
946 const char *p;
947 size_t l;
948
949 /* This resulting string must fit in 10 chars (i.e. the length
950 * of "/sbin/init") to look pretty in /bin/ps */
951
952 p = basename(path);
953 if (isempty(p)) {
954 rename_process("(...)");
955 return;
956 }
957
958 l = strlen(p);
959 if (l > 8) {
960 /* The end of the process name is usually more
961 * interesting, since the first bit might just be
962 * "systemd-" */
963 p = p + l - 8;
964 l = 8;
965 }
966
967 process_name[0] = '(';
968 memcpy(process_name+1, p, l);
969 process_name[1+l] = ')';
970 process_name[1+l+1] = 0;
971
972 rename_process(process_name);
973 }
974
975 #ifdef HAVE_SECCOMP
976
977 static int apply_seccomp(const ExecContext *c) {
978 uint32_t negative_action, action;
979 scmp_filter_ctx *seccomp;
980 Iterator i;
981 void *id;
982 int r;
983
984 assert(c);
985
986 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
987
988 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
989 if (!seccomp)
990 return -ENOMEM;
991
992 if (c->syscall_archs) {
993
994 SET_FOREACH(id, c->syscall_archs, i) {
995 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
996 if (r == -EEXIST)
997 continue;
998 if (r < 0)
999 goto finish;
1000 }
1001
1002 } else {
1003 r = seccomp_add_secondary_archs(seccomp);
1004 if (r < 0)
1005 goto finish;
1006 }
1007
1008 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1009 SET_FOREACH(id, c->syscall_filter, i) {
1010 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1011 if (r < 0)
1012 goto finish;
1013 }
1014
1015 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1016 if (r < 0)
1017 goto finish;
1018
1019 r = seccomp_load(seccomp);
1020
1021 finish:
1022 seccomp_release(seccomp);
1023 return r;
1024 }
1025
1026 static int apply_address_families(const ExecContext *c) {
1027 scmp_filter_ctx *seccomp;
1028 Iterator i;
1029 int r;
1030
1031 assert(c);
1032
1033 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1034 if (!seccomp)
1035 return -ENOMEM;
1036
1037 r = seccomp_add_secondary_archs(seccomp);
1038 if (r < 0)
1039 goto finish;
1040
1041 if (c->address_families_whitelist) {
1042 int af, first = 0, last = 0;
1043 void *afp;
1044
1045 /* If this is a whitelist, we first block the address
1046 * families that are out of range and then everything
1047 * that is not in the set. First, we find the lowest
1048 * and highest address family in the set. */
1049
1050 SET_FOREACH(afp, c->address_families, i) {
1051 af = PTR_TO_INT(afp);
1052
1053 if (af <= 0 || af >= af_max())
1054 continue;
1055
1056 if (first == 0 || af < first)
1057 first = af;
1058
1059 if (last == 0 || af > last)
1060 last = af;
1061 }
1062
1063 assert((first == 0) == (last == 0));
1064
1065 if (first == 0) {
1066
1067 /* No entries in the valid range, block everything */
1068 r = seccomp_rule_add(
1069 seccomp,
1070 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1071 SCMP_SYS(socket),
1072 0);
1073 if (r < 0)
1074 goto finish;
1075
1076 } else {
1077
1078 /* Block everything below the first entry */
1079 r = seccomp_rule_add(
1080 seccomp,
1081 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1082 SCMP_SYS(socket),
1083 1,
1084 SCMP_A0(SCMP_CMP_LT, first));
1085 if (r < 0)
1086 goto finish;
1087
1088 /* Block everything above the last entry */
1089 r = seccomp_rule_add(
1090 seccomp,
1091 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1092 SCMP_SYS(socket),
1093 1,
1094 SCMP_A0(SCMP_CMP_GT, last));
1095 if (r < 0)
1096 goto finish;
1097
1098 /* Block everything between the first and last
1099 * entry */
1100 for (af = 1; af < af_max(); af++) {
1101
1102 if (set_contains(c->address_families, INT_TO_PTR(af)))
1103 continue;
1104
1105 r = seccomp_rule_add(
1106 seccomp,
1107 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1108 SCMP_SYS(socket),
1109 1,
1110 SCMP_A0(SCMP_CMP_EQ, af));
1111 if (r < 0)
1112 goto finish;
1113 }
1114 }
1115
1116 } else {
1117 void *af;
1118
1119 /* If this is a blacklist, then generate one rule for
1120 * each address family that are then combined in OR
1121 * checks. */
1122
1123 SET_FOREACH(af, c->address_families, i) {
1124
1125 r = seccomp_rule_add(
1126 seccomp,
1127 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1128 SCMP_SYS(socket),
1129 1,
1130 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1131 if (r < 0)
1132 goto finish;
1133 }
1134 }
1135
1136 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1137 if (r < 0)
1138 goto finish;
1139
1140 r = seccomp_load(seccomp);
1141
1142 finish:
1143 seccomp_release(seccomp);
1144 return r;
1145 }
1146
1147 #endif
1148
1149 static void do_idle_pipe_dance(int idle_pipe[4]) {
1150 assert(idle_pipe);
1151
1152
1153 safe_close(idle_pipe[1]);
1154 safe_close(idle_pipe[2]);
1155
1156 if (idle_pipe[0] >= 0) {
1157 int r;
1158
1159 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1160
1161 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1162 /* Signal systemd that we are bored and want to continue. */
1163 r = write(idle_pipe[3], "x", 1);
1164 if (r > 0)
1165 /* Wait for systemd to react to the signal above. */
1166 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1167 }
1168
1169 safe_close(idle_pipe[0]);
1170
1171 }
1172
1173 safe_close(idle_pipe[3]);
1174 }
1175
1176 static int build_environment(
1177 const ExecContext *c,
1178 unsigned n_fds,
1179 usec_t watchdog_usec,
1180 const char *home,
1181 const char *username,
1182 const char *shell,
1183 char ***ret) {
1184
1185 _cleanup_strv_free_ char **our_env = NULL;
1186 unsigned n_env = 0;
1187 char *x;
1188
1189 assert(c);
1190 assert(ret);
1191
1192 our_env = new0(char*, 10);
1193 if (!our_env)
1194 return -ENOMEM;
1195
1196 if (n_fds > 0) {
1197 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1198 return -ENOMEM;
1199 our_env[n_env++] = x;
1200
1201 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1202 return -ENOMEM;
1203 our_env[n_env++] = x;
1204 }
1205
1206 if (watchdog_usec > 0) {
1207 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1208 return -ENOMEM;
1209 our_env[n_env++] = x;
1210
1211 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1212 return -ENOMEM;
1213 our_env[n_env++] = x;
1214 }
1215
1216 if (home) {
1217 x = strappend("HOME=", home);
1218 if (!x)
1219 return -ENOMEM;
1220 our_env[n_env++] = x;
1221 }
1222
1223 if (username) {
1224 x = strappend("LOGNAME=", username);
1225 if (!x)
1226 return -ENOMEM;
1227 our_env[n_env++] = x;
1228
1229 x = strappend("USER=", username);
1230 if (!x)
1231 return -ENOMEM;
1232 our_env[n_env++] = x;
1233 }
1234
1235 if (shell) {
1236 x = strappend("SHELL=", shell);
1237 if (!x)
1238 return -ENOMEM;
1239 our_env[n_env++] = x;
1240 }
1241
1242 if (is_terminal_input(c->std_input) ||
1243 c->std_output == EXEC_OUTPUT_TTY ||
1244 c->std_error == EXEC_OUTPUT_TTY ||
1245 c->tty_path) {
1246
1247 x = strdup(default_term_for_tty(tty_path(c)));
1248 if (!x)
1249 return -ENOMEM;
1250 our_env[n_env++] = x;
1251 }
1252
1253 our_env[n_env++] = NULL;
1254 assert(n_env <= 10);
1255
1256 *ret = our_env;
1257 our_env = NULL;
1258
1259 return 0;
1260 }
1261
1262 static int exec_child(
1263 ExecCommand *command,
1264 const ExecContext *context,
1265 const ExecParameters *params,
1266 ExecRuntime *runtime,
1267 char **argv,
1268 int socket_fd,
1269 int *fds, unsigned n_fds,
1270 char **files_env,
1271 int *exit_status) {
1272
1273 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1274 _cleanup_free_ char *mac_selinux_context_net = NULL;
1275 const char *username = NULL, *home = NULL, *shell = NULL;
1276 unsigned n_dont_close = 0;
1277 int dont_close[n_fds + 4];
1278 uid_t uid = UID_INVALID;
1279 gid_t gid = GID_INVALID;
1280 int i, r;
1281
1282 assert(command);
1283 assert(context);
1284 assert(params);
1285 assert(exit_status);
1286
1287 rename_process_from_path(command->path);
1288
1289 /* We reset exactly these signals, since they are the
1290 * only ones we set to SIG_IGN in the main daemon. All
1291 * others we leave untouched because we set them to
1292 * SIG_DFL or a valid handler initially, both of which
1293 * will be demoted to SIG_DFL. */
1294 default_signals(SIGNALS_CRASH_HANDLER,
1295 SIGNALS_IGNORE, -1);
1296
1297 if (context->ignore_sigpipe)
1298 ignore_signals(SIGPIPE, -1);
1299
1300 r = reset_signal_mask();
1301 if (r < 0) {
1302 *exit_status = EXIT_SIGNAL_MASK;
1303 return r;
1304 }
1305
1306 if (params->idle_pipe)
1307 do_idle_pipe_dance(params->idle_pipe);
1308
1309 /* Close sockets very early to make sure we don't
1310 * block init reexecution because it cannot bind its
1311 * sockets */
1312
1313 log_forget_fds();
1314
1315 if (socket_fd >= 0)
1316 dont_close[n_dont_close++] = socket_fd;
1317 if (n_fds > 0) {
1318 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1319 n_dont_close += n_fds;
1320 }
1321 if (params->bus_endpoint_fd >= 0)
1322 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1323 if (runtime) {
1324 if (runtime->netns_storage_socket[0] >= 0)
1325 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1326 if (runtime->netns_storage_socket[1] >= 0)
1327 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1328 }
1329
1330 r = close_all_fds(dont_close, n_dont_close);
1331 if (r < 0) {
1332 *exit_status = EXIT_FDS;
1333 return r;
1334 }
1335
1336 if (!context->same_pgrp)
1337 if (setsid() < 0) {
1338 *exit_status = EXIT_SETSID;
1339 return -errno;
1340 }
1341
1342 exec_context_tty_reset(context);
1343
1344 if (params->confirm_spawn) {
1345 char response;
1346
1347 r = ask_for_confirmation(&response, argv);
1348 if (r == -ETIMEDOUT)
1349 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1350 else if (r < 0)
1351 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1352 else if (response == 's') {
1353 write_confirm_message("Skipping execution.\n");
1354 *exit_status = EXIT_CONFIRM;
1355 return -ECANCELED;
1356 } else if (response == 'n') {
1357 write_confirm_message("Failing execution.\n");
1358 *exit_status = 0;
1359 return 0;
1360 }
1361 }
1362
1363 if (context->user) {
1364 username = context->user;
1365 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1366 if (r < 0) {
1367 *exit_status = EXIT_USER;
1368 return r;
1369 }
1370 }
1371
1372 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1373 * must sure to drop O_NONBLOCK */
1374 if (socket_fd >= 0)
1375 fd_nonblock(socket_fd, false);
1376
1377 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1378 if (r < 0) {
1379 *exit_status = EXIT_STDIN;
1380 return r;
1381 }
1382
1383 r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1384 if (r < 0) {
1385 *exit_status = EXIT_STDOUT;
1386 return r;
1387 }
1388
1389 r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1390 if (r < 0) {
1391 *exit_status = EXIT_STDERR;
1392 return r;
1393 }
1394
1395 if (params->cgroup_path) {
1396 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1397 if (r < 0) {
1398 *exit_status = EXIT_CGROUP;
1399 return r;
1400 }
1401 }
1402
1403 if (context->oom_score_adjust_set) {
1404 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1405
1406 /* When we can't make this change due to EPERM, then
1407 * let's silently skip over it. User namespaces
1408 * prohibit write access to this file, and we
1409 * shouldn't trip up over that. */
1410
1411 sprintf(t, "%i", context->oom_score_adjust);
1412 r = write_string_file("/proc/self/oom_score_adj", t);
1413 if (r == -EPERM || r == -EACCES) {
1414 log_open();
1415 log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1416 log_close();
1417 } else if (r < 0) {
1418 *exit_status = EXIT_OOM_ADJUST;
1419 return -errno;
1420 }
1421 }
1422
1423 if (context->nice_set)
1424 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1425 *exit_status = EXIT_NICE;
1426 return -errno;
1427 }
1428
1429 if (context->cpu_sched_set) {
1430 struct sched_param param = {
1431 .sched_priority = context->cpu_sched_priority,
1432 };
1433
1434 r = sched_setscheduler(0,
1435 context->cpu_sched_policy |
1436 (context->cpu_sched_reset_on_fork ?
1437 SCHED_RESET_ON_FORK : 0),
1438 &param);
1439 if (r < 0) {
1440 *exit_status = EXIT_SETSCHEDULER;
1441 return -errno;
1442 }
1443 }
1444
1445 if (context->cpuset)
1446 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1447 *exit_status = EXIT_CPUAFFINITY;
1448 return -errno;
1449 }
1450
1451 if (context->ioprio_set)
1452 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1453 *exit_status = EXIT_IOPRIO;
1454 return -errno;
1455 }
1456
1457 if (context->timer_slack_nsec != NSEC_INFINITY)
1458 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1459 *exit_status = EXIT_TIMERSLACK;
1460 return -errno;
1461 }
1462
1463 if (context->personality != 0xffffffffUL)
1464 if (personality(context->personality) < 0) {
1465 *exit_status = EXIT_PERSONALITY;
1466 return -errno;
1467 }
1468
1469 if (context->utmp_id)
1470 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1471
1472 if (context->user && is_terminal_input(context->std_input)) {
1473 r = chown_terminal(STDIN_FILENO, uid);
1474 if (r < 0) {
1475 *exit_status = EXIT_STDIN;
1476 return r;
1477 }
1478 }
1479
1480 #ifdef ENABLE_KDBUS
1481 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1482 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1483
1484 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1485 if (r < 0) {
1486 *exit_status = EXIT_BUS_ENDPOINT;
1487 return r;
1488 }
1489 }
1490 #endif
1491
1492 /* If delegation is enabled we'll pass ownership of the cgroup
1493 * (but only in systemd's own controller hierarchy!) to the
1494 * user of the new process. */
1495 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1496 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1497 if (r < 0) {
1498 *exit_status = EXIT_CGROUP;
1499 return r;
1500 }
1501
1502
1503 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1504 if (r < 0) {
1505 *exit_status = EXIT_CGROUP;
1506 return r;
1507 }
1508 }
1509
1510 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1511 char **rt;
1512
1513 STRV_FOREACH(rt, context->runtime_directory) {
1514 _cleanup_free_ char *p;
1515
1516 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1517 if (!p) {
1518 *exit_status = EXIT_RUNTIME_DIRECTORY;
1519 return -ENOMEM;
1520 }
1521
1522 r = mkdir_safe_label(p, context->runtime_directory_mode, uid, gid);
1523 if (r < 0) {
1524 *exit_status = EXIT_RUNTIME_DIRECTORY;
1525 return r;
1526 }
1527 }
1528 }
1529
1530 if (params->apply_permissions) {
1531 r = enforce_groups(context, username, gid);
1532 if (r < 0) {
1533 *exit_status = EXIT_GROUP;
1534 return r;
1535 }
1536 }
1537
1538 umask(context->umask);
1539
1540 #ifdef HAVE_PAM
1541 if (params->apply_permissions && context->pam_name && username) {
1542 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1543 if (r < 0) {
1544 *exit_status = EXIT_PAM;
1545 return r;
1546 }
1547 }
1548 #endif
1549
1550 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1551 r = setup_netns(runtime->netns_storage_socket);
1552 if (r < 0) {
1553 *exit_status = EXIT_NETWORK;
1554 return r;
1555 }
1556 }
1557
1558 if (!strv_isempty(context->read_write_dirs) ||
1559 !strv_isempty(context->read_only_dirs) ||
1560 !strv_isempty(context->inaccessible_dirs) ||
1561 context->mount_flags != 0 ||
1562 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1563 params->bus_endpoint_path ||
1564 context->private_devices ||
1565 context->protect_system != PROTECT_SYSTEM_NO ||
1566 context->protect_home != PROTECT_HOME_NO) {
1567
1568 char *tmp = NULL, *var = NULL;
1569
1570 /* The runtime struct only contains the parent
1571 * of the private /tmp, which is
1572 * non-accessible to world users. Inside of it
1573 * there's a /tmp that is sticky, and that's
1574 * the one we want to use here. */
1575
1576 if (context->private_tmp && runtime) {
1577 if (runtime->tmp_dir)
1578 tmp = strjoina(runtime->tmp_dir, "/tmp");
1579 if (runtime->var_tmp_dir)
1580 var = strjoina(runtime->var_tmp_dir, "/tmp");
1581 }
1582
1583 r = setup_namespace(
1584 context->read_write_dirs,
1585 context->read_only_dirs,
1586 context->inaccessible_dirs,
1587 tmp,
1588 var,
1589 params->bus_endpoint_path,
1590 context->private_devices,
1591 context->protect_home,
1592 context->protect_system,
1593 context->mount_flags);
1594
1595 /* If we couldn't set up the namespace this is
1596 * probably due to a missing capability. In this case,
1597 * silently proceeed. */
1598 if (r == -EPERM || r == -EACCES) {
1599 log_open();
1600 log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1601 log_close();
1602 } else if (r < 0) {
1603 *exit_status = EXIT_NAMESPACE;
1604 return r;
1605 }
1606 }
1607
1608 if (params->apply_chroot) {
1609 if (context->root_directory)
1610 if (chroot(context->root_directory) < 0) {
1611 *exit_status = EXIT_CHROOT;
1612 return -errno;
1613 }
1614
1615 if (chdir(context->working_directory ?: "/") < 0 &&
1616 !context->working_directory_missing_ok) {
1617 *exit_status = EXIT_CHDIR;
1618 return -errno;
1619 }
1620 } else {
1621 _cleanup_free_ char *d = NULL;
1622
1623 if (asprintf(&d, "%s/%s",
1624 context->root_directory ?: "",
1625 context->working_directory ?: "") < 0) {
1626 *exit_status = EXIT_MEMORY;
1627 return -ENOMEM;
1628 }
1629
1630 if (chdir(d) < 0 &&
1631 !context->working_directory_missing_ok) {
1632 *exit_status = EXIT_CHDIR;
1633 return -errno;
1634 }
1635 }
1636
1637 #ifdef HAVE_SELINUX
1638 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1639 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1640 if (r < 0) {
1641 *exit_status = EXIT_SELINUX_CONTEXT;
1642 return r;
1643 }
1644 }
1645 #endif
1646
1647 /* We repeat the fd closing here, to make sure that
1648 * nothing is leaked from the PAM modules. Note that
1649 * we are more aggressive this time since socket_fd
1650 * and the netns fds we don't need anymore. The custom
1651 * endpoint fd was needed to upload the policy and can
1652 * now be closed as well. */
1653 r = close_all_fds(fds, n_fds);
1654 if (r >= 0)
1655 r = shift_fds(fds, n_fds);
1656 if (r >= 0)
1657 r = flags_fds(fds, n_fds, context->non_blocking);
1658 if (r < 0) {
1659 *exit_status = EXIT_FDS;
1660 return r;
1661 }
1662
1663 if (params->apply_permissions) {
1664
1665 for (i = 0; i < _RLIMIT_MAX; i++) {
1666 if (!context->rlimit[i])
1667 continue;
1668
1669 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1670 *exit_status = EXIT_LIMITS;
1671 return -errno;
1672 }
1673 }
1674
1675 if (context->capability_bounding_set_drop) {
1676 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1677 if (r < 0) {
1678 *exit_status = EXIT_CAPABILITIES;
1679 return r;
1680 }
1681 }
1682
1683 #ifdef HAVE_SMACK
1684 if (context->smack_process_label) {
1685 r = mac_smack_apply_pid(0, context->smack_process_label);
1686 if (r < 0) {
1687 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1688 return r;
1689 }
1690 }
1691 #endif
1692
1693 if (context->user) {
1694 r = enforce_user(context, uid);
1695 if (r < 0) {
1696 *exit_status = EXIT_USER;
1697 return r;
1698 }
1699 }
1700
1701 /* PR_GET_SECUREBITS is not privileged, while
1702 * PR_SET_SECUREBITS is. So to suppress
1703 * potential EPERMs we'll try not to call
1704 * PR_SET_SECUREBITS unless necessary. */
1705 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1706 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1707 *exit_status = EXIT_SECUREBITS;
1708 return -errno;
1709 }
1710
1711 if (context->capabilities)
1712 if (cap_set_proc(context->capabilities) < 0) {
1713 *exit_status = EXIT_CAPABILITIES;
1714 return -errno;
1715 }
1716
1717 if (context->no_new_privileges)
1718 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1719 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1720 return -errno;
1721 }
1722
1723 #ifdef HAVE_SECCOMP
1724 if (context->address_families_whitelist ||
1725 !set_isempty(context->address_families)) {
1726 r = apply_address_families(context);
1727 if (r < 0) {
1728 *exit_status = EXIT_ADDRESS_FAMILIES;
1729 return r;
1730 }
1731 }
1732
1733 if (context->syscall_whitelist ||
1734 !set_isempty(context->syscall_filter) ||
1735 !set_isempty(context->syscall_archs)) {
1736 r = apply_seccomp(context);
1737 if (r < 0) {
1738 *exit_status = EXIT_SECCOMP;
1739 return r;
1740 }
1741 }
1742 #endif
1743
1744 #ifdef HAVE_SELINUX
1745 if (mac_selinux_use()) {
1746 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1747
1748 if (exec_context) {
1749 r = setexeccon(exec_context);
1750 if (r < 0) {
1751 *exit_status = EXIT_SELINUX_CONTEXT;
1752 return r;
1753 }
1754 }
1755 }
1756 #endif
1757
1758 #ifdef HAVE_APPARMOR
1759 if (context->apparmor_profile && mac_apparmor_use()) {
1760 r = aa_change_onexec(context->apparmor_profile);
1761 if (r < 0 && !context->apparmor_profile_ignore) {
1762 *exit_status = EXIT_APPARMOR_PROFILE;
1763 return -errno;
1764 }
1765 }
1766 #endif
1767 }
1768
1769 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1770 if (r < 0) {
1771 *exit_status = EXIT_MEMORY;
1772 return r;
1773 }
1774
1775 final_env = strv_env_merge(5,
1776 params->environment,
1777 our_env,
1778 context->environment,
1779 files_env,
1780 pam_env,
1781 NULL);
1782 if (!final_env) {
1783 *exit_status = EXIT_MEMORY;
1784 return -ENOMEM;
1785 }
1786
1787 final_argv = replace_env_argv(argv, final_env);
1788 if (!final_argv) {
1789 *exit_status = EXIT_MEMORY;
1790 return -ENOMEM;
1791 }
1792
1793 final_env = strv_env_clean(final_env);
1794
1795 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1796 _cleanup_free_ char *line;
1797
1798 line = exec_command_line(final_argv);
1799 if (line) {
1800 log_open();
1801 log_unit_struct(params->unit_id,
1802 LOG_DEBUG,
1803 "EXECUTABLE=%s", command->path,
1804 LOG_MESSAGE("Executing: %s", line),
1805 NULL);
1806 log_close();
1807 }
1808 }
1809 execve(command->path, final_argv, final_env);
1810 *exit_status = EXIT_EXEC;
1811 return -errno;
1812 }
1813
1814 int exec_spawn(ExecCommand *command,
1815 const ExecContext *context,
1816 const ExecParameters *params,
1817 ExecRuntime *runtime,
1818 pid_t *ret) {
1819
1820 _cleanup_strv_free_ char **files_env = NULL;
1821 int *fds = NULL; unsigned n_fds = 0;
1822 _cleanup_free_ char *line = NULL;
1823 int socket_fd, r;
1824 char **argv;
1825 pid_t pid;
1826
1827 assert(command);
1828 assert(context);
1829 assert(ret);
1830 assert(params);
1831 assert(params->fds || params->n_fds <= 0);
1832
1833 if (context->std_input == EXEC_INPUT_SOCKET ||
1834 context->std_output == EXEC_OUTPUT_SOCKET ||
1835 context->std_error == EXEC_OUTPUT_SOCKET) {
1836
1837 if (params->n_fds != 1) {
1838 log_unit_error(params->unit_id, "Got more than one socket.");
1839 return -EINVAL;
1840 }
1841
1842 socket_fd = params->fds[0];
1843 } else {
1844 socket_fd = -1;
1845 fds = params->fds;
1846 n_fds = params->n_fds;
1847 }
1848
1849 r = exec_context_load_environment(context, params->unit_id, &files_env);
1850 if (r < 0)
1851 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1852
1853 argv = params->argv ?: command->argv;
1854 line = exec_command_line(argv);
1855 if (!line)
1856 return log_oom();
1857
1858 log_unit_struct(params->unit_id,
1859 LOG_DEBUG,
1860 "EXECUTABLE=%s", command->path,
1861 LOG_MESSAGE("About to execute: %s", line),
1862 NULL);
1863 pid = fork();
1864 if (pid < 0)
1865 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1866
1867 if (pid == 0) {
1868 int exit_status;
1869
1870 r = exec_child(command,
1871 context,
1872 params,
1873 runtime,
1874 argv,
1875 socket_fd,
1876 fds, n_fds,
1877 files_env,
1878 &exit_status);
1879 if (r < 0) {
1880 log_open();
1881 log_unit_struct(params->unit_id,
1882 LOG_ERR,
1883 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1884 "EXECUTABLE=%s", command->path,
1885 LOG_MESSAGE("Failed at step %s spawning %s: %s",
1886 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1887 command->path, strerror(-r)),
1888 LOG_ERRNO(r),
1889 NULL);
1890 }
1891
1892 _exit(exit_status);
1893 }
1894
1895 log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1896
1897 /* We add the new process to the cgroup both in the child (so
1898 * that we can be sure that no user code is ever executed
1899 * outside of the cgroup) and in the parent (so that we can be
1900 * sure that when we kill the cgroup the process will be
1901 * killed too). */
1902 if (params->cgroup_path)
1903 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1904
1905 exec_status_start(&command->exec_status, pid);
1906
1907 *ret = pid;
1908 return 0;
1909 }
1910
1911 void exec_context_init(ExecContext *c) {
1912 assert(c);
1913
1914 c->umask = 0022;
1915 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1916 c->cpu_sched_policy = SCHED_OTHER;
1917 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1918 c->syslog_level_prefix = true;
1919 c->ignore_sigpipe = true;
1920 c->timer_slack_nsec = NSEC_INFINITY;
1921 c->personality = 0xffffffffUL;
1922 c->runtime_directory_mode = 0755;
1923 }
1924
1925 void exec_context_done(ExecContext *c) {
1926 unsigned l;
1927
1928 assert(c);
1929
1930 strv_free(c->environment);
1931 c->environment = NULL;
1932
1933 strv_free(c->environment_files);
1934 c->environment_files = NULL;
1935
1936 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1937 free(c->rlimit[l]);
1938 c->rlimit[l] = NULL;
1939 }
1940
1941 free(c->working_directory);
1942 c->working_directory = NULL;
1943 free(c->root_directory);
1944 c->root_directory = NULL;
1945
1946 free(c->tty_path);
1947 c->tty_path = NULL;
1948
1949 free(c->syslog_identifier);
1950 c->syslog_identifier = NULL;
1951
1952 free(c->user);
1953 c->user = NULL;
1954
1955 free(c->group);
1956 c->group = NULL;
1957
1958 strv_free(c->supplementary_groups);
1959 c->supplementary_groups = NULL;
1960
1961 free(c->pam_name);
1962 c->pam_name = NULL;
1963
1964 if (c->capabilities) {
1965 cap_free(c->capabilities);
1966 c->capabilities = NULL;
1967 }
1968
1969 strv_free(c->read_only_dirs);
1970 c->read_only_dirs = NULL;
1971
1972 strv_free(c->read_write_dirs);
1973 c->read_write_dirs = NULL;
1974
1975 strv_free(c->inaccessible_dirs);
1976 c->inaccessible_dirs = NULL;
1977
1978 if (c->cpuset)
1979 CPU_FREE(c->cpuset);
1980
1981 free(c->utmp_id);
1982 c->utmp_id = NULL;
1983
1984 free(c->selinux_context);
1985 c->selinux_context = NULL;
1986
1987 free(c->apparmor_profile);
1988 c->apparmor_profile = NULL;
1989
1990 set_free(c->syscall_filter);
1991 c->syscall_filter = NULL;
1992
1993 set_free(c->syscall_archs);
1994 c->syscall_archs = NULL;
1995
1996 set_free(c->address_families);
1997 c->address_families = NULL;
1998
1999 strv_free(c->runtime_directory);
2000 c->runtime_directory = NULL;
2001
2002 bus_endpoint_free(c->bus_endpoint);
2003 c->bus_endpoint = NULL;
2004 }
2005
2006 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2007 char **i;
2008
2009 assert(c);
2010
2011 if (!runtime_prefix)
2012 return 0;
2013
2014 STRV_FOREACH(i, c->runtime_directory) {
2015 _cleanup_free_ char *p;
2016
2017 p = strjoin(runtime_prefix, "/", *i, NULL);
2018 if (!p)
2019 return -ENOMEM;
2020
2021 /* We execute this synchronously, since we need to be
2022 * sure this is gone when we start the service
2023 * next. */
2024 (void) rm_rf(p, REMOVE_ROOT);
2025 }
2026
2027 return 0;
2028 }
2029
2030 void exec_command_done(ExecCommand *c) {
2031 assert(c);
2032
2033 free(c->path);
2034 c->path = NULL;
2035
2036 strv_free(c->argv);
2037 c->argv = NULL;
2038 }
2039
2040 void exec_command_done_array(ExecCommand *c, unsigned n) {
2041 unsigned i;
2042
2043 for (i = 0; i < n; i++)
2044 exec_command_done(c+i);
2045 }
2046
2047 ExecCommand* exec_command_free_list(ExecCommand *c) {
2048 ExecCommand *i;
2049
2050 while ((i = c)) {
2051 LIST_REMOVE(command, c, i);
2052 exec_command_done(i);
2053 free(i);
2054 }
2055
2056 return NULL;
2057 }
2058
2059 void exec_command_free_array(ExecCommand **c, unsigned n) {
2060 unsigned i;
2061
2062 for (i = 0; i < n; i++)
2063 c[i] = exec_command_free_list(c[i]);
2064 }
2065
2066 typedef struct InvalidEnvInfo {
2067 const char *unit_id;
2068 const char *path;
2069 } InvalidEnvInfo;
2070
2071 static void invalid_env(const char *p, void *userdata) {
2072 InvalidEnvInfo *info = userdata;
2073
2074 log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2075 }
2076
2077 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2078 char **i, **r = NULL;
2079
2080 assert(c);
2081 assert(l);
2082
2083 STRV_FOREACH(i, c->environment_files) {
2084 char *fn;
2085 int k;
2086 bool ignore = false;
2087 char **p;
2088 _cleanup_globfree_ glob_t pglob = {};
2089 int count, n;
2090
2091 fn = *i;
2092
2093 if (fn[0] == '-') {
2094 ignore = true;
2095 fn ++;
2096 }
2097
2098 if (!path_is_absolute(fn)) {
2099 if (ignore)
2100 continue;
2101
2102 strv_free(r);
2103 return -EINVAL;
2104 }
2105
2106 /* Filename supports globbing, take all matching files */
2107 errno = 0;
2108 if (glob(fn, 0, NULL, &pglob) != 0) {
2109 if (ignore)
2110 continue;
2111
2112 strv_free(r);
2113 return errno ? -errno : -EINVAL;
2114 }
2115 count = pglob.gl_pathc;
2116 if (count == 0) {
2117 if (ignore)
2118 continue;
2119
2120 strv_free(r);
2121 return -EINVAL;
2122 }
2123 for (n = 0; n < count; n++) {
2124 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2125 if (k < 0) {
2126 if (ignore)
2127 continue;
2128
2129 strv_free(r);
2130 return k;
2131 }
2132 /* Log invalid environment variables with filename */
2133 if (p) {
2134 InvalidEnvInfo info = {
2135 .unit_id = unit_id,
2136 .path = pglob.gl_pathv[n]
2137 };
2138
2139 p = strv_env_clean_with_callback(p, invalid_env, &info);
2140 }
2141
2142 if (r == NULL)
2143 r = p;
2144 else {
2145 char **m;
2146
2147 m = strv_env_merge(2, r, p);
2148 strv_free(r);
2149 strv_free(p);
2150 if (!m)
2151 return -ENOMEM;
2152
2153 r = m;
2154 }
2155 }
2156 }
2157
2158 *l = r;
2159
2160 return 0;
2161 }
2162
2163 static bool tty_may_match_dev_console(const char *tty) {
2164 _cleanup_free_ char *active = NULL;
2165 char *console;
2166
2167 if (startswith(tty, "/dev/"))
2168 tty += 5;
2169
2170 /* trivial identity? */
2171 if (streq(tty, "console"))
2172 return true;
2173
2174 console = resolve_dev_console(&active);
2175 /* if we could not resolve, assume it may */
2176 if (!console)
2177 return true;
2178
2179 /* "tty0" means the active VC, so it may be the same sometimes */
2180 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2181 }
2182
2183 bool exec_context_may_touch_console(ExecContext *ec) {
2184 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2185 is_terminal_input(ec->std_input) ||
2186 is_terminal_output(ec->std_output) ||
2187 is_terminal_output(ec->std_error)) &&
2188 tty_may_match_dev_console(tty_path(ec));
2189 }
2190
2191 static void strv_fprintf(FILE *f, char **l) {
2192 char **g;
2193
2194 assert(f);
2195
2196 STRV_FOREACH(g, l)
2197 fprintf(f, " %s", *g);
2198 }
2199
2200 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2201 char **e;
2202 unsigned i;
2203
2204 assert(c);
2205 assert(f);
2206
2207 prefix = strempty(prefix);
2208
2209 fprintf(f,
2210 "%sUMask: %04o\n"
2211 "%sWorkingDirectory: %s\n"
2212 "%sRootDirectory: %s\n"
2213 "%sNonBlocking: %s\n"
2214 "%sPrivateTmp: %s\n"
2215 "%sPrivateNetwork: %s\n"
2216 "%sPrivateDevices: %s\n"
2217 "%sProtectHome: %s\n"
2218 "%sProtectSystem: %s\n"
2219 "%sIgnoreSIGPIPE: %s\n",
2220 prefix, c->umask,
2221 prefix, c->working_directory ? c->working_directory : "/",
2222 prefix, c->root_directory ? c->root_directory : "/",
2223 prefix, yes_no(c->non_blocking),
2224 prefix, yes_no(c->private_tmp),
2225 prefix, yes_no(c->private_network),
2226 prefix, yes_no(c->private_devices),
2227 prefix, protect_home_to_string(c->protect_home),
2228 prefix, protect_system_to_string(c->protect_system),
2229 prefix, yes_no(c->ignore_sigpipe));
2230
2231 STRV_FOREACH(e, c->environment)
2232 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2233
2234 STRV_FOREACH(e, c->environment_files)
2235 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2236
2237 if (c->nice_set)
2238 fprintf(f,
2239 "%sNice: %i\n",
2240 prefix, c->nice);
2241
2242 if (c->oom_score_adjust_set)
2243 fprintf(f,
2244 "%sOOMScoreAdjust: %i\n",
2245 prefix, c->oom_score_adjust);
2246
2247 for (i = 0; i < RLIM_NLIMITS; i++)
2248 if (c->rlimit[i])
2249 fprintf(f, "%s%s: "RLIM_FMT"\n",
2250 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2251
2252 if (c->ioprio_set) {
2253 _cleanup_free_ char *class_str = NULL;
2254
2255 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2256 fprintf(f,
2257 "%sIOSchedulingClass: %s\n"
2258 "%sIOPriority: %i\n",
2259 prefix, strna(class_str),
2260 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2261 }
2262
2263 if (c->cpu_sched_set) {
2264 _cleanup_free_ char *policy_str = NULL;
2265
2266 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2267 fprintf(f,
2268 "%sCPUSchedulingPolicy: %s\n"
2269 "%sCPUSchedulingPriority: %i\n"
2270 "%sCPUSchedulingResetOnFork: %s\n",
2271 prefix, strna(policy_str),
2272 prefix, c->cpu_sched_priority,
2273 prefix, yes_no(c->cpu_sched_reset_on_fork));
2274 }
2275
2276 if (c->cpuset) {
2277 fprintf(f, "%sCPUAffinity:", prefix);
2278 for (i = 0; i < c->cpuset_ncpus; i++)
2279 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2280 fprintf(f, " %u", i);
2281 fputs("\n", f);
2282 }
2283
2284 if (c->timer_slack_nsec != NSEC_INFINITY)
2285 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2286
2287 fprintf(f,
2288 "%sStandardInput: %s\n"
2289 "%sStandardOutput: %s\n"
2290 "%sStandardError: %s\n",
2291 prefix, exec_input_to_string(c->std_input),
2292 prefix, exec_output_to_string(c->std_output),
2293 prefix, exec_output_to_string(c->std_error));
2294
2295 if (c->tty_path)
2296 fprintf(f,
2297 "%sTTYPath: %s\n"
2298 "%sTTYReset: %s\n"
2299 "%sTTYVHangup: %s\n"
2300 "%sTTYVTDisallocate: %s\n",
2301 prefix, c->tty_path,
2302 prefix, yes_no(c->tty_reset),
2303 prefix, yes_no(c->tty_vhangup),
2304 prefix, yes_no(c->tty_vt_disallocate));
2305
2306 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2307 c->std_output == EXEC_OUTPUT_KMSG ||
2308 c->std_output == EXEC_OUTPUT_JOURNAL ||
2309 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2310 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2311 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2312 c->std_error == EXEC_OUTPUT_SYSLOG ||
2313 c->std_error == EXEC_OUTPUT_KMSG ||
2314 c->std_error == EXEC_OUTPUT_JOURNAL ||
2315 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2316 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2317 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2318
2319 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2320
2321 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2322 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2323
2324 fprintf(f,
2325 "%sSyslogFacility: %s\n"
2326 "%sSyslogLevel: %s\n",
2327 prefix, strna(fac_str),
2328 prefix, strna(lvl_str));
2329 }
2330
2331 if (c->capabilities) {
2332 _cleanup_cap_free_charp_ char *t;
2333
2334 t = cap_to_text(c->capabilities, NULL);
2335 if (t)
2336 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2337 }
2338
2339 if (c->secure_bits)
2340 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2341 prefix,
2342 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2343 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2344 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2345 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2346 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2347 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2348
2349 if (c->capability_bounding_set_drop) {
2350 unsigned long l;
2351 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2352
2353 for (l = 0; l <= cap_last_cap(); l++)
2354 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2355 fprintf(f, " %s", strna(capability_to_name(l)));
2356
2357 fputs("\n", f);
2358 }
2359
2360 if (c->user)
2361 fprintf(f, "%sUser: %s\n", prefix, c->user);
2362 if (c->group)
2363 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2364
2365 if (strv_length(c->supplementary_groups) > 0) {
2366 fprintf(f, "%sSupplementaryGroups:", prefix);
2367 strv_fprintf(f, c->supplementary_groups);
2368 fputs("\n", f);
2369 }
2370
2371 if (c->pam_name)
2372 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2373
2374 if (strv_length(c->read_write_dirs) > 0) {
2375 fprintf(f, "%sReadWriteDirs:", prefix);
2376 strv_fprintf(f, c->read_write_dirs);
2377 fputs("\n", f);
2378 }
2379
2380 if (strv_length(c->read_only_dirs) > 0) {
2381 fprintf(f, "%sReadOnlyDirs:", prefix);
2382 strv_fprintf(f, c->read_only_dirs);
2383 fputs("\n", f);
2384 }
2385
2386 if (strv_length(c->inaccessible_dirs) > 0) {
2387 fprintf(f, "%sInaccessibleDirs:", prefix);
2388 strv_fprintf(f, c->inaccessible_dirs);
2389 fputs("\n", f);
2390 }
2391
2392 if (c->utmp_id)
2393 fprintf(f,
2394 "%sUtmpIdentifier: %s\n",
2395 prefix, c->utmp_id);
2396
2397 if (c->selinux_context)
2398 fprintf(f,
2399 "%sSELinuxContext: %s%s\n",
2400 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2401
2402 if (c->personality != 0xffffffffUL)
2403 fprintf(f,
2404 "%sPersonality: %s\n",
2405 prefix, strna(personality_to_string(c->personality)));
2406
2407 if (c->syscall_filter) {
2408 #ifdef HAVE_SECCOMP
2409 Iterator j;
2410 void *id;
2411 bool first = true;
2412 #endif
2413
2414 fprintf(f,
2415 "%sSystemCallFilter: ",
2416 prefix);
2417
2418 if (!c->syscall_whitelist)
2419 fputc('~', f);
2420
2421 #ifdef HAVE_SECCOMP
2422 SET_FOREACH(id, c->syscall_filter, j) {
2423 _cleanup_free_ char *name = NULL;
2424
2425 if (first)
2426 first = false;
2427 else
2428 fputc(' ', f);
2429
2430 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2431 fputs(strna(name), f);
2432 }
2433 #endif
2434
2435 fputc('\n', f);
2436 }
2437
2438 if (c->syscall_archs) {
2439 #ifdef HAVE_SECCOMP
2440 Iterator j;
2441 void *id;
2442 #endif
2443
2444 fprintf(f,
2445 "%sSystemCallArchitectures:",
2446 prefix);
2447
2448 #ifdef HAVE_SECCOMP
2449 SET_FOREACH(id, c->syscall_archs, j)
2450 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2451 #endif
2452 fputc('\n', f);
2453 }
2454
2455 if (c->syscall_errno != 0)
2456 fprintf(f,
2457 "%sSystemCallErrorNumber: %s\n",
2458 prefix, strna(errno_to_name(c->syscall_errno)));
2459
2460 if (c->apparmor_profile)
2461 fprintf(f,
2462 "%sAppArmorProfile: %s%s\n",
2463 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2464 }
2465
2466 bool exec_context_maintains_privileges(ExecContext *c) {
2467 assert(c);
2468
2469 /* Returns true if the process forked off would run run under
2470 * an unchanged UID or as root. */
2471
2472 if (!c->user)
2473 return true;
2474
2475 if (streq(c->user, "root") || streq(c->user, "0"))
2476 return true;
2477
2478 return false;
2479 }
2480
2481 void exec_status_start(ExecStatus *s, pid_t pid) {
2482 assert(s);
2483
2484 zero(*s);
2485 s->pid = pid;
2486 dual_timestamp_get(&s->start_timestamp);
2487 }
2488
2489 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2490 assert(s);
2491
2492 if (s->pid && s->pid != pid)
2493 zero(*s);
2494
2495 s->pid = pid;
2496 dual_timestamp_get(&s->exit_timestamp);
2497
2498 s->code = code;
2499 s->status = status;
2500
2501 if (context) {
2502 if (context->utmp_id)
2503 utmp_put_dead_process(context->utmp_id, pid, code, status);
2504
2505 exec_context_tty_reset(context);
2506 }
2507 }
2508
2509 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2510 char buf[FORMAT_TIMESTAMP_MAX];
2511
2512 assert(s);
2513 assert(f);
2514
2515 if (s->pid <= 0)
2516 return;
2517
2518 prefix = strempty(prefix);
2519
2520 fprintf(f,
2521 "%sPID: "PID_FMT"\n",
2522 prefix, s->pid);
2523
2524 if (s->start_timestamp.realtime > 0)
2525 fprintf(f,
2526 "%sStart Timestamp: %s\n",
2527 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2528
2529 if (s->exit_timestamp.realtime > 0)
2530 fprintf(f,
2531 "%sExit Timestamp: %s\n"
2532 "%sExit Code: %s\n"
2533 "%sExit Status: %i\n",
2534 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2535 prefix, sigchld_code_to_string(s->code),
2536 prefix, s->status);
2537 }
2538
2539 char *exec_command_line(char **argv) {
2540 size_t k;
2541 char *n, *p, **a;
2542 bool first = true;
2543
2544 assert(argv);
2545
2546 k = 1;
2547 STRV_FOREACH(a, argv)
2548 k += strlen(*a)+3;
2549
2550 if (!(n = new(char, k)))
2551 return NULL;
2552
2553 p = n;
2554 STRV_FOREACH(a, argv) {
2555
2556 if (!first)
2557 *(p++) = ' ';
2558 else
2559 first = false;
2560
2561 if (strpbrk(*a, WHITESPACE)) {
2562 *(p++) = '\'';
2563 p = stpcpy(p, *a);
2564 *(p++) = '\'';
2565 } else
2566 p = stpcpy(p, *a);
2567
2568 }
2569
2570 *p = 0;
2571
2572 /* FIXME: this doesn't really handle arguments that have
2573 * spaces and ticks in them */
2574
2575 return n;
2576 }
2577
2578 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2579 _cleanup_free_ char *cmd = NULL;
2580 const char *prefix2;
2581
2582 assert(c);
2583 assert(f);
2584
2585 prefix = strempty(prefix);
2586 prefix2 = strjoina(prefix, "\t");
2587
2588 cmd = exec_command_line(c->argv);
2589 fprintf(f,
2590 "%sCommand Line: %s\n",
2591 prefix, cmd ? cmd : strerror(ENOMEM));
2592
2593 exec_status_dump(&c->exec_status, f, prefix2);
2594 }
2595
2596 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2597 assert(f);
2598
2599 prefix = strempty(prefix);
2600
2601 LIST_FOREACH(command, c, c)
2602 exec_command_dump(c, f, prefix);
2603 }
2604
2605 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2606 ExecCommand *end;
2607
2608 assert(l);
2609 assert(e);
2610
2611 if (*l) {
2612 /* It's kind of important, that we keep the order here */
2613 LIST_FIND_TAIL(command, *l, end);
2614 LIST_INSERT_AFTER(command, *l, end, e);
2615 } else
2616 *l = e;
2617 }
2618
2619 int exec_command_set(ExecCommand *c, const char *path, ...) {
2620 va_list ap;
2621 char **l, *p;
2622
2623 assert(c);
2624 assert(path);
2625
2626 va_start(ap, path);
2627 l = strv_new_ap(path, ap);
2628 va_end(ap);
2629
2630 if (!l)
2631 return -ENOMEM;
2632
2633 p = strdup(path);
2634 if (!p) {
2635 strv_free(l);
2636 return -ENOMEM;
2637 }
2638
2639 free(c->path);
2640 c->path = p;
2641
2642 strv_free(c->argv);
2643 c->argv = l;
2644
2645 return 0;
2646 }
2647
2648 int exec_command_append(ExecCommand *c, const char *path, ...) {
2649 _cleanup_strv_free_ char **l = NULL;
2650 va_list ap;
2651 int r;
2652
2653 assert(c);
2654 assert(path);
2655
2656 va_start(ap, path);
2657 l = strv_new_ap(path, ap);
2658 va_end(ap);
2659
2660 if (!l)
2661 return -ENOMEM;
2662
2663 r = strv_extend_strv(&c->argv, l);
2664 if (r < 0)
2665 return r;
2666
2667 return 0;
2668 }
2669
2670
2671 static int exec_runtime_allocate(ExecRuntime **rt) {
2672
2673 if (*rt)
2674 return 0;
2675
2676 *rt = new0(ExecRuntime, 1);
2677 if (!*rt)
2678 return -ENOMEM;
2679
2680 (*rt)->n_ref = 1;
2681 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2682
2683 return 0;
2684 }
2685
2686 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2687 int r;
2688
2689 assert(rt);
2690 assert(c);
2691 assert(id);
2692
2693 if (*rt)
2694 return 1;
2695
2696 if (!c->private_network && !c->private_tmp)
2697 return 0;
2698
2699 r = exec_runtime_allocate(rt);
2700 if (r < 0)
2701 return r;
2702
2703 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2704 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2705 return -errno;
2706 }
2707
2708 if (c->private_tmp && !(*rt)->tmp_dir) {
2709 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2710 if (r < 0)
2711 return r;
2712 }
2713
2714 return 1;
2715 }
2716
2717 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2718 assert(r);
2719 assert(r->n_ref > 0);
2720
2721 r->n_ref++;
2722 return r;
2723 }
2724
2725 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2726
2727 if (!r)
2728 return NULL;
2729
2730 assert(r->n_ref > 0);
2731
2732 r->n_ref--;
2733 if (r->n_ref <= 0) {
2734 free(r->tmp_dir);
2735 free(r->var_tmp_dir);
2736 safe_close_pair(r->netns_storage_socket);
2737 free(r);
2738 }
2739
2740 return NULL;
2741 }
2742
2743 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2744 assert(u);
2745 assert(f);
2746 assert(fds);
2747
2748 if (!rt)
2749 return 0;
2750
2751 if (rt->tmp_dir)
2752 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2753
2754 if (rt->var_tmp_dir)
2755 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2756
2757 if (rt->netns_storage_socket[0] >= 0) {
2758 int copy;
2759
2760 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2761 if (copy < 0)
2762 return copy;
2763
2764 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2765 }
2766
2767 if (rt->netns_storage_socket[1] >= 0) {
2768 int copy;
2769
2770 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2771 if (copy < 0)
2772 return copy;
2773
2774 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2775 }
2776
2777 return 0;
2778 }
2779
2780 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2781 int r;
2782
2783 assert(rt);
2784 assert(key);
2785 assert(value);
2786
2787 if (streq(key, "tmp-dir")) {
2788 char *copy;
2789
2790 r = exec_runtime_allocate(rt);
2791 if (r < 0)
2792 return r;
2793
2794 copy = strdup(value);
2795 if (!copy)
2796 return log_oom();
2797
2798 free((*rt)->tmp_dir);
2799 (*rt)->tmp_dir = copy;
2800
2801 } else if (streq(key, "var-tmp-dir")) {
2802 char *copy;
2803
2804 r = exec_runtime_allocate(rt);
2805 if (r < 0)
2806 return r;
2807
2808 copy = strdup(value);
2809 if (!copy)
2810 return log_oom();
2811
2812 free((*rt)->var_tmp_dir);
2813 (*rt)->var_tmp_dir = copy;
2814
2815 } else if (streq(key, "netns-socket-0")) {
2816 int fd;
2817
2818 r = exec_runtime_allocate(rt);
2819 if (r < 0)
2820 return r;
2821
2822 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2823 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2824 else {
2825 safe_close((*rt)->netns_storage_socket[0]);
2826 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2827 }
2828 } else if (streq(key, "netns-socket-1")) {
2829 int fd;
2830
2831 r = exec_runtime_allocate(rt);
2832 if (r < 0)
2833 return r;
2834
2835 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2836 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2837 else {
2838 safe_close((*rt)->netns_storage_socket[1]);
2839 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2840 }
2841 } else
2842 return 0;
2843
2844 return 1;
2845 }
2846
2847 static void *remove_tmpdir_thread(void *p) {
2848 _cleanup_free_ char *path = p;
2849
2850 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
2851 return NULL;
2852 }
2853
2854 void exec_runtime_destroy(ExecRuntime *rt) {
2855 int r;
2856
2857 if (!rt)
2858 return;
2859
2860 /* If there are multiple users of this, let's leave the stuff around */
2861 if (rt->n_ref > 1)
2862 return;
2863
2864 if (rt->tmp_dir) {
2865 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2866
2867 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2868 if (r < 0) {
2869 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2870 free(rt->tmp_dir);
2871 }
2872
2873 rt->tmp_dir = NULL;
2874 }
2875
2876 if (rt->var_tmp_dir) {
2877 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2878
2879 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2880 if (r < 0) {
2881 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2882 free(rt->var_tmp_dir);
2883 }
2884
2885 rt->var_tmp_dir = NULL;
2886 }
2887
2888 safe_close_pair(rt->netns_storage_socket);
2889 }
2890
2891 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2892 [EXEC_INPUT_NULL] = "null",
2893 [EXEC_INPUT_TTY] = "tty",
2894 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2895 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2896 [EXEC_INPUT_SOCKET] = "socket"
2897 };
2898
2899 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2900
2901 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2902 [EXEC_OUTPUT_INHERIT] = "inherit",
2903 [EXEC_OUTPUT_NULL] = "null",
2904 [EXEC_OUTPUT_TTY] = "tty",
2905 [EXEC_OUTPUT_SYSLOG] = "syslog",
2906 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2907 [EXEC_OUTPUT_KMSG] = "kmsg",
2908 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2909 [EXEC_OUTPUT_JOURNAL] = "journal",
2910 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2911 [EXEC_OUTPUT_SOCKET] = "socket"
2912 };
2913
2914 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);