]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.c
core: Fix EACCES check for OOM adjustments
[thirdparty/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-endpoint.h"
88 #include "label.h"
89 #include "cap-list.h"
90
91 #ifdef HAVE_SECCOMP
92 #include "seccomp-util.h"
93 #endif
94
95 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
96 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97
98 /* This assumes there is a 'tty' group */
99 #define TTY_MODE 0620
100
101 #define SNDBUF_SIZE (8*1024*1024)
102
103 static int shift_fds(int fds[], unsigned n_fds) {
104 int start, restart_from;
105
106 if (n_fds <= 0)
107 return 0;
108
109 /* Modifies the fds array! (sorts it) */
110
111 assert(fds);
112
113 start = 0;
114 for (;;) {
115 int i;
116
117 restart_from = -1;
118
119 for (i = start; i < (int) n_fds; i++) {
120 int nfd;
121
122 /* Already at right index? */
123 if (fds[i] == i+3)
124 continue;
125
126 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
127 return -errno;
128
129 safe_close(fds[i]);
130 fds[i] = nfd;
131
132 /* Hmm, the fd we wanted isn't free? Then
133 * let's remember that and try again from here */
134 if (nfd != i+3 && restart_from < 0)
135 restart_from = i;
136 }
137
138 if (restart_from < 0)
139 break;
140
141 start = restart_from;
142 }
143
144 return 0;
145 }
146
147 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
148 unsigned i;
149 int r;
150
151 if (n_fds <= 0)
152 return 0;
153
154 assert(fds);
155
156 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157
158 for (i = 0; i < n_fds; i++) {
159
160 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
161 return r;
162
163 /* We unconditionally drop FD_CLOEXEC from the fds,
164 * since after all we want to pass these fds to our
165 * children */
166
167 if ((r = fd_cloexec(fds[i], false)) < 0)
168 return r;
169 }
170
171 return 0;
172 }
173
174 _pure_ static const char *tty_path(const ExecContext *context) {
175 assert(context);
176
177 if (context->tty_path)
178 return context->tty_path;
179
180 return "/dev/console";
181 }
182
183 static void exec_context_tty_reset(const ExecContext *context) {
184 assert(context);
185
186 if (context->tty_vhangup)
187 terminal_vhangup(tty_path(context));
188
189 if (context->tty_reset)
190 reset_terminal(tty_path(context));
191
192 if (context->tty_vt_disallocate && context->tty_path)
193 vt_disallocate(context->tty_path);
194 }
195
196 static bool is_terminal_output(ExecOutput o) {
197 return
198 o == EXEC_OUTPUT_TTY ||
199 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
200 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
201 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
202 }
203
204 static int open_null_as(int flags, int nfd) {
205 int fd, r;
206
207 assert(nfd >= 0);
208
209 fd = open("/dev/null", flags|O_NOCTTY);
210 if (fd < 0)
211 return -errno;
212
213 if (fd != nfd) {
214 r = dup2(fd, nfd) < 0 ? -errno : nfd;
215 safe_close(fd);
216 } else
217 r = nfd;
218
219 return r;
220 }
221
222 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
223 union sockaddr_union sa = {
224 .un.sun_family = AF_UNIX,
225 .un.sun_path = "/run/systemd/journal/stdout",
226 };
227 uid_t olduid = UID_INVALID;
228 gid_t oldgid = GID_INVALID;
229 int r;
230
231 if (gid != GID_INVALID) {
232 oldgid = getgid();
233
234 r = setegid(gid);
235 if (r < 0)
236 return -errno;
237 }
238
239 if (uid != UID_INVALID) {
240 olduid = getuid();
241
242 r = seteuid(uid);
243 if (r < 0) {
244 r = -errno;
245 goto restore_gid;
246 }
247 }
248
249 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
250 if (r < 0)
251 r = -errno;
252
253 /* If we fail to restore the uid or gid, things will likely
254 fail later on. This should only happen if an LSM interferes. */
255
256 if (uid != UID_INVALID)
257 (void) seteuid(olduid);
258
259 restore_gid:
260 if (gid != GID_INVALID)
261 (void) setegid(oldgid);
262
263 return r;
264 }
265
266 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
267 int fd, r;
268
269 assert(context);
270 assert(output < _EXEC_OUTPUT_MAX);
271 assert(ident);
272 assert(nfd >= 0);
273
274 fd = socket(AF_UNIX, SOCK_STREAM, 0);
275 if (fd < 0)
276 return -errno;
277
278 r = connect_journal_socket(fd, uid, gid);
279 if (r < 0)
280 return r;
281
282 if (shutdown(fd, SHUT_RD) < 0) {
283 safe_close(fd);
284 return -errno;
285 }
286
287 fd_inc_sndbuf(fd, SNDBUF_SIZE);
288
289 dprintf(fd,
290 "%s\n"
291 "%s\n"
292 "%i\n"
293 "%i\n"
294 "%i\n"
295 "%i\n"
296 "%i\n",
297 context->syslog_identifier ? context->syslog_identifier : ident,
298 unit_id,
299 context->syslog_priority,
300 !!context->syslog_level_prefix,
301 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
302 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
303 is_terminal_output(output));
304
305 if (fd != nfd) {
306 r = dup2(fd, nfd) < 0 ? -errno : nfd;
307 safe_close(fd);
308 } else
309 r = nfd;
310
311 return r;
312 }
313 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
314 int fd, r;
315
316 assert(path);
317 assert(nfd >= 0);
318
319 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
320 return fd;
321
322 if (fd != nfd) {
323 r = dup2(fd, nfd) < 0 ? -errno : nfd;
324 safe_close(fd);
325 } else
326 r = nfd;
327
328 return r;
329 }
330
331 static bool is_terminal_input(ExecInput i) {
332 return
333 i == EXEC_INPUT_TTY ||
334 i == EXEC_INPUT_TTY_FORCE ||
335 i == EXEC_INPUT_TTY_FAIL;
336 }
337
338 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
339
340 if (is_terminal_input(std_input) && !apply_tty_stdin)
341 return EXEC_INPUT_NULL;
342
343 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
344 return EXEC_INPUT_NULL;
345
346 return std_input;
347 }
348
349 static int fixup_output(ExecOutput std_output, int socket_fd) {
350
351 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
352 return EXEC_OUTPUT_INHERIT;
353
354 return std_output;
355 }
356
357 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
358 ExecInput i;
359
360 assert(context);
361
362 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
363
364 switch (i) {
365
366 case EXEC_INPUT_NULL:
367 return open_null_as(O_RDONLY, STDIN_FILENO);
368
369 case EXEC_INPUT_TTY:
370 case EXEC_INPUT_TTY_FORCE:
371 case EXEC_INPUT_TTY_FAIL: {
372 int fd, r;
373
374 fd = acquire_terminal(tty_path(context),
375 i == EXEC_INPUT_TTY_FAIL,
376 i == EXEC_INPUT_TTY_FORCE,
377 false,
378 USEC_INFINITY);
379 if (fd < 0)
380 return fd;
381
382 if (fd != STDIN_FILENO) {
383 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
384 safe_close(fd);
385 } else
386 r = STDIN_FILENO;
387
388 return r;
389 }
390
391 case EXEC_INPUT_SOCKET:
392 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
393
394 default:
395 assert_not_reached("Unknown input type");
396 }
397 }
398
399 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
400 ExecOutput o;
401 ExecInput i;
402 int r;
403
404 assert(context);
405 assert(ident);
406
407 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
408 o = fixup_output(context->std_output, socket_fd);
409
410 if (fileno == STDERR_FILENO) {
411 ExecOutput e;
412 e = fixup_output(context->std_error, socket_fd);
413
414 /* This expects the input and output are already set up */
415
416 /* Don't change the stderr file descriptor if we inherit all
417 * the way and are not on a tty */
418 if (e == EXEC_OUTPUT_INHERIT &&
419 o == EXEC_OUTPUT_INHERIT &&
420 i == EXEC_INPUT_NULL &&
421 !is_terminal_input(context->std_input) &&
422 getppid () != 1)
423 return fileno;
424
425 /* Duplicate from stdout if possible */
426 if (e == o || e == EXEC_OUTPUT_INHERIT)
427 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
428
429 o = e;
430
431 } else if (o == EXEC_OUTPUT_INHERIT) {
432 /* If input got downgraded, inherit the original value */
433 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
434 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
435
436 /* If the input is connected to anything that's not a /dev/null, inherit that... */
437 if (i != EXEC_INPUT_NULL)
438 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
439
440 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
441 if (getppid() != 1)
442 return fileno;
443
444 /* We need to open /dev/null here anew, to get the right access mode. */
445 return open_null_as(O_WRONLY, fileno);
446 }
447
448 switch (o) {
449
450 case EXEC_OUTPUT_NULL:
451 return open_null_as(O_WRONLY, fileno);
452
453 case EXEC_OUTPUT_TTY:
454 if (is_terminal_input(i))
455 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
456
457 /* We don't reset the terminal if this is just about output */
458 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
459
460 case EXEC_OUTPUT_SYSLOG:
461 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
462 case EXEC_OUTPUT_KMSG:
463 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
464 case EXEC_OUTPUT_JOURNAL:
465 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
466 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
467 if (r < 0) {
468 log_unit_struct(unit_id,
469 LOG_ERR,
470 LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
471 fileno == STDOUT_FILENO ? "stdout" : "stderr",
472 unit_id, strerror(-r)),
473 LOG_ERRNO(-r),
474 NULL);
475 r = open_null_as(O_WRONLY, fileno);
476 }
477 return r;
478
479 case EXEC_OUTPUT_SOCKET:
480 assert(socket_fd >= 0);
481 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
482
483 default:
484 assert_not_reached("Unknown error type");
485 }
486 }
487
488 static int chown_terminal(int fd, uid_t uid) {
489 struct stat st;
490
491 assert(fd >= 0);
492
493 /* This might fail. What matters are the results. */
494 (void) fchown(fd, uid, -1);
495 (void) fchmod(fd, TTY_MODE);
496
497 if (fstat(fd, &st) < 0)
498 return -errno;
499
500 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
501 return -EPERM;
502
503 return 0;
504 }
505
506 static int setup_confirm_stdio(int *_saved_stdin,
507 int *_saved_stdout) {
508 int fd = -1, saved_stdin, saved_stdout = -1, r;
509
510 assert(_saved_stdin);
511 assert(_saved_stdout);
512
513 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
514 if (saved_stdin < 0)
515 return -errno;
516
517 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
518 if (saved_stdout < 0) {
519 r = errno;
520 goto fail;
521 }
522
523 fd = acquire_terminal(
524 "/dev/console",
525 false,
526 false,
527 false,
528 DEFAULT_CONFIRM_USEC);
529 if (fd < 0) {
530 r = fd;
531 goto fail;
532 }
533
534 r = chown_terminal(fd, getuid());
535 if (r < 0)
536 goto fail;
537
538 if (dup2(fd, STDIN_FILENO) < 0) {
539 r = -errno;
540 goto fail;
541 }
542
543 if (dup2(fd, STDOUT_FILENO) < 0) {
544 r = -errno;
545 goto fail;
546 }
547
548 if (fd >= 2)
549 safe_close(fd);
550
551 *_saved_stdin = saved_stdin;
552 *_saved_stdout = saved_stdout;
553
554 return 0;
555
556 fail:
557 safe_close(saved_stdout);
558 safe_close(saved_stdin);
559 safe_close(fd);
560
561 return r;
562 }
563
564 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
565 _cleanup_close_ int fd = -1;
566 va_list ap;
567
568 assert(format);
569
570 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
571 if (fd < 0)
572 return fd;
573
574 va_start(ap, format);
575 vdprintf(fd, format, ap);
576 va_end(ap);
577
578 return 0;
579 }
580
581 static int restore_confirm_stdio(int *saved_stdin,
582 int *saved_stdout) {
583
584 int r = 0;
585
586 assert(saved_stdin);
587 assert(saved_stdout);
588
589 release_terminal();
590
591 if (*saved_stdin >= 0)
592 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
593 r = -errno;
594
595 if (*saved_stdout >= 0)
596 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
597 r = -errno;
598
599 safe_close(*saved_stdin);
600 safe_close(*saved_stdout);
601
602 return r;
603 }
604
605 static int ask_for_confirmation(char *response, char **argv) {
606 int saved_stdout = -1, saved_stdin = -1, r;
607 _cleanup_free_ char *line = NULL;
608
609 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
610 if (r < 0)
611 return r;
612
613 line = exec_command_line(argv);
614 if (!line)
615 return -ENOMEM;
616
617 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
618
619 restore_confirm_stdio(&saved_stdin, &saved_stdout);
620
621 return r;
622 }
623
624 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
625 bool keep_groups = false;
626 int r;
627
628 assert(context);
629
630 /* Lookup and set GID and supplementary group list. Here too
631 * we avoid NSS lookups for gid=0. */
632
633 if (context->group || username) {
634
635 if (context->group) {
636 const char *g = context->group;
637
638 if ((r = get_group_creds(&g, &gid)) < 0)
639 return r;
640 }
641
642 /* First step, initialize groups from /etc/groups */
643 if (username && gid != 0) {
644 if (initgroups(username, gid) < 0)
645 return -errno;
646
647 keep_groups = true;
648 }
649
650 /* Second step, set our gids */
651 if (setresgid(gid, gid, gid) < 0)
652 return -errno;
653 }
654
655 if (context->supplementary_groups) {
656 int ngroups_max, k;
657 gid_t *gids;
658 char **i;
659
660 /* Final step, initialize any manually set supplementary groups */
661 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
662
663 if (!(gids = new(gid_t, ngroups_max)))
664 return -ENOMEM;
665
666 if (keep_groups) {
667 if ((k = getgroups(ngroups_max, gids)) < 0) {
668 free(gids);
669 return -errno;
670 }
671 } else
672 k = 0;
673
674 STRV_FOREACH(i, context->supplementary_groups) {
675 const char *g;
676
677 if (k >= ngroups_max) {
678 free(gids);
679 return -E2BIG;
680 }
681
682 g = *i;
683 r = get_group_creds(&g, gids+k);
684 if (r < 0) {
685 free(gids);
686 return r;
687 }
688
689 k++;
690 }
691
692 if (setgroups(k, gids) < 0) {
693 free(gids);
694 return -errno;
695 }
696
697 free(gids);
698 }
699
700 return 0;
701 }
702
703 static int enforce_user(const ExecContext *context, uid_t uid) {
704 assert(context);
705
706 /* Sets (but doesn't lookup) the uid and make sure we keep the
707 * capabilities while doing so. */
708
709 if (context->capabilities) {
710 _cleanup_cap_free_ cap_t d = NULL;
711 static const cap_value_t bits[] = {
712 CAP_SETUID, /* Necessary so that we can run setresuid() below */
713 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
714 };
715
716 /* First step: If we need to keep capabilities but
717 * drop privileges we need to make sure we keep our
718 * caps, while we drop privileges. */
719 if (uid != 0) {
720 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
721
722 if (prctl(PR_GET_SECUREBITS) != sb)
723 if (prctl(PR_SET_SECUREBITS, sb) < 0)
724 return -errno;
725 }
726
727 /* Second step: set the capabilities. This will reduce
728 * the capabilities to the minimum we need. */
729
730 d = cap_dup(context->capabilities);
731 if (!d)
732 return -errno;
733
734 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
735 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
736 return -errno;
737
738 if (cap_set_proc(d) < 0)
739 return -errno;
740 }
741
742 /* Third step: actually set the uids */
743 if (setresuid(uid, uid, uid) < 0)
744 return -errno;
745
746 /* At this point we should have all necessary capabilities but
747 are otherwise a normal user. However, the caps might got
748 corrupted due to the setresuid() so we need clean them up
749 later. This is done outside of this call. */
750
751 return 0;
752 }
753
754 #ifdef HAVE_PAM
755
756 static int null_conv(
757 int num_msg,
758 const struct pam_message **msg,
759 struct pam_response **resp,
760 void *appdata_ptr) {
761
762 /* We don't support conversations */
763
764 return PAM_CONV_ERR;
765 }
766
767 static int setup_pam(
768 const char *name,
769 const char *user,
770 uid_t uid,
771 const char *tty,
772 char ***pam_env,
773 int fds[], unsigned n_fds) {
774
775 static const struct pam_conv conv = {
776 .conv = null_conv,
777 .appdata_ptr = NULL
778 };
779
780 pam_handle_t *handle = NULL;
781 sigset_t ss, old_ss;
782 int pam_code = PAM_SUCCESS;
783 int err;
784 char **e = NULL;
785 bool close_session = false;
786 pid_t pam_pid = 0, parent_pid;
787 int flags = 0;
788
789 assert(name);
790 assert(user);
791 assert(pam_env);
792
793 /* We set up PAM in the parent process, then fork. The child
794 * will then stay around until killed via PR_GET_PDEATHSIG or
795 * systemd via the cgroup logic. It will then remove the PAM
796 * session again. The parent process will exec() the actual
797 * daemon. We do things this way to ensure that the main PID
798 * of the daemon is the one we initially fork()ed. */
799
800 if (log_get_max_level() < LOG_DEBUG)
801 flags |= PAM_SILENT;
802
803 pam_code = pam_start(name, user, &conv, &handle);
804 if (pam_code != PAM_SUCCESS) {
805 handle = NULL;
806 goto fail;
807 }
808
809 if (tty) {
810 pam_code = pam_set_item(handle, PAM_TTY, tty);
811 if (pam_code != PAM_SUCCESS)
812 goto fail;
813 }
814
815 pam_code = pam_acct_mgmt(handle, flags);
816 if (pam_code != PAM_SUCCESS)
817 goto fail;
818
819 pam_code = pam_open_session(handle, flags);
820 if (pam_code != PAM_SUCCESS)
821 goto fail;
822
823 close_session = true;
824
825 e = pam_getenvlist(handle);
826 if (!e) {
827 pam_code = PAM_BUF_ERR;
828 goto fail;
829 }
830
831 /* Block SIGTERM, so that we know that it won't get lost in
832 * the child */
833 if (sigemptyset(&ss) < 0 ||
834 sigaddset(&ss, SIGTERM) < 0 ||
835 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
836 goto fail;
837
838 parent_pid = getpid();
839
840 pam_pid = fork();
841 if (pam_pid < 0)
842 goto fail;
843
844 if (pam_pid == 0) {
845 int sig;
846 int r = EXIT_PAM;
847
848 /* The child's job is to reset the PAM session on
849 * termination */
850
851 /* This string must fit in 10 chars (i.e. the length
852 * of "/sbin/init"), to look pretty in /bin/ps */
853 rename_process("(sd-pam)");
854
855 /* Make sure we don't keep open the passed fds in this
856 child. We assume that otherwise only those fds are
857 open here that have been opened by PAM. */
858 close_many(fds, n_fds);
859
860 /* Drop privileges - we don't need any to pam_close_session
861 * and this will make PR_SET_PDEATHSIG work in most cases.
862 * If this fails, ignore the error - but expect sd-pam threads
863 * to fail to exit normally */
864 if (setresuid(uid, uid, uid) < 0)
865 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
866
867 /* Wait until our parent died. This will only work if
868 * the above setresuid() succeeds, otherwise the kernel
869 * will not allow unprivileged parents kill their privileged
870 * children this way. We rely on the control groups kill logic
871 * to do the rest for us. */
872 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
873 goto child_finish;
874
875 /* Check if our parent process might already have
876 * died? */
877 if (getppid() == parent_pid) {
878 for (;;) {
879 if (sigwait(&ss, &sig) < 0) {
880 if (errno == EINTR)
881 continue;
882
883 goto child_finish;
884 }
885
886 assert(sig == SIGTERM);
887 break;
888 }
889 }
890
891 /* If our parent died we'll end the session */
892 if (getppid() != parent_pid) {
893 pam_code = pam_close_session(handle, flags);
894 if (pam_code != PAM_SUCCESS)
895 goto child_finish;
896 }
897
898 r = 0;
899
900 child_finish:
901 pam_end(handle, pam_code | flags);
902 _exit(r);
903 }
904
905 /* If the child was forked off successfully it will do all the
906 * cleanups, so forget about the handle here. */
907 handle = NULL;
908
909 /* Unblock SIGTERM again in the parent */
910 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
911 goto fail;
912
913 /* We close the log explicitly here, since the PAM modules
914 * might have opened it, but we don't want this fd around. */
915 closelog();
916
917 *pam_env = e;
918 e = NULL;
919
920 return 0;
921
922 fail:
923 if (pam_code != PAM_SUCCESS) {
924 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
925 err = -EPERM; /* PAM errors do not map to errno */
926 } else {
927 log_error_errno(errno, "PAM failed: %m");
928 err = -errno;
929 }
930
931 if (handle) {
932 if (close_session)
933 pam_code = pam_close_session(handle, flags);
934
935 pam_end(handle, pam_code | flags);
936 }
937
938 strv_free(e);
939
940 closelog();
941
942 if (pam_pid > 1) {
943 kill(pam_pid, SIGTERM);
944 kill(pam_pid, SIGCONT);
945 }
946
947 return err;
948 }
949 #endif
950
951 static void rename_process_from_path(const char *path) {
952 char process_name[11];
953 const char *p;
954 size_t l;
955
956 /* This resulting string must fit in 10 chars (i.e. the length
957 * of "/sbin/init") to look pretty in /bin/ps */
958
959 p = basename(path);
960 if (isempty(p)) {
961 rename_process("(...)");
962 return;
963 }
964
965 l = strlen(p);
966 if (l > 8) {
967 /* The end of the process name is usually more
968 * interesting, since the first bit might just be
969 * "systemd-" */
970 p = p + l - 8;
971 l = 8;
972 }
973
974 process_name[0] = '(';
975 memcpy(process_name+1, p, l);
976 process_name[1+l] = ')';
977 process_name[1+l+1] = 0;
978
979 rename_process(process_name);
980 }
981
982 #ifdef HAVE_SECCOMP
983
984 static int apply_seccomp(const ExecContext *c) {
985 uint32_t negative_action, action;
986 scmp_filter_ctx *seccomp;
987 Iterator i;
988 void *id;
989 int r;
990
991 assert(c);
992
993 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
994
995 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
996 if (!seccomp)
997 return -ENOMEM;
998
999 if (c->syscall_archs) {
1000
1001 SET_FOREACH(id, c->syscall_archs, i) {
1002 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1003 if (r == -EEXIST)
1004 continue;
1005 if (r < 0)
1006 goto finish;
1007 }
1008
1009 } else {
1010 r = seccomp_add_secondary_archs(seccomp);
1011 if (r < 0)
1012 goto finish;
1013 }
1014
1015 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1016 SET_FOREACH(id, c->syscall_filter, i) {
1017 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1018 if (r < 0)
1019 goto finish;
1020 }
1021
1022 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1023 if (r < 0)
1024 goto finish;
1025
1026 r = seccomp_load(seccomp);
1027
1028 finish:
1029 seccomp_release(seccomp);
1030 return r;
1031 }
1032
1033 static int apply_address_families(const ExecContext *c) {
1034 scmp_filter_ctx *seccomp;
1035 Iterator i;
1036 int r;
1037
1038 assert(c);
1039
1040 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1041 if (!seccomp)
1042 return -ENOMEM;
1043
1044 r = seccomp_add_secondary_archs(seccomp);
1045 if (r < 0)
1046 goto finish;
1047
1048 if (c->address_families_whitelist) {
1049 int af, first = 0, last = 0;
1050 void *afp;
1051
1052 /* If this is a whitelist, we first block the address
1053 * families that are out of range and then everything
1054 * that is not in the set. First, we find the lowest
1055 * and highest address family in the set. */
1056
1057 SET_FOREACH(afp, c->address_families, i) {
1058 af = PTR_TO_INT(afp);
1059
1060 if (af <= 0 || af >= af_max())
1061 continue;
1062
1063 if (first == 0 || af < first)
1064 first = af;
1065
1066 if (last == 0 || af > last)
1067 last = af;
1068 }
1069
1070 assert((first == 0) == (last == 0));
1071
1072 if (first == 0) {
1073
1074 /* No entries in the valid range, block everything */
1075 r = seccomp_rule_add(
1076 seccomp,
1077 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1078 SCMP_SYS(socket),
1079 0);
1080 if (r < 0)
1081 goto finish;
1082
1083 } else {
1084
1085 /* Block everything below the first entry */
1086 r = seccomp_rule_add(
1087 seccomp,
1088 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1089 SCMP_SYS(socket),
1090 1,
1091 SCMP_A0(SCMP_CMP_LT, first));
1092 if (r < 0)
1093 goto finish;
1094
1095 /* Block everything above the last entry */
1096 r = seccomp_rule_add(
1097 seccomp,
1098 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1099 SCMP_SYS(socket),
1100 1,
1101 SCMP_A0(SCMP_CMP_GT, last));
1102 if (r < 0)
1103 goto finish;
1104
1105 /* Block everything between the first and last
1106 * entry */
1107 for (af = 1; af < af_max(); af++) {
1108
1109 if (set_contains(c->address_families, INT_TO_PTR(af)))
1110 continue;
1111
1112 r = seccomp_rule_add(
1113 seccomp,
1114 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1115 SCMP_SYS(socket),
1116 1,
1117 SCMP_A0(SCMP_CMP_EQ, af));
1118 if (r < 0)
1119 goto finish;
1120 }
1121 }
1122
1123 } else {
1124 void *af;
1125
1126 /* If this is a blacklist, then generate one rule for
1127 * each address family that are then combined in OR
1128 * checks. */
1129
1130 SET_FOREACH(af, c->address_families, i) {
1131
1132 r = seccomp_rule_add(
1133 seccomp,
1134 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1135 SCMP_SYS(socket),
1136 1,
1137 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1138 if (r < 0)
1139 goto finish;
1140 }
1141 }
1142
1143 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1144 if (r < 0)
1145 goto finish;
1146
1147 r = seccomp_load(seccomp);
1148
1149 finish:
1150 seccomp_release(seccomp);
1151 return r;
1152 }
1153
1154 #endif
1155
1156 static void do_idle_pipe_dance(int idle_pipe[4]) {
1157 assert(idle_pipe);
1158
1159
1160 safe_close(idle_pipe[1]);
1161 safe_close(idle_pipe[2]);
1162
1163 if (idle_pipe[0] >= 0) {
1164 int r;
1165
1166 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1167
1168 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1169 /* Signal systemd that we are bored and want to continue. */
1170 write(idle_pipe[3], "x", 1);
1171
1172 /* Wait for systemd to react to the signal above. */
1173 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1174 }
1175
1176 safe_close(idle_pipe[0]);
1177
1178 }
1179
1180 safe_close(idle_pipe[3]);
1181 }
1182
1183 static int build_environment(
1184 const ExecContext *c,
1185 unsigned n_fds,
1186 usec_t watchdog_usec,
1187 const char *home,
1188 const char *username,
1189 const char *shell,
1190 char ***ret) {
1191
1192 _cleanup_strv_free_ char **our_env = NULL;
1193 unsigned n_env = 0;
1194 char *x;
1195
1196 assert(c);
1197 assert(ret);
1198
1199 our_env = new0(char*, 10);
1200 if (!our_env)
1201 return -ENOMEM;
1202
1203 if (n_fds > 0) {
1204 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1205 return -ENOMEM;
1206 our_env[n_env++] = x;
1207
1208 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1209 return -ENOMEM;
1210 our_env[n_env++] = x;
1211 }
1212
1213 if (watchdog_usec > 0) {
1214 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1215 return -ENOMEM;
1216 our_env[n_env++] = x;
1217
1218 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1219 return -ENOMEM;
1220 our_env[n_env++] = x;
1221 }
1222
1223 if (home) {
1224 x = strappend("HOME=", home);
1225 if (!x)
1226 return -ENOMEM;
1227 our_env[n_env++] = x;
1228 }
1229
1230 if (username) {
1231 x = strappend("LOGNAME=", username);
1232 if (!x)
1233 return -ENOMEM;
1234 our_env[n_env++] = x;
1235
1236 x = strappend("USER=", username);
1237 if (!x)
1238 return -ENOMEM;
1239 our_env[n_env++] = x;
1240 }
1241
1242 if (shell) {
1243 x = strappend("SHELL=", shell);
1244 if (!x)
1245 return -ENOMEM;
1246 our_env[n_env++] = x;
1247 }
1248
1249 if (is_terminal_input(c->std_input) ||
1250 c->std_output == EXEC_OUTPUT_TTY ||
1251 c->std_error == EXEC_OUTPUT_TTY ||
1252 c->tty_path) {
1253
1254 x = strdup(default_term_for_tty(tty_path(c)));
1255 if (!x)
1256 return -ENOMEM;
1257 our_env[n_env++] = x;
1258 }
1259
1260 our_env[n_env++] = NULL;
1261 assert(n_env <= 10);
1262
1263 *ret = our_env;
1264 our_env = NULL;
1265
1266 return 0;
1267 }
1268
1269 static int exec_child(
1270 ExecCommand *command,
1271 const ExecContext *context,
1272 const ExecParameters *params,
1273 ExecRuntime *runtime,
1274 char **argv,
1275 int socket_fd,
1276 int *fds, unsigned n_fds,
1277 char **files_env,
1278 int *exit_status) {
1279
1280 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1281 _cleanup_free_ char *mac_selinux_context_net = NULL;
1282 const char *username = NULL, *home = NULL, *shell = NULL;
1283 unsigned n_dont_close = 0;
1284 int dont_close[n_fds + 4];
1285 uid_t uid = UID_INVALID;
1286 gid_t gid = GID_INVALID;
1287 int i, r;
1288
1289 assert(command);
1290 assert(context);
1291 assert(params);
1292 assert(exit_status);
1293
1294 rename_process_from_path(command->path);
1295
1296 /* We reset exactly these signals, since they are the
1297 * only ones we set to SIG_IGN in the main daemon. All
1298 * others we leave untouched because we set them to
1299 * SIG_DFL or a valid handler initially, both of which
1300 * will be demoted to SIG_DFL. */
1301 default_signals(SIGNALS_CRASH_HANDLER,
1302 SIGNALS_IGNORE, -1);
1303
1304 if (context->ignore_sigpipe)
1305 ignore_signals(SIGPIPE, -1);
1306
1307 r = reset_signal_mask();
1308 if (r < 0) {
1309 *exit_status = EXIT_SIGNAL_MASK;
1310 return r;
1311 }
1312
1313 if (params->idle_pipe)
1314 do_idle_pipe_dance(params->idle_pipe);
1315
1316 /* Close sockets very early to make sure we don't
1317 * block init reexecution because it cannot bind its
1318 * sockets */
1319
1320 log_forget_fds();
1321
1322 if (socket_fd >= 0)
1323 dont_close[n_dont_close++] = socket_fd;
1324 if (n_fds > 0) {
1325 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1326 n_dont_close += n_fds;
1327 }
1328 if (params->bus_endpoint_fd >= 0)
1329 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1330 if (runtime) {
1331 if (runtime->netns_storage_socket[0] >= 0)
1332 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1333 if (runtime->netns_storage_socket[1] >= 0)
1334 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1335 }
1336
1337 r = close_all_fds(dont_close, n_dont_close);
1338 if (r < 0) {
1339 *exit_status = EXIT_FDS;
1340 return r;
1341 }
1342
1343 if (!context->same_pgrp)
1344 if (setsid() < 0) {
1345 *exit_status = EXIT_SETSID;
1346 return -errno;
1347 }
1348
1349 exec_context_tty_reset(context);
1350
1351 if (params->confirm_spawn) {
1352 char response;
1353
1354 r = ask_for_confirmation(&response, argv);
1355 if (r == -ETIMEDOUT)
1356 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1357 else if (r < 0)
1358 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1359 else if (response == 's') {
1360 write_confirm_message("Skipping execution.\n");
1361 *exit_status = EXIT_CONFIRM;
1362 return -ECANCELED;
1363 } else if (response == 'n') {
1364 write_confirm_message("Failing execution.\n");
1365 *exit_status = 0;
1366 return 0;
1367 }
1368 }
1369
1370 if (context->user) {
1371 username = context->user;
1372 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1373 if (r < 0) {
1374 *exit_status = EXIT_USER;
1375 return r;
1376 }
1377 }
1378
1379 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1380 * must sure to drop O_NONBLOCK */
1381 if (socket_fd >= 0)
1382 fd_nonblock(socket_fd, false);
1383
1384 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1385 if (r < 0) {
1386 *exit_status = EXIT_STDIN;
1387 return r;
1388 }
1389
1390 r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1391 if (r < 0) {
1392 *exit_status = EXIT_STDOUT;
1393 return r;
1394 }
1395
1396 r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1397 if (r < 0) {
1398 *exit_status = EXIT_STDERR;
1399 return r;
1400 }
1401
1402 if (params->cgroup_path) {
1403 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1404 if (r < 0) {
1405 *exit_status = EXIT_CGROUP;
1406 return r;
1407 }
1408 }
1409
1410 if (context->oom_score_adjust_set) {
1411 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1412
1413 /* When we can't make this change due to EPERM, then
1414 * let's silently skip over it. User namespaces
1415 * prohibit write access to this file, and we
1416 * shouldn't trip up over that. */
1417
1418 sprintf(t, "%i", context->oom_score_adjust);
1419 r = write_string_file("/proc/self/oom_score_adj", t);
1420 if (r == -EPERM || r == -EACCES) {
1421 log_open();
1422 log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1423 log_close();
1424 } else if (r < 0) {
1425 *exit_status = EXIT_OOM_ADJUST;
1426 return -errno;
1427 }
1428 }
1429
1430 if (context->nice_set)
1431 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1432 *exit_status = EXIT_NICE;
1433 return -errno;
1434 }
1435
1436 if (context->cpu_sched_set) {
1437 struct sched_param param = {
1438 .sched_priority = context->cpu_sched_priority,
1439 };
1440
1441 r = sched_setscheduler(0,
1442 context->cpu_sched_policy |
1443 (context->cpu_sched_reset_on_fork ?
1444 SCHED_RESET_ON_FORK : 0),
1445 &param);
1446 if (r < 0) {
1447 *exit_status = EXIT_SETSCHEDULER;
1448 return -errno;
1449 }
1450 }
1451
1452 if (context->cpuset)
1453 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1454 *exit_status = EXIT_CPUAFFINITY;
1455 return -errno;
1456 }
1457
1458 if (context->ioprio_set)
1459 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1460 *exit_status = EXIT_IOPRIO;
1461 return -errno;
1462 }
1463
1464 if (context->timer_slack_nsec != NSEC_INFINITY)
1465 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1466 *exit_status = EXIT_TIMERSLACK;
1467 return -errno;
1468 }
1469
1470 if (context->personality != 0xffffffffUL)
1471 if (personality(context->personality) < 0) {
1472 *exit_status = EXIT_PERSONALITY;
1473 return -errno;
1474 }
1475
1476 if (context->utmp_id)
1477 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1478
1479 if (context->user && is_terminal_input(context->std_input)) {
1480 r = chown_terminal(STDIN_FILENO, uid);
1481 if (r < 0) {
1482 *exit_status = EXIT_STDIN;
1483 return r;
1484 }
1485 }
1486
1487 #ifdef ENABLE_KDBUS
1488 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1489 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1490
1491 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1492 if (r < 0) {
1493 *exit_status = EXIT_BUS_ENDPOINT;
1494 return r;
1495 }
1496 }
1497 #endif
1498
1499 /* If delegation is enabled we'll pass ownership of the cgroup
1500 * (but only in systemd's own controller hierarchy!) to the
1501 * user of the new process. */
1502 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1503 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1504 if (r < 0) {
1505 *exit_status = EXIT_CGROUP;
1506 return r;
1507 }
1508
1509
1510 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1511 if (r < 0) {
1512 *exit_status = EXIT_CGROUP;
1513 return r;
1514 }
1515 }
1516
1517 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1518 char **rt;
1519
1520 STRV_FOREACH(rt, context->runtime_directory) {
1521 _cleanup_free_ char *p;
1522
1523 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1524 if (!p) {
1525 *exit_status = EXIT_RUNTIME_DIRECTORY;
1526 return -ENOMEM;
1527 }
1528
1529 r = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1530 if (r < 0) {
1531 *exit_status = EXIT_RUNTIME_DIRECTORY;
1532 return r;
1533 }
1534 }
1535 }
1536
1537 if (params->apply_permissions) {
1538 r = enforce_groups(context, username, gid);
1539 if (r < 0) {
1540 *exit_status = EXIT_GROUP;
1541 return r;
1542 }
1543 }
1544
1545 umask(context->umask);
1546
1547 #ifdef HAVE_PAM
1548 if (params->apply_permissions && context->pam_name && username) {
1549 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1550 if (r < 0) {
1551 *exit_status = EXIT_PAM;
1552 return r;
1553 }
1554 }
1555 #endif
1556
1557 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1558 r = setup_netns(runtime->netns_storage_socket);
1559 if (r < 0) {
1560 *exit_status = EXIT_NETWORK;
1561 return r;
1562 }
1563 }
1564
1565 if (!strv_isempty(context->read_write_dirs) ||
1566 !strv_isempty(context->read_only_dirs) ||
1567 !strv_isempty(context->inaccessible_dirs) ||
1568 context->mount_flags != 0 ||
1569 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1570 params->bus_endpoint_path ||
1571 context->private_devices ||
1572 context->protect_system != PROTECT_SYSTEM_NO ||
1573 context->protect_home != PROTECT_HOME_NO) {
1574
1575 char *tmp = NULL, *var = NULL;
1576
1577 /* The runtime struct only contains the parent
1578 * of the private /tmp, which is
1579 * non-accessible to world users. Inside of it
1580 * there's a /tmp that is sticky, and that's
1581 * the one we want to use here. */
1582
1583 if (context->private_tmp && runtime) {
1584 if (runtime->tmp_dir)
1585 tmp = strappenda(runtime->tmp_dir, "/tmp");
1586 if (runtime->var_tmp_dir)
1587 var = strappenda(runtime->var_tmp_dir, "/tmp");
1588 }
1589
1590 r = setup_namespace(
1591 context->read_write_dirs,
1592 context->read_only_dirs,
1593 context->inaccessible_dirs,
1594 tmp,
1595 var,
1596 params->bus_endpoint_path,
1597 context->private_devices,
1598 context->protect_home,
1599 context->protect_system,
1600 context->mount_flags);
1601
1602 /* If we couldn't set up the namespace this is
1603 * probably due to a missing capability. In this case,
1604 * silently proceeed. */
1605 if (r == -EPERM || r == -EACCES) {
1606 log_open();
1607 log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1608 log_close();
1609 } else if (r < 0) {
1610 *exit_status = EXIT_NAMESPACE;
1611 return r;
1612 }
1613 }
1614
1615 if (params->apply_chroot) {
1616 if (context->root_directory)
1617 if (chroot(context->root_directory) < 0) {
1618 *exit_status = EXIT_CHROOT;
1619 return -errno;
1620 }
1621
1622 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1623 *exit_status = EXIT_CHDIR;
1624 return -errno;
1625 }
1626 } else {
1627 _cleanup_free_ char *d = NULL;
1628
1629 if (asprintf(&d, "%s/%s",
1630 context->root_directory ? context->root_directory : "",
1631 context->working_directory ? context->working_directory : "") < 0) {
1632 *exit_status = EXIT_MEMORY;
1633 return -ENOMEM;
1634 }
1635
1636 if (chdir(d) < 0) {
1637 *exit_status = EXIT_CHDIR;
1638 return -errno;
1639 }
1640 }
1641
1642 #ifdef HAVE_SELINUX
1643 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1644 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1645 if (r < 0) {
1646 *exit_status = EXIT_SELINUX_CONTEXT;
1647 return r;
1648 }
1649 }
1650 #endif
1651
1652 /* We repeat the fd closing here, to make sure that
1653 * nothing is leaked from the PAM modules. Note that
1654 * we are more aggressive this time since socket_fd
1655 * and the netns fds we don't need anymore. The custom
1656 * endpoint fd was needed to upload the policy and can
1657 * now be closed as well. */
1658 r = close_all_fds(fds, n_fds);
1659 if (r >= 0)
1660 r = shift_fds(fds, n_fds);
1661 if (r >= 0)
1662 r = flags_fds(fds, n_fds, context->non_blocking);
1663 if (r < 0) {
1664 *exit_status = EXIT_FDS;
1665 return r;
1666 }
1667
1668 if (params->apply_permissions) {
1669
1670 for (i = 0; i < _RLIMIT_MAX; i++) {
1671 if (!context->rlimit[i])
1672 continue;
1673
1674 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1675 *exit_status = EXIT_LIMITS;
1676 return -errno;
1677 }
1678 }
1679
1680 if (context->capability_bounding_set_drop) {
1681 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1682 if (r < 0) {
1683 *exit_status = EXIT_CAPABILITIES;
1684 return r;
1685 }
1686 }
1687
1688 #ifdef HAVE_SMACK
1689 if (context->smack_process_label) {
1690 r = mac_smack_apply_pid(0, context->smack_process_label);
1691 if (r < 0) {
1692 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1693 return r;
1694 }
1695 }
1696 #endif
1697
1698 if (context->user) {
1699 r = enforce_user(context, uid);
1700 if (r < 0) {
1701 *exit_status = EXIT_USER;
1702 return r;
1703 }
1704 }
1705
1706 /* PR_GET_SECUREBITS is not privileged, while
1707 * PR_SET_SECUREBITS is. So to suppress
1708 * potential EPERMs we'll try not to call
1709 * PR_SET_SECUREBITS unless necessary. */
1710 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1711 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1712 *exit_status = EXIT_SECUREBITS;
1713 return -errno;
1714 }
1715
1716 if (context->capabilities)
1717 if (cap_set_proc(context->capabilities) < 0) {
1718 *exit_status = EXIT_CAPABILITIES;
1719 return -errno;
1720 }
1721
1722 if (context->no_new_privileges)
1723 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1724 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1725 return -errno;
1726 }
1727
1728 #ifdef HAVE_SECCOMP
1729 if (context->address_families_whitelist ||
1730 !set_isempty(context->address_families)) {
1731 r = apply_address_families(context);
1732 if (r < 0) {
1733 *exit_status = EXIT_ADDRESS_FAMILIES;
1734 return r;
1735 }
1736 }
1737
1738 if (context->syscall_whitelist ||
1739 !set_isempty(context->syscall_filter) ||
1740 !set_isempty(context->syscall_archs)) {
1741 r = apply_seccomp(context);
1742 if (r < 0) {
1743 *exit_status = EXIT_SECCOMP;
1744 return r;
1745 }
1746 }
1747 #endif
1748
1749 #ifdef HAVE_SELINUX
1750 if (mac_selinux_use()) {
1751 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1752
1753 if (exec_context) {
1754 r = setexeccon(exec_context);
1755 if (r < 0) {
1756 *exit_status = EXIT_SELINUX_CONTEXT;
1757 return r;
1758 }
1759 }
1760 }
1761 #endif
1762
1763 #ifdef HAVE_APPARMOR
1764 if (context->apparmor_profile && mac_apparmor_use()) {
1765 r = aa_change_onexec(context->apparmor_profile);
1766 if (r < 0 && !context->apparmor_profile_ignore) {
1767 *exit_status = EXIT_APPARMOR_PROFILE;
1768 return -errno;
1769 }
1770 }
1771 #endif
1772 }
1773
1774 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1775 if (r < 0) {
1776 *exit_status = EXIT_MEMORY;
1777 return r;
1778 }
1779
1780 final_env = strv_env_merge(5,
1781 params->environment,
1782 our_env,
1783 context->environment,
1784 files_env,
1785 pam_env,
1786 NULL);
1787 if (!final_env) {
1788 *exit_status = EXIT_MEMORY;
1789 return -ENOMEM;
1790 }
1791
1792 final_argv = replace_env_argv(argv, final_env);
1793 if (!final_argv) {
1794 *exit_status = EXIT_MEMORY;
1795 return -ENOMEM;
1796 }
1797
1798 final_env = strv_env_clean(final_env);
1799
1800 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1801 _cleanup_free_ char *line;
1802
1803 line = exec_command_line(final_argv);
1804 if (line) {
1805 log_open();
1806 log_unit_struct(params->unit_id,
1807 LOG_DEBUG,
1808 "EXECUTABLE=%s", command->path,
1809 LOG_MESSAGE("Executing: %s", line),
1810 NULL);
1811 log_close();
1812 }
1813 }
1814 execve(command->path, final_argv, final_env);
1815 *exit_status = EXIT_EXEC;
1816 return -errno;
1817 }
1818
1819 int exec_spawn(ExecCommand *command,
1820 const ExecContext *context,
1821 const ExecParameters *params,
1822 ExecRuntime *runtime,
1823 pid_t *ret) {
1824
1825 _cleanup_strv_free_ char **files_env = NULL;
1826 int *fds = NULL; unsigned n_fds = 0;
1827 _cleanup_free_ char *line = NULL;
1828 int socket_fd, r;
1829 char **argv;
1830 pid_t pid;
1831
1832 assert(command);
1833 assert(context);
1834 assert(ret);
1835 assert(params);
1836 assert(params->fds || params->n_fds <= 0);
1837
1838 if (context->std_input == EXEC_INPUT_SOCKET ||
1839 context->std_output == EXEC_OUTPUT_SOCKET ||
1840 context->std_error == EXEC_OUTPUT_SOCKET) {
1841
1842 if (params->n_fds != 1) {
1843 log_unit_error(params->unit_id, "Got more than one socket.");
1844 return -EINVAL;
1845 }
1846
1847 socket_fd = params->fds[0];
1848 } else {
1849 socket_fd = -1;
1850 fds = params->fds;
1851 n_fds = params->n_fds;
1852 }
1853
1854 r = exec_context_load_environment(context, params->unit_id, &files_env);
1855 if (r < 0)
1856 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1857
1858 argv = params->argv ?: command->argv;
1859 line = exec_command_line(argv);
1860 if (!line)
1861 return log_oom();
1862
1863 log_unit_struct(params->unit_id,
1864 LOG_DEBUG,
1865 "EXECUTABLE=%s", command->path,
1866 LOG_MESSAGE("About to execute: %s", line),
1867 NULL);
1868 pid = fork();
1869 if (pid < 0)
1870 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1871
1872 if (pid == 0) {
1873 int exit_status;
1874
1875 r = exec_child(command,
1876 context,
1877 params,
1878 runtime,
1879 argv,
1880 socket_fd,
1881 fds, n_fds,
1882 files_env,
1883 &exit_status);
1884 if (r < 0) {
1885 log_open();
1886 log_unit_struct(params->unit_id,
1887 LOG_ERR,
1888 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1889 "EXECUTABLE=%s", command->path,
1890 LOG_MESSAGE("Failed at step %s spawning %s: %s",
1891 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1892 command->path, strerror(-r)),
1893 LOG_ERRNO(r),
1894 NULL);
1895 }
1896
1897 _exit(exit_status);
1898 }
1899
1900 log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1901
1902 /* We add the new process to the cgroup both in the child (so
1903 * that we can be sure that no user code is ever executed
1904 * outside of the cgroup) and in the parent (so that we can be
1905 * sure that when we kill the cgroup the process will be
1906 * killed too). */
1907 if (params->cgroup_path)
1908 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1909
1910 exec_status_start(&command->exec_status, pid);
1911
1912 *ret = pid;
1913 return 0;
1914 }
1915
1916 void exec_context_init(ExecContext *c) {
1917 assert(c);
1918
1919 c->umask = 0022;
1920 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1921 c->cpu_sched_policy = SCHED_OTHER;
1922 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1923 c->syslog_level_prefix = true;
1924 c->ignore_sigpipe = true;
1925 c->timer_slack_nsec = NSEC_INFINITY;
1926 c->personality = 0xffffffffUL;
1927 c->runtime_directory_mode = 0755;
1928 }
1929
1930 void exec_context_done(ExecContext *c) {
1931 unsigned l;
1932
1933 assert(c);
1934
1935 strv_free(c->environment);
1936 c->environment = NULL;
1937
1938 strv_free(c->environment_files);
1939 c->environment_files = NULL;
1940
1941 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1942 free(c->rlimit[l]);
1943 c->rlimit[l] = NULL;
1944 }
1945
1946 free(c->working_directory);
1947 c->working_directory = NULL;
1948 free(c->root_directory);
1949 c->root_directory = NULL;
1950
1951 free(c->tty_path);
1952 c->tty_path = NULL;
1953
1954 free(c->syslog_identifier);
1955 c->syslog_identifier = NULL;
1956
1957 free(c->user);
1958 c->user = NULL;
1959
1960 free(c->group);
1961 c->group = NULL;
1962
1963 strv_free(c->supplementary_groups);
1964 c->supplementary_groups = NULL;
1965
1966 free(c->pam_name);
1967 c->pam_name = NULL;
1968
1969 if (c->capabilities) {
1970 cap_free(c->capabilities);
1971 c->capabilities = NULL;
1972 }
1973
1974 strv_free(c->read_only_dirs);
1975 c->read_only_dirs = NULL;
1976
1977 strv_free(c->read_write_dirs);
1978 c->read_write_dirs = NULL;
1979
1980 strv_free(c->inaccessible_dirs);
1981 c->inaccessible_dirs = NULL;
1982
1983 if (c->cpuset)
1984 CPU_FREE(c->cpuset);
1985
1986 free(c->utmp_id);
1987 c->utmp_id = NULL;
1988
1989 free(c->selinux_context);
1990 c->selinux_context = NULL;
1991
1992 free(c->apparmor_profile);
1993 c->apparmor_profile = NULL;
1994
1995 set_free(c->syscall_filter);
1996 c->syscall_filter = NULL;
1997
1998 set_free(c->syscall_archs);
1999 c->syscall_archs = NULL;
2000
2001 set_free(c->address_families);
2002 c->address_families = NULL;
2003
2004 strv_free(c->runtime_directory);
2005 c->runtime_directory = NULL;
2006
2007 bus_endpoint_free(c->bus_endpoint);
2008 c->bus_endpoint = NULL;
2009 }
2010
2011 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2012 char **i;
2013
2014 assert(c);
2015
2016 if (!runtime_prefix)
2017 return 0;
2018
2019 STRV_FOREACH(i, c->runtime_directory) {
2020 _cleanup_free_ char *p;
2021
2022 p = strjoin(runtime_prefix, "/", *i, NULL);
2023 if (!p)
2024 return -ENOMEM;
2025
2026 /* We execute this synchronously, since we need to be
2027 * sure this is gone when we start the service
2028 * next. */
2029 rm_rf(p, false, true, false);
2030 }
2031
2032 return 0;
2033 }
2034
2035 void exec_command_done(ExecCommand *c) {
2036 assert(c);
2037
2038 free(c->path);
2039 c->path = NULL;
2040
2041 strv_free(c->argv);
2042 c->argv = NULL;
2043 }
2044
2045 void exec_command_done_array(ExecCommand *c, unsigned n) {
2046 unsigned i;
2047
2048 for (i = 0; i < n; i++)
2049 exec_command_done(c+i);
2050 }
2051
2052 ExecCommand* exec_command_free_list(ExecCommand *c) {
2053 ExecCommand *i;
2054
2055 while ((i = c)) {
2056 LIST_REMOVE(command, c, i);
2057 exec_command_done(i);
2058 free(i);
2059 }
2060
2061 return NULL;
2062 }
2063
2064 void exec_command_free_array(ExecCommand **c, unsigned n) {
2065 unsigned i;
2066
2067 for (i = 0; i < n; i++)
2068 c[i] = exec_command_free_list(c[i]);
2069 }
2070
2071 typedef struct InvalidEnvInfo {
2072 const char *unit_id;
2073 const char *path;
2074 } InvalidEnvInfo;
2075
2076 static void invalid_env(const char *p, void *userdata) {
2077 InvalidEnvInfo *info = userdata;
2078
2079 log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2080 }
2081
2082 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2083 char **i, **r = NULL;
2084
2085 assert(c);
2086 assert(l);
2087
2088 STRV_FOREACH(i, c->environment_files) {
2089 char *fn;
2090 int k;
2091 bool ignore = false;
2092 char **p;
2093 _cleanup_globfree_ glob_t pglob = {};
2094 int count, n;
2095
2096 fn = *i;
2097
2098 if (fn[0] == '-') {
2099 ignore = true;
2100 fn ++;
2101 }
2102
2103 if (!path_is_absolute(fn)) {
2104 if (ignore)
2105 continue;
2106
2107 strv_free(r);
2108 return -EINVAL;
2109 }
2110
2111 /* Filename supports globbing, take all matching files */
2112 errno = 0;
2113 if (glob(fn, 0, NULL, &pglob) != 0) {
2114 if (ignore)
2115 continue;
2116
2117 strv_free(r);
2118 return errno ? -errno : -EINVAL;
2119 }
2120 count = pglob.gl_pathc;
2121 if (count == 0) {
2122 if (ignore)
2123 continue;
2124
2125 strv_free(r);
2126 return -EINVAL;
2127 }
2128 for (n = 0; n < count; n++) {
2129 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2130 if (k < 0) {
2131 if (ignore)
2132 continue;
2133
2134 strv_free(r);
2135 return k;
2136 }
2137 /* Log invalid environment variables with filename */
2138 if (p) {
2139 InvalidEnvInfo info = {
2140 .unit_id = unit_id,
2141 .path = pglob.gl_pathv[n]
2142 };
2143
2144 p = strv_env_clean_with_callback(p, invalid_env, &info);
2145 }
2146
2147 if (r == NULL)
2148 r = p;
2149 else {
2150 char **m;
2151
2152 m = strv_env_merge(2, r, p);
2153 strv_free(r);
2154 strv_free(p);
2155 if (!m)
2156 return -ENOMEM;
2157
2158 r = m;
2159 }
2160 }
2161 }
2162
2163 *l = r;
2164
2165 return 0;
2166 }
2167
2168 static bool tty_may_match_dev_console(const char *tty) {
2169 _cleanup_free_ char *active = NULL;
2170 char *console;
2171
2172 if (startswith(tty, "/dev/"))
2173 tty += 5;
2174
2175 /* trivial identity? */
2176 if (streq(tty, "console"))
2177 return true;
2178
2179 console = resolve_dev_console(&active);
2180 /* if we could not resolve, assume it may */
2181 if (!console)
2182 return true;
2183
2184 /* "tty0" means the active VC, so it may be the same sometimes */
2185 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2186 }
2187
2188 bool exec_context_may_touch_console(ExecContext *ec) {
2189 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2190 is_terminal_input(ec->std_input) ||
2191 is_terminal_output(ec->std_output) ||
2192 is_terminal_output(ec->std_error)) &&
2193 tty_may_match_dev_console(tty_path(ec));
2194 }
2195
2196 static void strv_fprintf(FILE *f, char **l) {
2197 char **g;
2198
2199 assert(f);
2200
2201 STRV_FOREACH(g, l)
2202 fprintf(f, " %s", *g);
2203 }
2204
2205 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2206 char **e;
2207 unsigned i;
2208
2209 assert(c);
2210 assert(f);
2211
2212 prefix = strempty(prefix);
2213
2214 fprintf(f,
2215 "%sUMask: %04o\n"
2216 "%sWorkingDirectory: %s\n"
2217 "%sRootDirectory: %s\n"
2218 "%sNonBlocking: %s\n"
2219 "%sPrivateTmp: %s\n"
2220 "%sPrivateNetwork: %s\n"
2221 "%sPrivateDevices: %s\n"
2222 "%sProtectHome: %s\n"
2223 "%sProtectSystem: %s\n"
2224 "%sIgnoreSIGPIPE: %s\n",
2225 prefix, c->umask,
2226 prefix, c->working_directory ? c->working_directory : "/",
2227 prefix, c->root_directory ? c->root_directory : "/",
2228 prefix, yes_no(c->non_blocking),
2229 prefix, yes_no(c->private_tmp),
2230 prefix, yes_no(c->private_network),
2231 prefix, yes_no(c->private_devices),
2232 prefix, protect_home_to_string(c->protect_home),
2233 prefix, protect_system_to_string(c->protect_system),
2234 prefix, yes_no(c->ignore_sigpipe));
2235
2236 STRV_FOREACH(e, c->environment)
2237 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2238
2239 STRV_FOREACH(e, c->environment_files)
2240 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2241
2242 if (c->nice_set)
2243 fprintf(f,
2244 "%sNice: %i\n",
2245 prefix, c->nice);
2246
2247 if (c->oom_score_adjust_set)
2248 fprintf(f,
2249 "%sOOMScoreAdjust: %i\n",
2250 prefix, c->oom_score_adjust);
2251
2252 for (i = 0; i < RLIM_NLIMITS; i++)
2253 if (c->rlimit[i])
2254 fprintf(f, "%s%s: "RLIM_FMT"\n",
2255 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2256
2257 if (c->ioprio_set) {
2258 _cleanup_free_ char *class_str = NULL;
2259
2260 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2261 fprintf(f,
2262 "%sIOSchedulingClass: %s\n"
2263 "%sIOPriority: %i\n",
2264 prefix, strna(class_str),
2265 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2266 }
2267
2268 if (c->cpu_sched_set) {
2269 _cleanup_free_ char *policy_str = NULL;
2270
2271 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2272 fprintf(f,
2273 "%sCPUSchedulingPolicy: %s\n"
2274 "%sCPUSchedulingPriority: %i\n"
2275 "%sCPUSchedulingResetOnFork: %s\n",
2276 prefix, strna(policy_str),
2277 prefix, c->cpu_sched_priority,
2278 prefix, yes_no(c->cpu_sched_reset_on_fork));
2279 }
2280
2281 if (c->cpuset) {
2282 fprintf(f, "%sCPUAffinity:", prefix);
2283 for (i = 0; i < c->cpuset_ncpus; i++)
2284 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2285 fprintf(f, " %u", i);
2286 fputs("\n", f);
2287 }
2288
2289 if (c->timer_slack_nsec != NSEC_INFINITY)
2290 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2291
2292 fprintf(f,
2293 "%sStandardInput: %s\n"
2294 "%sStandardOutput: %s\n"
2295 "%sStandardError: %s\n",
2296 prefix, exec_input_to_string(c->std_input),
2297 prefix, exec_output_to_string(c->std_output),
2298 prefix, exec_output_to_string(c->std_error));
2299
2300 if (c->tty_path)
2301 fprintf(f,
2302 "%sTTYPath: %s\n"
2303 "%sTTYReset: %s\n"
2304 "%sTTYVHangup: %s\n"
2305 "%sTTYVTDisallocate: %s\n",
2306 prefix, c->tty_path,
2307 prefix, yes_no(c->tty_reset),
2308 prefix, yes_no(c->tty_vhangup),
2309 prefix, yes_no(c->tty_vt_disallocate));
2310
2311 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2312 c->std_output == EXEC_OUTPUT_KMSG ||
2313 c->std_output == EXEC_OUTPUT_JOURNAL ||
2314 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2315 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2316 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2317 c->std_error == EXEC_OUTPUT_SYSLOG ||
2318 c->std_error == EXEC_OUTPUT_KMSG ||
2319 c->std_error == EXEC_OUTPUT_JOURNAL ||
2320 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2321 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2322 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2323
2324 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2325
2326 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2327 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2328
2329 fprintf(f,
2330 "%sSyslogFacility: %s\n"
2331 "%sSyslogLevel: %s\n",
2332 prefix, strna(fac_str),
2333 prefix, strna(lvl_str));
2334 }
2335
2336 if (c->capabilities) {
2337 _cleanup_cap_free_charp_ char *t;
2338
2339 t = cap_to_text(c->capabilities, NULL);
2340 if (t)
2341 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2342 }
2343
2344 if (c->secure_bits)
2345 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2346 prefix,
2347 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2348 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2349 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2350 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2351 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2352 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2353
2354 if (c->capability_bounding_set_drop) {
2355 unsigned long l;
2356 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2357
2358 for (l = 0; l <= cap_last_cap(); l++)
2359 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2360 fprintf(f, " %s", strna(capability_to_name(l)));
2361
2362 fputs("\n", f);
2363 }
2364
2365 if (c->user)
2366 fprintf(f, "%sUser: %s\n", prefix, c->user);
2367 if (c->group)
2368 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2369
2370 if (strv_length(c->supplementary_groups) > 0) {
2371 fprintf(f, "%sSupplementaryGroups:", prefix);
2372 strv_fprintf(f, c->supplementary_groups);
2373 fputs("\n", f);
2374 }
2375
2376 if (c->pam_name)
2377 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2378
2379 if (strv_length(c->read_write_dirs) > 0) {
2380 fprintf(f, "%sReadWriteDirs:", prefix);
2381 strv_fprintf(f, c->read_write_dirs);
2382 fputs("\n", f);
2383 }
2384
2385 if (strv_length(c->read_only_dirs) > 0) {
2386 fprintf(f, "%sReadOnlyDirs:", prefix);
2387 strv_fprintf(f, c->read_only_dirs);
2388 fputs("\n", f);
2389 }
2390
2391 if (strv_length(c->inaccessible_dirs) > 0) {
2392 fprintf(f, "%sInaccessibleDirs:", prefix);
2393 strv_fprintf(f, c->inaccessible_dirs);
2394 fputs("\n", f);
2395 }
2396
2397 if (c->utmp_id)
2398 fprintf(f,
2399 "%sUtmpIdentifier: %s\n",
2400 prefix, c->utmp_id);
2401
2402 if (c->selinux_context)
2403 fprintf(f,
2404 "%sSELinuxContext: %s%s\n",
2405 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2406
2407 if (c->personality != 0xffffffffUL)
2408 fprintf(f,
2409 "%sPersonality: %s\n",
2410 prefix, strna(personality_to_string(c->personality)));
2411
2412 if (c->syscall_filter) {
2413 #ifdef HAVE_SECCOMP
2414 Iterator j;
2415 void *id;
2416 bool first = true;
2417 #endif
2418
2419 fprintf(f,
2420 "%sSystemCallFilter: ",
2421 prefix);
2422
2423 if (!c->syscall_whitelist)
2424 fputc('~', f);
2425
2426 #ifdef HAVE_SECCOMP
2427 SET_FOREACH(id, c->syscall_filter, j) {
2428 _cleanup_free_ char *name = NULL;
2429
2430 if (first)
2431 first = false;
2432 else
2433 fputc(' ', f);
2434
2435 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2436 fputs(strna(name), f);
2437 }
2438 #endif
2439
2440 fputc('\n', f);
2441 }
2442
2443 if (c->syscall_archs) {
2444 #ifdef HAVE_SECCOMP
2445 Iterator j;
2446 void *id;
2447 #endif
2448
2449 fprintf(f,
2450 "%sSystemCallArchitectures:",
2451 prefix);
2452
2453 #ifdef HAVE_SECCOMP
2454 SET_FOREACH(id, c->syscall_archs, j)
2455 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2456 #endif
2457 fputc('\n', f);
2458 }
2459
2460 if (c->syscall_errno != 0)
2461 fprintf(f,
2462 "%sSystemCallErrorNumber: %s\n",
2463 prefix, strna(errno_to_name(c->syscall_errno)));
2464
2465 if (c->apparmor_profile)
2466 fprintf(f,
2467 "%sAppArmorProfile: %s%s\n",
2468 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2469 }
2470
2471 bool exec_context_maintains_privileges(ExecContext *c) {
2472 assert(c);
2473
2474 /* Returns true if the process forked off would run run under
2475 * an unchanged UID or as root. */
2476
2477 if (!c->user)
2478 return true;
2479
2480 if (streq(c->user, "root") || streq(c->user, "0"))
2481 return true;
2482
2483 return false;
2484 }
2485
2486 void exec_status_start(ExecStatus *s, pid_t pid) {
2487 assert(s);
2488
2489 zero(*s);
2490 s->pid = pid;
2491 dual_timestamp_get(&s->start_timestamp);
2492 }
2493
2494 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2495 assert(s);
2496
2497 if (s->pid && s->pid != pid)
2498 zero(*s);
2499
2500 s->pid = pid;
2501 dual_timestamp_get(&s->exit_timestamp);
2502
2503 s->code = code;
2504 s->status = status;
2505
2506 if (context) {
2507 if (context->utmp_id)
2508 utmp_put_dead_process(context->utmp_id, pid, code, status);
2509
2510 exec_context_tty_reset(context);
2511 }
2512 }
2513
2514 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2515 char buf[FORMAT_TIMESTAMP_MAX];
2516
2517 assert(s);
2518 assert(f);
2519
2520 if (s->pid <= 0)
2521 return;
2522
2523 prefix = strempty(prefix);
2524
2525 fprintf(f,
2526 "%sPID: "PID_FMT"\n",
2527 prefix, s->pid);
2528
2529 if (s->start_timestamp.realtime > 0)
2530 fprintf(f,
2531 "%sStart Timestamp: %s\n",
2532 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2533
2534 if (s->exit_timestamp.realtime > 0)
2535 fprintf(f,
2536 "%sExit Timestamp: %s\n"
2537 "%sExit Code: %s\n"
2538 "%sExit Status: %i\n",
2539 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2540 prefix, sigchld_code_to_string(s->code),
2541 prefix, s->status);
2542 }
2543
2544 char *exec_command_line(char **argv) {
2545 size_t k;
2546 char *n, *p, **a;
2547 bool first = true;
2548
2549 assert(argv);
2550
2551 k = 1;
2552 STRV_FOREACH(a, argv)
2553 k += strlen(*a)+3;
2554
2555 if (!(n = new(char, k)))
2556 return NULL;
2557
2558 p = n;
2559 STRV_FOREACH(a, argv) {
2560
2561 if (!first)
2562 *(p++) = ' ';
2563 else
2564 first = false;
2565
2566 if (strpbrk(*a, WHITESPACE)) {
2567 *(p++) = '\'';
2568 p = stpcpy(p, *a);
2569 *(p++) = '\'';
2570 } else
2571 p = stpcpy(p, *a);
2572
2573 }
2574
2575 *p = 0;
2576
2577 /* FIXME: this doesn't really handle arguments that have
2578 * spaces and ticks in them */
2579
2580 return n;
2581 }
2582
2583 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2584 _cleanup_free_ char *cmd = NULL;
2585 const char *prefix2;
2586
2587 assert(c);
2588 assert(f);
2589
2590 prefix = strempty(prefix);
2591 prefix2 = strappenda(prefix, "\t");
2592
2593 cmd = exec_command_line(c->argv);
2594 fprintf(f,
2595 "%sCommand Line: %s\n",
2596 prefix, cmd ? cmd : strerror(ENOMEM));
2597
2598 exec_status_dump(&c->exec_status, f, prefix2);
2599 }
2600
2601 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2602 assert(f);
2603
2604 prefix = strempty(prefix);
2605
2606 LIST_FOREACH(command, c, c)
2607 exec_command_dump(c, f, prefix);
2608 }
2609
2610 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2611 ExecCommand *end;
2612
2613 assert(l);
2614 assert(e);
2615
2616 if (*l) {
2617 /* It's kind of important, that we keep the order here */
2618 LIST_FIND_TAIL(command, *l, end);
2619 LIST_INSERT_AFTER(command, *l, end, e);
2620 } else
2621 *l = e;
2622 }
2623
2624 int exec_command_set(ExecCommand *c, const char *path, ...) {
2625 va_list ap;
2626 char **l, *p;
2627
2628 assert(c);
2629 assert(path);
2630
2631 va_start(ap, path);
2632 l = strv_new_ap(path, ap);
2633 va_end(ap);
2634
2635 if (!l)
2636 return -ENOMEM;
2637
2638 p = strdup(path);
2639 if (!p) {
2640 strv_free(l);
2641 return -ENOMEM;
2642 }
2643
2644 free(c->path);
2645 c->path = p;
2646
2647 strv_free(c->argv);
2648 c->argv = l;
2649
2650 return 0;
2651 }
2652
2653 int exec_command_append(ExecCommand *c, const char *path, ...) {
2654 _cleanup_strv_free_ char **l = NULL;
2655 va_list ap;
2656 int r;
2657
2658 assert(c);
2659 assert(path);
2660
2661 va_start(ap, path);
2662 l = strv_new_ap(path, ap);
2663 va_end(ap);
2664
2665 if (!l)
2666 return -ENOMEM;
2667
2668 r = strv_extend_strv(&c->argv, l);
2669 if (r < 0)
2670 return r;
2671
2672 return 0;
2673 }
2674
2675
2676 static int exec_runtime_allocate(ExecRuntime **rt) {
2677
2678 if (*rt)
2679 return 0;
2680
2681 *rt = new0(ExecRuntime, 1);
2682 if (!*rt)
2683 return -ENOMEM;
2684
2685 (*rt)->n_ref = 1;
2686 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2687
2688 return 0;
2689 }
2690
2691 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2692 int r;
2693
2694 assert(rt);
2695 assert(c);
2696 assert(id);
2697
2698 if (*rt)
2699 return 1;
2700
2701 if (!c->private_network && !c->private_tmp)
2702 return 0;
2703
2704 r = exec_runtime_allocate(rt);
2705 if (r < 0)
2706 return r;
2707
2708 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2709 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2710 return -errno;
2711 }
2712
2713 if (c->private_tmp && !(*rt)->tmp_dir) {
2714 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2715 if (r < 0)
2716 return r;
2717 }
2718
2719 return 1;
2720 }
2721
2722 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2723 assert(r);
2724 assert(r->n_ref > 0);
2725
2726 r->n_ref++;
2727 return r;
2728 }
2729
2730 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2731
2732 if (!r)
2733 return NULL;
2734
2735 assert(r->n_ref > 0);
2736
2737 r->n_ref--;
2738 if (r->n_ref <= 0) {
2739 free(r->tmp_dir);
2740 free(r->var_tmp_dir);
2741 safe_close_pair(r->netns_storage_socket);
2742 free(r);
2743 }
2744
2745 return NULL;
2746 }
2747
2748 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2749 assert(u);
2750 assert(f);
2751 assert(fds);
2752
2753 if (!rt)
2754 return 0;
2755
2756 if (rt->tmp_dir)
2757 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2758
2759 if (rt->var_tmp_dir)
2760 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2761
2762 if (rt->netns_storage_socket[0] >= 0) {
2763 int copy;
2764
2765 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2766 if (copy < 0)
2767 return copy;
2768
2769 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2770 }
2771
2772 if (rt->netns_storage_socket[1] >= 0) {
2773 int copy;
2774
2775 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2776 if (copy < 0)
2777 return copy;
2778
2779 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2780 }
2781
2782 return 0;
2783 }
2784
2785 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2786 int r;
2787
2788 assert(rt);
2789 assert(key);
2790 assert(value);
2791
2792 if (streq(key, "tmp-dir")) {
2793 char *copy;
2794
2795 r = exec_runtime_allocate(rt);
2796 if (r < 0)
2797 return r;
2798
2799 copy = strdup(value);
2800 if (!copy)
2801 return log_oom();
2802
2803 free((*rt)->tmp_dir);
2804 (*rt)->tmp_dir = copy;
2805
2806 } else if (streq(key, "var-tmp-dir")) {
2807 char *copy;
2808
2809 r = exec_runtime_allocate(rt);
2810 if (r < 0)
2811 return r;
2812
2813 copy = strdup(value);
2814 if (!copy)
2815 return log_oom();
2816
2817 free((*rt)->var_tmp_dir);
2818 (*rt)->var_tmp_dir = copy;
2819
2820 } else if (streq(key, "netns-socket-0")) {
2821 int fd;
2822
2823 r = exec_runtime_allocate(rt);
2824 if (r < 0)
2825 return r;
2826
2827 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2828 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2829 else {
2830 safe_close((*rt)->netns_storage_socket[0]);
2831 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2832 }
2833 } else if (streq(key, "netns-socket-1")) {
2834 int fd;
2835
2836 r = exec_runtime_allocate(rt);
2837 if (r < 0)
2838 return r;
2839
2840 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2841 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2842 else {
2843 safe_close((*rt)->netns_storage_socket[1]);
2844 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2845 }
2846 } else
2847 return 0;
2848
2849 return 1;
2850 }
2851
2852 static void *remove_tmpdir_thread(void *p) {
2853 _cleanup_free_ char *path = p;
2854
2855 rm_rf_dangerous(path, false, true, false);
2856 return NULL;
2857 }
2858
2859 void exec_runtime_destroy(ExecRuntime *rt) {
2860 int r;
2861
2862 if (!rt)
2863 return;
2864
2865 /* If there are multiple users of this, let's leave the stuff around */
2866 if (rt->n_ref > 1)
2867 return;
2868
2869 if (rt->tmp_dir) {
2870 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2871
2872 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2873 if (r < 0) {
2874 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2875 free(rt->tmp_dir);
2876 }
2877
2878 rt->tmp_dir = NULL;
2879 }
2880
2881 if (rt->var_tmp_dir) {
2882 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2883
2884 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2885 if (r < 0) {
2886 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2887 free(rt->var_tmp_dir);
2888 }
2889
2890 rt->var_tmp_dir = NULL;
2891 }
2892
2893 safe_close_pair(rt->netns_storage_socket);
2894 }
2895
2896 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2897 [EXEC_INPUT_NULL] = "null",
2898 [EXEC_INPUT_TTY] = "tty",
2899 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2900 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2901 [EXEC_INPUT_SOCKET] = "socket"
2902 };
2903
2904 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2905
2906 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2907 [EXEC_OUTPUT_INHERIT] = "inherit",
2908 [EXEC_OUTPUT_NULL] = "null",
2909 [EXEC_OUTPUT_TTY] = "tty",
2910 [EXEC_OUTPUT_SYSLOG] = "syslog",
2911 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2912 [EXEC_OUTPUT_KMSG] = "kmsg",
2913 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2914 [EXEC_OUTPUT_JOURNAL] = "journal",
2915 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2916 [EXEC_OUTPUT_SOCKET] = "socket"
2917 };
2918
2919 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);