]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/process-util.c
Merge pull request #7745 from poettering/sockaddr-size
[thirdparty/systemd.git] / src / basic / process-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <linux/oom.h>
25 #include <sched.h>
26 #include <signal.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdio_ext.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/mman.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/types.h>
36 #include <sys/wait.h>
37 #include <syslog.h>
38 #include <unistd.h>
39 #if HAVE_VALGRIND_VALGRIND_H
40 #include <valgrind/valgrind.h>
41 #endif
42
43 #include "alloc-util.h"
44 #include "architecture.h"
45 #include "escape.h"
46 #include "fd-util.h"
47 #include "fileio.h"
48 #include "fs-util.h"
49 #include "ioprio.h"
50 #include "log.h"
51 #include "macro.h"
52 #include "missing.h"
53 #include "process-util.h"
54 #include "raw-clone.h"
55 #include "signal-util.h"
56 #include "stat-util.h"
57 #include "string-table.h"
58 #include "string-util.h"
59 #include "terminal-util.h"
60 #include "user-util.h"
61 #include "util.h"
62
63 int get_process_state(pid_t pid) {
64 const char *p;
65 char state;
66 int r;
67 _cleanup_free_ char *line = NULL;
68
69 assert(pid >= 0);
70
71 p = procfs_file_alloca(pid, "stat");
72
73 r = read_one_line_file(p, &line);
74 if (r == -ENOENT)
75 return -ESRCH;
76 if (r < 0)
77 return r;
78
79 p = strrchr(line, ')');
80 if (!p)
81 return -EIO;
82
83 p++;
84
85 if (sscanf(p, " %c", &state) != 1)
86 return -EIO;
87
88 return (unsigned char) state;
89 }
90
91 int get_process_comm(pid_t pid, char **name) {
92 const char *p;
93 int r;
94
95 assert(name);
96 assert(pid >= 0);
97
98 p = procfs_file_alloca(pid, "comm");
99
100 r = read_one_line_file(p, name);
101 if (r == -ENOENT)
102 return -ESRCH;
103
104 return r;
105 }
106
107 int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
108 _cleanup_fclose_ FILE *f = NULL;
109 bool space = false;
110 char *k, *ans = NULL;
111 const char *p;
112 int c;
113
114 assert(line);
115 assert(pid >= 0);
116
117 /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
118 * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
119 * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
120 * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
121 * command line that resolves to the empty string will return the "comm" name of the process instead.
122 *
123 * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
124 * comm_fallback is false). Returns 0 and sets *line otherwise. */
125
126 p = procfs_file_alloca(pid, "cmdline");
127
128 f = fopen(p, "re");
129 if (!f) {
130 if (errno == ENOENT)
131 return -ESRCH;
132 return -errno;
133 }
134
135 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
136
137 if (max_length == 1) {
138
139 /* If there's only room for one byte, return the empty string */
140 ans = new0(char, 1);
141 if (!ans)
142 return -ENOMEM;
143
144 *line = ans;
145 return 0;
146
147 } else if (max_length == 0) {
148 size_t len = 0, allocated = 0;
149
150 while ((c = getc(f)) != EOF) {
151
152 if (!GREEDY_REALLOC(ans, allocated, len+3)) {
153 free(ans);
154 return -ENOMEM;
155 }
156
157 if (isprint(c)) {
158 if (space) {
159 ans[len++] = ' ';
160 space = false;
161 }
162
163 ans[len++] = c;
164 } else if (len > 0)
165 space = true;
166 }
167
168 if (len > 0)
169 ans[len] = '\0';
170 else
171 ans = mfree(ans);
172
173 } else {
174 bool dotdotdot = false;
175 size_t left;
176
177 ans = new(char, max_length);
178 if (!ans)
179 return -ENOMEM;
180
181 k = ans;
182 left = max_length;
183 while ((c = getc(f)) != EOF) {
184
185 if (isprint(c)) {
186
187 if (space) {
188 if (left <= 2) {
189 dotdotdot = true;
190 break;
191 }
192
193 *(k++) = ' ';
194 left--;
195 space = false;
196 }
197
198 if (left <= 1) {
199 dotdotdot = true;
200 break;
201 }
202
203 *(k++) = (char) c;
204 left--;
205 } else if (k > ans)
206 space = true;
207 }
208
209 if (dotdotdot) {
210 if (max_length <= 4) {
211 k = ans;
212 left = max_length;
213 } else {
214 k = ans + max_length - 4;
215 left = 4;
216
217 /* Eat up final spaces */
218 while (k > ans && isspace(k[-1])) {
219 k--;
220 left++;
221 }
222 }
223
224 strncpy(k, "...", left-1);
225 k[left-1] = 0;
226 } else
227 *k = 0;
228 }
229
230 /* Kernel threads have no argv[] */
231 if (isempty(ans)) {
232 _cleanup_free_ char *t = NULL;
233 int h;
234
235 free(ans);
236
237 if (!comm_fallback)
238 return -ENOENT;
239
240 h = get_process_comm(pid, &t);
241 if (h < 0)
242 return h;
243
244 if (max_length == 0)
245 ans = strjoin("[", t, "]");
246 else {
247 size_t l;
248
249 l = strlen(t);
250
251 if (l + 3 <= max_length)
252 ans = strjoin("[", t, "]");
253 else if (max_length <= 6) {
254
255 ans = new(char, max_length);
256 if (!ans)
257 return -ENOMEM;
258
259 memcpy(ans, "[...]", max_length-1);
260 ans[max_length-1] = 0;
261 } else {
262 char *e;
263
264 t[max_length - 6] = 0;
265
266 /* Chop off final spaces */
267 e = strchr(t, 0);
268 while (e > t && isspace(e[-1]))
269 e--;
270 *e = 0;
271
272 ans = strjoin("[", t, "...]");
273 }
274 }
275 if (!ans)
276 return -ENOMEM;
277 }
278
279 *line = ans;
280 return 0;
281 }
282
283 int rename_process(const char name[]) {
284 static size_t mm_size = 0;
285 static char *mm = NULL;
286 bool truncated = false;
287 size_t l;
288
289 /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
290 * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
291 * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
292 * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
293 * truncated.
294 *
295 * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
296
297 if (isempty(name))
298 return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
299
300 if (!is_main_thread())
301 return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we
302 * cache things without locking, and we make assumptions that PR_SET_NAME sets the
303 * process name that isn't correct on any other threads */
304
305 l = strlen(name);
306
307 /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we
308 * can use PR_SET_NAME, which sets the thread name for the calling thread. */
309 if (prctl(PR_SET_NAME, name) < 0)
310 log_debug_errno(errno, "PR_SET_NAME failed: %m");
311 if (l > 15) /* Linux process names can be 15 chars at max */
312 truncated = true;
313
314 /* Second step, change glibc's ID of the process name. */
315 if (program_invocation_name) {
316 size_t k;
317
318 k = strlen(program_invocation_name);
319 strncpy(program_invocation_name, name, k);
320 if (l > k)
321 truncated = true;
322 }
323
324 /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
325 * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
326 * the end. This is the best option for changing /proc/self/cmdline. */
327
328 /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
329 * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
330 * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
331 * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
332 * mmap() is not. */
333 if (geteuid() != 0)
334 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
335 else if (mm_size < l+1) {
336 size_t nn_size;
337 char *nn;
338
339 nn_size = PAGE_ALIGN(l+1);
340 nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
341 if (nn == MAP_FAILED) {
342 log_debug_errno(errno, "mmap() failed: %m");
343 goto use_saved_argv;
344 }
345
346 strncpy(nn, name, nn_size);
347
348 /* Now, let's tell the kernel about this new memory */
349 if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
350 log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m");
351 (void) munmap(nn, nn_size);
352 goto use_saved_argv;
353 }
354
355 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
356 * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
357 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
358 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
359
360 if (mm)
361 (void) munmap(mm, mm_size);
362
363 mm = nn;
364 mm_size = nn_size;
365 } else {
366 strncpy(mm, name, mm_size);
367
368 /* Update the end pointer, continuing regardless of any failure. */
369 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
370 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
371 }
372
373 use_saved_argv:
374 /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
375 * it still looks here */
376
377 if (saved_argc > 0) {
378 int i;
379
380 if (saved_argv[0]) {
381 size_t k;
382
383 k = strlen(saved_argv[0]);
384 strncpy(saved_argv[0], name, k);
385 if (l > k)
386 truncated = true;
387 }
388
389 for (i = 1; i < saved_argc; i++) {
390 if (!saved_argv[i])
391 break;
392
393 memzero(saved_argv[i], strlen(saved_argv[i]));
394 }
395 }
396
397 return !truncated;
398 }
399
400 int is_kernel_thread(pid_t pid) {
401 const char *p;
402 size_t count;
403 char c;
404 bool eof;
405 FILE *f;
406
407 if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
408 return 0;
409
410 assert(pid > 1);
411
412 p = procfs_file_alloca(pid, "cmdline");
413 f = fopen(p, "re");
414 if (!f) {
415 if (errno == ENOENT)
416 return -ESRCH;
417 return -errno;
418 }
419
420 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
421
422 count = fread(&c, 1, 1, f);
423 eof = feof(f);
424 fclose(f);
425
426 /* Kernel threads have an empty cmdline */
427
428 if (count <= 0)
429 return eof ? 1 : -errno;
430
431 return 0;
432 }
433
434 int get_process_capeff(pid_t pid, char **capeff) {
435 const char *p;
436 int r;
437
438 assert(capeff);
439 assert(pid >= 0);
440
441 p = procfs_file_alloca(pid, "status");
442
443 r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
444 if (r == -ENOENT)
445 return -ESRCH;
446
447 return r;
448 }
449
450 static int get_process_link_contents(const char *proc_file, char **name) {
451 int r;
452
453 assert(proc_file);
454 assert(name);
455
456 r = readlink_malloc(proc_file, name);
457 if (r == -ENOENT)
458 return -ESRCH;
459 if (r < 0)
460 return r;
461
462 return 0;
463 }
464
465 int get_process_exe(pid_t pid, char **name) {
466 const char *p;
467 char *d;
468 int r;
469
470 assert(pid >= 0);
471
472 p = procfs_file_alloca(pid, "exe");
473 r = get_process_link_contents(p, name);
474 if (r < 0)
475 return r;
476
477 d = endswith(*name, " (deleted)");
478 if (d)
479 *d = '\0';
480
481 return 0;
482 }
483
484 static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
485 _cleanup_fclose_ FILE *f = NULL;
486 char line[LINE_MAX];
487 const char *p;
488
489 assert(field);
490 assert(uid);
491
492 if (pid < 0)
493 return -EINVAL;
494
495 p = procfs_file_alloca(pid, "status");
496 f = fopen(p, "re");
497 if (!f) {
498 if (errno == ENOENT)
499 return -ESRCH;
500 return -errno;
501 }
502
503 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
504
505 FOREACH_LINE(line, f, return -errno) {
506 char *l;
507
508 l = strstrip(line);
509
510 if (startswith(l, field)) {
511 l += strlen(field);
512 l += strspn(l, WHITESPACE);
513
514 l[strcspn(l, WHITESPACE)] = 0;
515
516 return parse_uid(l, uid);
517 }
518 }
519
520 return -EIO;
521 }
522
523 int get_process_uid(pid_t pid, uid_t *uid) {
524
525 if (pid == 0 || pid == getpid_cached()) {
526 *uid = getuid();
527 return 0;
528 }
529
530 return get_process_id(pid, "Uid:", uid);
531 }
532
533 int get_process_gid(pid_t pid, gid_t *gid) {
534
535 if (pid == 0 || pid == getpid_cached()) {
536 *gid = getgid();
537 return 0;
538 }
539
540 assert_cc(sizeof(uid_t) == sizeof(gid_t));
541 return get_process_id(pid, "Gid:", gid);
542 }
543
544 int get_process_cwd(pid_t pid, char **cwd) {
545 const char *p;
546
547 assert(pid >= 0);
548
549 p = procfs_file_alloca(pid, "cwd");
550
551 return get_process_link_contents(p, cwd);
552 }
553
554 int get_process_root(pid_t pid, char **root) {
555 const char *p;
556
557 assert(pid >= 0);
558
559 p = procfs_file_alloca(pid, "root");
560
561 return get_process_link_contents(p, root);
562 }
563
564 int get_process_environ(pid_t pid, char **env) {
565 _cleanup_fclose_ FILE *f = NULL;
566 _cleanup_free_ char *outcome = NULL;
567 int c;
568 const char *p;
569 size_t allocated = 0, sz = 0;
570
571 assert(pid >= 0);
572 assert(env);
573
574 p = procfs_file_alloca(pid, "environ");
575
576 f = fopen(p, "re");
577 if (!f) {
578 if (errno == ENOENT)
579 return -ESRCH;
580 return -errno;
581 }
582
583 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
584
585 while ((c = fgetc(f)) != EOF) {
586 if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
587 return -ENOMEM;
588
589 if (c == '\0')
590 outcome[sz++] = '\n';
591 else
592 sz += cescape_char(c, outcome + sz);
593 }
594
595 if (!outcome) {
596 outcome = strdup("");
597 if (!outcome)
598 return -ENOMEM;
599 } else
600 outcome[sz] = '\0';
601
602 *env = outcome;
603 outcome = NULL;
604
605 return 0;
606 }
607
608 int get_process_ppid(pid_t pid, pid_t *_ppid) {
609 int r;
610 _cleanup_free_ char *line = NULL;
611 long unsigned ppid;
612 const char *p;
613
614 assert(pid >= 0);
615 assert(_ppid);
616
617 if (pid == 0 || pid == getpid_cached()) {
618 *_ppid = getppid();
619 return 0;
620 }
621
622 p = procfs_file_alloca(pid, "stat");
623 r = read_one_line_file(p, &line);
624 if (r == -ENOENT)
625 return -ESRCH;
626 if (r < 0)
627 return r;
628
629 /* Let's skip the pid and comm fields. The latter is enclosed
630 * in () but does not escape any () in its value, so let's
631 * skip over it manually */
632
633 p = strrchr(line, ')');
634 if (!p)
635 return -EIO;
636
637 p++;
638
639 if (sscanf(p, " "
640 "%*c " /* state */
641 "%lu ", /* ppid */
642 &ppid) != 1)
643 return -EIO;
644
645 if ((long unsigned) (pid_t) ppid != ppid)
646 return -ERANGE;
647
648 *_ppid = (pid_t) ppid;
649
650 return 0;
651 }
652
653 int wait_for_terminate(pid_t pid, siginfo_t *status) {
654 siginfo_t dummy;
655
656 assert(pid >= 1);
657
658 if (!status)
659 status = &dummy;
660
661 for (;;) {
662 zero(*status);
663
664 if (waitid(P_PID, pid, status, WEXITED) < 0) {
665
666 if (errno == EINTR)
667 continue;
668
669 return negative_errno();
670 }
671
672 return 0;
673 }
674 }
675
676 /*
677 * Return values:
678 * < 0 : wait_for_terminate() failed to get the state of the
679 * process, the process was terminated by a signal, or
680 * failed for an unknown reason.
681 * >=0 : The process terminated normally, and its exit code is
682 * returned.
683 *
684 * That is, success is indicated by a return value of zero, and an
685 * error is indicated by a non-zero value.
686 *
687 * A warning is emitted if the process terminates abnormally,
688 * and also if it returns non-zero unless check_exit_code is true.
689 */
690 int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_code) {
691 int r;
692 siginfo_t status;
693
694 assert(name);
695 assert(pid > 1);
696
697 r = wait_for_terminate(pid, &status);
698 if (r < 0)
699 return log_warning_errno(r, "Failed to wait for %s: %m", name);
700
701 if (status.si_code == CLD_EXITED) {
702 if (status.si_status != 0)
703 log_full(check_exit_code ? LOG_WARNING : LOG_DEBUG,
704 "%s failed with error code %i.", name, status.si_status);
705 else
706 log_debug("%s succeeded.", name);
707
708 return status.si_status;
709 } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
710
711 log_warning("%s terminated by signal %s.", name, signal_to_string(status.si_status));
712 return -EPROTO;
713 }
714
715 log_warning("%s failed due to unknown reason.", name);
716 return -EPROTO;
717 }
718
719 /*
720 * Return values:
721 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
722 * process, the process timed out, the process was terminated by a
723 * signal, or failed for an unknown reason.
724 * >=0 : The process terminated normally with no failures.
725 *
726 * Success is indicated by a return value of zero, a timeout is indicated
727 * by ETIMEDOUT, and all other child failure states are indicated by error
728 * is indicated by a non-zero value.
729 */
730 int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
731 sigset_t mask;
732 int r;
733 usec_t until;
734
735 assert_se(sigemptyset(&mask) == 0);
736 assert_se(sigaddset(&mask, SIGCHLD) == 0);
737
738 /* Drop into a sigtimewait-based timeout. Waiting for the
739 * pid to exit. */
740 until = now(CLOCK_MONOTONIC) + timeout;
741 for (;;) {
742 usec_t n;
743 siginfo_t status = {};
744 struct timespec ts;
745
746 n = now(CLOCK_MONOTONIC);
747 if (n >= until)
748 break;
749
750 r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
751 /* Assuming we woke due to the child exiting. */
752 if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
753 if (status.si_pid == pid) {
754 /* This is the correct child.*/
755 if (status.si_code == CLD_EXITED)
756 return (status.si_status == 0) ? 0 : -EPROTO;
757 else
758 return -EPROTO;
759 }
760 }
761 /* Not the child, check for errors and proceed appropriately */
762 if (r < 0) {
763 switch (r) {
764 case -EAGAIN:
765 /* Timed out, child is likely hung. */
766 return -ETIMEDOUT;
767 case -EINTR:
768 /* Received a different signal and should retry */
769 continue;
770 default:
771 /* Return any unexpected errors */
772 return r;
773 }
774 }
775 }
776
777 return -EPROTO;
778 }
779
780 void sigkill_wait(pid_t pid) {
781 assert(pid > 1);
782
783 if (kill(pid, SIGKILL) > 0)
784 (void) wait_for_terminate(pid, NULL);
785 }
786
787 void sigkill_waitp(pid_t *pid) {
788 if (!pid)
789 return;
790 if (*pid <= 1)
791 return;
792
793 sigkill_wait(*pid);
794 }
795
796 int kill_and_sigcont(pid_t pid, int sig) {
797 int r;
798
799 r = kill(pid, sig) < 0 ? -errno : 0;
800
801 /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
802 * affected by a process being suspended anyway. */
803 if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
804 (void) kill(pid, SIGCONT);
805
806 return r;
807 }
808
809 int getenv_for_pid(pid_t pid, const char *field, char **_value) {
810 _cleanup_fclose_ FILE *f = NULL;
811 char *value = NULL;
812 int r;
813 bool done = false;
814 size_t l;
815 const char *path;
816
817 assert(pid >= 0);
818 assert(field);
819 assert(_value);
820
821 path = procfs_file_alloca(pid, "environ");
822
823 f = fopen(path, "re");
824 if (!f) {
825 if (errno == ENOENT)
826 return -ESRCH;
827 return -errno;
828 }
829
830 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
831
832 l = strlen(field);
833 r = 0;
834
835 do {
836 char line[LINE_MAX];
837 unsigned i;
838
839 for (i = 0; i < sizeof(line)-1; i++) {
840 int c;
841
842 c = getc(f);
843 if (_unlikely_(c == EOF)) {
844 done = true;
845 break;
846 } else if (c == 0)
847 break;
848
849 line[i] = c;
850 }
851 line[i] = 0;
852
853 if (strneq(line, field, l) && line[l] == '=') {
854 value = strdup(line + l + 1);
855 if (!value)
856 return -ENOMEM;
857
858 r = 1;
859 break;
860 }
861
862 } while (!done);
863
864 *_value = value;
865 return r;
866 }
867
868 bool pid_is_unwaited(pid_t pid) {
869 /* Checks whether a PID is still valid at all, including a zombie */
870
871 if (pid < 0)
872 return false;
873
874 if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
875 return true;
876
877 if (pid == getpid_cached())
878 return true;
879
880 if (kill(pid, 0) >= 0)
881 return true;
882
883 return errno != ESRCH;
884 }
885
886 bool pid_is_alive(pid_t pid) {
887 int r;
888
889 /* Checks whether a PID is still valid and not a zombie */
890
891 if (pid < 0)
892 return false;
893
894 if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
895 return true;
896
897 if (pid == getpid_cached())
898 return true;
899
900 r = get_process_state(pid);
901 if (IN_SET(r, -ESRCH, 'Z'))
902 return false;
903
904 return true;
905 }
906
907 int pid_from_same_root_fs(pid_t pid) {
908 const char *root;
909
910 if (pid < 0)
911 return false;
912
913 if (pid == 0 || pid == getpid_cached())
914 return true;
915
916 root = procfs_file_alloca(pid, "root");
917
918 return files_same(root, "/proc/1/root", 0);
919 }
920
921 bool is_main_thread(void) {
922 static thread_local int cached = 0;
923
924 if (_unlikely_(cached == 0))
925 cached = getpid_cached() == gettid() ? 1 : -1;
926
927 return cached > 0;
928 }
929
930 noreturn void freeze(void) {
931
932 log_close();
933
934 /* Make sure nobody waits for us on a socket anymore */
935 close_all_fds(NULL, 0);
936
937 sync();
938
939 for (;;)
940 pause();
941 }
942
943 bool oom_score_adjust_is_valid(int oa) {
944 return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
945 }
946
947 unsigned long personality_from_string(const char *p) {
948 int architecture;
949
950 if (!p)
951 return PERSONALITY_INVALID;
952
953 /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
954 * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
955 * the same register size. */
956
957 architecture = architecture_from_string(p);
958 if (architecture < 0)
959 return PERSONALITY_INVALID;
960
961 if (architecture == native_architecture())
962 return PER_LINUX;
963 #ifdef SECONDARY_ARCHITECTURE
964 if (architecture == SECONDARY_ARCHITECTURE)
965 return PER_LINUX32;
966 #endif
967
968 return PERSONALITY_INVALID;
969 }
970
971 const char* personality_to_string(unsigned long p) {
972 int architecture = _ARCHITECTURE_INVALID;
973
974 if (p == PER_LINUX)
975 architecture = native_architecture();
976 #ifdef SECONDARY_ARCHITECTURE
977 else if (p == PER_LINUX32)
978 architecture = SECONDARY_ARCHITECTURE;
979 #endif
980
981 if (architecture < 0)
982 return NULL;
983
984 return architecture_to_string(architecture);
985 }
986
987 int safe_personality(unsigned long p) {
988 int ret;
989
990 /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
991 * and in others as negative return value containing an errno-like value. Let's work around this: this is a
992 * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
993 * the return value indicating the same issue, so that we are definitely on the safe side.
994 *
995 * See https://github.com/systemd/systemd/issues/6737 */
996
997 errno = 0;
998 ret = personality(p);
999 if (ret < 0) {
1000 if (errno != 0)
1001 return -errno;
1002
1003 errno = -ret;
1004 }
1005
1006 return ret;
1007 }
1008
1009 int opinionated_personality(unsigned long *ret) {
1010 int current;
1011
1012 /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1013 * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1014 * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1015
1016 current = safe_personality(PERSONALITY_INVALID);
1017 if (current < 0)
1018 return current;
1019
1020 if (((unsigned long) current & 0xffff) == PER_LINUX32)
1021 *ret = PER_LINUX32;
1022 else
1023 *ret = PER_LINUX;
1024
1025 return 0;
1026 }
1027
1028 void valgrind_summary_hack(void) {
1029 #if HAVE_VALGRIND_VALGRIND_H
1030 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
1031 pid_t pid;
1032 pid = raw_clone(SIGCHLD);
1033 if (pid < 0)
1034 log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
1035 else if (pid == 0)
1036 exit(EXIT_SUCCESS);
1037 else {
1038 log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1039 (void) wait_for_terminate(pid, NULL);
1040 }
1041 }
1042 #endif
1043 }
1044
1045 int pid_compare_func(const void *a, const void *b) {
1046 const pid_t *p = a, *q = b;
1047
1048 /* Suitable for usage in qsort() */
1049
1050 if (*p < *q)
1051 return -1;
1052 if (*p > *q)
1053 return 1;
1054 return 0;
1055 }
1056
1057 int ioprio_parse_priority(const char *s, int *ret) {
1058 int i, r;
1059
1060 assert(s);
1061 assert(ret);
1062
1063 r = safe_atoi(s, &i);
1064 if (r < 0)
1065 return r;
1066
1067 if (!ioprio_priority_is_valid(i))
1068 return -EINVAL;
1069
1070 *ret = i;
1071 return 0;
1072 }
1073
1074 /* The cached PID, possible values:
1075 *
1076 * == UNSET [0] → cache not initialized yet
1077 * == BUSY [-1] → some thread is initializing it at the moment
1078 * any other → the cached PID
1079 */
1080
1081 #define CACHED_PID_UNSET ((pid_t) 0)
1082 #define CACHED_PID_BUSY ((pid_t) -1)
1083
1084 static pid_t cached_pid = CACHED_PID_UNSET;
1085
1086 static void reset_cached_pid(void) {
1087 /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1088 cached_pid = CACHED_PID_UNSET;
1089 }
1090
1091 /* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1092 * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1093 * libpthread, as it is part of glibc anyway. */
1094 extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void * __dso_handle);
1095 extern void* __dso_handle __attribute__ ((__weak__));
1096
1097 pid_t getpid_cached(void) {
1098 pid_t current_value;
1099
1100 /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1101 * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1102 * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1103 * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1104 *
1105 * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1106 * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1107 */
1108
1109 current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
1110
1111 switch (current_value) {
1112
1113 case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
1114 pid_t new_pid;
1115
1116 new_pid = getpid();
1117
1118 if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
1119 /* OOM? Let's try again later */
1120 cached_pid = CACHED_PID_UNSET;
1121 return new_pid;
1122 }
1123
1124 cached_pid = new_pid;
1125 return new_pid;
1126 }
1127
1128 case CACHED_PID_BUSY: /* Somebody else is currently initializing */
1129 return getpid();
1130
1131 default: /* Properly initialized */
1132 return current_value;
1133 }
1134 }
1135
1136 int must_be_root(void) {
1137
1138 if (geteuid() == 0)
1139 return 0;
1140
1141 log_error("Need to be root.");
1142 return -EPERM;
1143 }
1144
1145 int safe_fork_full(
1146 const char *name,
1147 const int except_fds[],
1148 size_t n_except_fds,
1149 ForkFlags flags,
1150 pid_t *ret_pid) {
1151
1152 pid_t original_pid, pid;
1153 sigset_t saved_ss;
1154 bool block_signals;
1155 int r;
1156
1157 /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
1158 * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
1159
1160 original_pid = getpid_cached();
1161
1162 block_signals = flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG);
1163
1164 if (block_signals) {
1165 sigset_t ss;
1166
1167 /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can be sure
1168 * that SIGTERMs are not lost we might send to the child. */
1169 if (sigfillset(&ss) < 0)
1170 return log_debug_errno(errno, "Failed to reset signal set: %m");
1171
1172 if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
1173 return log_debug_errno(errno, "Failed to reset signal mask: %m");
1174 }
1175
1176 pid = fork();
1177 if (pid < 0) {
1178 r = -errno;
1179
1180 if (block_signals) /* undo what we did above */
1181 (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1182
1183 return log_debug_errno(r, "Failed to fork: %m");
1184 }
1185 if (pid > 0) {
1186 /* We are in the parent process */
1187
1188 if (block_signals) /* undo what we did above */
1189 (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1190
1191 log_debug("Sucessfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
1192
1193 if (ret_pid)
1194 *ret_pid = pid;
1195
1196 return 1;
1197 }
1198
1199 /* We are in the child process */
1200
1201 if (flags & FORK_REOPEN_LOG) {
1202 /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
1203 log_close();
1204 log_set_open_when_needed(true);
1205 }
1206
1207 if (name) {
1208 r = rename_process(name);
1209 if (r < 0)
1210 log_debug_errno(r, "Failed to rename process, ignoring: %m");
1211 }
1212
1213 if (flags & FORK_DEATHSIG)
1214 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
1215 log_debug_errno(errno, "Failed to set death signal: %m");
1216 _exit(EXIT_FAILURE);
1217 }
1218
1219 if (flags & FORK_RESET_SIGNALS) {
1220 r = reset_all_signal_handlers();
1221 if (r < 0) {
1222 log_debug_errno(r, "Failed to reset signal handlers: %m");
1223 _exit(EXIT_FAILURE);
1224 }
1225
1226 /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
1227 r = reset_signal_mask();
1228 if (r < 0) {
1229 log_debug_errno(r, "Failed to reset signal mask: %m");
1230 _exit(EXIT_FAILURE);
1231 }
1232 } else if (block_signals) { /* undo what we did above */
1233 if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
1234 log_debug_errno(errno, "Failed to restore signal mask: %m");
1235 _exit(EXIT_FAILURE);
1236 }
1237 }
1238
1239 if (flags & FORK_DEATHSIG) {
1240 /* Let's see if the parent PID is still the one we started from? If not, then the parent
1241 * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
1242
1243 if (getppid() != original_pid) {
1244 log_debug("Parent died early, raising SIGTERM.");
1245 (void) raise(SIGTERM);
1246 _exit(EXIT_FAILURE);
1247 }
1248 }
1249
1250 if (flags & FORK_CLOSE_ALL_FDS) {
1251 /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
1252 log_close();
1253
1254 r = close_all_fds(except_fds, n_except_fds);
1255 if (r < 0) {
1256 log_debug_errno(r, "Failed to close all file descriptors: %m");
1257 _exit(EXIT_FAILURE);
1258 }
1259 }
1260
1261 /* When we were asked to reopen the logs, do so again now */
1262 if (flags & FORK_REOPEN_LOG) {
1263 log_open();
1264 log_set_open_when_needed(false);
1265 }
1266
1267 if (flags & FORK_NULL_STDIO) {
1268 r = make_null_stdio();
1269 if (r < 0) {
1270 log_debug_errno(r, "Failed to connect stdin/stdout to /dev/null: %m");
1271 _exit(EXIT_FAILURE);
1272 }
1273 }
1274
1275 if (ret_pid)
1276 *ret_pid = getpid_cached();
1277
1278 return 0;
1279 }
1280
1281 int fork_agent(const char *name, const int except[], unsigned n_except, pid_t *ret_pid, const char *path, ...) {
1282 bool stdout_is_tty, stderr_is_tty;
1283 unsigned n, i;
1284 va_list ap;
1285 char **l;
1286 int r;
1287
1288 assert(path);
1289
1290 /* Spawns a temporary TTY agent, making sure it goes away when we go away */
1291
1292 r = safe_fork_full(name, except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, ret_pid);
1293 if (r < 0)
1294 return r;
1295 if (r > 0)
1296 return 0;
1297
1298 /* In the child: */
1299
1300 stdout_is_tty = isatty(STDOUT_FILENO);
1301 stderr_is_tty = isatty(STDERR_FILENO);
1302
1303 if (!stdout_is_tty || !stderr_is_tty) {
1304 int fd;
1305
1306 /* Detach from stdout/stderr. and reopen
1307 * /dev/tty for them. This is important to
1308 * ensure that when systemctl is started via
1309 * popen() or a similar call that expects to
1310 * read EOF we actually do generate EOF and
1311 * not delay this indefinitely by because we
1312 * keep an unused copy of stdin around. */
1313 fd = open("/dev/tty", O_WRONLY);
1314 if (fd < 0) {
1315 log_error_errno(errno, "Failed to open /dev/tty: %m");
1316 _exit(EXIT_FAILURE);
1317 }
1318
1319 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
1320 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
1321 _exit(EXIT_FAILURE);
1322 }
1323
1324 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
1325 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
1326 _exit(EXIT_FAILURE);
1327 }
1328
1329 if (fd > STDERR_FILENO)
1330 close(fd);
1331 }
1332
1333 /* Count arguments */
1334 va_start(ap, path);
1335 for (n = 0; va_arg(ap, char*); n++)
1336 ;
1337 va_end(ap);
1338
1339 /* Allocate strv */
1340 l = alloca(sizeof(char *) * (n + 1));
1341
1342 /* Fill in arguments */
1343 va_start(ap, path);
1344 for (i = 0; i <= n; i++)
1345 l[i] = va_arg(ap, char*);
1346 va_end(ap);
1347
1348 execv(path, l);
1349 _exit(EXIT_FAILURE);
1350 }
1351
1352 static const char *const ioprio_class_table[] = {
1353 [IOPRIO_CLASS_NONE] = "none",
1354 [IOPRIO_CLASS_RT] = "realtime",
1355 [IOPRIO_CLASS_BE] = "best-effort",
1356 [IOPRIO_CLASS_IDLE] = "idle"
1357 };
1358
1359 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, INT_MAX);
1360
1361 static const char *const sigchld_code_table[] = {
1362 [CLD_EXITED] = "exited",
1363 [CLD_KILLED] = "killed",
1364 [CLD_DUMPED] = "dumped",
1365 [CLD_TRAPPED] = "trapped",
1366 [CLD_STOPPED] = "stopped",
1367 [CLD_CONTINUED] = "continued",
1368 };
1369
1370 DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
1371
1372 static const char* const sched_policy_table[] = {
1373 [SCHED_OTHER] = "other",
1374 [SCHED_BATCH] = "batch",
1375 [SCHED_IDLE] = "idle",
1376 [SCHED_FIFO] = "fifo",
1377 [SCHED_RR] = "rr"
1378 };
1379
1380 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);