]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/process-util.c
tree-wide: introduce new safe_fork() helper and port everything over
[thirdparty/systemd.git] / src / basic / process-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <linux/oom.h>
25 #include <sched.h>
26 #include <signal.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdio_ext.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/mman.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/types.h>
36 #include <sys/wait.h>
37 #include <syslog.h>
38 #include <unistd.h>
39 #if HAVE_VALGRIND_VALGRIND_H
40 #include <valgrind/valgrind.h>
41 #endif
42
43 #include "alloc-util.h"
44 #include "architecture.h"
45 #include "escape.h"
46 #include "fd-util.h"
47 #include "fileio.h"
48 #include "fs-util.h"
49 #include "ioprio.h"
50 #include "log.h"
51 #include "macro.h"
52 #include "missing.h"
53 #include "process-util.h"
54 #include "raw-clone.h"
55 #include "signal-util.h"
56 #include "stat-util.h"
57 #include "string-table.h"
58 #include "string-util.h"
59 #include "terminal-util.h"
60 #include "user-util.h"
61 #include "util.h"
62
63 int get_process_state(pid_t pid) {
64 const char *p;
65 char state;
66 int r;
67 _cleanup_free_ char *line = NULL;
68
69 assert(pid >= 0);
70
71 p = procfs_file_alloca(pid, "stat");
72
73 r = read_one_line_file(p, &line);
74 if (r == -ENOENT)
75 return -ESRCH;
76 if (r < 0)
77 return r;
78
79 p = strrchr(line, ')');
80 if (!p)
81 return -EIO;
82
83 p++;
84
85 if (sscanf(p, " %c", &state) != 1)
86 return -EIO;
87
88 return (unsigned char) state;
89 }
90
91 int get_process_comm(pid_t pid, char **name) {
92 const char *p;
93 int r;
94
95 assert(name);
96 assert(pid >= 0);
97
98 p = procfs_file_alloca(pid, "comm");
99
100 r = read_one_line_file(p, name);
101 if (r == -ENOENT)
102 return -ESRCH;
103
104 return r;
105 }
106
107 int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
108 _cleanup_fclose_ FILE *f = NULL;
109 bool space = false;
110 char *k, *ans = NULL;
111 const char *p;
112 int c;
113
114 assert(line);
115 assert(pid >= 0);
116
117 /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
118 * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
119 * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
120 * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
121 * command line that resolves to the empty string will return the "comm" name of the process instead.
122 *
123 * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
124 * comm_fallback is false). Returns 0 and sets *line otherwise. */
125
126 p = procfs_file_alloca(pid, "cmdline");
127
128 f = fopen(p, "re");
129 if (!f) {
130 if (errno == ENOENT)
131 return -ESRCH;
132 return -errno;
133 }
134
135 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
136
137 if (max_length == 1) {
138
139 /* If there's only room for one byte, return the empty string */
140 ans = new0(char, 1);
141 if (!ans)
142 return -ENOMEM;
143
144 *line = ans;
145 return 0;
146
147 } else if (max_length == 0) {
148 size_t len = 0, allocated = 0;
149
150 while ((c = getc(f)) != EOF) {
151
152 if (!GREEDY_REALLOC(ans, allocated, len+3)) {
153 free(ans);
154 return -ENOMEM;
155 }
156
157 if (isprint(c)) {
158 if (space) {
159 ans[len++] = ' ';
160 space = false;
161 }
162
163 ans[len++] = c;
164 } else if (len > 0)
165 space = true;
166 }
167
168 if (len > 0)
169 ans[len] = '\0';
170 else
171 ans = mfree(ans);
172
173 } else {
174 bool dotdotdot = false;
175 size_t left;
176
177 ans = new(char, max_length);
178 if (!ans)
179 return -ENOMEM;
180
181 k = ans;
182 left = max_length;
183 while ((c = getc(f)) != EOF) {
184
185 if (isprint(c)) {
186
187 if (space) {
188 if (left <= 2) {
189 dotdotdot = true;
190 break;
191 }
192
193 *(k++) = ' ';
194 left--;
195 space = false;
196 }
197
198 if (left <= 1) {
199 dotdotdot = true;
200 break;
201 }
202
203 *(k++) = (char) c;
204 left--;
205 } else if (k > ans)
206 space = true;
207 }
208
209 if (dotdotdot) {
210 if (max_length <= 4) {
211 k = ans;
212 left = max_length;
213 } else {
214 k = ans + max_length - 4;
215 left = 4;
216
217 /* Eat up final spaces */
218 while (k > ans && isspace(k[-1])) {
219 k--;
220 left++;
221 }
222 }
223
224 strncpy(k, "...", left-1);
225 k[left-1] = 0;
226 } else
227 *k = 0;
228 }
229
230 /* Kernel threads have no argv[] */
231 if (isempty(ans)) {
232 _cleanup_free_ char *t = NULL;
233 int h;
234
235 free(ans);
236
237 if (!comm_fallback)
238 return -ENOENT;
239
240 h = get_process_comm(pid, &t);
241 if (h < 0)
242 return h;
243
244 if (max_length == 0)
245 ans = strjoin("[", t, "]");
246 else {
247 size_t l;
248
249 l = strlen(t);
250
251 if (l + 3 <= max_length)
252 ans = strjoin("[", t, "]");
253 else if (max_length <= 6) {
254
255 ans = new(char, max_length);
256 if (!ans)
257 return -ENOMEM;
258
259 memcpy(ans, "[...]", max_length-1);
260 ans[max_length-1] = 0;
261 } else {
262 char *e;
263
264 t[max_length - 6] = 0;
265
266 /* Chop off final spaces */
267 e = strchr(t, 0);
268 while (e > t && isspace(e[-1]))
269 e--;
270 *e = 0;
271
272 ans = strjoin("[", t, "...]");
273 }
274 }
275 if (!ans)
276 return -ENOMEM;
277 }
278
279 *line = ans;
280 return 0;
281 }
282
283 int rename_process(const char name[]) {
284 static size_t mm_size = 0;
285 static char *mm = NULL;
286 bool truncated = false;
287 size_t l;
288
289 /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
290 * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
291 * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
292 * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
293 * truncated.
294 *
295 * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
296
297 if (isempty(name))
298 return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
299
300 l = strlen(name);
301
302 /* First step, change the comm field. */
303 (void) prctl(PR_SET_NAME, name);
304 if (l > 15) /* Linux process names can be 15 chars at max */
305 truncated = true;
306
307 /* Second step, change glibc's ID of the process name. */
308 if (program_invocation_name) {
309 size_t k;
310
311 k = strlen(program_invocation_name);
312 strncpy(program_invocation_name, name, k);
313 if (l > k)
314 truncated = true;
315 }
316
317 /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
318 * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
319 * the end. This is the best option for changing /proc/self/cmdline. */
320
321 /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
322 * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
323 * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
324 * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
325 * mmap() is not. */
326 if (geteuid() != 0)
327 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
328 else if (mm_size < l+1) {
329 size_t nn_size;
330 char *nn;
331
332 nn_size = PAGE_ALIGN(l+1);
333 nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
334 if (nn == MAP_FAILED) {
335 log_debug_errno(errno, "mmap() failed: %m");
336 goto use_saved_argv;
337 }
338
339 strncpy(nn, name, nn_size);
340
341 /* Now, let's tell the kernel about this new memory */
342 if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
343 log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m");
344 (void) munmap(nn, nn_size);
345 goto use_saved_argv;
346 }
347
348 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
349 * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
350 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
351 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
352
353 if (mm)
354 (void) munmap(mm, mm_size);
355
356 mm = nn;
357 mm_size = nn_size;
358 } else {
359 strncpy(mm, name, mm_size);
360
361 /* Update the end pointer, continuing regardless of any failure. */
362 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
363 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
364 }
365
366 use_saved_argv:
367 /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
368 * it still looks here */
369
370 if (saved_argc > 0) {
371 int i;
372
373 if (saved_argv[0]) {
374 size_t k;
375
376 k = strlen(saved_argv[0]);
377 strncpy(saved_argv[0], name, k);
378 if (l > k)
379 truncated = true;
380 }
381
382 for (i = 1; i < saved_argc; i++) {
383 if (!saved_argv[i])
384 break;
385
386 memzero(saved_argv[i], strlen(saved_argv[i]));
387 }
388 }
389
390 return !truncated;
391 }
392
393 int is_kernel_thread(pid_t pid) {
394 const char *p;
395 size_t count;
396 char c;
397 bool eof;
398 FILE *f;
399
400 if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
401 return 0;
402
403 assert(pid > 1);
404
405 p = procfs_file_alloca(pid, "cmdline");
406 f = fopen(p, "re");
407 if (!f) {
408 if (errno == ENOENT)
409 return -ESRCH;
410 return -errno;
411 }
412
413 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
414
415 count = fread(&c, 1, 1, f);
416 eof = feof(f);
417 fclose(f);
418
419 /* Kernel threads have an empty cmdline */
420
421 if (count <= 0)
422 return eof ? 1 : -errno;
423
424 return 0;
425 }
426
427 int get_process_capeff(pid_t pid, char **capeff) {
428 const char *p;
429 int r;
430
431 assert(capeff);
432 assert(pid >= 0);
433
434 p = procfs_file_alloca(pid, "status");
435
436 r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
437 if (r == -ENOENT)
438 return -ESRCH;
439
440 return r;
441 }
442
443 static int get_process_link_contents(const char *proc_file, char **name) {
444 int r;
445
446 assert(proc_file);
447 assert(name);
448
449 r = readlink_malloc(proc_file, name);
450 if (r == -ENOENT)
451 return -ESRCH;
452 if (r < 0)
453 return r;
454
455 return 0;
456 }
457
458 int get_process_exe(pid_t pid, char **name) {
459 const char *p;
460 char *d;
461 int r;
462
463 assert(pid >= 0);
464
465 p = procfs_file_alloca(pid, "exe");
466 r = get_process_link_contents(p, name);
467 if (r < 0)
468 return r;
469
470 d = endswith(*name, " (deleted)");
471 if (d)
472 *d = '\0';
473
474 return 0;
475 }
476
477 static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
478 _cleanup_fclose_ FILE *f = NULL;
479 char line[LINE_MAX];
480 const char *p;
481
482 assert(field);
483 assert(uid);
484
485 if (pid < 0)
486 return -EINVAL;
487
488 p = procfs_file_alloca(pid, "status");
489 f = fopen(p, "re");
490 if (!f) {
491 if (errno == ENOENT)
492 return -ESRCH;
493 return -errno;
494 }
495
496 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
497
498 FOREACH_LINE(line, f, return -errno) {
499 char *l;
500
501 l = strstrip(line);
502
503 if (startswith(l, field)) {
504 l += strlen(field);
505 l += strspn(l, WHITESPACE);
506
507 l[strcspn(l, WHITESPACE)] = 0;
508
509 return parse_uid(l, uid);
510 }
511 }
512
513 return -EIO;
514 }
515
516 int get_process_uid(pid_t pid, uid_t *uid) {
517
518 if (pid == 0 || pid == getpid_cached()) {
519 *uid = getuid();
520 return 0;
521 }
522
523 return get_process_id(pid, "Uid:", uid);
524 }
525
526 int get_process_gid(pid_t pid, gid_t *gid) {
527
528 if (pid == 0 || pid == getpid_cached()) {
529 *gid = getgid();
530 return 0;
531 }
532
533 assert_cc(sizeof(uid_t) == sizeof(gid_t));
534 return get_process_id(pid, "Gid:", gid);
535 }
536
537 int get_process_cwd(pid_t pid, char **cwd) {
538 const char *p;
539
540 assert(pid >= 0);
541
542 p = procfs_file_alloca(pid, "cwd");
543
544 return get_process_link_contents(p, cwd);
545 }
546
547 int get_process_root(pid_t pid, char **root) {
548 const char *p;
549
550 assert(pid >= 0);
551
552 p = procfs_file_alloca(pid, "root");
553
554 return get_process_link_contents(p, root);
555 }
556
557 int get_process_environ(pid_t pid, char **env) {
558 _cleanup_fclose_ FILE *f = NULL;
559 _cleanup_free_ char *outcome = NULL;
560 int c;
561 const char *p;
562 size_t allocated = 0, sz = 0;
563
564 assert(pid >= 0);
565 assert(env);
566
567 p = procfs_file_alloca(pid, "environ");
568
569 f = fopen(p, "re");
570 if (!f) {
571 if (errno == ENOENT)
572 return -ESRCH;
573 return -errno;
574 }
575
576 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
577
578 while ((c = fgetc(f)) != EOF) {
579 if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
580 return -ENOMEM;
581
582 if (c == '\0')
583 outcome[sz++] = '\n';
584 else
585 sz += cescape_char(c, outcome + sz);
586 }
587
588 if (!outcome) {
589 outcome = strdup("");
590 if (!outcome)
591 return -ENOMEM;
592 } else
593 outcome[sz] = '\0';
594
595 *env = outcome;
596 outcome = NULL;
597
598 return 0;
599 }
600
601 int get_process_ppid(pid_t pid, pid_t *_ppid) {
602 int r;
603 _cleanup_free_ char *line = NULL;
604 long unsigned ppid;
605 const char *p;
606
607 assert(pid >= 0);
608 assert(_ppid);
609
610 if (pid == 0 || pid == getpid_cached()) {
611 *_ppid = getppid();
612 return 0;
613 }
614
615 p = procfs_file_alloca(pid, "stat");
616 r = read_one_line_file(p, &line);
617 if (r == -ENOENT)
618 return -ESRCH;
619 if (r < 0)
620 return r;
621
622 /* Let's skip the pid and comm fields. The latter is enclosed
623 * in () but does not escape any () in its value, so let's
624 * skip over it manually */
625
626 p = strrchr(line, ')');
627 if (!p)
628 return -EIO;
629
630 p++;
631
632 if (sscanf(p, " "
633 "%*c " /* state */
634 "%lu ", /* ppid */
635 &ppid) != 1)
636 return -EIO;
637
638 if ((long unsigned) (pid_t) ppid != ppid)
639 return -ERANGE;
640
641 *_ppid = (pid_t) ppid;
642
643 return 0;
644 }
645
646 int wait_for_terminate(pid_t pid, siginfo_t *status) {
647 siginfo_t dummy;
648
649 assert(pid >= 1);
650
651 if (!status)
652 status = &dummy;
653
654 for (;;) {
655 zero(*status);
656
657 if (waitid(P_PID, pid, status, WEXITED) < 0) {
658
659 if (errno == EINTR)
660 continue;
661
662 return negative_errno();
663 }
664
665 return 0;
666 }
667 }
668
669 /*
670 * Return values:
671 * < 0 : wait_for_terminate() failed to get the state of the
672 * process, the process was terminated by a signal, or
673 * failed for an unknown reason.
674 * >=0 : The process terminated normally, and its exit code is
675 * returned.
676 *
677 * That is, success is indicated by a return value of zero, and an
678 * error is indicated by a non-zero value.
679 *
680 * A warning is emitted if the process terminates abnormally,
681 * and also if it returns non-zero unless check_exit_code is true.
682 */
683 int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_code) {
684 int r;
685 siginfo_t status;
686
687 assert(name);
688 assert(pid > 1);
689
690 r = wait_for_terminate(pid, &status);
691 if (r < 0)
692 return log_warning_errno(r, "Failed to wait for %s: %m", name);
693
694 if (status.si_code == CLD_EXITED) {
695 if (status.si_status != 0)
696 log_full(check_exit_code ? LOG_WARNING : LOG_DEBUG,
697 "%s failed with error code %i.", name, status.si_status);
698 else
699 log_debug("%s succeeded.", name);
700
701 return status.si_status;
702 } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
703
704 log_warning("%s terminated by signal %s.", name, signal_to_string(status.si_status));
705 return -EPROTO;
706 }
707
708 log_warning("%s failed due to unknown reason.", name);
709 return -EPROTO;
710 }
711
712 /*
713 * Return values:
714 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
715 * process, the process timed out, the process was terminated by a
716 * signal, or failed for an unknown reason.
717 * >=0 : The process terminated normally with no failures.
718 *
719 * Success is indicated by a return value of zero, a timeout is indicated
720 * by ETIMEDOUT, and all other child failure states are indicated by error
721 * is indicated by a non-zero value.
722 */
723 int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
724 sigset_t mask;
725 int r;
726 usec_t until;
727
728 assert_se(sigemptyset(&mask) == 0);
729 assert_se(sigaddset(&mask, SIGCHLD) == 0);
730
731 /* Drop into a sigtimewait-based timeout. Waiting for the
732 * pid to exit. */
733 until = now(CLOCK_MONOTONIC) + timeout;
734 for (;;) {
735 usec_t n;
736 siginfo_t status = {};
737 struct timespec ts;
738
739 n = now(CLOCK_MONOTONIC);
740 if (n >= until)
741 break;
742
743 r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
744 /* Assuming we woke due to the child exiting. */
745 if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
746 if (status.si_pid == pid) {
747 /* This is the correct child.*/
748 if (status.si_code == CLD_EXITED)
749 return (status.si_status == 0) ? 0 : -EPROTO;
750 else
751 return -EPROTO;
752 }
753 }
754 /* Not the child, check for errors and proceed appropriately */
755 if (r < 0) {
756 switch (r) {
757 case -EAGAIN:
758 /* Timed out, child is likely hung. */
759 return -ETIMEDOUT;
760 case -EINTR:
761 /* Received a different signal and should retry */
762 continue;
763 default:
764 /* Return any unexpected errors */
765 return r;
766 }
767 }
768 }
769
770 return -EPROTO;
771 }
772
773 void sigkill_wait(pid_t pid) {
774 assert(pid > 1);
775
776 if (kill(pid, SIGKILL) > 0)
777 (void) wait_for_terminate(pid, NULL);
778 }
779
780 void sigkill_waitp(pid_t *pid) {
781 if (!pid)
782 return;
783 if (*pid <= 1)
784 return;
785
786 sigkill_wait(*pid);
787 }
788
789 int kill_and_sigcont(pid_t pid, int sig) {
790 int r;
791
792 r = kill(pid, sig) < 0 ? -errno : 0;
793
794 /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
795 * affected by a process being suspended anyway. */
796 if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
797 (void) kill(pid, SIGCONT);
798
799 return r;
800 }
801
802 int getenv_for_pid(pid_t pid, const char *field, char **_value) {
803 _cleanup_fclose_ FILE *f = NULL;
804 char *value = NULL;
805 int r;
806 bool done = false;
807 size_t l;
808 const char *path;
809
810 assert(pid >= 0);
811 assert(field);
812 assert(_value);
813
814 path = procfs_file_alloca(pid, "environ");
815
816 f = fopen(path, "re");
817 if (!f) {
818 if (errno == ENOENT)
819 return -ESRCH;
820 return -errno;
821 }
822
823 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
824
825 l = strlen(field);
826 r = 0;
827
828 do {
829 char line[LINE_MAX];
830 unsigned i;
831
832 for (i = 0; i < sizeof(line)-1; i++) {
833 int c;
834
835 c = getc(f);
836 if (_unlikely_(c == EOF)) {
837 done = true;
838 break;
839 } else if (c == 0)
840 break;
841
842 line[i] = c;
843 }
844 line[i] = 0;
845
846 if (strneq(line, field, l) && line[l] == '=') {
847 value = strdup(line + l + 1);
848 if (!value)
849 return -ENOMEM;
850
851 r = 1;
852 break;
853 }
854
855 } while (!done);
856
857 *_value = value;
858 return r;
859 }
860
861 bool pid_is_unwaited(pid_t pid) {
862 /* Checks whether a PID is still valid at all, including a zombie */
863
864 if (pid < 0)
865 return false;
866
867 if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
868 return true;
869
870 if (pid == getpid_cached())
871 return true;
872
873 if (kill(pid, 0) >= 0)
874 return true;
875
876 return errno != ESRCH;
877 }
878
879 bool pid_is_alive(pid_t pid) {
880 int r;
881
882 /* Checks whether a PID is still valid and not a zombie */
883
884 if (pid < 0)
885 return false;
886
887 if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
888 return true;
889
890 if (pid == getpid_cached())
891 return true;
892
893 r = get_process_state(pid);
894 if (IN_SET(r, -ESRCH, 'Z'))
895 return false;
896
897 return true;
898 }
899
900 int pid_from_same_root_fs(pid_t pid) {
901 const char *root;
902
903 if (pid < 0)
904 return false;
905
906 if (pid == 0 || pid == getpid_cached())
907 return true;
908
909 root = procfs_file_alloca(pid, "root");
910
911 return files_same(root, "/proc/1/root", 0);
912 }
913
914 bool is_main_thread(void) {
915 static thread_local int cached = 0;
916
917 if (_unlikely_(cached == 0))
918 cached = getpid_cached() == gettid() ? 1 : -1;
919
920 return cached > 0;
921 }
922
923 noreturn void freeze(void) {
924
925 log_close();
926
927 /* Make sure nobody waits for us on a socket anymore */
928 close_all_fds(NULL, 0);
929
930 sync();
931
932 for (;;)
933 pause();
934 }
935
936 bool oom_score_adjust_is_valid(int oa) {
937 return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
938 }
939
940 unsigned long personality_from_string(const char *p) {
941 int architecture;
942
943 if (!p)
944 return PERSONALITY_INVALID;
945
946 /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
947 * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
948 * the same register size. */
949
950 architecture = architecture_from_string(p);
951 if (architecture < 0)
952 return PERSONALITY_INVALID;
953
954 if (architecture == native_architecture())
955 return PER_LINUX;
956 #ifdef SECONDARY_ARCHITECTURE
957 if (architecture == SECONDARY_ARCHITECTURE)
958 return PER_LINUX32;
959 #endif
960
961 return PERSONALITY_INVALID;
962 }
963
964 const char* personality_to_string(unsigned long p) {
965 int architecture = _ARCHITECTURE_INVALID;
966
967 if (p == PER_LINUX)
968 architecture = native_architecture();
969 #ifdef SECONDARY_ARCHITECTURE
970 else if (p == PER_LINUX32)
971 architecture = SECONDARY_ARCHITECTURE;
972 #endif
973
974 if (architecture < 0)
975 return NULL;
976
977 return architecture_to_string(architecture);
978 }
979
980 int safe_personality(unsigned long p) {
981 int ret;
982
983 /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
984 * and in others as negative return value containing an errno-like value. Let's work around this: this is a
985 * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
986 * the return value indicating the same issue, so that we are definitely on the safe side.
987 *
988 * See https://github.com/systemd/systemd/issues/6737 */
989
990 errno = 0;
991 ret = personality(p);
992 if (ret < 0) {
993 if (errno != 0)
994 return -errno;
995
996 errno = -ret;
997 }
998
999 return ret;
1000 }
1001
1002 int opinionated_personality(unsigned long *ret) {
1003 int current;
1004
1005 /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1006 * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1007 * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1008
1009 current = safe_personality(PERSONALITY_INVALID);
1010 if (current < 0)
1011 return current;
1012
1013 if (((unsigned long) current & 0xffff) == PER_LINUX32)
1014 *ret = PER_LINUX32;
1015 else
1016 *ret = PER_LINUX;
1017
1018 return 0;
1019 }
1020
1021 void valgrind_summary_hack(void) {
1022 #if HAVE_VALGRIND_VALGRIND_H
1023 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
1024 pid_t pid;
1025 pid = raw_clone(SIGCHLD);
1026 if (pid < 0)
1027 log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
1028 else if (pid == 0)
1029 exit(EXIT_SUCCESS);
1030 else {
1031 log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1032 (void) wait_for_terminate(pid, NULL);
1033 }
1034 }
1035 #endif
1036 }
1037
1038 int pid_compare_func(const void *a, const void *b) {
1039 const pid_t *p = a, *q = b;
1040
1041 /* Suitable for usage in qsort() */
1042
1043 if (*p < *q)
1044 return -1;
1045 if (*p > *q)
1046 return 1;
1047 return 0;
1048 }
1049
1050 int ioprio_parse_priority(const char *s, int *ret) {
1051 int i, r;
1052
1053 assert(s);
1054 assert(ret);
1055
1056 r = safe_atoi(s, &i);
1057 if (r < 0)
1058 return r;
1059
1060 if (!ioprio_priority_is_valid(i))
1061 return -EINVAL;
1062
1063 *ret = i;
1064 return 0;
1065 }
1066
1067 /* The cached PID, possible values:
1068 *
1069 * == UNSET [0] → cache not initialized yet
1070 * == BUSY [-1] → some thread is initializing it at the moment
1071 * any other → the cached PID
1072 */
1073
1074 #define CACHED_PID_UNSET ((pid_t) 0)
1075 #define CACHED_PID_BUSY ((pid_t) -1)
1076
1077 static pid_t cached_pid = CACHED_PID_UNSET;
1078
1079 static void reset_cached_pid(void) {
1080 /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1081 cached_pid = CACHED_PID_UNSET;
1082 }
1083
1084 /* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1085 * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1086 * libpthread, as it is part of glibc anyway. */
1087 extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void * __dso_handle);
1088 extern void* __dso_handle __attribute__ ((__weak__));
1089
1090 pid_t getpid_cached(void) {
1091 pid_t current_value;
1092
1093 /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1094 * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1095 * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1096 * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1097 *
1098 * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1099 * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1100 */
1101
1102 current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
1103
1104 switch (current_value) {
1105
1106 case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
1107 pid_t new_pid;
1108
1109 new_pid = getpid();
1110
1111 if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
1112 /* OOM? Let's try again later */
1113 cached_pid = CACHED_PID_UNSET;
1114 return new_pid;
1115 }
1116
1117 cached_pid = new_pid;
1118 return new_pid;
1119 }
1120
1121 case CACHED_PID_BUSY: /* Somebody else is currently initializing */
1122 return getpid();
1123
1124 default: /* Properly initialized */
1125 return current_value;
1126 }
1127 }
1128
1129 int must_be_root(void) {
1130
1131 if (geteuid() == 0)
1132 return 0;
1133
1134 log_error("Need to be root.");
1135 return -EPERM;
1136 }
1137
1138 int safe_fork_full(
1139 const char *name,
1140 const int except_fds[],
1141 size_t n_except_fds,
1142 ForkFlags flags,
1143 pid_t *ret_pid) {
1144
1145 pid_t original_pid, pid;
1146 sigset_t saved_ss;
1147 bool block_signals;
1148 int r;
1149
1150 /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
1151 * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
1152
1153 original_pid = getpid_cached();
1154
1155 block_signals = flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG);
1156
1157 if (block_signals) {
1158 sigset_t ss;
1159
1160 /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can be sure
1161 * that SIGTERMs are not lost we might send to the child. */
1162 if (sigfillset(&ss) < 0)
1163 return log_debug_errno(errno, "Failed to reset signal set: %m");
1164
1165 if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
1166 return log_debug_errno(errno, "Failed to reset signal mask: %m");
1167 }
1168
1169 pid = fork();
1170 if (pid < 0) {
1171 r = -errno;
1172
1173 if (block_signals) /* undo what we did above */
1174 (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1175
1176 return log_debug_errno(r, "Failed to fork: %m");
1177 }
1178 if (pid > 0) {
1179 /* We are in the parent process */
1180
1181 if (block_signals) /* undo what we did above */
1182 (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1183
1184 log_debug("Sucessfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
1185
1186 if (ret_pid)
1187 *ret_pid = pid;
1188
1189 return 1;
1190 }
1191
1192 /* We are in the child process */
1193
1194 if (flags & FORK_REOPEN_LOG) {
1195 /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
1196 log_close();
1197 log_set_open_when_needed(true);
1198 }
1199
1200 if (name) {
1201 r = rename_process(name);
1202 if (r < 0)
1203 log_debug_errno(r, "Failed to rename process, ignoring: %m");
1204 }
1205
1206 if (flags & FORK_DEATHSIG)
1207 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
1208 log_debug_errno(errno, "Failed to set death signal: %m");
1209 _exit(EXIT_FAILURE);
1210 }
1211
1212 if (flags & FORK_RESET_SIGNALS) {
1213 r = reset_all_signal_handlers();
1214 if (r < 0) {
1215 log_debug_errno(r, "Failed to reset signal handlers: %m");
1216 _exit(EXIT_FAILURE);
1217 }
1218
1219 /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
1220 r = reset_signal_mask();
1221 if (r < 0) {
1222 log_debug_errno(r, "Failed to reset signal mask: %m");
1223 _exit(EXIT_FAILURE);
1224 }
1225 } else if (block_signals) { /* undo what we did above */
1226 if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
1227 log_debug_errno(errno, "Failed to restore signal mask: %m");
1228 _exit(EXIT_FAILURE);
1229 }
1230 }
1231
1232 if (flags & FORK_DEATHSIG) {
1233 /* Let's see if the parent PID is still the one we started from? If not, then the parent
1234 * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
1235
1236 if (getppid() != original_pid) {
1237 log_debug("Parent died early, raising SIGTERM.");
1238 (void) raise(SIGTERM);
1239 _exit(EXIT_FAILURE);
1240 }
1241 }
1242
1243 if (flags & FORK_CLOSE_ALL_FDS) {
1244 /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
1245 log_close();
1246
1247 r = close_all_fds(except_fds, n_except_fds);
1248 if (r < 0) {
1249 log_debug_errno(r, "Failed to close all file descriptors: %m");
1250 _exit(EXIT_FAILURE);
1251 }
1252 }
1253
1254 /* When we were asked to reopen the logs, do so again now */
1255 if (flags & FORK_REOPEN_LOG) {
1256 log_open();
1257 log_set_open_when_needed(false);
1258 }
1259
1260 if (flags & FORK_NULL_STDIO) {
1261 r = make_null_stdio();
1262 if (r < 0) {
1263 log_debug_errno(r, "Failed to connect stdin/stdout to /dev/null: %m");
1264 _exit(EXIT_FAILURE);
1265 }
1266 }
1267
1268 if (ret_pid)
1269 *ret_pid = getpid_cached();
1270
1271 return 0;
1272 }
1273
1274 static const char *const ioprio_class_table[] = {
1275 [IOPRIO_CLASS_NONE] = "none",
1276 [IOPRIO_CLASS_RT] = "realtime",
1277 [IOPRIO_CLASS_BE] = "best-effort",
1278 [IOPRIO_CLASS_IDLE] = "idle"
1279 };
1280
1281 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, INT_MAX);
1282
1283 static const char *const sigchld_code_table[] = {
1284 [CLD_EXITED] = "exited",
1285 [CLD_KILLED] = "killed",
1286 [CLD_DUMPED] = "dumped",
1287 [CLD_TRAPPED] = "trapped",
1288 [CLD_STOPPED] = "stopped",
1289 [CLD_CONTINUED] = "continued",
1290 };
1291
1292 DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
1293
1294 static const char* const sched_policy_table[] = {
1295 [SCHED_OTHER] = "other",
1296 [SCHED_BATCH] = "batch",
1297 [SCHED_IDLE] = "idle",
1298 [SCHED_FIFO] = "fifo",
1299 [SCHED_RR] = "rr"
1300 };
1301
1302 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);