]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/process-util.c
Merge pull request #6598 from kyle-walker/shutdown-limit
[thirdparty/systemd.git] / src / basic / process-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <linux/oom.h>
25 #include <sched.h>
26 #include <signal.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdio_ext.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/mman.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/types.h>
36 #include <sys/wait.h>
37 #include <syslog.h>
38 #include <unistd.h>
39 #if HAVE_VALGRIND_VALGRIND_H
40 #include <valgrind/valgrind.h>
41 #endif
42
43 #include "alloc-util.h"
44 #include "architecture.h"
45 #include "escape.h"
46 #include "fd-util.h"
47 #include "fileio.h"
48 #include "fs-util.h"
49 #include "ioprio.h"
50 #include "log.h"
51 #include "macro.h"
52 #include "missing.h"
53 #include "process-util.h"
54 #include "raw-clone.h"
55 #include "signal-util.h"
56 #include "stat-util.h"
57 #include "string-table.h"
58 #include "string-util.h"
59 #include "user-util.h"
60 #include "util.h"
61
62 int get_process_state(pid_t pid) {
63 const char *p;
64 char state;
65 int r;
66 _cleanup_free_ char *line = NULL;
67
68 assert(pid >= 0);
69
70 p = procfs_file_alloca(pid, "stat");
71
72 r = read_one_line_file(p, &line);
73 if (r == -ENOENT)
74 return -ESRCH;
75 if (r < 0)
76 return r;
77
78 p = strrchr(line, ')');
79 if (!p)
80 return -EIO;
81
82 p++;
83
84 if (sscanf(p, " %c", &state) != 1)
85 return -EIO;
86
87 return (unsigned char) state;
88 }
89
90 int get_process_comm(pid_t pid, char **name) {
91 const char *p;
92 int r;
93
94 assert(name);
95 assert(pid >= 0);
96
97 p = procfs_file_alloca(pid, "comm");
98
99 r = read_one_line_file(p, name);
100 if (r == -ENOENT)
101 return -ESRCH;
102
103 return r;
104 }
105
106 int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
107 _cleanup_fclose_ FILE *f = NULL;
108 bool space = false;
109 char *k, *ans = NULL;
110 const char *p;
111 int c;
112
113 assert(line);
114 assert(pid >= 0);
115
116 /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
117 * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
118 * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
119 * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
120 * command line that resolves to the empty string will return the "comm" name of the process instead.
121 *
122 * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
123 * comm_fallback is false). Returns 0 and sets *line otherwise. */
124
125 p = procfs_file_alloca(pid, "cmdline");
126
127 f = fopen(p, "re");
128 if (!f) {
129 if (errno == ENOENT)
130 return -ESRCH;
131 return -errno;
132 }
133
134 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
135
136 if (max_length == 1) {
137
138 /* If there's only room for one byte, return the empty string */
139 ans = new0(char, 1);
140 if (!ans)
141 return -ENOMEM;
142
143 *line = ans;
144 return 0;
145
146 } else if (max_length == 0) {
147 size_t len = 0, allocated = 0;
148
149 while ((c = getc(f)) != EOF) {
150
151 if (!GREEDY_REALLOC(ans, allocated, len+3)) {
152 free(ans);
153 return -ENOMEM;
154 }
155
156 if (isprint(c)) {
157 if (space) {
158 ans[len++] = ' ';
159 space = false;
160 }
161
162 ans[len++] = c;
163 } else if (len > 0)
164 space = true;
165 }
166
167 if (len > 0)
168 ans[len] = '\0';
169 else
170 ans = mfree(ans);
171
172 } else {
173 bool dotdotdot = false;
174 size_t left;
175
176 ans = new(char, max_length);
177 if (!ans)
178 return -ENOMEM;
179
180 k = ans;
181 left = max_length;
182 while ((c = getc(f)) != EOF) {
183
184 if (isprint(c)) {
185
186 if (space) {
187 if (left <= 2) {
188 dotdotdot = true;
189 break;
190 }
191
192 *(k++) = ' ';
193 left--;
194 space = false;
195 }
196
197 if (left <= 1) {
198 dotdotdot = true;
199 break;
200 }
201
202 *(k++) = (char) c;
203 left--;
204 } else if (k > ans)
205 space = true;
206 }
207
208 if (dotdotdot) {
209 if (max_length <= 4) {
210 k = ans;
211 left = max_length;
212 } else {
213 k = ans + max_length - 4;
214 left = 4;
215
216 /* Eat up final spaces */
217 while (k > ans && isspace(k[-1])) {
218 k--;
219 left++;
220 }
221 }
222
223 strncpy(k, "...", left-1);
224 k[left-1] = 0;
225 } else
226 *k = 0;
227 }
228
229 /* Kernel threads have no argv[] */
230 if (isempty(ans)) {
231 _cleanup_free_ char *t = NULL;
232 int h;
233
234 free(ans);
235
236 if (!comm_fallback)
237 return -ENOENT;
238
239 h = get_process_comm(pid, &t);
240 if (h < 0)
241 return h;
242
243 if (max_length == 0)
244 ans = strjoin("[", t, "]");
245 else {
246 size_t l;
247
248 l = strlen(t);
249
250 if (l + 3 <= max_length)
251 ans = strjoin("[", t, "]");
252 else if (max_length <= 6) {
253
254 ans = new(char, max_length);
255 if (!ans)
256 return -ENOMEM;
257
258 memcpy(ans, "[...]", max_length-1);
259 ans[max_length-1] = 0;
260 } else {
261 char *e;
262
263 t[max_length - 6] = 0;
264
265 /* Chop off final spaces */
266 e = strchr(t, 0);
267 while (e > t && isspace(e[-1]))
268 e--;
269 *e = 0;
270
271 ans = strjoin("[", t, "...]");
272 }
273 }
274 if (!ans)
275 return -ENOMEM;
276 }
277
278 *line = ans;
279 return 0;
280 }
281
282 int rename_process(const char name[]) {
283 static size_t mm_size = 0;
284 static char *mm = NULL;
285 bool truncated = false;
286 size_t l;
287
288 /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
289 * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
290 * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
291 * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
292 * truncated.
293 *
294 * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
295
296 if (isempty(name))
297 return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
298
299 l = strlen(name);
300
301 /* First step, change the comm field. */
302 (void) prctl(PR_SET_NAME, name);
303 if (l > 15) /* Linux process names can be 15 chars at max */
304 truncated = true;
305
306 /* Second step, change glibc's ID of the process name. */
307 if (program_invocation_name) {
308 size_t k;
309
310 k = strlen(program_invocation_name);
311 strncpy(program_invocation_name, name, k);
312 if (l > k)
313 truncated = true;
314 }
315
316 /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
317 * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
318 * the end. This is the best option for changing /proc/self/cmdline. */
319
320 /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
321 * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
322 * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
323 * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
324 * mmap() is not. */
325 if (geteuid() != 0)
326 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
327 else if (mm_size < l+1) {
328 size_t nn_size;
329 char *nn;
330
331 nn_size = PAGE_ALIGN(l+1);
332 nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
333 if (nn == MAP_FAILED) {
334 log_debug_errno(errno, "mmap() failed: %m");
335 goto use_saved_argv;
336 }
337
338 strncpy(nn, name, nn_size);
339
340 /* Now, let's tell the kernel about this new memory */
341 if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
342 log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m");
343 (void) munmap(nn, nn_size);
344 goto use_saved_argv;
345 }
346
347 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
348 * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
349 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
350 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
351
352 if (mm)
353 (void) munmap(mm, mm_size);
354
355 mm = nn;
356 mm_size = nn_size;
357 } else {
358 strncpy(mm, name, mm_size);
359
360 /* Update the end pointer, continuing regardless of any failure. */
361 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
362 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
363 }
364
365 use_saved_argv:
366 /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
367 * it still looks here */
368
369 if (saved_argc > 0) {
370 int i;
371
372 if (saved_argv[0]) {
373 size_t k;
374
375 k = strlen(saved_argv[0]);
376 strncpy(saved_argv[0], name, k);
377 if (l > k)
378 truncated = true;
379 }
380
381 for (i = 1; i < saved_argc; i++) {
382 if (!saved_argv[i])
383 break;
384
385 memzero(saved_argv[i], strlen(saved_argv[i]));
386 }
387 }
388
389 return !truncated;
390 }
391
392 int is_kernel_thread(pid_t pid) {
393 const char *p;
394 size_t count;
395 char c;
396 bool eof;
397 FILE *f;
398
399 if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
400 return 0;
401
402 assert(pid > 1);
403
404 p = procfs_file_alloca(pid, "cmdline");
405 f = fopen(p, "re");
406 if (!f) {
407 if (errno == ENOENT)
408 return -ESRCH;
409 return -errno;
410 }
411
412 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
413
414 count = fread(&c, 1, 1, f);
415 eof = feof(f);
416 fclose(f);
417
418 /* Kernel threads have an empty cmdline */
419
420 if (count <= 0)
421 return eof ? 1 : -errno;
422
423 return 0;
424 }
425
426 int get_process_capeff(pid_t pid, char **capeff) {
427 const char *p;
428 int r;
429
430 assert(capeff);
431 assert(pid >= 0);
432
433 p = procfs_file_alloca(pid, "status");
434
435 r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
436 if (r == -ENOENT)
437 return -ESRCH;
438
439 return r;
440 }
441
442 static int get_process_link_contents(const char *proc_file, char **name) {
443 int r;
444
445 assert(proc_file);
446 assert(name);
447
448 r = readlink_malloc(proc_file, name);
449 if (r == -ENOENT)
450 return -ESRCH;
451 if (r < 0)
452 return r;
453
454 return 0;
455 }
456
457 int get_process_exe(pid_t pid, char **name) {
458 const char *p;
459 char *d;
460 int r;
461
462 assert(pid >= 0);
463
464 p = procfs_file_alloca(pid, "exe");
465 r = get_process_link_contents(p, name);
466 if (r < 0)
467 return r;
468
469 d = endswith(*name, " (deleted)");
470 if (d)
471 *d = '\0';
472
473 return 0;
474 }
475
476 static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
477 _cleanup_fclose_ FILE *f = NULL;
478 char line[LINE_MAX];
479 const char *p;
480
481 assert(field);
482 assert(uid);
483
484 if (pid < 0)
485 return -EINVAL;
486
487 p = procfs_file_alloca(pid, "status");
488 f = fopen(p, "re");
489 if (!f) {
490 if (errno == ENOENT)
491 return -ESRCH;
492 return -errno;
493 }
494
495 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
496
497 FOREACH_LINE(line, f, return -errno) {
498 char *l;
499
500 l = strstrip(line);
501
502 if (startswith(l, field)) {
503 l += strlen(field);
504 l += strspn(l, WHITESPACE);
505
506 l[strcspn(l, WHITESPACE)] = 0;
507
508 return parse_uid(l, uid);
509 }
510 }
511
512 return -EIO;
513 }
514
515 int get_process_uid(pid_t pid, uid_t *uid) {
516
517 if (pid == 0 || pid == getpid_cached()) {
518 *uid = getuid();
519 return 0;
520 }
521
522 return get_process_id(pid, "Uid:", uid);
523 }
524
525 int get_process_gid(pid_t pid, gid_t *gid) {
526
527 if (pid == 0 || pid == getpid_cached()) {
528 *gid = getgid();
529 return 0;
530 }
531
532 assert_cc(sizeof(uid_t) == sizeof(gid_t));
533 return get_process_id(pid, "Gid:", gid);
534 }
535
536 int get_process_cwd(pid_t pid, char **cwd) {
537 const char *p;
538
539 assert(pid >= 0);
540
541 p = procfs_file_alloca(pid, "cwd");
542
543 return get_process_link_contents(p, cwd);
544 }
545
546 int get_process_root(pid_t pid, char **root) {
547 const char *p;
548
549 assert(pid >= 0);
550
551 p = procfs_file_alloca(pid, "root");
552
553 return get_process_link_contents(p, root);
554 }
555
556 int get_process_environ(pid_t pid, char **env) {
557 _cleanup_fclose_ FILE *f = NULL;
558 _cleanup_free_ char *outcome = NULL;
559 int c;
560 const char *p;
561 size_t allocated = 0, sz = 0;
562
563 assert(pid >= 0);
564 assert(env);
565
566 p = procfs_file_alloca(pid, "environ");
567
568 f = fopen(p, "re");
569 if (!f) {
570 if (errno == ENOENT)
571 return -ESRCH;
572 return -errno;
573 }
574
575 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
576
577 while ((c = fgetc(f)) != EOF) {
578 if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
579 return -ENOMEM;
580
581 if (c == '\0')
582 outcome[sz++] = '\n';
583 else
584 sz += cescape_char(c, outcome + sz);
585 }
586
587 if (!outcome) {
588 outcome = strdup("");
589 if (!outcome)
590 return -ENOMEM;
591 } else
592 outcome[sz] = '\0';
593
594 *env = outcome;
595 outcome = NULL;
596
597 return 0;
598 }
599
600 int get_process_ppid(pid_t pid, pid_t *_ppid) {
601 int r;
602 _cleanup_free_ char *line = NULL;
603 long unsigned ppid;
604 const char *p;
605
606 assert(pid >= 0);
607 assert(_ppid);
608
609 if (pid == 0 || pid == getpid_cached()) {
610 *_ppid = getppid();
611 return 0;
612 }
613
614 p = procfs_file_alloca(pid, "stat");
615 r = read_one_line_file(p, &line);
616 if (r == -ENOENT)
617 return -ESRCH;
618 if (r < 0)
619 return r;
620
621 /* Let's skip the pid and comm fields. The latter is enclosed
622 * in () but does not escape any () in its value, so let's
623 * skip over it manually */
624
625 p = strrchr(line, ')');
626 if (!p)
627 return -EIO;
628
629 p++;
630
631 if (sscanf(p, " "
632 "%*c " /* state */
633 "%lu ", /* ppid */
634 &ppid) != 1)
635 return -EIO;
636
637 if ((long unsigned) (pid_t) ppid != ppid)
638 return -ERANGE;
639
640 *_ppid = (pid_t) ppid;
641
642 return 0;
643 }
644
645 int wait_for_terminate(pid_t pid, siginfo_t *status) {
646 siginfo_t dummy;
647
648 assert(pid >= 1);
649
650 if (!status)
651 status = &dummy;
652
653 for (;;) {
654 zero(*status);
655
656 if (waitid(P_PID, pid, status, WEXITED) < 0) {
657
658 if (errno == EINTR)
659 continue;
660
661 return negative_errno();
662 }
663
664 return 0;
665 }
666 }
667
668 /*
669 * Return values:
670 * < 0 : wait_for_terminate() failed to get the state of the
671 * process, the process was terminated by a signal, or
672 * failed for an unknown reason.
673 * >=0 : The process terminated normally, and its exit code is
674 * returned.
675 *
676 * That is, success is indicated by a return value of zero, and an
677 * error is indicated by a non-zero value.
678 *
679 * A warning is emitted if the process terminates abnormally,
680 * and also if it returns non-zero unless check_exit_code is true.
681 */
682 int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_code) {
683 int r;
684 siginfo_t status;
685
686 assert(name);
687 assert(pid > 1);
688
689 r = wait_for_terminate(pid, &status);
690 if (r < 0)
691 return log_warning_errno(r, "Failed to wait for %s: %m", name);
692
693 if (status.si_code == CLD_EXITED) {
694 if (status.si_status != 0)
695 log_full(check_exit_code ? LOG_WARNING : LOG_DEBUG,
696 "%s failed with error code %i.", name, status.si_status);
697 else
698 log_debug("%s succeeded.", name);
699
700 return status.si_status;
701 } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
702
703 log_warning("%s terminated by signal %s.", name, signal_to_string(status.si_status));
704 return -EPROTO;
705 }
706
707 log_warning("%s failed due to unknown reason.", name);
708 return -EPROTO;
709 }
710
711 /*
712 * Return values:
713 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
714 * process, the process timed out, the process was terminated by a
715 * signal, or failed for an unknown reason.
716 * >=0 : The process terminated normally with no failures.
717 *
718 * Success is indicated by a return value of zero, a timeout is indicated
719 * by ETIMEDOUT, and all other child failure states are indicated by error
720 * is indicated by a non-zero value.
721 */
722 int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
723 sigset_t mask;
724 int r;
725 usec_t until;
726
727 assert_se(sigemptyset(&mask) == 0);
728 assert_se(sigaddset(&mask, SIGCHLD) == 0);
729
730 /* Drop into a sigtimewait-based timeout. Waiting for the
731 * pid to exit. */
732 until = now(CLOCK_MONOTONIC) + timeout;
733 for (;;) {
734 usec_t n;
735 siginfo_t status = {};
736 struct timespec ts;
737
738 n = now(CLOCK_MONOTONIC);
739 if (n >= until)
740 break;
741
742 r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
743 /* Assuming we woke due to the child exiting. */
744 if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
745 if (status.si_pid == pid) {
746 /* This is the correct child.*/
747 if (status.si_code == CLD_EXITED)
748 return (status.si_status == 0) ? 0 : -EPROTO;
749 else
750 return -EPROTO;
751 }
752 }
753 /* Not the child, check for errors and proceed appropriately */
754 if (r < 0) {
755 switch (r) {
756 case -EAGAIN:
757 /* Timed out, child is likely hung. */
758 return -ETIMEDOUT;
759 case -EINTR:
760 /* Received a different signal and should retry */
761 continue;
762 default:
763 /* Return any unexpected errors */
764 return r;
765 }
766 }
767 }
768
769 return -EPROTO;
770 }
771
772 void sigkill_wait(pid_t pid) {
773 assert(pid > 1);
774
775 if (kill(pid, SIGKILL) > 0)
776 (void) wait_for_terminate(pid, NULL);
777 }
778
779 void sigkill_waitp(pid_t *pid) {
780 if (!pid)
781 return;
782 if (*pid <= 1)
783 return;
784
785 sigkill_wait(*pid);
786 }
787
788 int kill_and_sigcont(pid_t pid, int sig) {
789 int r;
790
791 r = kill(pid, sig) < 0 ? -errno : 0;
792
793 /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
794 * affected by a process being suspended anyway. */
795 if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
796 (void) kill(pid, SIGCONT);
797
798 return r;
799 }
800
801 int getenv_for_pid(pid_t pid, const char *field, char **_value) {
802 _cleanup_fclose_ FILE *f = NULL;
803 char *value = NULL;
804 int r;
805 bool done = false;
806 size_t l;
807 const char *path;
808
809 assert(pid >= 0);
810 assert(field);
811 assert(_value);
812
813 path = procfs_file_alloca(pid, "environ");
814
815 f = fopen(path, "re");
816 if (!f) {
817 if (errno == ENOENT)
818 return -ESRCH;
819 return -errno;
820 }
821
822 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
823
824 l = strlen(field);
825 r = 0;
826
827 do {
828 char line[LINE_MAX];
829 unsigned i;
830
831 for (i = 0; i < sizeof(line)-1; i++) {
832 int c;
833
834 c = getc(f);
835 if (_unlikely_(c == EOF)) {
836 done = true;
837 break;
838 } else if (c == 0)
839 break;
840
841 line[i] = c;
842 }
843 line[i] = 0;
844
845 if (strneq(line, field, l) && line[l] == '=') {
846 value = strdup(line + l + 1);
847 if (!value)
848 return -ENOMEM;
849
850 r = 1;
851 break;
852 }
853
854 } while (!done);
855
856 *_value = value;
857 return r;
858 }
859
860 bool pid_is_unwaited(pid_t pid) {
861 /* Checks whether a PID is still valid at all, including a zombie */
862
863 if (pid < 0)
864 return false;
865
866 if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
867 return true;
868
869 if (pid == getpid_cached())
870 return true;
871
872 if (kill(pid, 0) >= 0)
873 return true;
874
875 return errno != ESRCH;
876 }
877
878 bool pid_is_alive(pid_t pid) {
879 int r;
880
881 /* Checks whether a PID is still valid and not a zombie */
882
883 if (pid < 0)
884 return false;
885
886 if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
887 return true;
888
889 if (pid == getpid_cached())
890 return true;
891
892 r = get_process_state(pid);
893 if (IN_SET(r, -ESRCH, 'Z'))
894 return false;
895
896 return true;
897 }
898
899 int pid_from_same_root_fs(pid_t pid) {
900 const char *root;
901
902 if (pid < 0)
903 return false;
904
905 if (pid == 0 || pid == getpid_cached())
906 return true;
907
908 root = procfs_file_alloca(pid, "root");
909
910 return files_same(root, "/proc/1/root", 0);
911 }
912
913 bool is_main_thread(void) {
914 static thread_local int cached = 0;
915
916 if (_unlikely_(cached == 0))
917 cached = getpid_cached() == gettid() ? 1 : -1;
918
919 return cached > 0;
920 }
921
922 noreturn void freeze(void) {
923
924 log_close();
925
926 /* Make sure nobody waits for us on a socket anymore */
927 close_all_fds(NULL, 0);
928
929 sync();
930
931 for (;;)
932 pause();
933 }
934
935 bool oom_score_adjust_is_valid(int oa) {
936 return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
937 }
938
939 unsigned long personality_from_string(const char *p) {
940 int architecture;
941
942 if (!p)
943 return PERSONALITY_INVALID;
944
945 /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
946 * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
947 * the same register size. */
948
949 architecture = architecture_from_string(p);
950 if (architecture < 0)
951 return PERSONALITY_INVALID;
952
953 if (architecture == native_architecture())
954 return PER_LINUX;
955 #ifdef SECONDARY_ARCHITECTURE
956 if (architecture == SECONDARY_ARCHITECTURE)
957 return PER_LINUX32;
958 #endif
959
960 return PERSONALITY_INVALID;
961 }
962
963 const char* personality_to_string(unsigned long p) {
964 int architecture = _ARCHITECTURE_INVALID;
965
966 if (p == PER_LINUX)
967 architecture = native_architecture();
968 #ifdef SECONDARY_ARCHITECTURE
969 else if (p == PER_LINUX32)
970 architecture = SECONDARY_ARCHITECTURE;
971 #endif
972
973 if (architecture < 0)
974 return NULL;
975
976 return architecture_to_string(architecture);
977 }
978
979 int safe_personality(unsigned long p) {
980 int ret;
981
982 /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
983 * and in others as negative return value containing an errno-like value. Let's work around this: this is a
984 * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
985 * the return value indicating the same issue, so that we are definitely on the safe side.
986 *
987 * See https://github.com/systemd/systemd/issues/6737 */
988
989 errno = 0;
990 ret = personality(p);
991 if (ret < 0) {
992 if (errno != 0)
993 return -errno;
994
995 errno = -ret;
996 }
997
998 return ret;
999 }
1000
1001 int opinionated_personality(unsigned long *ret) {
1002 int current;
1003
1004 /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1005 * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1006 * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1007
1008 current = safe_personality(PERSONALITY_INVALID);
1009 if (current < 0)
1010 return current;
1011
1012 if (((unsigned long) current & 0xffff) == PER_LINUX32)
1013 *ret = PER_LINUX32;
1014 else
1015 *ret = PER_LINUX;
1016
1017 return 0;
1018 }
1019
1020 void valgrind_summary_hack(void) {
1021 #if HAVE_VALGRIND_VALGRIND_H
1022 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
1023 pid_t pid;
1024 pid = raw_clone(SIGCHLD);
1025 if (pid < 0)
1026 log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
1027 else if (pid == 0)
1028 exit(EXIT_SUCCESS);
1029 else {
1030 log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1031 (void) wait_for_terminate(pid, NULL);
1032 }
1033 }
1034 #endif
1035 }
1036
1037 int pid_compare_func(const void *a, const void *b) {
1038 const pid_t *p = a, *q = b;
1039
1040 /* Suitable for usage in qsort() */
1041
1042 if (*p < *q)
1043 return -1;
1044 if (*p > *q)
1045 return 1;
1046 return 0;
1047 }
1048
1049 int ioprio_parse_priority(const char *s, int *ret) {
1050 int i, r;
1051
1052 assert(s);
1053 assert(ret);
1054
1055 r = safe_atoi(s, &i);
1056 if (r < 0)
1057 return r;
1058
1059 if (!ioprio_priority_is_valid(i))
1060 return -EINVAL;
1061
1062 *ret = i;
1063 return 0;
1064 }
1065
1066 /* The cached PID, possible values:
1067 *
1068 * == UNSET [0] → cache not initialized yet
1069 * == BUSY [-1] → some thread is initializing it at the moment
1070 * any other → the cached PID
1071 */
1072
1073 #define CACHED_PID_UNSET ((pid_t) 0)
1074 #define CACHED_PID_BUSY ((pid_t) -1)
1075
1076 static pid_t cached_pid = CACHED_PID_UNSET;
1077
1078 static void reset_cached_pid(void) {
1079 /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1080 cached_pid = CACHED_PID_UNSET;
1081 }
1082
1083 /* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1084 * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1085 * libpthread, as it is part of glibc anyway. */
1086 extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void * __dso_handle);
1087 extern void* __dso_handle __attribute__ ((__weak__));
1088
1089 pid_t getpid_cached(void) {
1090 pid_t current_value;
1091
1092 /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1093 * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1094 * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1095 * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1096 *
1097 * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1098 * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1099 */
1100
1101 current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
1102
1103 switch (current_value) {
1104
1105 case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
1106 pid_t new_pid;
1107
1108 new_pid = getpid();
1109
1110 if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
1111 /* OOM? Let's try again later */
1112 cached_pid = CACHED_PID_UNSET;
1113 return new_pid;
1114 }
1115
1116 cached_pid = new_pid;
1117 return new_pid;
1118 }
1119
1120 case CACHED_PID_BUSY: /* Somebody else is currently initializing */
1121 return getpid();
1122
1123 default: /* Properly initialized */
1124 return current_value;
1125 }
1126 }
1127
1128 int must_be_root(void) {
1129
1130 if (geteuid() == 0)
1131 return 0;
1132
1133 log_error("Need to be root.");
1134 return -EPERM;
1135 }
1136
1137 static const char *const ioprio_class_table[] = {
1138 [IOPRIO_CLASS_NONE] = "none",
1139 [IOPRIO_CLASS_RT] = "realtime",
1140 [IOPRIO_CLASS_BE] = "best-effort",
1141 [IOPRIO_CLASS_IDLE] = "idle"
1142 };
1143
1144 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, INT_MAX);
1145
1146 static const char *const sigchld_code_table[] = {
1147 [CLD_EXITED] = "exited",
1148 [CLD_KILLED] = "killed",
1149 [CLD_DUMPED] = "dumped",
1150 [CLD_TRAPPED] = "trapped",
1151 [CLD_STOPPED] = "stopped",
1152 [CLD_CONTINUED] = "continued",
1153 };
1154
1155 DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
1156
1157 static const char* const sched_policy_table[] = {
1158 [SCHED_OTHER] = "other",
1159 [SCHED_BATCH] = "batch",
1160 [SCHED_IDLE] = "idle",
1161 [SCHED_FIFO] = "fifo",
1162 [SCHED_RR] = "rr"
1163 };
1164
1165 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);