]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/process-util.c
Merge pull request #8042 from zx2c4-forks/jd/networkctl-type
[thirdparty/systemd.git] / src / basic / process-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <linux/oom.h>
25 #include <sched.h>
26 #include <signal.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdio_ext.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/mman.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/types.h>
36 #include <sys/wait.h>
37 #include <syslog.h>
38 #include <unistd.h>
39 #if HAVE_VALGRIND_VALGRIND_H
40 #include <valgrind/valgrind.h>
41 #endif
42
43 #include "alloc-util.h"
44 #include "architecture.h"
45 #include "escape.h"
46 #include "fd-util.h"
47 #include "fileio.h"
48 #include "fs-util.h"
49 #include "ioprio.h"
50 #include "log.h"
51 #include "macro.h"
52 #include "missing.h"
53 #include "process-util.h"
54 #include "raw-clone.h"
55 #include "signal-util.h"
56 #include "stat-util.h"
57 #include "string-table.h"
58 #include "string-util.h"
59 #include "terminal-util.h"
60 #include "user-util.h"
61 #include "util.h"
62
63 int get_process_state(pid_t pid) {
64 const char *p;
65 char state;
66 int r;
67 _cleanup_free_ char *line = NULL;
68
69 assert(pid >= 0);
70
71 p = procfs_file_alloca(pid, "stat");
72
73 r = read_one_line_file(p, &line);
74 if (r == -ENOENT)
75 return -ESRCH;
76 if (r < 0)
77 return r;
78
79 p = strrchr(line, ')');
80 if (!p)
81 return -EIO;
82
83 p++;
84
85 if (sscanf(p, " %c", &state) != 1)
86 return -EIO;
87
88 return (unsigned char) state;
89 }
90
91 int get_process_comm(pid_t pid, char **name) {
92 const char *p;
93 int r;
94
95 assert(name);
96 assert(pid >= 0);
97
98 p = procfs_file_alloca(pid, "comm");
99
100 r = read_one_line_file(p, name);
101 if (r == -ENOENT)
102 return -ESRCH;
103
104 return r;
105 }
106
107 int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
108 _cleanup_fclose_ FILE *f = NULL;
109 bool space = false;
110 char *k, *ans = NULL;
111 const char *p;
112 int c;
113
114 assert(line);
115 assert(pid >= 0);
116
117 /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
118 * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
119 * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
120 * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
121 * command line that resolves to the empty string will return the "comm" name of the process instead.
122 *
123 * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
124 * comm_fallback is false). Returns 0 and sets *line otherwise. */
125
126 p = procfs_file_alloca(pid, "cmdline");
127
128 f = fopen(p, "re");
129 if (!f) {
130 if (errno == ENOENT)
131 return -ESRCH;
132 return -errno;
133 }
134
135 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
136
137 if (max_length == 1) {
138
139 /* If there's only room for one byte, return the empty string */
140 ans = new0(char, 1);
141 if (!ans)
142 return -ENOMEM;
143
144 *line = ans;
145 return 0;
146
147 } else if (max_length == 0) {
148 size_t len = 0, allocated = 0;
149
150 while ((c = getc(f)) != EOF) {
151
152 if (!GREEDY_REALLOC(ans, allocated, len+3)) {
153 free(ans);
154 return -ENOMEM;
155 }
156
157 if (isprint(c)) {
158 if (space) {
159 ans[len++] = ' ';
160 space = false;
161 }
162
163 ans[len++] = c;
164 } else if (len > 0)
165 space = true;
166 }
167
168 if (len > 0)
169 ans[len] = '\0';
170 else
171 ans = mfree(ans);
172
173 } else {
174 bool dotdotdot = false;
175 size_t left;
176
177 ans = new(char, max_length);
178 if (!ans)
179 return -ENOMEM;
180
181 k = ans;
182 left = max_length;
183 while ((c = getc(f)) != EOF) {
184
185 if (isprint(c)) {
186
187 if (space) {
188 if (left <= 2) {
189 dotdotdot = true;
190 break;
191 }
192
193 *(k++) = ' ';
194 left--;
195 space = false;
196 }
197
198 if (left <= 1) {
199 dotdotdot = true;
200 break;
201 }
202
203 *(k++) = (char) c;
204 left--;
205 } else if (k > ans)
206 space = true;
207 }
208
209 if (dotdotdot) {
210 if (max_length <= 4) {
211 k = ans;
212 left = max_length;
213 } else {
214 k = ans + max_length - 4;
215 left = 4;
216
217 /* Eat up final spaces */
218 while (k > ans && isspace(k[-1])) {
219 k--;
220 left++;
221 }
222 }
223
224 strncpy(k, "...", left-1);
225 k[left-1] = 0;
226 } else
227 *k = 0;
228 }
229
230 /* Kernel threads have no argv[] */
231 if (isempty(ans)) {
232 _cleanup_free_ char *t = NULL;
233 int h;
234
235 free(ans);
236
237 if (!comm_fallback)
238 return -ENOENT;
239
240 h = get_process_comm(pid, &t);
241 if (h < 0)
242 return h;
243
244 if (max_length == 0)
245 ans = strjoin("[", t, "]");
246 else {
247 size_t l;
248
249 l = strlen(t);
250
251 if (l + 3 <= max_length)
252 ans = strjoin("[", t, "]");
253 else if (max_length <= 6) {
254
255 ans = new(char, max_length);
256 if (!ans)
257 return -ENOMEM;
258
259 memcpy(ans, "[...]", max_length-1);
260 ans[max_length-1] = 0;
261 } else {
262 char *e;
263
264 t[max_length - 6] = 0;
265
266 /* Chop off final spaces */
267 e = strchr(t, 0);
268 while (e > t && isspace(e[-1]))
269 e--;
270 *e = 0;
271
272 ans = strjoin("[", t, "...]");
273 }
274 }
275 if (!ans)
276 return -ENOMEM;
277 }
278
279 *line = ans;
280 return 0;
281 }
282
283 int rename_process(const char name[]) {
284 static size_t mm_size = 0;
285 static char *mm = NULL;
286 bool truncated = false;
287 size_t l;
288
289 /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
290 * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
291 * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
292 * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
293 * truncated.
294 *
295 * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
296
297 if (isempty(name))
298 return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
299
300 if (!is_main_thread())
301 return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we
302 * cache things without locking, and we make assumptions that PR_SET_NAME sets the
303 * process name that isn't correct on any other threads */
304
305 l = strlen(name);
306
307 /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we
308 * can use PR_SET_NAME, which sets the thread name for the calling thread. */
309 if (prctl(PR_SET_NAME, name) < 0)
310 log_debug_errno(errno, "PR_SET_NAME failed: %m");
311 if (l > 15) /* Linux process names can be 15 chars at max */
312 truncated = true;
313
314 /* Second step, change glibc's ID of the process name. */
315 if (program_invocation_name) {
316 size_t k;
317
318 k = strlen(program_invocation_name);
319 strncpy(program_invocation_name, name, k);
320 if (l > k)
321 truncated = true;
322 }
323
324 /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
325 * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
326 * the end. This is the best option for changing /proc/self/cmdline. */
327
328 /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
329 * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
330 * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
331 * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
332 * mmap() is not. */
333 if (geteuid() != 0)
334 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
335 else if (mm_size < l+1) {
336 size_t nn_size;
337 char *nn;
338
339 nn_size = PAGE_ALIGN(l+1);
340 nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
341 if (nn == MAP_FAILED) {
342 log_debug_errno(errno, "mmap() failed: %m");
343 goto use_saved_argv;
344 }
345
346 strncpy(nn, name, nn_size);
347
348 /* Now, let's tell the kernel about this new memory */
349 if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
350 log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m");
351 (void) munmap(nn, nn_size);
352 goto use_saved_argv;
353 }
354
355 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
356 * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
357 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
358 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
359
360 if (mm)
361 (void) munmap(mm, mm_size);
362
363 mm = nn;
364 mm_size = nn_size;
365 } else {
366 strncpy(mm, name, mm_size);
367
368 /* Update the end pointer, continuing regardless of any failure. */
369 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
370 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
371 }
372
373 use_saved_argv:
374 /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
375 * it still looks here */
376
377 if (saved_argc > 0) {
378 int i;
379
380 if (saved_argv[0]) {
381 size_t k;
382
383 k = strlen(saved_argv[0]);
384 strncpy(saved_argv[0], name, k);
385 if (l > k)
386 truncated = true;
387 }
388
389 for (i = 1; i < saved_argc; i++) {
390 if (!saved_argv[i])
391 break;
392
393 memzero(saved_argv[i], strlen(saved_argv[i]));
394 }
395 }
396
397 return !truncated;
398 }
399
400 int is_kernel_thread(pid_t pid) {
401 const char *p;
402 size_t count;
403 char c;
404 bool eof;
405 FILE *f;
406
407 if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
408 return 0;
409
410 assert(pid > 1);
411
412 p = procfs_file_alloca(pid, "cmdline");
413 f = fopen(p, "re");
414 if (!f) {
415 if (errno == ENOENT)
416 return -ESRCH;
417 return -errno;
418 }
419
420 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
421
422 count = fread(&c, 1, 1, f);
423 eof = feof(f);
424 fclose(f);
425
426 /* Kernel threads have an empty cmdline */
427
428 if (count <= 0)
429 return eof ? 1 : -errno;
430
431 return 0;
432 }
433
434 int get_process_capeff(pid_t pid, char **capeff) {
435 const char *p;
436 int r;
437
438 assert(capeff);
439 assert(pid >= 0);
440
441 p = procfs_file_alloca(pid, "status");
442
443 r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
444 if (r == -ENOENT)
445 return -ESRCH;
446
447 return r;
448 }
449
450 static int get_process_link_contents(const char *proc_file, char **name) {
451 int r;
452
453 assert(proc_file);
454 assert(name);
455
456 r = readlink_malloc(proc_file, name);
457 if (r == -ENOENT)
458 return -ESRCH;
459 if (r < 0)
460 return r;
461
462 return 0;
463 }
464
465 int get_process_exe(pid_t pid, char **name) {
466 const char *p;
467 char *d;
468 int r;
469
470 assert(pid >= 0);
471
472 p = procfs_file_alloca(pid, "exe");
473 r = get_process_link_contents(p, name);
474 if (r < 0)
475 return r;
476
477 d = endswith(*name, " (deleted)");
478 if (d)
479 *d = '\0';
480
481 return 0;
482 }
483
484 static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
485 _cleanup_fclose_ FILE *f = NULL;
486 char line[LINE_MAX];
487 const char *p;
488
489 assert(field);
490 assert(uid);
491
492 if (pid < 0)
493 return -EINVAL;
494
495 p = procfs_file_alloca(pid, "status");
496 f = fopen(p, "re");
497 if (!f) {
498 if (errno == ENOENT)
499 return -ESRCH;
500 return -errno;
501 }
502
503 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
504
505 FOREACH_LINE(line, f, return -errno) {
506 char *l;
507
508 l = strstrip(line);
509
510 if (startswith(l, field)) {
511 l += strlen(field);
512 l += strspn(l, WHITESPACE);
513
514 l[strcspn(l, WHITESPACE)] = 0;
515
516 return parse_uid(l, uid);
517 }
518 }
519
520 return -EIO;
521 }
522
523 int get_process_uid(pid_t pid, uid_t *uid) {
524
525 if (pid == 0 || pid == getpid_cached()) {
526 *uid = getuid();
527 return 0;
528 }
529
530 return get_process_id(pid, "Uid:", uid);
531 }
532
533 int get_process_gid(pid_t pid, gid_t *gid) {
534
535 if (pid == 0 || pid == getpid_cached()) {
536 *gid = getgid();
537 return 0;
538 }
539
540 assert_cc(sizeof(uid_t) == sizeof(gid_t));
541 return get_process_id(pid, "Gid:", gid);
542 }
543
544 int get_process_cwd(pid_t pid, char **cwd) {
545 const char *p;
546
547 assert(pid >= 0);
548
549 p = procfs_file_alloca(pid, "cwd");
550
551 return get_process_link_contents(p, cwd);
552 }
553
554 int get_process_root(pid_t pid, char **root) {
555 const char *p;
556
557 assert(pid >= 0);
558
559 p = procfs_file_alloca(pid, "root");
560
561 return get_process_link_contents(p, root);
562 }
563
564 int get_process_environ(pid_t pid, char **env) {
565 _cleanup_fclose_ FILE *f = NULL;
566 _cleanup_free_ char *outcome = NULL;
567 int c;
568 const char *p;
569 size_t allocated = 0, sz = 0;
570
571 assert(pid >= 0);
572 assert(env);
573
574 p = procfs_file_alloca(pid, "environ");
575
576 f = fopen(p, "re");
577 if (!f) {
578 if (errno == ENOENT)
579 return -ESRCH;
580 return -errno;
581 }
582
583 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
584
585 while ((c = fgetc(f)) != EOF) {
586 if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
587 return -ENOMEM;
588
589 if (c == '\0')
590 outcome[sz++] = '\n';
591 else
592 sz += cescape_char(c, outcome + sz);
593 }
594
595 if (!outcome) {
596 outcome = strdup("");
597 if (!outcome)
598 return -ENOMEM;
599 } else
600 outcome[sz] = '\0';
601
602 *env = outcome;
603 outcome = NULL;
604
605 return 0;
606 }
607
608 int get_process_ppid(pid_t pid, pid_t *_ppid) {
609 int r;
610 _cleanup_free_ char *line = NULL;
611 long unsigned ppid;
612 const char *p;
613
614 assert(pid >= 0);
615 assert(_ppid);
616
617 if (pid == 0 || pid == getpid_cached()) {
618 *_ppid = getppid();
619 return 0;
620 }
621
622 p = procfs_file_alloca(pid, "stat");
623 r = read_one_line_file(p, &line);
624 if (r == -ENOENT)
625 return -ESRCH;
626 if (r < 0)
627 return r;
628
629 /* Let's skip the pid and comm fields. The latter is enclosed
630 * in () but does not escape any () in its value, so let's
631 * skip over it manually */
632
633 p = strrchr(line, ')');
634 if (!p)
635 return -EIO;
636
637 p++;
638
639 if (sscanf(p, " "
640 "%*c " /* state */
641 "%lu ", /* ppid */
642 &ppid) != 1)
643 return -EIO;
644
645 if ((long unsigned) (pid_t) ppid != ppid)
646 return -ERANGE;
647
648 *_ppid = (pid_t) ppid;
649
650 return 0;
651 }
652
653 int wait_for_terminate(pid_t pid, siginfo_t *status) {
654 siginfo_t dummy;
655
656 assert(pid >= 1);
657
658 if (!status)
659 status = &dummy;
660
661 for (;;) {
662 zero(*status);
663
664 if (waitid(P_PID, pid, status, WEXITED) < 0) {
665
666 if (errno == EINTR)
667 continue;
668
669 return negative_errno();
670 }
671
672 return 0;
673 }
674 }
675
676 /*
677 * Return values:
678 * < 0 : wait_for_terminate() failed to get the state of the
679 * process, the process was terminated by a signal, or
680 * failed for an unknown reason.
681 * >=0 : The process terminated normally, and its exit code is
682 * returned.
683 *
684 * That is, success is indicated by a return value of zero, and an
685 * error is indicated by a non-zero value.
686 *
687 * A warning is emitted if the process terminates abnormally,
688 * and also if it returns non-zero unless check_exit_code is true.
689 */
690 int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) {
691 _cleanup_free_ char *buffer = NULL;
692 siginfo_t status;
693 int r, prio;
694
695 assert(pid > 1);
696
697 if (!name) {
698 r = get_process_comm(pid, &buffer);
699 if (r < 0)
700 log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pid);
701 else
702 name = buffer;
703 }
704
705 prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG;
706
707 r = wait_for_terminate(pid, &status);
708 if (r < 0)
709 return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name));
710
711 if (status.si_code == CLD_EXITED) {
712 if (status.si_status != EXIT_SUCCESS)
713 log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG,
714 "%s failed with exit status %i.", strna(name), status.si_status);
715 else
716 log_debug("%s succeeded.", name);
717
718 return status.si_status;
719
720 } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
721
722 log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status));
723 return -EPROTO;
724 }
725
726 log_full(prio, "%s failed due to unknown reason.", strna(name));
727 return -EPROTO;
728 }
729
730 /*
731 * Return values:
732 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
733 * process, the process timed out, the process was terminated by a
734 * signal, or failed for an unknown reason.
735 * >=0 : The process terminated normally with no failures.
736 *
737 * Success is indicated by a return value of zero, a timeout is indicated
738 * by ETIMEDOUT, and all other child failure states are indicated by error
739 * is indicated by a non-zero value.
740 */
741 int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
742 sigset_t mask;
743 int r;
744 usec_t until;
745
746 assert_se(sigemptyset(&mask) == 0);
747 assert_se(sigaddset(&mask, SIGCHLD) == 0);
748
749 /* Drop into a sigtimewait-based timeout. Waiting for the
750 * pid to exit. */
751 until = now(CLOCK_MONOTONIC) + timeout;
752 for (;;) {
753 usec_t n;
754 siginfo_t status = {};
755 struct timespec ts;
756
757 n = now(CLOCK_MONOTONIC);
758 if (n >= until)
759 break;
760
761 r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
762 /* Assuming we woke due to the child exiting. */
763 if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
764 if (status.si_pid == pid) {
765 /* This is the correct child.*/
766 if (status.si_code == CLD_EXITED)
767 return (status.si_status == 0) ? 0 : -EPROTO;
768 else
769 return -EPROTO;
770 }
771 }
772 /* Not the child, check for errors and proceed appropriately */
773 if (r < 0) {
774 switch (r) {
775 case -EAGAIN:
776 /* Timed out, child is likely hung. */
777 return -ETIMEDOUT;
778 case -EINTR:
779 /* Received a different signal and should retry */
780 continue;
781 default:
782 /* Return any unexpected errors */
783 return r;
784 }
785 }
786 }
787
788 return -EPROTO;
789 }
790
791 void sigkill_wait(pid_t pid) {
792 assert(pid > 1);
793
794 if (kill(pid, SIGKILL) > 0)
795 (void) wait_for_terminate(pid, NULL);
796 }
797
798 void sigkill_waitp(pid_t *pid) {
799 PROTECT_ERRNO;
800
801 if (!pid)
802 return;
803 if (*pid <= 1)
804 return;
805
806 sigkill_wait(*pid);
807 }
808
809 int kill_and_sigcont(pid_t pid, int sig) {
810 int r;
811
812 r = kill(pid, sig) < 0 ? -errno : 0;
813
814 /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
815 * affected by a process being suspended anyway. */
816 if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
817 (void) kill(pid, SIGCONT);
818
819 return r;
820 }
821
822 int getenv_for_pid(pid_t pid, const char *field, char **_value) {
823 _cleanup_fclose_ FILE *f = NULL;
824 char *value = NULL;
825 int r;
826 bool done = false;
827 size_t l;
828 const char *path;
829
830 assert(pid >= 0);
831 assert(field);
832 assert(_value);
833
834 path = procfs_file_alloca(pid, "environ");
835
836 f = fopen(path, "re");
837 if (!f) {
838 if (errno == ENOENT)
839 return -ESRCH;
840 return -errno;
841 }
842
843 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
844
845 l = strlen(field);
846 r = 0;
847
848 do {
849 char line[LINE_MAX];
850 unsigned i;
851
852 for (i = 0; i < sizeof(line)-1; i++) {
853 int c;
854
855 c = getc(f);
856 if (_unlikely_(c == EOF)) {
857 done = true;
858 break;
859 } else if (c == 0)
860 break;
861
862 line[i] = c;
863 }
864 line[i] = 0;
865
866 if (strneq(line, field, l) && line[l] == '=') {
867 value = strdup(line + l + 1);
868 if (!value)
869 return -ENOMEM;
870
871 r = 1;
872 break;
873 }
874
875 } while (!done);
876
877 *_value = value;
878 return r;
879 }
880
881 bool pid_is_unwaited(pid_t pid) {
882 /* Checks whether a PID is still valid at all, including a zombie */
883
884 if (pid < 0)
885 return false;
886
887 if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
888 return true;
889
890 if (pid == getpid_cached())
891 return true;
892
893 if (kill(pid, 0) >= 0)
894 return true;
895
896 return errno != ESRCH;
897 }
898
899 bool pid_is_alive(pid_t pid) {
900 int r;
901
902 /* Checks whether a PID is still valid and not a zombie */
903
904 if (pid < 0)
905 return false;
906
907 if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
908 return true;
909
910 if (pid == getpid_cached())
911 return true;
912
913 r = get_process_state(pid);
914 if (IN_SET(r, -ESRCH, 'Z'))
915 return false;
916
917 return true;
918 }
919
920 int pid_from_same_root_fs(pid_t pid) {
921 const char *root;
922
923 if (pid < 0)
924 return false;
925
926 if (pid == 0 || pid == getpid_cached())
927 return true;
928
929 root = procfs_file_alloca(pid, "root");
930
931 return files_same(root, "/proc/1/root", 0);
932 }
933
934 bool is_main_thread(void) {
935 static thread_local int cached = 0;
936
937 if (_unlikely_(cached == 0))
938 cached = getpid_cached() == gettid() ? 1 : -1;
939
940 return cached > 0;
941 }
942
943 noreturn void freeze(void) {
944
945 log_close();
946
947 /* Make sure nobody waits for us on a socket anymore */
948 close_all_fds(NULL, 0);
949
950 sync();
951
952 /* Let's not freeze right away, but keep reaping zombies. */
953 for (;;) {
954 int r;
955 siginfo_t si = {};
956
957 r = waitid(P_ALL, 0, &si, WEXITED);
958 if (r < 0 && errno != EINTR)
959 break;
960 }
961
962 /* waitid() failed with an unexpected error, things are really borked. Freeze now! */
963 for (;;)
964 pause();
965 }
966
967 bool oom_score_adjust_is_valid(int oa) {
968 return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
969 }
970
971 unsigned long personality_from_string(const char *p) {
972 int architecture;
973
974 if (!p)
975 return PERSONALITY_INVALID;
976
977 /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
978 * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
979 * the same register size. */
980
981 architecture = architecture_from_string(p);
982 if (architecture < 0)
983 return PERSONALITY_INVALID;
984
985 if (architecture == native_architecture())
986 return PER_LINUX;
987 #ifdef SECONDARY_ARCHITECTURE
988 if (architecture == SECONDARY_ARCHITECTURE)
989 return PER_LINUX32;
990 #endif
991
992 return PERSONALITY_INVALID;
993 }
994
995 const char* personality_to_string(unsigned long p) {
996 int architecture = _ARCHITECTURE_INVALID;
997
998 if (p == PER_LINUX)
999 architecture = native_architecture();
1000 #ifdef SECONDARY_ARCHITECTURE
1001 else if (p == PER_LINUX32)
1002 architecture = SECONDARY_ARCHITECTURE;
1003 #endif
1004
1005 if (architecture < 0)
1006 return NULL;
1007
1008 return architecture_to_string(architecture);
1009 }
1010
1011 int safe_personality(unsigned long p) {
1012 int ret;
1013
1014 /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
1015 * and in others as negative return value containing an errno-like value. Let's work around this: this is a
1016 * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
1017 * the return value indicating the same issue, so that we are definitely on the safe side.
1018 *
1019 * See https://github.com/systemd/systemd/issues/6737 */
1020
1021 errno = 0;
1022 ret = personality(p);
1023 if (ret < 0) {
1024 if (errno != 0)
1025 return -errno;
1026
1027 errno = -ret;
1028 }
1029
1030 return ret;
1031 }
1032
1033 int opinionated_personality(unsigned long *ret) {
1034 int current;
1035
1036 /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1037 * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1038 * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1039
1040 current = safe_personality(PERSONALITY_INVALID);
1041 if (current < 0)
1042 return current;
1043
1044 if (((unsigned long) current & 0xffff) == PER_LINUX32)
1045 *ret = PER_LINUX32;
1046 else
1047 *ret = PER_LINUX;
1048
1049 return 0;
1050 }
1051
1052 void valgrind_summary_hack(void) {
1053 #if HAVE_VALGRIND_VALGRIND_H
1054 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
1055 pid_t pid;
1056 pid = raw_clone(SIGCHLD);
1057 if (pid < 0)
1058 log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
1059 else if (pid == 0)
1060 exit(EXIT_SUCCESS);
1061 else {
1062 log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1063 (void) wait_for_terminate(pid, NULL);
1064 }
1065 }
1066 #endif
1067 }
1068
1069 int pid_compare_func(const void *a, const void *b) {
1070 const pid_t *p = a, *q = b;
1071
1072 /* Suitable for usage in qsort() */
1073
1074 if (*p < *q)
1075 return -1;
1076 if (*p > *q)
1077 return 1;
1078 return 0;
1079 }
1080
1081 int ioprio_parse_priority(const char *s, int *ret) {
1082 int i, r;
1083
1084 assert(s);
1085 assert(ret);
1086
1087 r = safe_atoi(s, &i);
1088 if (r < 0)
1089 return r;
1090
1091 if (!ioprio_priority_is_valid(i))
1092 return -EINVAL;
1093
1094 *ret = i;
1095 return 0;
1096 }
1097
1098 /* The cached PID, possible values:
1099 *
1100 * == UNSET [0] → cache not initialized yet
1101 * == BUSY [-1] → some thread is initializing it at the moment
1102 * any other → the cached PID
1103 */
1104
1105 #define CACHED_PID_UNSET ((pid_t) 0)
1106 #define CACHED_PID_BUSY ((pid_t) -1)
1107
1108 static pid_t cached_pid = CACHED_PID_UNSET;
1109
1110 void reset_cached_pid(void) {
1111 /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1112 cached_pid = CACHED_PID_UNSET;
1113 }
1114
1115 /* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1116 * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1117 * libpthread, as it is part of glibc anyway. */
1118 extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void * __dso_handle);
1119 extern void* __dso_handle __attribute__ ((__weak__));
1120
1121 pid_t getpid_cached(void) {
1122 pid_t current_value;
1123
1124 /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1125 * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1126 * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1127 * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1128 *
1129 * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1130 * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1131 */
1132
1133 current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
1134
1135 switch (current_value) {
1136
1137 case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
1138 pid_t new_pid;
1139
1140 new_pid = raw_getpid();
1141
1142 if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
1143 /* OOM? Let's try again later */
1144 cached_pid = CACHED_PID_UNSET;
1145 return new_pid;
1146 }
1147
1148 cached_pid = new_pid;
1149 return new_pid;
1150 }
1151
1152 case CACHED_PID_BUSY: /* Somebody else is currently initializing */
1153 return raw_getpid();
1154
1155 default: /* Properly initialized */
1156 return current_value;
1157 }
1158 }
1159
1160 int must_be_root(void) {
1161
1162 if (geteuid() == 0)
1163 return 0;
1164
1165 log_error("Need to be root.");
1166 return -EPERM;
1167 }
1168
1169 int safe_fork_full(
1170 const char *name,
1171 const int except_fds[],
1172 size_t n_except_fds,
1173 ForkFlags flags,
1174 pid_t *ret_pid) {
1175
1176 pid_t original_pid, pid;
1177 sigset_t saved_ss, ss;
1178 bool block_signals = false;
1179 int prio, r;
1180
1181 /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
1182 * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
1183
1184 prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG;
1185
1186 original_pid = getpid_cached();
1187
1188 if (flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG)) {
1189
1190 /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can
1191 * be sure that SIGTERMs are not lost we might send to the child. */
1192
1193 if (sigfillset(&ss) < 0)
1194 return log_full_errno(prio, errno, "Failed to reset signal set: %m");
1195
1196 block_signals = true;
1197
1198 } else if (flags & FORK_WAIT) {
1199
1200 /* Let's block SIGCHLD at least, so that we can safely watch for the child process */
1201
1202 if (sigemptyset(&ss) < 0)
1203 return log_full_errno(prio, errno, "Failed to clear signal set: %m");
1204
1205 if (sigaddset(&ss, SIGCHLD) < 0)
1206 return log_full_errno(prio, errno, "Failed to add SIGCHLD to signal set: %m");
1207
1208 block_signals = true;
1209 }
1210
1211 if (block_signals)
1212 if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
1213 return log_full_errno(prio, errno, "Failed to set signal mask: %m");
1214
1215 if (flags & FORK_NEW_MOUNTNS)
1216 pid = raw_clone(SIGCHLD|CLONE_NEWNS);
1217 else
1218 pid = fork();
1219 if (pid < 0) {
1220 r = -errno;
1221
1222 if (block_signals) /* undo what we did above */
1223 (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1224
1225 return log_full_errno(prio, r, "Failed to fork: %m");
1226 }
1227 if (pid > 0) {
1228 /* We are in the parent process */
1229
1230 log_debug("Successfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
1231
1232 if (flags & FORK_WAIT) {
1233 r = wait_for_terminate_and_check(name, pid, (flags & FORK_LOG ? WAIT_LOG : 0));
1234 if (r < 0)
1235 return r;
1236 if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
1237 return -EPROTO;
1238 }
1239
1240 if (block_signals) /* undo what we did above */
1241 (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1242
1243 if (ret_pid)
1244 *ret_pid = pid;
1245
1246 return 1;
1247 }
1248
1249 /* We are in the child process */
1250
1251 if (flags & FORK_REOPEN_LOG) {
1252 /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
1253 log_close();
1254 log_set_open_when_needed(true);
1255 }
1256
1257 if (name) {
1258 r = rename_process(name);
1259 if (r < 0)
1260 log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG,
1261 r, "Failed to rename process, ignoring: %m");
1262 }
1263
1264 if (flags & FORK_DEATHSIG)
1265 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
1266 log_full_errno(prio, errno, "Failed to set death signal: %m");
1267 _exit(EXIT_FAILURE);
1268 }
1269
1270 if (flags & FORK_RESET_SIGNALS) {
1271 r = reset_all_signal_handlers();
1272 if (r < 0) {
1273 log_full_errno(prio, r, "Failed to reset signal handlers: %m");
1274 _exit(EXIT_FAILURE);
1275 }
1276
1277 /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
1278 r = reset_signal_mask();
1279 if (r < 0) {
1280 log_full_errno(prio, r, "Failed to reset signal mask: %m");
1281 _exit(EXIT_FAILURE);
1282 }
1283 } else if (block_signals) { /* undo what we did above */
1284 if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
1285 log_full_errno(prio, errno, "Failed to restore signal mask: %m");
1286 _exit(EXIT_FAILURE);
1287 }
1288 }
1289
1290 if (flags & FORK_DEATHSIG) {
1291 pid_t ppid;
1292 /* Let's see if the parent PID is still the one we started from? If not, then the parent
1293 * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
1294
1295 ppid = getppid();
1296 if (ppid == 0)
1297 /* Parent is in a differn't PID namespace. */;
1298 else if (ppid != original_pid) {
1299 log_debug("Parent died early, raising SIGTERM.");
1300 (void) raise(SIGTERM);
1301 _exit(EXIT_FAILURE);
1302 }
1303 }
1304
1305 if (flags & FORK_CLOSE_ALL_FDS) {
1306 /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
1307 log_close();
1308
1309 r = close_all_fds(except_fds, n_except_fds);
1310 if (r < 0) {
1311 log_full_errno(prio, r, "Failed to close all file descriptors: %m");
1312 _exit(EXIT_FAILURE);
1313 }
1314 }
1315
1316 /* When we were asked to reopen the logs, do so again now */
1317 if (flags & FORK_REOPEN_LOG) {
1318 log_open();
1319 log_set_open_when_needed(false);
1320 }
1321
1322 if (flags & FORK_NULL_STDIO) {
1323 r = make_null_stdio();
1324 if (r < 0) {
1325 log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m");
1326 _exit(EXIT_FAILURE);
1327 }
1328 }
1329
1330 if (ret_pid)
1331 *ret_pid = getpid_cached();
1332
1333 return 0;
1334 }
1335
1336 int fork_agent(const char *name, const int except[], unsigned n_except, pid_t *ret_pid, const char *path, ...) {
1337 bool stdout_is_tty, stderr_is_tty;
1338 unsigned n, i;
1339 va_list ap;
1340 char **l;
1341 int r;
1342
1343 assert(path);
1344
1345 /* Spawns a temporary TTY agent, making sure it goes away when we go away */
1346
1347 r = safe_fork_full(name, except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, ret_pid);
1348 if (r < 0)
1349 return r;
1350 if (r > 0)
1351 return 0;
1352
1353 /* In the child: */
1354
1355 stdout_is_tty = isatty(STDOUT_FILENO);
1356 stderr_is_tty = isatty(STDERR_FILENO);
1357
1358 if (!stdout_is_tty || !stderr_is_tty) {
1359 int fd;
1360
1361 /* Detach from stdout/stderr. and reopen
1362 * /dev/tty for them. This is important to
1363 * ensure that when systemctl is started via
1364 * popen() or a similar call that expects to
1365 * read EOF we actually do generate EOF and
1366 * not delay this indefinitely by because we
1367 * keep an unused copy of stdin around. */
1368 fd = open("/dev/tty", O_WRONLY);
1369 if (fd < 0) {
1370 log_error_errno(errno, "Failed to open /dev/tty: %m");
1371 _exit(EXIT_FAILURE);
1372 }
1373
1374 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
1375 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
1376 _exit(EXIT_FAILURE);
1377 }
1378
1379 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
1380 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
1381 _exit(EXIT_FAILURE);
1382 }
1383
1384 if (fd > STDERR_FILENO)
1385 close(fd);
1386 }
1387
1388 /* Count arguments */
1389 va_start(ap, path);
1390 for (n = 0; va_arg(ap, char*); n++)
1391 ;
1392 va_end(ap);
1393
1394 /* Allocate strv */
1395 l = alloca(sizeof(char *) * (n + 1));
1396
1397 /* Fill in arguments */
1398 va_start(ap, path);
1399 for (i = 0; i <= n; i++)
1400 l[i] = va_arg(ap, char*);
1401 va_end(ap);
1402
1403 execv(path, l);
1404 _exit(EXIT_FAILURE);
1405 }
1406
1407 static const char *const ioprio_class_table[] = {
1408 [IOPRIO_CLASS_NONE] = "none",
1409 [IOPRIO_CLASS_RT] = "realtime",
1410 [IOPRIO_CLASS_BE] = "best-effort",
1411 [IOPRIO_CLASS_IDLE] = "idle"
1412 };
1413
1414 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, INT_MAX);
1415
1416 static const char *const sigchld_code_table[] = {
1417 [CLD_EXITED] = "exited",
1418 [CLD_KILLED] = "killed",
1419 [CLD_DUMPED] = "dumped",
1420 [CLD_TRAPPED] = "trapped",
1421 [CLD_STOPPED] = "stopped",
1422 [CLD_CONTINUED] = "continued",
1423 };
1424
1425 DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
1426
1427 static const char* const sched_policy_table[] = {
1428 [SCHED_OTHER] = "other",
1429 [SCHED_BATCH] = "batch",
1430 [SCHED_IDLE] = "idle",
1431 [SCHED_FIFO] = "fifo",
1432 [SCHED_RR] = "rr"
1433 };
1434
1435 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);