]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/process-util.c
process-util: rework wait_for_terminate_and_warn() to take a flags parameter
[thirdparty/systemd.git] / src / basic / process-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <linux/oom.h>
25 #include <sched.h>
26 #include <signal.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdio_ext.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/mman.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/types.h>
36 #include <sys/wait.h>
37 #include <syslog.h>
38 #include <unistd.h>
39 #if HAVE_VALGRIND_VALGRIND_H
40 #include <valgrind/valgrind.h>
41 #endif
42
43 #include "alloc-util.h"
44 #include "architecture.h"
45 #include "escape.h"
46 #include "fd-util.h"
47 #include "fileio.h"
48 #include "fs-util.h"
49 #include "ioprio.h"
50 #include "log.h"
51 #include "macro.h"
52 #include "missing.h"
53 #include "process-util.h"
54 #include "raw-clone.h"
55 #include "signal-util.h"
56 #include "stat-util.h"
57 #include "string-table.h"
58 #include "string-util.h"
59 #include "terminal-util.h"
60 #include "user-util.h"
61 #include "util.h"
62
63 int get_process_state(pid_t pid) {
64 const char *p;
65 char state;
66 int r;
67 _cleanup_free_ char *line = NULL;
68
69 assert(pid >= 0);
70
71 p = procfs_file_alloca(pid, "stat");
72
73 r = read_one_line_file(p, &line);
74 if (r == -ENOENT)
75 return -ESRCH;
76 if (r < 0)
77 return r;
78
79 p = strrchr(line, ')');
80 if (!p)
81 return -EIO;
82
83 p++;
84
85 if (sscanf(p, " %c", &state) != 1)
86 return -EIO;
87
88 return (unsigned char) state;
89 }
90
91 int get_process_comm(pid_t pid, char **name) {
92 const char *p;
93 int r;
94
95 assert(name);
96 assert(pid >= 0);
97
98 p = procfs_file_alloca(pid, "comm");
99
100 r = read_one_line_file(p, name);
101 if (r == -ENOENT)
102 return -ESRCH;
103
104 return r;
105 }
106
107 int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
108 _cleanup_fclose_ FILE *f = NULL;
109 bool space = false;
110 char *k, *ans = NULL;
111 const char *p;
112 int c;
113
114 assert(line);
115 assert(pid >= 0);
116
117 /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
118 * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
119 * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
120 * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
121 * command line that resolves to the empty string will return the "comm" name of the process instead.
122 *
123 * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
124 * comm_fallback is false). Returns 0 and sets *line otherwise. */
125
126 p = procfs_file_alloca(pid, "cmdline");
127
128 f = fopen(p, "re");
129 if (!f) {
130 if (errno == ENOENT)
131 return -ESRCH;
132 return -errno;
133 }
134
135 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
136
137 if (max_length == 1) {
138
139 /* If there's only room for one byte, return the empty string */
140 ans = new0(char, 1);
141 if (!ans)
142 return -ENOMEM;
143
144 *line = ans;
145 return 0;
146
147 } else if (max_length == 0) {
148 size_t len = 0, allocated = 0;
149
150 while ((c = getc(f)) != EOF) {
151
152 if (!GREEDY_REALLOC(ans, allocated, len+3)) {
153 free(ans);
154 return -ENOMEM;
155 }
156
157 if (isprint(c)) {
158 if (space) {
159 ans[len++] = ' ';
160 space = false;
161 }
162
163 ans[len++] = c;
164 } else if (len > 0)
165 space = true;
166 }
167
168 if (len > 0)
169 ans[len] = '\0';
170 else
171 ans = mfree(ans);
172
173 } else {
174 bool dotdotdot = false;
175 size_t left;
176
177 ans = new(char, max_length);
178 if (!ans)
179 return -ENOMEM;
180
181 k = ans;
182 left = max_length;
183 while ((c = getc(f)) != EOF) {
184
185 if (isprint(c)) {
186
187 if (space) {
188 if (left <= 2) {
189 dotdotdot = true;
190 break;
191 }
192
193 *(k++) = ' ';
194 left--;
195 space = false;
196 }
197
198 if (left <= 1) {
199 dotdotdot = true;
200 break;
201 }
202
203 *(k++) = (char) c;
204 left--;
205 } else if (k > ans)
206 space = true;
207 }
208
209 if (dotdotdot) {
210 if (max_length <= 4) {
211 k = ans;
212 left = max_length;
213 } else {
214 k = ans + max_length - 4;
215 left = 4;
216
217 /* Eat up final spaces */
218 while (k > ans && isspace(k[-1])) {
219 k--;
220 left++;
221 }
222 }
223
224 strncpy(k, "...", left-1);
225 k[left-1] = 0;
226 } else
227 *k = 0;
228 }
229
230 /* Kernel threads have no argv[] */
231 if (isempty(ans)) {
232 _cleanup_free_ char *t = NULL;
233 int h;
234
235 free(ans);
236
237 if (!comm_fallback)
238 return -ENOENT;
239
240 h = get_process_comm(pid, &t);
241 if (h < 0)
242 return h;
243
244 if (max_length == 0)
245 ans = strjoin("[", t, "]");
246 else {
247 size_t l;
248
249 l = strlen(t);
250
251 if (l + 3 <= max_length)
252 ans = strjoin("[", t, "]");
253 else if (max_length <= 6) {
254
255 ans = new(char, max_length);
256 if (!ans)
257 return -ENOMEM;
258
259 memcpy(ans, "[...]", max_length-1);
260 ans[max_length-1] = 0;
261 } else {
262 char *e;
263
264 t[max_length - 6] = 0;
265
266 /* Chop off final spaces */
267 e = strchr(t, 0);
268 while (e > t && isspace(e[-1]))
269 e--;
270 *e = 0;
271
272 ans = strjoin("[", t, "...]");
273 }
274 }
275 if (!ans)
276 return -ENOMEM;
277 }
278
279 *line = ans;
280 return 0;
281 }
282
283 int rename_process(const char name[]) {
284 static size_t mm_size = 0;
285 static char *mm = NULL;
286 bool truncated = false;
287 size_t l;
288
289 /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
290 * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
291 * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
292 * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
293 * truncated.
294 *
295 * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
296
297 if (isempty(name))
298 return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
299
300 if (!is_main_thread())
301 return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we
302 * cache things without locking, and we make assumptions that PR_SET_NAME sets the
303 * process name that isn't correct on any other threads */
304
305 l = strlen(name);
306
307 /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we
308 * can use PR_SET_NAME, which sets the thread name for the calling thread. */
309 if (prctl(PR_SET_NAME, name) < 0)
310 log_debug_errno(errno, "PR_SET_NAME failed: %m");
311 if (l > 15) /* Linux process names can be 15 chars at max */
312 truncated = true;
313
314 /* Second step, change glibc's ID of the process name. */
315 if (program_invocation_name) {
316 size_t k;
317
318 k = strlen(program_invocation_name);
319 strncpy(program_invocation_name, name, k);
320 if (l > k)
321 truncated = true;
322 }
323
324 /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
325 * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
326 * the end. This is the best option for changing /proc/self/cmdline. */
327
328 /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
329 * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
330 * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
331 * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
332 * mmap() is not. */
333 if (geteuid() != 0)
334 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
335 else if (mm_size < l+1) {
336 size_t nn_size;
337 char *nn;
338
339 nn_size = PAGE_ALIGN(l+1);
340 nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
341 if (nn == MAP_FAILED) {
342 log_debug_errno(errno, "mmap() failed: %m");
343 goto use_saved_argv;
344 }
345
346 strncpy(nn, name, nn_size);
347
348 /* Now, let's tell the kernel about this new memory */
349 if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
350 log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m");
351 (void) munmap(nn, nn_size);
352 goto use_saved_argv;
353 }
354
355 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
356 * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
357 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
358 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
359
360 if (mm)
361 (void) munmap(mm, mm_size);
362
363 mm = nn;
364 mm_size = nn_size;
365 } else {
366 strncpy(mm, name, mm_size);
367
368 /* Update the end pointer, continuing regardless of any failure. */
369 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
370 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
371 }
372
373 use_saved_argv:
374 /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
375 * it still looks here */
376
377 if (saved_argc > 0) {
378 int i;
379
380 if (saved_argv[0]) {
381 size_t k;
382
383 k = strlen(saved_argv[0]);
384 strncpy(saved_argv[0], name, k);
385 if (l > k)
386 truncated = true;
387 }
388
389 for (i = 1; i < saved_argc; i++) {
390 if (!saved_argv[i])
391 break;
392
393 memzero(saved_argv[i], strlen(saved_argv[i]));
394 }
395 }
396
397 return !truncated;
398 }
399
400 int is_kernel_thread(pid_t pid) {
401 const char *p;
402 size_t count;
403 char c;
404 bool eof;
405 FILE *f;
406
407 if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
408 return 0;
409
410 assert(pid > 1);
411
412 p = procfs_file_alloca(pid, "cmdline");
413 f = fopen(p, "re");
414 if (!f) {
415 if (errno == ENOENT)
416 return -ESRCH;
417 return -errno;
418 }
419
420 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
421
422 count = fread(&c, 1, 1, f);
423 eof = feof(f);
424 fclose(f);
425
426 /* Kernel threads have an empty cmdline */
427
428 if (count <= 0)
429 return eof ? 1 : -errno;
430
431 return 0;
432 }
433
434 int get_process_capeff(pid_t pid, char **capeff) {
435 const char *p;
436 int r;
437
438 assert(capeff);
439 assert(pid >= 0);
440
441 p = procfs_file_alloca(pid, "status");
442
443 r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
444 if (r == -ENOENT)
445 return -ESRCH;
446
447 return r;
448 }
449
450 static int get_process_link_contents(const char *proc_file, char **name) {
451 int r;
452
453 assert(proc_file);
454 assert(name);
455
456 r = readlink_malloc(proc_file, name);
457 if (r == -ENOENT)
458 return -ESRCH;
459 if (r < 0)
460 return r;
461
462 return 0;
463 }
464
465 int get_process_exe(pid_t pid, char **name) {
466 const char *p;
467 char *d;
468 int r;
469
470 assert(pid >= 0);
471
472 p = procfs_file_alloca(pid, "exe");
473 r = get_process_link_contents(p, name);
474 if (r < 0)
475 return r;
476
477 d = endswith(*name, " (deleted)");
478 if (d)
479 *d = '\0';
480
481 return 0;
482 }
483
484 static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
485 _cleanup_fclose_ FILE *f = NULL;
486 char line[LINE_MAX];
487 const char *p;
488
489 assert(field);
490 assert(uid);
491
492 if (pid < 0)
493 return -EINVAL;
494
495 p = procfs_file_alloca(pid, "status");
496 f = fopen(p, "re");
497 if (!f) {
498 if (errno == ENOENT)
499 return -ESRCH;
500 return -errno;
501 }
502
503 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
504
505 FOREACH_LINE(line, f, return -errno) {
506 char *l;
507
508 l = strstrip(line);
509
510 if (startswith(l, field)) {
511 l += strlen(field);
512 l += strspn(l, WHITESPACE);
513
514 l[strcspn(l, WHITESPACE)] = 0;
515
516 return parse_uid(l, uid);
517 }
518 }
519
520 return -EIO;
521 }
522
523 int get_process_uid(pid_t pid, uid_t *uid) {
524
525 if (pid == 0 || pid == getpid_cached()) {
526 *uid = getuid();
527 return 0;
528 }
529
530 return get_process_id(pid, "Uid:", uid);
531 }
532
533 int get_process_gid(pid_t pid, gid_t *gid) {
534
535 if (pid == 0 || pid == getpid_cached()) {
536 *gid = getgid();
537 return 0;
538 }
539
540 assert_cc(sizeof(uid_t) == sizeof(gid_t));
541 return get_process_id(pid, "Gid:", gid);
542 }
543
544 int get_process_cwd(pid_t pid, char **cwd) {
545 const char *p;
546
547 assert(pid >= 0);
548
549 p = procfs_file_alloca(pid, "cwd");
550
551 return get_process_link_contents(p, cwd);
552 }
553
554 int get_process_root(pid_t pid, char **root) {
555 const char *p;
556
557 assert(pid >= 0);
558
559 p = procfs_file_alloca(pid, "root");
560
561 return get_process_link_contents(p, root);
562 }
563
564 int get_process_environ(pid_t pid, char **env) {
565 _cleanup_fclose_ FILE *f = NULL;
566 _cleanup_free_ char *outcome = NULL;
567 int c;
568 const char *p;
569 size_t allocated = 0, sz = 0;
570
571 assert(pid >= 0);
572 assert(env);
573
574 p = procfs_file_alloca(pid, "environ");
575
576 f = fopen(p, "re");
577 if (!f) {
578 if (errno == ENOENT)
579 return -ESRCH;
580 return -errno;
581 }
582
583 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
584
585 while ((c = fgetc(f)) != EOF) {
586 if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
587 return -ENOMEM;
588
589 if (c == '\0')
590 outcome[sz++] = '\n';
591 else
592 sz += cescape_char(c, outcome + sz);
593 }
594
595 if (!outcome) {
596 outcome = strdup("");
597 if (!outcome)
598 return -ENOMEM;
599 } else
600 outcome[sz] = '\0';
601
602 *env = outcome;
603 outcome = NULL;
604
605 return 0;
606 }
607
608 int get_process_ppid(pid_t pid, pid_t *_ppid) {
609 int r;
610 _cleanup_free_ char *line = NULL;
611 long unsigned ppid;
612 const char *p;
613
614 assert(pid >= 0);
615 assert(_ppid);
616
617 if (pid == 0 || pid == getpid_cached()) {
618 *_ppid = getppid();
619 return 0;
620 }
621
622 p = procfs_file_alloca(pid, "stat");
623 r = read_one_line_file(p, &line);
624 if (r == -ENOENT)
625 return -ESRCH;
626 if (r < 0)
627 return r;
628
629 /* Let's skip the pid and comm fields. The latter is enclosed
630 * in () but does not escape any () in its value, so let's
631 * skip over it manually */
632
633 p = strrchr(line, ')');
634 if (!p)
635 return -EIO;
636
637 p++;
638
639 if (sscanf(p, " "
640 "%*c " /* state */
641 "%lu ", /* ppid */
642 &ppid) != 1)
643 return -EIO;
644
645 if ((long unsigned) (pid_t) ppid != ppid)
646 return -ERANGE;
647
648 *_ppid = (pid_t) ppid;
649
650 return 0;
651 }
652
653 int wait_for_terminate(pid_t pid, siginfo_t *status) {
654 siginfo_t dummy;
655
656 assert(pid >= 1);
657
658 if (!status)
659 status = &dummy;
660
661 for (;;) {
662 zero(*status);
663
664 if (waitid(P_PID, pid, status, WEXITED) < 0) {
665
666 if (errno == EINTR)
667 continue;
668
669 return negative_errno();
670 }
671
672 return 0;
673 }
674 }
675
676 /*
677 * Return values:
678 * < 0 : wait_for_terminate() failed to get the state of the
679 * process, the process was terminated by a signal, or
680 * failed for an unknown reason.
681 * >=0 : The process terminated normally, and its exit code is
682 * returned.
683 *
684 * That is, success is indicated by a return value of zero, and an
685 * error is indicated by a non-zero value.
686 *
687 * A warning is emitted if the process terminates abnormally,
688 * and also if it returns non-zero unless check_exit_code is true.
689 */
690 int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) {
691 _cleanup_free_ char *buffer = NULL;
692 siginfo_t status;
693 int r, prio;
694
695 assert(pid > 1);
696
697 if (!name) {
698 r = get_process_comm(pid, &buffer);
699 if (r < 0)
700 log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pid);
701 else
702 name = buffer;
703 }
704
705 prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG;
706
707 r = wait_for_terminate(pid, &status);
708 if (r < 0)
709 return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name));
710
711 if (status.si_code == CLD_EXITED) {
712 if (status.si_status != EXIT_SUCCESS)
713 log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG,
714 "%s failed with exit status %i.", strna(name), status.si_status);
715 else
716 log_debug("%s succeeded.", name);
717
718 return status.si_status;
719
720 } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
721
722 log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status));
723 return -EPROTO;
724 }
725
726 log_full(prio, "%s failed due to unknown reason.", strna(name));
727 return -EPROTO;
728 }
729
730 /*
731 * Return values:
732 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
733 * process, the process timed out, the process was terminated by a
734 * signal, or failed for an unknown reason.
735 * >=0 : The process terminated normally with no failures.
736 *
737 * Success is indicated by a return value of zero, a timeout is indicated
738 * by ETIMEDOUT, and all other child failure states are indicated by error
739 * is indicated by a non-zero value.
740 */
741 int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
742 sigset_t mask;
743 int r;
744 usec_t until;
745
746 assert_se(sigemptyset(&mask) == 0);
747 assert_se(sigaddset(&mask, SIGCHLD) == 0);
748
749 /* Drop into a sigtimewait-based timeout. Waiting for the
750 * pid to exit. */
751 until = now(CLOCK_MONOTONIC) + timeout;
752 for (;;) {
753 usec_t n;
754 siginfo_t status = {};
755 struct timespec ts;
756
757 n = now(CLOCK_MONOTONIC);
758 if (n >= until)
759 break;
760
761 r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
762 /* Assuming we woke due to the child exiting. */
763 if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
764 if (status.si_pid == pid) {
765 /* This is the correct child.*/
766 if (status.si_code == CLD_EXITED)
767 return (status.si_status == 0) ? 0 : -EPROTO;
768 else
769 return -EPROTO;
770 }
771 }
772 /* Not the child, check for errors and proceed appropriately */
773 if (r < 0) {
774 switch (r) {
775 case -EAGAIN:
776 /* Timed out, child is likely hung. */
777 return -ETIMEDOUT;
778 case -EINTR:
779 /* Received a different signal and should retry */
780 continue;
781 default:
782 /* Return any unexpected errors */
783 return r;
784 }
785 }
786 }
787
788 return -EPROTO;
789 }
790
791 void sigkill_wait(pid_t pid) {
792 assert(pid > 1);
793
794 if (kill(pid, SIGKILL) > 0)
795 (void) wait_for_terminate(pid, NULL);
796 }
797
798 void sigkill_waitp(pid_t *pid) {
799 if (!pid)
800 return;
801 if (*pid <= 1)
802 return;
803
804 sigkill_wait(*pid);
805 }
806
807 int kill_and_sigcont(pid_t pid, int sig) {
808 int r;
809
810 r = kill(pid, sig) < 0 ? -errno : 0;
811
812 /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
813 * affected by a process being suspended anyway. */
814 if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
815 (void) kill(pid, SIGCONT);
816
817 return r;
818 }
819
820 int getenv_for_pid(pid_t pid, const char *field, char **_value) {
821 _cleanup_fclose_ FILE *f = NULL;
822 char *value = NULL;
823 int r;
824 bool done = false;
825 size_t l;
826 const char *path;
827
828 assert(pid >= 0);
829 assert(field);
830 assert(_value);
831
832 path = procfs_file_alloca(pid, "environ");
833
834 f = fopen(path, "re");
835 if (!f) {
836 if (errno == ENOENT)
837 return -ESRCH;
838 return -errno;
839 }
840
841 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
842
843 l = strlen(field);
844 r = 0;
845
846 do {
847 char line[LINE_MAX];
848 unsigned i;
849
850 for (i = 0; i < sizeof(line)-1; i++) {
851 int c;
852
853 c = getc(f);
854 if (_unlikely_(c == EOF)) {
855 done = true;
856 break;
857 } else if (c == 0)
858 break;
859
860 line[i] = c;
861 }
862 line[i] = 0;
863
864 if (strneq(line, field, l) && line[l] == '=') {
865 value = strdup(line + l + 1);
866 if (!value)
867 return -ENOMEM;
868
869 r = 1;
870 break;
871 }
872
873 } while (!done);
874
875 *_value = value;
876 return r;
877 }
878
879 bool pid_is_unwaited(pid_t pid) {
880 /* Checks whether a PID is still valid at all, including a zombie */
881
882 if (pid < 0)
883 return false;
884
885 if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
886 return true;
887
888 if (pid == getpid_cached())
889 return true;
890
891 if (kill(pid, 0) >= 0)
892 return true;
893
894 return errno != ESRCH;
895 }
896
897 bool pid_is_alive(pid_t pid) {
898 int r;
899
900 /* Checks whether a PID is still valid and not a zombie */
901
902 if (pid < 0)
903 return false;
904
905 if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
906 return true;
907
908 if (pid == getpid_cached())
909 return true;
910
911 r = get_process_state(pid);
912 if (IN_SET(r, -ESRCH, 'Z'))
913 return false;
914
915 return true;
916 }
917
918 int pid_from_same_root_fs(pid_t pid) {
919 const char *root;
920
921 if (pid < 0)
922 return false;
923
924 if (pid == 0 || pid == getpid_cached())
925 return true;
926
927 root = procfs_file_alloca(pid, "root");
928
929 return files_same(root, "/proc/1/root", 0);
930 }
931
932 bool is_main_thread(void) {
933 static thread_local int cached = 0;
934
935 if (_unlikely_(cached == 0))
936 cached = getpid_cached() == gettid() ? 1 : -1;
937
938 return cached > 0;
939 }
940
941 noreturn void freeze(void) {
942
943 log_close();
944
945 /* Make sure nobody waits for us on a socket anymore */
946 close_all_fds(NULL, 0);
947
948 sync();
949
950 for (;;)
951 pause();
952 }
953
954 bool oom_score_adjust_is_valid(int oa) {
955 return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
956 }
957
958 unsigned long personality_from_string(const char *p) {
959 int architecture;
960
961 if (!p)
962 return PERSONALITY_INVALID;
963
964 /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
965 * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
966 * the same register size. */
967
968 architecture = architecture_from_string(p);
969 if (architecture < 0)
970 return PERSONALITY_INVALID;
971
972 if (architecture == native_architecture())
973 return PER_LINUX;
974 #ifdef SECONDARY_ARCHITECTURE
975 if (architecture == SECONDARY_ARCHITECTURE)
976 return PER_LINUX32;
977 #endif
978
979 return PERSONALITY_INVALID;
980 }
981
982 const char* personality_to_string(unsigned long p) {
983 int architecture = _ARCHITECTURE_INVALID;
984
985 if (p == PER_LINUX)
986 architecture = native_architecture();
987 #ifdef SECONDARY_ARCHITECTURE
988 else if (p == PER_LINUX32)
989 architecture = SECONDARY_ARCHITECTURE;
990 #endif
991
992 if (architecture < 0)
993 return NULL;
994
995 return architecture_to_string(architecture);
996 }
997
998 int safe_personality(unsigned long p) {
999 int ret;
1000
1001 /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
1002 * and in others as negative return value containing an errno-like value. Let's work around this: this is a
1003 * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
1004 * the return value indicating the same issue, so that we are definitely on the safe side.
1005 *
1006 * See https://github.com/systemd/systemd/issues/6737 */
1007
1008 errno = 0;
1009 ret = personality(p);
1010 if (ret < 0) {
1011 if (errno != 0)
1012 return -errno;
1013
1014 errno = -ret;
1015 }
1016
1017 return ret;
1018 }
1019
1020 int opinionated_personality(unsigned long *ret) {
1021 int current;
1022
1023 /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1024 * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1025 * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1026
1027 current = safe_personality(PERSONALITY_INVALID);
1028 if (current < 0)
1029 return current;
1030
1031 if (((unsigned long) current & 0xffff) == PER_LINUX32)
1032 *ret = PER_LINUX32;
1033 else
1034 *ret = PER_LINUX;
1035
1036 return 0;
1037 }
1038
1039 void valgrind_summary_hack(void) {
1040 #if HAVE_VALGRIND_VALGRIND_H
1041 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
1042 pid_t pid;
1043 pid = raw_clone(SIGCHLD);
1044 if (pid < 0)
1045 log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
1046 else if (pid == 0)
1047 exit(EXIT_SUCCESS);
1048 else {
1049 log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1050 (void) wait_for_terminate(pid, NULL);
1051 }
1052 }
1053 #endif
1054 }
1055
1056 int pid_compare_func(const void *a, const void *b) {
1057 const pid_t *p = a, *q = b;
1058
1059 /* Suitable for usage in qsort() */
1060
1061 if (*p < *q)
1062 return -1;
1063 if (*p > *q)
1064 return 1;
1065 return 0;
1066 }
1067
1068 int ioprio_parse_priority(const char *s, int *ret) {
1069 int i, r;
1070
1071 assert(s);
1072 assert(ret);
1073
1074 r = safe_atoi(s, &i);
1075 if (r < 0)
1076 return r;
1077
1078 if (!ioprio_priority_is_valid(i))
1079 return -EINVAL;
1080
1081 *ret = i;
1082 return 0;
1083 }
1084
1085 /* The cached PID, possible values:
1086 *
1087 * == UNSET [0] → cache not initialized yet
1088 * == BUSY [-1] → some thread is initializing it at the moment
1089 * any other → the cached PID
1090 */
1091
1092 #define CACHED_PID_UNSET ((pid_t) 0)
1093 #define CACHED_PID_BUSY ((pid_t) -1)
1094
1095 static pid_t cached_pid = CACHED_PID_UNSET;
1096
1097 void reset_cached_pid(void) {
1098 /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1099 cached_pid = CACHED_PID_UNSET;
1100 }
1101
1102 /* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1103 * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1104 * libpthread, as it is part of glibc anyway. */
1105 extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void * __dso_handle);
1106 extern void* __dso_handle __attribute__ ((__weak__));
1107
1108 pid_t getpid_cached(void) {
1109 pid_t current_value;
1110
1111 /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1112 * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1113 * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1114 * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1115 *
1116 * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1117 * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1118 */
1119
1120 current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
1121
1122 switch (current_value) {
1123
1124 case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
1125 pid_t new_pid;
1126
1127 new_pid = getpid();
1128
1129 if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
1130 /* OOM? Let's try again later */
1131 cached_pid = CACHED_PID_UNSET;
1132 return new_pid;
1133 }
1134
1135 cached_pid = new_pid;
1136 return new_pid;
1137 }
1138
1139 case CACHED_PID_BUSY: /* Somebody else is currently initializing */
1140 return getpid();
1141
1142 default: /* Properly initialized */
1143 return current_value;
1144 }
1145 }
1146
1147 int must_be_root(void) {
1148
1149 if (geteuid() == 0)
1150 return 0;
1151
1152 log_error("Need to be root.");
1153 return -EPERM;
1154 }
1155
1156 int safe_fork_full(
1157 const char *name,
1158 const int except_fds[],
1159 size_t n_except_fds,
1160 ForkFlags flags,
1161 pid_t *ret_pid) {
1162
1163 pid_t original_pid, pid;
1164 sigset_t saved_ss;
1165 bool block_signals;
1166 int prio, r;
1167
1168 /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
1169 * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
1170
1171 prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG;
1172
1173 original_pid = getpid_cached();
1174
1175 block_signals = flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG);
1176
1177 if (block_signals) {
1178 sigset_t ss;
1179
1180 /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can be sure
1181 * that SIGTERMs are not lost we might send to the child. */
1182 if (sigfillset(&ss) < 0)
1183 return log_full_errno(prio, errno, "Failed to reset signal set: %m");
1184
1185 if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
1186 return log_full_errno(prio, errno, "Failed to reset signal mask: %m");
1187 }
1188
1189 pid = fork();
1190 if (pid < 0) {
1191 r = -errno;
1192
1193 if (block_signals) /* undo what we did above */
1194 (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1195
1196 return log_full_errno(prio, r, "Failed to fork: %m");
1197 }
1198 if (pid > 0) {
1199 /* We are in the parent process */
1200
1201 if (block_signals) /* undo what we did above */
1202 (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1203
1204 log_debug("Sucessfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
1205
1206 if (ret_pid)
1207 *ret_pid = pid;
1208
1209 return 1;
1210 }
1211
1212 /* We are in the child process */
1213
1214 if (flags & FORK_REOPEN_LOG) {
1215 /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
1216 log_close();
1217 log_set_open_when_needed(true);
1218 }
1219
1220 if (name) {
1221 r = rename_process(name);
1222 if (r < 0)
1223 log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG,
1224 r, "Failed to rename process, ignoring: %m");
1225 }
1226
1227 if (flags & FORK_DEATHSIG)
1228 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
1229 log_full_errno(prio, errno, "Failed to set death signal: %m");
1230 _exit(EXIT_FAILURE);
1231 }
1232
1233 if (flags & FORK_RESET_SIGNALS) {
1234 r = reset_all_signal_handlers();
1235 if (r < 0) {
1236 log_full_errno(prio, r, "Failed to reset signal handlers: %m");
1237 _exit(EXIT_FAILURE);
1238 }
1239
1240 /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
1241 r = reset_signal_mask();
1242 if (r < 0) {
1243 log_full_errno(prio, r, "Failed to reset signal mask: %m");
1244 _exit(EXIT_FAILURE);
1245 }
1246 } else if (block_signals) { /* undo what we did above */
1247 if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
1248 log_full_errno(prio, errno, "Failed to restore signal mask: %m");
1249 _exit(EXIT_FAILURE);
1250 }
1251 }
1252
1253 if (flags & FORK_DEATHSIG) {
1254 pid_t ppid;
1255 /* Let's see if the parent PID is still the one we started from? If not, then the parent
1256 * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
1257
1258 ppid = getppid();
1259 if (ppid == 0)
1260 /* Parent is in a differn't PID namespace. */;
1261 else if (ppid != original_pid) {
1262 log_debug("Parent died early, raising SIGTERM.");
1263 (void) raise(SIGTERM);
1264 _exit(EXIT_FAILURE);
1265 }
1266 }
1267
1268 if (flags & FORK_CLOSE_ALL_FDS) {
1269 /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
1270 log_close();
1271
1272 r = close_all_fds(except_fds, n_except_fds);
1273 if (r < 0) {
1274 log_full_errno(prio, r, "Failed to close all file descriptors: %m");
1275 _exit(EXIT_FAILURE);
1276 }
1277 }
1278
1279 /* When we were asked to reopen the logs, do so again now */
1280 if (flags & FORK_REOPEN_LOG) {
1281 log_open();
1282 log_set_open_when_needed(false);
1283 }
1284
1285 if (flags & FORK_NULL_STDIO) {
1286 r = make_null_stdio();
1287 if (r < 0) {
1288 log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m");
1289 _exit(EXIT_FAILURE);
1290 }
1291 }
1292
1293 if (ret_pid)
1294 *ret_pid = getpid_cached();
1295
1296 return 0;
1297 }
1298
1299 int fork_agent(const char *name, const int except[], unsigned n_except, pid_t *ret_pid, const char *path, ...) {
1300 bool stdout_is_tty, stderr_is_tty;
1301 unsigned n, i;
1302 va_list ap;
1303 char **l;
1304 int r;
1305
1306 assert(path);
1307
1308 /* Spawns a temporary TTY agent, making sure it goes away when we go away */
1309
1310 r = safe_fork_full(name, except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, ret_pid);
1311 if (r < 0)
1312 return r;
1313 if (r > 0)
1314 return 0;
1315
1316 /* In the child: */
1317
1318 stdout_is_tty = isatty(STDOUT_FILENO);
1319 stderr_is_tty = isatty(STDERR_FILENO);
1320
1321 if (!stdout_is_tty || !stderr_is_tty) {
1322 int fd;
1323
1324 /* Detach from stdout/stderr. and reopen
1325 * /dev/tty for them. This is important to
1326 * ensure that when systemctl is started via
1327 * popen() or a similar call that expects to
1328 * read EOF we actually do generate EOF and
1329 * not delay this indefinitely by because we
1330 * keep an unused copy of stdin around. */
1331 fd = open("/dev/tty", O_WRONLY);
1332 if (fd < 0) {
1333 log_error_errno(errno, "Failed to open /dev/tty: %m");
1334 _exit(EXIT_FAILURE);
1335 }
1336
1337 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
1338 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
1339 _exit(EXIT_FAILURE);
1340 }
1341
1342 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
1343 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
1344 _exit(EXIT_FAILURE);
1345 }
1346
1347 if (fd > STDERR_FILENO)
1348 close(fd);
1349 }
1350
1351 /* Count arguments */
1352 va_start(ap, path);
1353 for (n = 0; va_arg(ap, char*); n++)
1354 ;
1355 va_end(ap);
1356
1357 /* Allocate strv */
1358 l = alloca(sizeof(char *) * (n + 1));
1359
1360 /* Fill in arguments */
1361 va_start(ap, path);
1362 for (i = 0; i <= n; i++)
1363 l[i] = va_arg(ap, char*);
1364 va_end(ap);
1365
1366 execv(path, l);
1367 _exit(EXIT_FAILURE);
1368 }
1369
1370 static const char *const ioprio_class_table[] = {
1371 [IOPRIO_CLASS_NONE] = "none",
1372 [IOPRIO_CLASS_RT] = "realtime",
1373 [IOPRIO_CLASS_BE] = "best-effort",
1374 [IOPRIO_CLASS_IDLE] = "idle"
1375 };
1376
1377 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, INT_MAX);
1378
1379 static const char *const sigchld_code_table[] = {
1380 [CLD_EXITED] = "exited",
1381 [CLD_KILLED] = "killed",
1382 [CLD_DUMPED] = "dumped",
1383 [CLD_TRAPPED] = "trapped",
1384 [CLD_STOPPED] = "stopped",
1385 [CLD_CONTINUED] = "continued",
1386 };
1387
1388 DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
1389
1390 static const char* const sched_policy_table[] = {
1391 [SCHED_OTHER] = "other",
1392 [SCHED_BATCH] = "batch",
1393 [SCHED_IDLE] = "idle",
1394 [SCHED_FIFO] = "fifo",
1395 [SCHED_RR] = "rr"
1396 };
1397
1398 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);