]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/process-util.c
tree-wide: properly name all threads we fork off
[thirdparty/systemd.git] / src / basic / process-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
0b452006
RC
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
4f5dd394 21#include <ctype.h>
0b452006 22#include <errno.h>
11c3a366
TA
23#include <limits.h>
24#include <linux/oom.h>
7b3e062c 25#include <sched.h>
0b452006 26#include <signal.h>
4f5dd394
LP
27#include <stdbool.h>
28#include <stdio.h>
35bbbf85 29#include <stdio_ext.h>
11c3a366 30#include <stdlib.h>
4f5dd394 31#include <string.h>
9bfaffd5 32#include <sys/mman.h>
7b3e062c 33#include <sys/personality.h>
405f8907 34#include <sys/prctl.h>
4f5dd394
LP
35#include <sys/types.h>
36#include <sys/wait.h>
11c3a366 37#include <syslog.h>
4f5dd394 38#include <unistd.h>
349cc4a5 39#if HAVE_VALGRIND_VALGRIND_H
dcadc967
EV
40#include <valgrind/valgrind.h>
41#endif
0b452006 42
b5efdb8a 43#include "alloc-util.h"
6e5f1b57 44#include "architecture.h"
4f5dd394 45#include "escape.h"
3ffd4af2 46#include "fd-util.h"
0b452006 47#include "fileio.h"
f4f15635 48#include "fs-util.h"
7b3e062c 49#include "ioprio.h"
0b452006 50#include "log.h"
11c3a366
TA
51#include "macro.h"
52#include "missing.h"
93cc7779 53#include "process-util.h"
8869a0b4 54#include "raw-clone.h"
93cc7779 55#include "signal-util.h"
1359fffa 56#include "stat-util.h"
7b3e062c 57#include "string-table.h"
07630cea 58#include "string-util.h"
4c253ed1 59#include "terminal-util.h"
b1d4f8e1 60#include "user-util.h"
4f5dd394 61#include "util.h"
0b452006
RC
62
63int get_process_state(pid_t pid) {
64 const char *p;
65 char state;
66 int r;
67 _cleanup_free_ char *line = NULL;
68
69 assert(pid >= 0);
70
71 p = procfs_file_alloca(pid, "stat");
a644184a 72
0b452006 73 r = read_one_line_file(p, &line);
a644184a
LP
74 if (r == -ENOENT)
75 return -ESRCH;
0b452006
RC
76 if (r < 0)
77 return r;
78
79 p = strrchr(line, ')');
80 if (!p)
81 return -EIO;
82
83 p++;
84
85 if (sscanf(p, " %c", &state) != 1)
86 return -EIO;
87
88 return (unsigned char) state;
89}
90
91int get_process_comm(pid_t pid, char **name) {
92 const char *p;
93 int r;
94
95 assert(name);
96 assert(pid >= 0);
97
98 p = procfs_file_alloca(pid, "comm");
99
100 r = read_one_line_file(p, name);
101 if (r == -ENOENT)
102 return -ESRCH;
103
104 return r;
105}
106
107int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
108 _cleanup_fclose_ FILE *f = NULL;
ba4cd7e2 109 bool space = false;
c0534780 110 char *k, *ans = NULL;
0b452006
RC
111 const char *p;
112 int c;
113
114 assert(line);
115 assert(pid >= 0);
116
69281c49
LP
117 /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
118 * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
119 * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
120 * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
121 * command line that resolves to the empty string will return the "comm" name of the process instead.
122 *
123 * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
c0534780 124 * comm_fallback is false). Returns 0 and sets *line otherwise. */
69281c49 125
0b452006
RC
126 p = procfs_file_alloca(pid, "cmdline");
127
128 f = fopen(p, "re");
a644184a
LP
129 if (!f) {
130 if (errno == ENOENT)
131 return -ESRCH;
0b452006 132 return -errno;
a644184a 133 }
0b452006 134
35bbbf85
LP
135 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
136
69281c49
LP
137 if (max_length == 1) {
138
139 /* If there's only room for one byte, return the empty string */
c0534780
ZJS
140 ans = new0(char, 1);
141 if (!ans)
69281c49
LP
142 return -ENOMEM;
143
c0534780 144 *line = ans;
69281c49
LP
145 return 0;
146
147 } else if (max_length == 0) {
0b452006
RC
148 size_t len = 0, allocated = 0;
149
150 while ((c = getc(f)) != EOF) {
151
c0534780
ZJS
152 if (!GREEDY_REALLOC(ans, allocated, len+3)) {
153 free(ans);
0b452006
RC
154 return -ENOMEM;
155 }
156
ba4cd7e2
MP
157 if (isprint(c)) {
158 if (space) {
c0534780 159 ans[len++] = ' ';
ba4cd7e2
MP
160 space = false;
161 }
162
c0534780 163 ans[len++] = c;
69281c49 164 } else if (len > 0)
ba4cd7e2
MP
165 space = true;
166 }
0b452006
RC
167
168 if (len > 0)
c0534780 169 ans[len] = '\0';
b09df4e2 170 else
c0534780 171 ans = mfree(ans);
0b452006
RC
172
173 } else {
69281c49 174 bool dotdotdot = false;
0b452006
RC
175 size_t left;
176
c0534780
ZJS
177 ans = new(char, max_length);
178 if (!ans)
0b452006
RC
179 return -ENOMEM;
180
c0534780 181 k = ans;
0b452006
RC
182 left = max_length;
183 while ((c = getc(f)) != EOF) {
184
185 if (isprint(c)) {
69281c49 186
0b452006 187 if (space) {
69281c49
LP
188 if (left <= 2) {
189 dotdotdot = true;
0b452006 190 break;
69281c49 191 }
0b452006
RC
192
193 *(k++) = ' ';
194 left--;
195 space = false;
196 }
197
69281c49
LP
198 if (left <= 1) {
199 dotdotdot = true;
0b452006 200 break;
69281c49 201 }
0b452006
RC
202
203 *(k++) = (char) c;
204 left--;
c0534780 205 } else if (k > ans)
0b452006
RC
206 space = true;
207 }
208
69281c49
LP
209 if (dotdotdot) {
210 if (max_length <= 4) {
c0534780 211 k = ans;
69281c49
LP
212 left = max_length;
213 } else {
c0534780 214 k = ans + max_length - 4;
69281c49
LP
215 left = 4;
216
217 /* Eat up final spaces */
c0534780 218 while (k > ans && isspace(k[-1])) {
69281c49
LP
219 k--;
220 left++;
221 }
222 }
223
224 strncpy(k, "...", left-1);
b09df4e2 225 k[left-1] = 0;
0b452006
RC
226 } else
227 *k = 0;
228 }
229
230 /* Kernel threads have no argv[] */
c0534780 231 if (isempty(ans)) {
0b452006
RC
232 _cleanup_free_ char *t = NULL;
233 int h;
234
c0534780 235 free(ans);
0b452006
RC
236
237 if (!comm_fallback)
238 return -ENOENT;
239
240 h = get_process_comm(pid, &t);
241 if (h < 0)
242 return h;
243
69281c49 244 if (max_length == 0)
c0534780 245 ans = strjoin("[", t, "]");
69281c49
LP
246 else {
247 size_t l;
248
249 l = strlen(t);
250
251 if (l + 3 <= max_length)
c0534780 252 ans = strjoin("[", t, "]");
69281c49
LP
253 else if (max_length <= 6) {
254
c0534780
ZJS
255 ans = new(char, max_length);
256 if (!ans)
69281c49
LP
257 return -ENOMEM;
258
c0534780
ZJS
259 memcpy(ans, "[...]", max_length-1);
260 ans[max_length-1] = 0;
69281c49
LP
261 } else {
262 char *e;
263
264 t[max_length - 6] = 0;
265
266 /* Chop off final spaces */
267 e = strchr(t, 0);
268 while (e > t && isspace(e[-1]))
269 e--;
270 *e = 0;
271
c0534780 272 ans = strjoin("[", t, "...]");
69281c49
LP
273 }
274 }
c0534780 275 if (!ans)
0b452006
RC
276 return -ENOMEM;
277 }
278
c0534780 279 *line = ans;
0b452006
RC
280 return 0;
281}
282
9bfaffd5
LP
283int rename_process(const char name[]) {
284 static size_t mm_size = 0;
285 static char *mm = NULL;
286 bool truncated = false;
287 size_t l;
288
289 /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
290 * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
291 * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
292 * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
293 * truncated.
294 *
295 * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
296
297 if (isempty(name))
298 return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
405f8907 299
9bfaffd5 300 l = strlen(name);
405f8907 301
9bfaffd5 302 /* First step, change the comm field. */
79d62972 303 (void) prctl(PR_SET_NAME, name);
9bfaffd5
LP
304 if (l > 15) /* Linux process names can be 15 chars at max */
305 truncated = true;
306
307 /* Second step, change glibc's ID of the process name. */
308 if (program_invocation_name) {
309 size_t k;
310
311 k = strlen(program_invocation_name);
312 strncpy(program_invocation_name, name, k);
313 if (l > k)
314 truncated = true;
315 }
316
317 /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
318 * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
13e785f7 319 * the end. This is the best option for changing /proc/self/cmdline. */
01f989c6
JW
320
321 /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
322 * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
323 * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
324 * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
325 * mmap() is not. */
326 if (geteuid() != 0)
327 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
328 else if (mm_size < l+1) {
9bfaffd5
LP
329 size_t nn_size;
330 char *nn;
331
9bfaffd5
LP
332 nn_size = PAGE_ALIGN(l+1);
333 nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
334 if (nn == MAP_FAILED) {
335 log_debug_errno(errno, "mmap() failed: %m");
336 goto use_saved_argv;
337 }
405f8907 338
9bfaffd5
LP
339 strncpy(nn, name, nn_size);
340
341 /* Now, let's tell the kernel about this new memory */
342 if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
343 log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m");
344 (void) munmap(nn, nn_size);
345 goto use_saved_argv;
346 }
347
348 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
349 * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
350 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
351 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
352
353 if (mm)
354 (void) munmap(mm, mm_size);
355
356 mm = nn;
357 mm_size = nn_size;
01f989c6 358 } else {
9bfaffd5
LP
359 strncpy(mm, name, mm_size);
360
01f989c6
JW
361 /* Update the end pointer, continuing regardless of any failure. */
362 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
363 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
364 }
365
9bfaffd5
LP
366use_saved_argv:
367 /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
368 * it still looks here */
405f8907
LP
369
370 if (saved_argc > 0) {
371 int i;
372
9bfaffd5
LP
373 if (saved_argv[0]) {
374 size_t k;
375
376 k = strlen(saved_argv[0]);
377 strncpy(saved_argv[0], name, k);
378 if (l > k)
379 truncated = true;
380 }
405f8907
LP
381
382 for (i = 1; i < saved_argc; i++) {
383 if (!saved_argv[i])
384 break;
385
386 memzero(saved_argv[i], strlen(saved_argv[i]));
387 }
388 }
9bfaffd5
LP
389
390 return !truncated;
405f8907
LP
391}
392
0b452006
RC
393int is_kernel_thread(pid_t pid) {
394 const char *p;
395 size_t count;
396 char c;
397 bool eof;
398 FILE *f;
399
4c701096 400 if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
0b452006
RC
401 return 0;
402
a6149b93 403 assert(pid > 1);
0b452006
RC
404
405 p = procfs_file_alloca(pid, "cmdline");
406 f = fopen(p, "re");
a644184a
LP
407 if (!f) {
408 if (errno == ENOENT)
409 return -ESRCH;
0b452006 410 return -errno;
a644184a 411 }
0b452006 412
35bbbf85
LP
413 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
414
0b452006
RC
415 count = fread(&c, 1, 1, f);
416 eof = feof(f);
417 fclose(f);
418
419 /* Kernel threads have an empty cmdline */
420
421 if (count <= 0)
422 return eof ? 1 : -errno;
423
424 return 0;
425}
426
427int get_process_capeff(pid_t pid, char **capeff) {
428 const char *p;
a644184a 429 int r;
0b452006
RC
430
431 assert(capeff);
432 assert(pid >= 0);
433
434 p = procfs_file_alloca(pid, "status");
435
c4cd1d4d 436 r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
a644184a
LP
437 if (r == -ENOENT)
438 return -ESRCH;
439
440 return r;
0b452006
RC
441}
442
443static int get_process_link_contents(const char *proc_file, char **name) {
444 int r;
445
446 assert(proc_file);
447 assert(name);
448
449 r = readlink_malloc(proc_file, name);
a644184a
LP
450 if (r == -ENOENT)
451 return -ESRCH;
0b452006 452 if (r < 0)
a644184a 453 return r;
0b452006
RC
454
455 return 0;
456}
457
458int get_process_exe(pid_t pid, char **name) {
459 const char *p;
460 char *d;
461 int r;
462
463 assert(pid >= 0);
464
465 p = procfs_file_alloca(pid, "exe");
466 r = get_process_link_contents(p, name);
467 if (r < 0)
468 return r;
469
470 d = endswith(*name, " (deleted)");
471 if (d)
472 *d = '\0';
473
474 return 0;
475}
476
477static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
478 _cleanup_fclose_ FILE *f = NULL;
479 char line[LINE_MAX];
480 const char *p;
481
482 assert(field);
483 assert(uid);
484
07b38ba5 485 if (pid < 0)
6f8cbcdb
LP
486 return -EINVAL;
487
0b452006
RC
488 p = procfs_file_alloca(pid, "status");
489 f = fopen(p, "re");
a644184a
LP
490 if (!f) {
491 if (errno == ENOENT)
492 return -ESRCH;
0b452006 493 return -errno;
a644184a 494 }
0b452006 495
35bbbf85
LP
496 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
497
0b452006
RC
498 FOREACH_LINE(line, f, return -errno) {
499 char *l;
500
501 l = strstrip(line);
502
503 if (startswith(l, field)) {
504 l += strlen(field);
505 l += strspn(l, WHITESPACE);
506
507 l[strcspn(l, WHITESPACE)] = 0;
508
509 return parse_uid(l, uid);
510 }
511 }
512
513 return -EIO;
514}
515
516int get_process_uid(pid_t pid, uid_t *uid) {
6f8cbcdb
LP
517
518 if (pid == 0 || pid == getpid_cached()) {
519 *uid = getuid();
520 return 0;
521 }
522
0b452006
RC
523 return get_process_id(pid, "Uid:", uid);
524}
525
526int get_process_gid(pid_t pid, gid_t *gid) {
6f8cbcdb
LP
527
528 if (pid == 0 || pid == getpid_cached()) {
529 *gid = getgid();
530 return 0;
531 }
532
0b452006
RC
533 assert_cc(sizeof(uid_t) == sizeof(gid_t));
534 return get_process_id(pid, "Gid:", gid);
535}
536
537int get_process_cwd(pid_t pid, char **cwd) {
538 const char *p;
539
540 assert(pid >= 0);
541
542 p = procfs_file_alloca(pid, "cwd");
543
544 return get_process_link_contents(p, cwd);
545}
546
547int get_process_root(pid_t pid, char **root) {
548 const char *p;
549
550 assert(pid >= 0);
551
552 p = procfs_file_alloca(pid, "root");
553
554 return get_process_link_contents(p, root);
555}
556
557int get_process_environ(pid_t pid, char **env) {
558 _cleanup_fclose_ FILE *f = NULL;
559 _cleanup_free_ char *outcome = NULL;
560 int c;
561 const char *p;
562 size_t allocated = 0, sz = 0;
563
564 assert(pid >= 0);
565 assert(env);
566
567 p = procfs_file_alloca(pid, "environ");
568
569 f = fopen(p, "re");
a644184a
LP
570 if (!f) {
571 if (errno == ENOENT)
572 return -ESRCH;
0b452006 573 return -errno;
a644184a 574 }
0b452006 575
35bbbf85
LP
576 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
577
0b452006
RC
578 while ((c = fgetc(f)) != EOF) {
579 if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
580 return -ENOMEM;
581
582 if (c == '\0')
583 outcome[sz++] = '\n';
584 else
585 sz += cescape_char(c, outcome + sz);
586 }
587
03c55bc0
LP
588 if (!outcome) {
589 outcome = strdup("");
590 if (!outcome)
591 return -ENOMEM;
592 } else
593 outcome[sz] = '\0';
de8763b6 594
0b452006
RC
595 *env = outcome;
596 outcome = NULL;
597
598 return 0;
599}
600
6bc73acb 601int get_process_ppid(pid_t pid, pid_t *_ppid) {
0b452006
RC
602 int r;
603 _cleanup_free_ char *line = NULL;
604 long unsigned ppid;
605 const char *p;
606
607 assert(pid >= 0);
608 assert(_ppid);
609
6f8cbcdb 610 if (pid == 0 || pid == getpid_cached()) {
0b452006
RC
611 *_ppid = getppid();
612 return 0;
613 }
614
615 p = procfs_file_alloca(pid, "stat");
616 r = read_one_line_file(p, &line);
a644184a
LP
617 if (r == -ENOENT)
618 return -ESRCH;
0b452006
RC
619 if (r < 0)
620 return r;
621
622 /* Let's skip the pid and comm fields. The latter is enclosed
623 * in () but does not escape any () in its value, so let's
624 * skip over it manually */
625
626 p = strrchr(line, ')');
627 if (!p)
628 return -EIO;
629
630 p++;
631
632 if (sscanf(p, " "
633 "%*c " /* state */
634 "%lu ", /* ppid */
635 &ppid) != 1)
636 return -EIO;
637
638 if ((long unsigned) (pid_t) ppid != ppid)
639 return -ERANGE;
640
641 *_ppid = (pid_t) ppid;
642
643 return 0;
644}
645
646int wait_for_terminate(pid_t pid, siginfo_t *status) {
647 siginfo_t dummy;
648
649 assert(pid >= 1);
650
651 if (!status)
652 status = &dummy;
653
654 for (;;) {
655 zero(*status);
656
657 if (waitid(P_PID, pid, status, WEXITED) < 0) {
658
659 if (errno == EINTR)
660 continue;
661
3f0083a2 662 return negative_errno();
0b452006
RC
663 }
664
665 return 0;
666 }
667}
668
669/*
670 * Return values:
671 * < 0 : wait_for_terminate() failed to get the state of the
672 * process, the process was terminated by a signal, or
673 * failed for an unknown reason.
674 * >=0 : The process terminated normally, and its exit code is
675 * returned.
676 *
677 * That is, success is indicated by a return value of zero, and an
678 * error is indicated by a non-zero value.
679 *
680 * A warning is emitted if the process terminates abnormally,
681 * and also if it returns non-zero unless check_exit_code is true.
682 */
683int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_code) {
684 int r;
685 siginfo_t status;
686
687 assert(name);
688 assert(pid > 1);
689
690 r = wait_for_terminate(pid, &status);
691 if (r < 0)
692 return log_warning_errno(r, "Failed to wait for %s: %m", name);
693
694 if (status.si_code == CLD_EXITED) {
695 if (status.si_status != 0)
696 log_full(check_exit_code ? LOG_WARNING : LOG_DEBUG,
697 "%s failed with error code %i.", name, status.si_status);
698 else
699 log_debug("%s succeeded.", name);
700
701 return status.si_status;
3742095b 702 } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
0b452006
RC
703
704 log_warning("%s terminated by signal %s.", name, signal_to_string(status.si_status));
705 return -EPROTO;
706 }
707
708 log_warning("%s failed due to unknown reason.", name);
709 return -EPROTO;
710}
711
d5641e0d
KW
712/*
713 * Return values:
714 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
715 * process, the process timed out, the process was terminated by a
716 * signal, or failed for an unknown reason.
717 * >=0 : The process terminated normally with no failures.
718 *
719 * Success is indicated by a return value of zero, a timeout is indicated
720 * by ETIMEDOUT, and all other child failure states are indicated by error
721 * is indicated by a non-zero value.
722 */
723int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
724 sigset_t mask;
725 int r;
726 usec_t until;
727
728 assert_se(sigemptyset(&mask) == 0);
729 assert_se(sigaddset(&mask, SIGCHLD) == 0);
730
731 /* Drop into a sigtimewait-based timeout. Waiting for the
732 * pid to exit. */
733 until = now(CLOCK_MONOTONIC) + timeout;
734 for (;;) {
735 usec_t n;
736 siginfo_t status = {};
737 struct timespec ts;
738
739 n = now(CLOCK_MONOTONIC);
740 if (n >= until)
741 break;
742
743 r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
744 /* Assuming we woke due to the child exiting. */
745 if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
746 if (status.si_pid == pid) {
747 /* This is the correct child.*/
748 if (status.si_code == CLD_EXITED)
749 return (status.si_status == 0) ? 0 : -EPROTO;
750 else
751 return -EPROTO;
752 }
753 }
754 /* Not the child, check for errors and proceed appropriately */
755 if (r < 0) {
756 switch (r) {
757 case -EAGAIN:
758 /* Timed out, child is likely hung. */
759 return -ETIMEDOUT;
760 case -EINTR:
761 /* Received a different signal and should retry */
762 continue;
763 default:
764 /* Return any unexpected errors */
765 return r;
766 }
767 }
768 }
769
770 return -EPROTO;
771}
772
89c9030d
LP
773void sigkill_wait(pid_t pid) {
774 assert(pid > 1);
775
776 if (kill(pid, SIGKILL) > 0)
777 (void) wait_for_terminate(pid, NULL);
778}
779
780void sigkill_waitp(pid_t *pid) {
4d0d3d41
LP
781 if (!pid)
782 return;
783 if (*pid <= 1)
784 return;
785
89c9030d 786 sigkill_wait(*pid);
4d0d3d41
LP
787}
788
0b452006
RC
789int kill_and_sigcont(pid_t pid, int sig) {
790 int r;
791
792 r = kill(pid, sig) < 0 ? -errno : 0;
793
26f417d3
LP
794 /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
795 * affected by a process being suspended anyway. */
a3d8d68c 796 if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
26f417d3 797 (void) kill(pid, SIGCONT);
0b452006
RC
798
799 return r;
800}
801
802int getenv_for_pid(pid_t pid, const char *field, char **_value) {
803 _cleanup_fclose_ FILE *f = NULL;
804 char *value = NULL;
805 int r;
806 bool done = false;
807 size_t l;
808 const char *path;
809
810 assert(pid >= 0);
811 assert(field);
812 assert(_value);
813
814 path = procfs_file_alloca(pid, "environ");
815
816 f = fopen(path, "re");
a644184a
LP
817 if (!f) {
818 if (errno == ENOENT)
819 return -ESRCH;
0b452006 820 return -errno;
a644184a 821 }
0b452006 822
35bbbf85
LP
823 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
824
0b452006
RC
825 l = strlen(field);
826 r = 0;
827
828 do {
829 char line[LINE_MAX];
830 unsigned i;
831
832 for (i = 0; i < sizeof(line)-1; i++) {
833 int c;
834
835 c = getc(f);
836 if (_unlikely_(c == EOF)) {
837 done = true;
838 break;
839 } else if (c == 0)
840 break;
841
842 line[i] = c;
843 }
844 line[i] = 0;
845
041b5ae1 846 if (strneq(line, field, l) && line[l] == '=') {
0b452006
RC
847 value = strdup(line + l + 1);
848 if (!value)
849 return -ENOMEM;
850
851 r = 1;
852 break;
853 }
854
855 } while (!done);
856
857 *_value = value;
858 return r;
859}
860
861bool pid_is_unwaited(pid_t pid) {
862 /* Checks whether a PID is still valid at all, including a zombie */
863
07b38ba5 864 if (pid < 0)
0b452006
RC
865 return false;
866
5fd9b2c5
LP
867 if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
868 return true;
869
6f8cbcdb
LP
870 if (pid == getpid_cached())
871 return true;
872
0b452006
RC
873 if (kill(pid, 0) >= 0)
874 return true;
875
876 return errno != ESRCH;
877}
878
879bool pid_is_alive(pid_t pid) {
880 int r;
881
882 /* Checks whether a PID is still valid and not a zombie */
883
07b38ba5 884 if (pid < 0)
0b452006
RC
885 return false;
886
5fd9b2c5
LP
887 if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
888 return true;
889
6f8cbcdb
LP
890 if (pid == getpid_cached())
891 return true;
892
0b452006 893 r = get_process_state(pid);
4c701096 894 if (IN_SET(r, -ESRCH, 'Z'))
0b452006
RC
895 return false;
896
897 return true;
898}
d4510856 899
1359fffa
MS
900int pid_from_same_root_fs(pid_t pid) {
901 const char *root;
902
07b38ba5 903 if (pid < 0)
6f8cbcdb
LP
904 return false;
905
906 if (pid == 0 || pid == getpid_cached())
907 return true;
1359fffa
MS
908
909 root = procfs_file_alloca(pid, "root");
910
e3f791a2 911 return files_same(root, "/proc/1/root", 0);
1359fffa
MS
912}
913
d4510856
LP
914bool is_main_thread(void) {
915 static thread_local int cached = 0;
916
917 if (_unlikely_(cached == 0))
df0ff127 918 cached = getpid_cached() == gettid() ? 1 : -1;
d4510856
LP
919
920 return cached > 0;
921}
7b3e062c
LP
922
923noreturn void freeze(void) {
924
3da48d7a
EV
925 log_close();
926
7b3e062c
LP
927 /* Make sure nobody waits for us on a socket anymore */
928 close_all_fds(NULL, 0);
929
930 sync();
931
932 for (;;)
933 pause();
934}
935
936bool oom_score_adjust_is_valid(int oa) {
937 return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
938}
939
940unsigned long personality_from_string(const char *p) {
6e5f1b57 941 int architecture;
7b3e062c 942
0c0fea07
LP
943 if (!p)
944 return PERSONALITY_INVALID;
945
6e5f1b57
LP
946 /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
947 * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
948 * the same register size. */
949
950 architecture = architecture_from_string(p);
951 if (architecture < 0)
952 return PERSONALITY_INVALID;
7b3e062c 953
0c0fea07 954 if (architecture == native_architecture())
7b3e062c 955 return PER_LINUX;
0c0fea07
LP
956#ifdef SECONDARY_ARCHITECTURE
957 if (architecture == SECONDARY_ARCHITECTURE)
f2d1736c 958 return PER_LINUX32;
7b3e062c
LP
959#endif
960
961 return PERSONALITY_INVALID;
962}
963
964const char* personality_to_string(unsigned long p) {
6e5f1b57 965 int architecture = _ARCHITECTURE_INVALID;
7b3e062c 966
7b3e062c 967 if (p == PER_LINUX)
0c0fea07
LP
968 architecture = native_architecture();
969#ifdef SECONDARY_ARCHITECTURE
6e5f1b57 970 else if (p == PER_LINUX32)
0c0fea07 971 architecture = SECONDARY_ARCHITECTURE;
7b3e062c
LP
972#endif
973
6e5f1b57
LP
974 if (architecture < 0)
975 return NULL;
976
977 return architecture_to_string(architecture);
7b3e062c
LP
978}
979
21022b9d
LP
980int safe_personality(unsigned long p) {
981 int ret;
982
983 /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
984 * and in others as negative return value containing an errno-like value. Let's work around this: this is a
985 * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
986 * the return value indicating the same issue, so that we are definitely on the safe side.
987 *
988 * See https://github.com/systemd/systemd/issues/6737 */
989
990 errno = 0;
991 ret = personality(p);
992 if (ret < 0) {
993 if (errno != 0)
994 return -errno;
995
996 errno = -ret;
997 }
998
999 return ret;
1000}
1001
e8132d63
LP
1002int opinionated_personality(unsigned long *ret) {
1003 int current;
1004
1005 /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1006 * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1007 * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1008
21022b9d 1009 current = safe_personality(PERSONALITY_INVALID);
e8132d63 1010 if (current < 0)
21022b9d 1011 return current;
e8132d63
LP
1012
1013 if (((unsigned long) current & 0xffff) == PER_LINUX32)
1014 *ret = PER_LINUX32;
1015 else
1016 *ret = PER_LINUX;
1017
1018 return 0;
1019}
1020
dcadc967 1021void valgrind_summary_hack(void) {
349cc4a5 1022#if HAVE_VALGRIND_VALGRIND_H
df0ff127 1023 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
dcadc967 1024 pid_t pid;
8869a0b4 1025 pid = raw_clone(SIGCHLD);
dcadc967
EV
1026 if (pid < 0)
1027 log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
1028 else if (pid == 0)
1029 exit(EXIT_SUCCESS);
1030 else {
1031 log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1032 (void) wait_for_terminate(pid, NULL);
1033 }
1034 }
1035#endif
1036}
1037
291d565a
LP
1038int pid_compare_func(const void *a, const void *b) {
1039 const pid_t *p = a, *q = b;
1040
1041 /* Suitable for usage in qsort() */
1042
1043 if (*p < *q)
1044 return -1;
1045 if (*p > *q)
1046 return 1;
1047 return 0;
1048}
1049
7f452159
LP
1050int ioprio_parse_priority(const char *s, int *ret) {
1051 int i, r;
1052
1053 assert(s);
1054 assert(ret);
1055
1056 r = safe_atoi(s, &i);
1057 if (r < 0)
1058 return r;
1059
1060 if (!ioprio_priority_is_valid(i))
1061 return -EINVAL;
1062
1063 *ret = i;
1064 return 0;
1065}
1066
5c30a6d2
LP
1067/* The cached PID, possible values:
1068 *
1069 * == UNSET [0] → cache not initialized yet
1070 * == BUSY [-1] → some thread is initializing it at the moment
1071 * any other → the cached PID
1072 */
1073
1074#define CACHED_PID_UNSET ((pid_t) 0)
1075#define CACHED_PID_BUSY ((pid_t) -1)
1076
1077static pid_t cached_pid = CACHED_PID_UNSET;
1078
1079static void reset_cached_pid(void) {
1080 /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1081 cached_pid = CACHED_PID_UNSET;
1082}
1083
1084/* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1085 * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1086 * libpthread, as it is part of glibc anyway. */
1087extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void * __dso_handle);
1088extern void* __dso_handle __attribute__ ((__weak__));
1089
1090pid_t getpid_cached(void) {
1091 pid_t current_value;
1092
1093 /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1094 * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1095 * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1096 * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1097 *
1098 * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
a4041e4f 1099 * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
5c30a6d2
LP
1100 */
1101
1102 current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
1103
1104 switch (current_value) {
1105
1106 case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
1107 pid_t new_pid;
1108
1109 new_pid = getpid();
1110
1111 if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
1112 /* OOM? Let's try again later */
1113 cached_pid = CACHED_PID_UNSET;
1114 return new_pid;
1115 }
1116
1117 cached_pid = new_pid;
1118 return new_pid;
1119 }
1120
1121 case CACHED_PID_BUSY: /* Somebody else is currently initializing */
1122 return getpid();
1123
1124 default: /* Properly initialized */
1125 return current_value;
1126 }
1127}
1128
fba868fa
LP
1129int must_be_root(void) {
1130
1131 if (geteuid() == 0)
1132 return 0;
1133
1134 log_error("Need to be root.");
1135 return -EPERM;
1136}
1137
4c253ed1
LP
1138int safe_fork_full(
1139 const char *name,
1140 const int except_fds[],
1141 size_t n_except_fds,
1142 ForkFlags flags,
1143 pid_t *ret_pid) {
1144
1145 pid_t original_pid, pid;
1146 sigset_t saved_ss;
1147 bool block_signals;
1148 int r;
1149
1150 /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
1151 * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
1152
1153 original_pid = getpid_cached();
1154
1155 block_signals = flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG);
1156
1157 if (block_signals) {
1158 sigset_t ss;
1159
1160 /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can be sure
1161 * that SIGTERMs are not lost we might send to the child. */
1162 if (sigfillset(&ss) < 0)
1163 return log_debug_errno(errno, "Failed to reset signal set: %m");
1164
1165 if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
1166 return log_debug_errno(errno, "Failed to reset signal mask: %m");
1167 }
1168
1169 pid = fork();
1170 if (pid < 0) {
1171 r = -errno;
1172
1173 if (block_signals) /* undo what we did above */
1174 (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1175
1176 return log_debug_errno(r, "Failed to fork: %m");
1177 }
1178 if (pid > 0) {
1179 /* We are in the parent process */
1180
1181 if (block_signals) /* undo what we did above */
1182 (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1183
1184 log_debug("Sucessfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
1185
1186 if (ret_pid)
1187 *ret_pid = pid;
1188
1189 return 1;
1190 }
1191
1192 /* We are in the child process */
1193
1194 if (flags & FORK_REOPEN_LOG) {
1195 /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
1196 log_close();
1197 log_set_open_when_needed(true);
1198 }
1199
1200 if (name) {
1201 r = rename_process(name);
1202 if (r < 0)
1203 log_debug_errno(r, "Failed to rename process, ignoring: %m");
1204 }
1205
1206 if (flags & FORK_DEATHSIG)
1207 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
1208 log_debug_errno(errno, "Failed to set death signal: %m");
1209 _exit(EXIT_FAILURE);
1210 }
1211
1212 if (flags & FORK_RESET_SIGNALS) {
1213 r = reset_all_signal_handlers();
1214 if (r < 0) {
1215 log_debug_errno(r, "Failed to reset signal handlers: %m");
1216 _exit(EXIT_FAILURE);
1217 }
1218
1219 /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
1220 r = reset_signal_mask();
1221 if (r < 0) {
1222 log_debug_errno(r, "Failed to reset signal mask: %m");
1223 _exit(EXIT_FAILURE);
1224 }
1225 } else if (block_signals) { /* undo what we did above */
1226 if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
1227 log_debug_errno(errno, "Failed to restore signal mask: %m");
1228 _exit(EXIT_FAILURE);
1229 }
1230 }
1231
1232 if (flags & FORK_DEATHSIG) {
1233 /* Let's see if the parent PID is still the one we started from? If not, then the parent
1234 * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
1235
1236 if (getppid() != original_pid) {
1237 log_debug("Parent died early, raising SIGTERM.");
1238 (void) raise(SIGTERM);
1239 _exit(EXIT_FAILURE);
1240 }
1241 }
1242
1243 if (flags & FORK_CLOSE_ALL_FDS) {
1244 /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
1245 log_close();
1246
1247 r = close_all_fds(except_fds, n_except_fds);
1248 if (r < 0) {
1249 log_debug_errno(r, "Failed to close all file descriptors: %m");
1250 _exit(EXIT_FAILURE);
1251 }
1252 }
1253
1254 /* When we were asked to reopen the logs, do so again now */
1255 if (flags & FORK_REOPEN_LOG) {
1256 log_open();
1257 log_set_open_when_needed(false);
1258 }
1259
1260 if (flags & FORK_NULL_STDIO) {
1261 r = make_null_stdio();
1262 if (r < 0) {
1263 log_debug_errno(r, "Failed to connect stdin/stdout to /dev/null: %m");
1264 _exit(EXIT_FAILURE);
1265 }
1266 }
1267
1268 if (ret_pid)
1269 *ret_pid = getpid_cached();
1270
1271 return 0;
1272}
1273
7b3e062c
LP
1274static const char *const ioprio_class_table[] = {
1275 [IOPRIO_CLASS_NONE] = "none",
1276 [IOPRIO_CLASS_RT] = "realtime",
1277 [IOPRIO_CLASS_BE] = "best-effort",
1278 [IOPRIO_CLASS_IDLE] = "idle"
1279};
1280
1281DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, INT_MAX);
1282
1283static const char *const sigchld_code_table[] = {
1284 [CLD_EXITED] = "exited",
1285 [CLD_KILLED] = "killed",
1286 [CLD_DUMPED] = "dumped",
1287 [CLD_TRAPPED] = "trapped",
1288 [CLD_STOPPED] = "stopped",
1289 [CLD_CONTINUED] = "continued",
1290};
1291
1292DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
1293
1294static const char* const sched_policy_table[] = {
1295 [SCHED_OTHER] = "other",
1296 [SCHED_BATCH] = "batch",
1297 [SCHED_IDLE] = "idle",
1298 [SCHED_FIFO] = "fifo",
1299 [SCHED_RR] = "rr"
1300};
1301
1302DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);