]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/process-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 #include <linux/oom.h>
32 #include <sys/personality.h>
33 #include <sys/prctl.h>
34 #include <sys/types.h>
38 #if HAVE_VALGRIND_VALGRIND_H
39 #include <valgrind/valgrind.h>
42 #include "alloc-util.h"
43 #include "architecture.h"
52 #include "process-util.h"
53 #include "raw-clone.h"
54 #include "signal-util.h"
55 #include "stat-util.h"
56 #include "string-table.h"
57 #include "string-util.h"
58 #include "user-util.h"
61 int get_process_state(pid_t pid
) {
65 _cleanup_free_
char *line
= NULL
;
69 p
= procfs_file_alloca(pid
, "stat");
71 r
= read_one_line_file(p
, &line
);
77 p
= strrchr(line
, ')');
83 if (sscanf(p
, " %c", &state
) != 1)
86 return (unsigned char) state
;
89 int get_process_comm(pid_t pid
, char **name
) {
96 p
= procfs_file_alloca(pid
, "comm");
98 r
= read_one_line_file(p
, name
);
105 int get_process_cmdline(pid_t pid
, size_t max_length
, bool comm_fallback
, char **line
) {
106 _cleanup_fclose_
FILE *f
= NULL
;
108 char *k
, *ans
= NULL
;
115 /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
116 * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
117 * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
118 * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
119 * command line that resolves to the empty string will return the "comm" name of the process instead.
121 * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
122 * comm_fallback is false). Returns 0 and sets *line otherwise. */
124 p
= procfs_file_alloca(pid
, "cmdline");
133 if (max_length
== 1) {
135 /* If there's only room for one byte, return the empty string */
143 } else if (max_length
== 0) {
144 size_t len
= 0, allocated
= 0;
146 while ((c
= getc(f
)) != EOF
) {
148 if (!GREEDY_REALLOC(ans
, allocated
, len
+3)) {
170 bool dotdotdot
= false;
173 ans
= new(char, max_length
);
179 while ((c
= getc(f
)) != EOF
) {
206 if (max_length
<= 4) {
210 k
= ans
+ max_length
- 4;
213 /* Eat up final spaces */
214 while (k
> ans
&& isspace(k
[-1])) {
220 strncpy(k
, "...", left
-1);
226 /* Kernel threads have no argv[] */
228 _cleanup_free_
char *t
= NULL
;
236 h
= get_process_comm(pid
, &t
);
241 ans
= strjoin("[", t
, "]");
247 if (l
+ 3 <= max_length
)
248 ans
= strjoin("[", t
, "]");
249 else if (max_length
<= 6) {
251 ans
= new(char, max_length
);
255 memcpy(ans
, "[...]", max_length
-1);
256 ans
[max_length
-1] = 0;
260 t
[max_length
- 6] = 0;
262 /* Chop off final spaces */
264 while (e
> t
&& isspace(e
[-1]))
268 ans
= strjoin("[", t
, "...]");
279 int rename_process(const char name
[]) {
280 static size_t mm_size
= 0;
281 static char *mm
= NULL
;
282 bool truncated
= false;
285 /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
286 * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
287 * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
288 * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
291 * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
294 return -EINVAL
; /* let's not confuse users unnecessarily with an empty name */
298 /* First step, change the comm field. */
299 (void) prctl(PR_SET_NAME
, name
);
300 if (l
> 15) /* Linux process names can be 15 chars at max */
303 /* Second step, change glibc's ID of the process name. */
304 if (program_invocation_name
) {
307 k
= strlen(program_invocation_name
);
308 strncpy(program_invocation_name
, name
, k
);
313 /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
314 * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
315 * the end. This is the best option for changing /proc/self/cmdline. */
317 /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
318 * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
319 * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
320 * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
323 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
324 else if (mm_size
< l
+1) {
328 nn_size
= PAGE_ALIGN(l
+1);
329 nn
= mmap(NULL
, nn_size
, PROT_READ
|PROT_WRITE
, MAP_PRIVATE
|MAP_ANONYMOUS
, -1, 0);
330 if (nn
== MAP_FAILED
) {
331 log_debug_errno(errno
, "mmap() failed: %m");
335 strncpy(nn
, name
, nn_size
);
337 /* Now, let's tell the kernel about this new memory */
338 if (prctl(PR_SET_MM
, PR_SET_MM_ARG_START
, (unsigned long) nn
, 0, 0) < 0) {
339 log_debug_errno(errno
, "PR_SET_MM_ARG_START failed, proceeding without: %m");
340 (void) munmap(nn
, nn_size
);
344 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
345 * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
346 if (prctl(PR_SET_MM
, PR_SET_MM_ARG_END
, (unsigned long) nn
+ l
+ 1, 0, 0) < 0)
347 log_debug_errno(errno
, "PR_SET_MM_ARG_END failed, proceeding without: %m");
350 (void) munmap(mm
, mm_size
);
355 strncpy(mm
, name
, mm_size
);
357 /* Update the end pointer, continuing regardless of any failure. */
358 if (prctl(PR_SET_MM
, PR_SET_MM_ARG_END
, (unsigned long) mm
+ l
+ 1, 0, 0) < 0)
359 log_debug_errno(errno
, "PR_SET_MM_ARG_END failed, proceeding without: %m");
363 /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
364 * it still looks here */
366 if (saved_argc
> 0) {
372 k
= strlen(saved_argv
[0]);
373 strncpy(saved_argv
[0], name
, k
);
378 for (i
= 1; i
< saved_argc
; i
++) {
382 memzero(saved_argv
[i
], strlen(saved_argv
[i
]));
389 int is_kernel_thread(pid_t pid
) {
396 if (IN_SET(pid
, 0, 1) || pid
== getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
401 p
= procfs_file_alloca(pid
, "cmdline");
409 count
= fread(&c
, 1, 1, f
);
413 /* Kernel threads have an empty cmdline */
416 return eof
? 1 : -errno
;
421 int get_process_capeff(pid_t pid
, char **capeff
) {
428 p
= procfs_file_alloca(pid
, "status");
430 r
= get_proc_field(p
, "CapEff", WHITESPACE
, capeff
);
437 static int get_process_link_contents(const char *proc_file
, char **name
) {
443 r
= readlink_malloc(proc_file
, name
);
452 int get_process_exe(pid_t pid
, char **name
) {
459 p
= procfs_file_alloca(pid
, "exe");
460 r
= get_process_link_contents(p
, name
);
464 d
= endswith(*name
, " (deleted)");
471 static int get_process_id(pid_t pid
, const char *field
, uid_t
*uid
) {
472 _cleanup_fclose_
FILE *f
= NULL
;
482 p
= procfs_file_alloca(pid
, "status");
490 FOREACH_LINE(line
, f
, return -errno
) {
495 if (startswith(l
, field
)) {
497 l
+= strspn(l
, WHITESPACE
);
499 l
[strcspn(l
, WHITESPACE
)] = 0;
501 return parse_uid(l
, uid
);
508 int get_process_uid(pid_t pid
, uid_t
*uid
) {
510 if (pid
== 0 || pid
== getpid_cached()) {
515 return get_process_id(pid
, "Uid:", uid
);
518 int get_process_gid(pid_t pid
, gid_t
*gid
) {
520 if (pid
== 0 || pid
== getpid_cached()) {
525 assert_cc(sizeof(uid_t
) == sizeof(gid_t
));
526 return get_process_id(pid
, "Gid:", gid
);
529 int get_process_cwd(pid_t pid
, char **cwd
) {
534 p
= procfs_file_alloca(pid
, "cwd");
536 return get_process_link_contents(p
, cwd
);
539 int get_process_root(pid_t pid
, char **root
) {
544 p
= procfs_file_alloca(pid
, "root");
546 return get_process_link_contents(p
, root
);
549 int get_process_environ(pid_t pid
, char **env
) {
550 _cleanup_fclose_
FILE *f
= NULL
;
551 _cleanup_free_
char *outcome
= NULL
;
554 size_t allocated
= 0, sz
= 0;
559 p
= procfs_file_alloca(pid
, "environ");
568 while ((c
= fgetc(f
)) != EOF
) {
569 if (!GREEDY_REALLOC(outcome
, allocated
, sz
+ 5))
573 outcome
[sz
++] = '\n';
575 sz
+= cescape_char(c
, outcome
+ sz
);
579 outcome
= strdup("");
591 int get_process_ppid(pid_t pid
, pid_t
*_ppid
) {
593 _cleanup_free_
char *line
= NULL
;
600 if (pid
== 0 || pid
== getpid_cached()) {
605 p
= procfs_file_alloca(pid
, "stat");
606 r
= read_one_line_file(p
, &line
);
612 /* Let's skip the pid and comm fields. The latter is enclosed
613 * in () but does not escape any () in its value, so let's
614 * skip over it manually */
616 p
= strrchr(line
, ')');
628 if ((long unsigned) (pid_t
) ppid
!= ppid
)
631 *_ppid
= (pid_t
) ppid
;
636 int wait_for_terminate(pid_t pid
, siginfo_t
*status
) {
647 if (waitid(P_PID
, pid
, status
, WEXITED
) < 0) {
652 return negative_errno();
661 * < 0 : wait_for_terminate() failed to get the state of the
662 * process, the process was terminated by a signal, or
663 * failed for an unknown reason.
664 * >=0 : The process terminated normally, and its exit code is
667 * That is, success is indicated by a return value of zero, and an
668 * error is indicated by a non-zero value.
670 * A warning is emitted if the process terminates abnormally,
671 * and also if it returns non-zero unless check_exit_code is true.
673 int wait_for_terminate_and_warn(const char *name
, pid_t pid
, bool check_exit_code
) {
680 r
= wait_for_terminate(pid
, &status
);
682 return log_warning_errno(r
, "Failed to wait for %s: %m", name
);
684 if (status
.si_code
== CLD_EXITED
) {
685 if (status
.si_status
!= 0)
686 log_full(check_exit_code
? LOG_WARNING
: LOG_DEBUG
,
687 "%s failed with error code %i.", name
, status
.si_status
);
689 log_debug("%s succeeded.", name
);
691 return status
.si_status
;
692 } else if (IN_SET(status
.si_code
, CLD_KILLED
, CLD_DUMPED
)) {
694 log_warning("%s terminated by signal %s.", name
, signal_to_string(status
.si_status
));
698 log_warning("%s failed due to unknown reason.", name
);
702 void sigkill_wait(pid_t pid
) {
705 if (kill(pid
, SIGKILL
) > 0)
706 (void) wait_for_terminate(pid
, NULL
);
709 void sigkill_waitp(pid_t
*pid
) {
718 int kill_and_sigcont(pid_t pid
, int sig
) {
721 r
= kill(pid
, sig
) < 0 ? -errno
: 0;
723 /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
724 * affected by a process being suspended anyway. */
725 if (r
>= 0 && !IN_SET(sig
, SIGCONT
, SIGKILL
))
726 (void) kill(pid
, SIGCONT
);
731 int getenv_for_pid(pid_t pid
, const char *field
, char **_value
) {
732 _cleanup_fclose_
FILE *f
= NULL
;
743 path
= procfs_file_alloca(pid
, "environ");
745 f
= fopen(path
, "re");
759 for (i
= 0; i
< sizeof(line
)-1; i
++) {
763 if (_unlikely_(c
== EOF
)) {
773 if (strneq(line
, field
, l
) && line
[l
] == '=') {
774 value
= strdup(line
+ l
+ 1);
788 bool pid_is_unwaited(pid_t pid
) {
789 /* Checks whether a PID is still valid at all, including a zombie */
794 if (pid
<= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
797 if (pid
== getpid_cached())
800 if (kill(pid
, 0) >= 0)
803 return errno
!= ESRCH
;
806 bool pid_is_alive(pid_t pid
) {
809 /* Checks whether a PID is still valid and not a zombie */
814 if (pid
<= 1) /* If we or PID 1 would be a zombie, this code would not be running */
817 if (pid
== getpid_cached())
820 r
= get_process_state(pid
);
821 if (IN_SET(r
, -ESRCH
, 'Z'))
827 int pid_from_same_root_fs(pid_t pid
) {
833 if (pid
== 0 || pid
== getpid_cached())
836 root
= procfs_file_alloca(pid
, "root");
838 return files_same(root
, "/proc/1/root", 0);
841 bool is_main_thread(void) {
842 static thread_local
int cached
= 0;
844 if (_unlikely_(cached
== 0))
845 cached
= getpid_cached() == gettid() ? 1 : -1;
850 noreturn
void freeze(void) {
854 /* Make sure nobody waits for us on a socket anymore */
855 close_all_fds(NULL
, 0);
863 bool oom_score_adjust_is_valid(int oa
) {
864 return oa
>= OOM_SCORE_ADJ_MIN
&& oa
<= OOM_SCORE_ADJ_MAX
;
867 unsigned long personality_from_string(const char *p
) {
871 return PERSONALITY_INVALID
;
873 /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
874 * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
875 * the same register size. */
877 architecture
= architecture_from_string(p
);
878 if (architecture
< 0)
879 return PERSONALITY_INVALID
;
881 if (architecture
== native_architecture())
883 #ifdef SECONDARY_ARCHITECTURE
884 if (architecture
== SECONDARY_ARCHITECTURE
)
888 return PERSONALITY_INVALID
;
891 const char* personality_to_string(unsigned long p
) {
892 int architecture
= _ARCHITECTURE_INVALID
;
895 architecture
= native_architecture();
896 #ifdef SECONDARY_ARCHITECTURE
897 else if (p
== PER_LINUX32
)
898 architecture
= SECONDARY_ARCHITECTURE
;
901 if (architecture
< 0)
904 return architecture_to_string(architecture
);
907 int safe_personality(unsigned long p
) {
910 /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
911 * and in others as negative return value containing an errno-like value. Let's work around this: this is a
912 * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
913 * the return value indicating the same issue, so that we are definitely on the safe side.
915 * See https://github.com/systemd/systemd/issues/6737 */
918 ret
= personality(p
);
929 int opinionated_personality(unsigned long *ret
) {
932 /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
933 * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
934 * two most relevant personalities: PER_LINUX and PER_LINUX32. */
936 current
= safe_personality(PERSONALITY_INVALID
);
940 if (((unsigned long) current
& 0xffff) == PER_LINUX32
)
948 void valgrind_summary_hack(void) {
949 #if HAVE_VALGRIND_VALGRIND_H
950 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND
) {
952 pid
= raw_clone(SIGCHLD
);
954 log_emergency_errno(errno
, "Failed to fork off valgrind helper: %m");
958 log_info("Spawned valgrind helper as PID "PID_FMT
".", pid
);
959 (void) wait_for_terminate(pid
, NULL
);
965 int pid_compare_func(const void *a
, const void *b
) {
966 const pid_t
*p
= a
, *q
= b
;
968 /* Suitable for usage in qsort() */
977 int ioprio_parse_priority(const char *s
, int *ret
) {
983 r
= safe_atoi(s
, &i
);
987 if (!ioprio_priority_is_valid(i
))
994 /* The cached PID, possible values:
996 * == UNSET [0] → cache not initialized yet
997 * == BUSY [-1] → some thread is initializing it at the moment
998 * any other → the cached PID
1001 #define CACHED_PID_UNSET ((pid_t) 0)
1002 #define CACHED_PID_BUSY ((pid_t) -1)
1004 static pid_t cached_pid
= CACHED_PID_UNSET
;
1006 static void reset_cached_pid(void) {
1007 /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1008 cached_pid
= CACHED_PID_UNSET
;
1011 /* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1012 * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1013 * libpthread, as it is part of glibc anyway. */
1014 extern int __register_atfork(void (*prepare
) (void), void (*parent
) (void), void (*child
) (void), void * __dso_handle
);
1015 extern void* __dso_handle
__attribute__ ((__weak__
));
1017 pid_t
getpid_cached(void) {
1018 pid_t current_value
;
1020 /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1021 * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1022 * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1023 * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1025 * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1026 * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1029 current_value
= __sync_val_compare_and_swap(&cached_pid
, CACHED_PID_UNSET
, CACHED_PID_BUSY
);
1031 switch (current_value
) {
1033 case CACHED_PID_UNSET
: { /* Not initialized yet, then do so now */
1038 if (__register_atfork(NULL
, NULL
, reset_cached_pid
, __dso_handle
) != 0) {
1039 /* OOM? Let's try again later */
1040 cached_pid
= CACHED_PID_UNSET
;
1044 cached_pid
= new_pid
;
1048 case CACHED_PID_BUSY
: /* Somebody else is currently initializing */
1051 default: /* Properly initialized */
1052 return current_value
;
1056 static const char *const ioprio_class_table
[] = {
1057 [IOPRIO_CLASS_NONE
] = "none",
1058 [IOPRIO_CLASS_RT
] = "realtime",
1059 [IOPRIO_CLASS_BE
] = "best-effort",
1060 [IOPRIO_CLASS_IDLE
] = "idle"
1063 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class
, int, INT_MAX
);
1065 static const char *const sigchld_code_table
[] = {
1066 [CLD_EXITED
] = "exited",
1067 [CLD_KILLED
] = "killed",
1068 [CLD_DUMPED
] = "dumped",
1069 [CLD_TRAPPED
] = "trapped",
1070 [CLD_STOPPED
] = "stopped",
1071 [CLD_CONTINUED
] = "continued",
1074 DEFINE_STRING_TABLE_LOOKUP(sigchld_code
, int);
1076 static const char* const sched_policy_table
[] = {
1077 [SCHED_OTHER
] = "other",
1078 [SCHED_BATCH
] = "batch",
1079 [SCHED_IDLE
] = "idle",
1080 [SCHED_FIFO
] = "fifo",
1084 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy
, int, INT_MAX
);