src/basic/process-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <ctype.h>
  22 #include <errno.h>
  23 #include <limits.h>
  24 #include <linux/oom.h>
  25 #include <sched.h>
  26 #include <signal.h>
  27 #include <stdbool.h>
  28 #include <stdio.h>
  29 #include <stdio_ext.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <sys/mman.h>
  33 #include <sys/personality.h>
  34 #include <sys/prctl.h>
  35 #include <sys/types.h>
  36 #include <sys/wait.h>
  37 #include <syslog.h>
  38 #include <unistd.h>
  39 #if HAVE_VALGRIND_VALGRIND_H
  40 #include <valgrind/valgrind.h>
  41 #endif
  42
  43 #include "alloc-util.h"
  44 #include "architecture.h"
  45 #include "escape.h"
  46 #include "fd-util.h"
  47 #include "fileio.h"
  48 #include "fs-util.h"
  49 #include "ioprio.h"
  50 #include "log.h"
  51 #include "macro.h"
  52 #include "missing.h"
  53 #include "process-util.h"
  54 #include "raw-clone.h"
  55 #include "signal-util.h"
  56 #include "stat-util.h"
  57 #include "string-table.h"
  58 #include "string-util.h"
  59 #include "terminal-util.h"
  60 #include "user-util.h"
  61 #include "util.h"
  62
  63 int get_process_state(pid_t pid) {
  64         const char *p;
  65         char state;
  66         int r;
  67         _cleanup_free_ char *line = NULL;
  68
  69         assert(pid >= 0);
  70
  71         p = procfs_file_alloca(pid, "stat");
  72
  73         r = read_one_line_file(p, &line);
  74         if (r == -ENOENT)
  75                 return -ESRCH;
  76         if (r < 0)
  77                 return r;
  78
  79         p = strrchr(line, ')');
  80         if (!p)
  81                 return -EIO;
  82
  83         p++;
  84
  85         if (sscanf(p, " %c", &state) != 1)
  86                 return -EIO;
  87
  88         return (unsigned char) state;
  89 }
  90
  91 int get_process_comm(pid_t pid, char **name) {
  92         const char *p;
  93         int r;
  94
  95         assert(name);
  96         assert(pid >= 0);
  97
  98         p = procfs_file_alloca(pid, "comm");
  99
 100         r = read_one_line_file(p, name);
 101         if (r == -ENOENT)
 102                 return -ESRCH;
 103
 104         return r;
 105 }
 106
 107 int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
 108         _cleanup_fclose_ FILE *f = NULL;
 109         bool space = false;
 110         char *k, *ans = NULL;
 111         const char *p;
 112         int c;
 113
 114         assert(line);
 115         assert(pid >= 0);
 116
 117         /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
 118          * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
 119          * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
 120          * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
 121          * command line that resolves to the empty string will return the "comm" name of the process instead.
 122          *
 123          * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
 124          * comm_fallback is false). Returns 0 and sets *line otherwise. */
 125
 126         p = procfs_file_alloca(pid, "cmdline");
 127
 128         f = fopen(p, "re");
 129         if (!f) {
 130                 if (errno == ENOENT)
 131                         return -ESRCH;
 132                 return -errno;
 133         }
 134
 135         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 136
 137         if (max_length == 1) {
 138
 139                 /* If there's only room for one byte, return the empty string */
 140                 ans = new0(char, 1);
 141                 if (!ans)
 142                         return -ENOMEM;
 143
 144                 *line = ans;
 145                 return 0;
 146
 147         } else if (max_length == 0) {
 148                 size_t len = 0, allocated = 0;
 149
 150                 while ((c = getc(f)) != EOF) {
 151
 152                         if (!GREEDY_REALLOC(ans, allocated, len+3)) {
 153                                 free(ans);
 154                                 return -ENOMEM;
 155                         }
 156
 157                         if (isprint(c)) {
 158                                 if (space) {
 159                                         ans[len++] = ' ';
 160                                         space = false;
 161                                 }
 162
 163                                 ans[len++] = c;
 164                         } else if (len > 0)
 165                                 space = true;
 166                }
 167
 168                 if (len > 0)
 169                         ans[len] = '\0';
 170                 else
 171                         ans = mfree(ans);
 172
 173         } else {
 174                 bool dotdotdot = false;
 175                 size_t left;
 176
 177                 ans = new(char, max_length);
 178                 if (!ans)
 179                         return -ENOMEM;
 180
 181                 k = ans;
 182                 left = max_length;
 183                 while ((c = getc(f)) != EOF) {
 184
 185                         if (isprint(c)) {
 186
 187                                 if (space) {
 188                                         if (left <= 2) {
 189                                                 dotdotdot = true;
 190                                                 break;
 191                                         }
 192
 193                                         *(k++) = ' ';
 194                                         left--;
 195                                         space = false;
 196                                 }
 197
 198                                 if (left <= 1) {
 199                                         dotdotdot = true;
 200                                         break;
 201                                 }
 202
 203                                 *(k++) = (char) c;
 204                                 left--;
 205                         } else if (k > ans)
 206                                 space = true;
 207                 }
 208
 209                 if (dotdotdot) {
 210                         if (max_length <= 4) {
 211                                 k = ans;
 212                                 left = max_length;
 213                         } else {
 214                                 k = ans + max_length - 4;
 215                                 left = 4;
 216
 217                                 /* Eat up final spaces */
 218                                 while (k > ans && isspace(k[-1])) {
 219                                         k--;
 220                                         left++;
 221                                 }
 222                         }
 223
 224                         strncpy(k, "...", left-1);
 225                         k[left-1] = 0;
 226                 } else
 227                         *k = 0;
 228         }
 229
 230         /* Kernel threads have no argv[] */
 231         if (isempty(ans)) {
 232                 _cleanup_free_ char *t = NULL;
 233                 int h;
 234
 235                 free(ans);
 236
 237                 if (!comm_fallback)
 238                         return -ENOENT;
 239
 240                 h = get_process_comm(pid, &t);
 241                 if (h < 0)
 242                         return h;
 243
 244                 if (max_length == 0)
 245                         ans = strjoin("[", t, "]");
 246                 else {
 247                         size_t l;
 248
 249                         l = strlen(t);
 250
 251                         if (l + 3 <= max_length)
 252                                 ans = strjoin("[", t, "]");
 253                         else if (max_length <= 6) {
 254
 255                                 ans = new(char, max_length);
 256                                 if (!ans)
 257                                         return -ENOMEM;
 258
 259                                 memcpy(ans, "[...]", max_length-1);
 260                                 ans[max_length-1] = 0;
 261                         } else {
 262                                 char *e;
 263
 264                                 t[max_length - 6] = 0;
 265
 266                                 /* Chop off final spaces */
 267                                 e = strchr(t, 0);
 268                                 while (e > t && isspace(e[-1]))
 269                                         e--;
 270                                 *e = 0;
 271
 272                                 ans = strjoin("[", t, "...]");
 273                         }
 274                 }
 275                 if (!ans)
 276                         return -ENOMEM;
 277         }
 278
 279         *line = ans;
 280         return 0;
 281 }
 282
 283 int rename_process(const char name[]) {
 284         static size_t mm_size = 0;
 285         static char *mm = NULL;
 286         bool truncated = false;
 287         size_t l;
 288
 289         /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
 290          * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
 291          * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
 292          * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
 293          * truncated.
 294          *
 295          * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
 296
 297         if (isempty(name))
 298                 return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
 299
 300         if (!is_main_thread())
 301                 return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we
 302                                 * cache things without locking, and we make assumptions that PR_SET_NAME sets the
 303                                 * process name that isn't correct on any other threads */
 304
 305         l = strlen(name);
 306
 307         /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we
 308          * can use PR_SET_NAME, which sets the thread name for the calling thread. */
 309         if (prctl(PR_SET_NAME, name) < 0)
 310                 log_debug_errno(errno, "PR_SET_NAME failed: %m");
 311         if (l > 15) /* Linux process names can be 15 chars at max */
 312                 truncated = true;
 313
 314         /* Second step, change glibc's ID of the process name. */
 315         if (program_invocation_name) {
 316                 size_t k;
 317
 318                 k = strlen(program_invocation_name);
 319                 strncpy(program_invocation_name, name, k);
 320                 if (l > k)
 321                         truncated = true;
 322         }
 323
 324         /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
 325          * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
 326          * the end. This is the best option for changing /proc/self/cmdline. */
 327
 328         /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
 329          * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
 330          * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
 331          * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
 332          * mmap() is not. */
 333         if (geteuid() != 0)
 334                 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
 335         else if (mm_size < l+1) {
 336                 size_t nn_size;
 337                 char *nn;
 338
 339                 nn_size = PAGE_ALIGN(l+1);
 340                 nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
 341                 if (nn == MAP_FAILED) {
 342                         log_debug_errno(errno, "mmap() failed: %m");
 343                         goto use_saved_argv;
 344                 }
 345
 346                 strncpy(nn, name, nn_size);
 347
 348                 /* Now, let's tell the kernel about this new memory */
 349                 if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
 350                         log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m");
 351                         (void) munmap(nn, nn_size);
 352                         goto use_saved_argv;
 353                 }
 354
 355                 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
 356                  * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
 357                 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
 358                         log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
 359
 360                 if (mm)
 361                         (void) munmap(mm, mm_size);
 362
 363                 mm = nn;
 364                 mm_size = nn_size;
 365         } else {
 366                 strncpy(mm, name, mm_size);
 367
 368                 /* Update the end pointer, continuing regardless of any failure. */
 369                 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
 370                         log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
 371         }
 372
 373 use_saved_argv:
 374         /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
 375          * it still looks here */
 376
 377         if (saved_argc > 0) {
 378                 int i;
 379
 380                 if (saved_argv[0]) {
 381                         size_t k;
 382
 383                         k = strlen(saved_argv[0]);
 384                         strncpy(saved_argv[0], name, k);
 385                         if (l > k)
 386                                 truncated = true;
 387                 }
 388
 389                 for (i = 1; i < saved_argc; i++) {
 390                         if (!saved_argv[i])
 391                                 break;
 392
 393                         memzero(saved_argv[i], strlen(saved_argv[i]));
 394                 }
 395         }
 396
 397         return !truncated;
 398 }
 399
 400 int is_kernel_thread(pid_t pid) {
 401         const char *p;
 402         size_t count;
 403         char c;
 404         bool eof;
 405         FILE *f;
 406
 407         if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
 408                 return 0;
 409
 410         assert(pid > 1);
 411
 412         p = procfs_file_alloca(pid, "cmdline");
 413         f = fopen(p, "re");
 414         if (!f) {
 415                 if (errno == ENOENT)
 416                         return -ESRCH;
 417                 return -errno;
 418         }
 419
 420         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 421
 422         count = fread(&c, 1, 1, f);
 423         eof = feof(f);
 424         fclose(f);
 425
 426         /* Kernel threads have an empty cmdline */
 427
 428         if (count <= 0)
 429                 return eof ? 1 : -errno;
 430
 431         return 0;
 432 }
 433
 434 int get_process_capeff(pid_t pid, char **capeff) {
 435         const char *p;
 436         int r;
 437
 438         assert(capeff);
 439         assert(pid >= 0);
 440
 441         p = procfs_file_alloca(pid, "status");
 442
 443         r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
 444         if (r == -ENOENT)
 445                 return -ESRCH;
 446
 447         return r;
 448 }
 449
 450 static int get_process_link_contents(const char *proc_file, char **name) {
 451         int r;
 452
 453         assert(proc_file);
 454         assert(name);
 455
 456         r = readlink_malloc(proc_file, name);
 457         if (r == -ENOENT)
 458                 return -ESRCH;
 459         if (r < 0)
 460                 return r;
 461
 462         return 0;
 463 }
 464
 465 int get_process_exe(pid_t pid, char **name) {
 466         const char *p;
 467         char *d;
 468         int r;
 469
 470         assert(pid >= 0);
 471
 472         p = procfs_file_alloca(pid, "exe");
 473         r = get_process_link_contents(p, name);
 474         if (r < 0)
 475                 return r;
 476
 477         d = endswith(*name, " (deleted)");
 478         if (d)
 479                 *d = '\0';
 480
 481         return 0;
 482 }
 483
 484 static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
 485         _cleanup_fclose_ FILE *f = NULL;
 486         char line[LINE_MAX];
 487         const char *p;
 488
 489         assert(field);
 490         assert(uid);
 491
 492         if (pid < 0)
 493                 return -EINVAL;
 494
 495         p = procfs_file_alloca(pid, "status");
 496         f = fopen(p, "re");
 497         if (!f) {
 498                 if (errno == ENOENT)
 499                         return -ESRCH;
 500                 return -errno;
 501         }
 502
 503         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 504
 505         FOREACH_LINE(line, f, return -errno) {
 506                 char *l;
 507
 508                 l = strstrip(line);
 509
 510                 if (startswith(l, field)) {
 511                         l += strlen(field);
 512                         l += strspn(l, WHITESPACE);
 513
 514                         l[strcspn(l, WHITESPACE)] = 0;
 515
 516                         return parse_uid(l, uid);
 517                 }
 518         }
 519
 520         return -EIO;
 521 }
 522
 523 int get_process_uid(pid_t pid, uid_t *uid) {
 524
 525         if (pid == 0 || pid == getpid_cached()) {
 526                 *uid = getuid();
 527                 return 0;
 528         }
 529
 530         return get_process_id(pid, "Uid:", uid);
 531 }
 532
 533 int get_process_gid(pid_t pid, gid_t *gid) {
 534
 535         if (pid == 0 || pid == getpid_cached()) {
 536                 *gid = getgid();
 537                 return 0;
 538         }
 539
 540         assert_cc(sizeof(uid_t) == sizeof(gid_t));
 541         return get_process_id(pid, "Gid:", gid);
 542 }
 543
 544 int get_process_cwd(pid_t pid, char **cwd) {
 545         const char *p;
 546
 547         assert(pid >= 0);
 548
 549         p = procfs_file_alloca(pid, "cwd");
 550
 551         return get_process_link_contents(p, cwd);
 552 }
 553
 554 int get_process_root(pid_t pid, char **root) {
 555         const char *p;
 556
 557         assert(pid >= 0);
 558
 559         p = procfs_file_alloca(pid, "root");
 560
 561         return get_process_link_contents(p, root);
 562 }
 563
 564 int get_process_environ(pid_t pid, char **env) {
 565         _cleanup_fclose_ FILE *f = NULL;
 566         _cleanup_free_ char *outcome = NULL;
 567         int c;
 568         const char *p;
 569         size_t allocated = 0, sz = 0;
 570
 571         assert(pid >= 0);
 572         assert(env);
 573
 574         p = procfs_file_alloca(pid, "environ");
 575
 576         f = fopen(p, "re");
 577         if (!f) {
 578                 if (errno == ENOENT)
 579                         return -ESRCH;
 580                 return -errno;
 581         }
 582
 583         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 584
 585         while ((c = fgetc(f)) != EOF) {
 586                 if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
 587                         return -ENOMEM;
 588
 589                 if (c == '\0')
 590                         outcome[sz++] = '\n';
 591                 else
 592                         sz += cescape_char(c, outcome + sz);
 593         }
 594
 595         if (!outcome) {
 596                 outcome = strdup("");
 597                 if (!outcome)
 598                         return -ENOMEM;
 599         } else
 600                 outcome[sz] = '\0';
 601
 602         *env = outcome;
 603         outcome = NULL;
 604
 605         return 0;
 606 }
 607
 608 int get_process_ppid(pid_t pid, pid_t *_ppid) {
 609         int r;
 610         _cleanup_free_ char *line = NULL;
 611         long unsigned ppid;
 612         const char *p;
 613
 614         assert(pid >= 0);
 615         assert(_ppid);
 616
 617         if (pid == 0 || pid == getpid_cached()) {
 618                 *_ppid = getppid();
 619                 return 0;
 620         }
 621
 622         p = procfs_file_alloca(pid, "stat");
 623         r = read_one_line_file(p, &line);
 624         if (r == -ENOENT)
 625                 return -ESRCH;
 626         if (r < 0)
 627                 return r;
 628
 629         /* Let's skip the pid and comm fields. The latter is enclosed
 630          * in () but does not escape any () in its value, so let's
 631          * skip over it manually */
 632
 633         p = strrchr(line, ')');
 634         if (!p)
 635                 return -EIO;
 636
 637         p++;
 638
 639         if (sscanf(p, " "
 640                    "%*c "  /* state */
 641                    "%lu ", /* ppid */
 642                    &ppid) != 1)
 643                 return -EIO;
 644
 645         if ((long unsigned) (pid_t) ppid != ppid)
 646                 return -ERANGE;
 647
 648         *_ppid = (pid_t) ppid;
 649
 650         return 0;
 651 }
 652
 653 int wait_for_terminate(pid_t pid, siginfo_t *status) {
 654         siginfo_t dummy;
 655
 656         assert(pid >= 1);
 657
 658         if (!status)
 659                 status = &dummy;
 660
 661         for (;;) {
 662                 zero(*status);
 663
 664                 if (waitid(P_PID, pid, status, WEXITED) < 0) {
 665
 666                         if (errno == EINTR)
 667                                 continue;
 668
 669                         return negative_errno();
 670                 }
 671
 672                 return 0;
 673         }
 674 }
 675
 676 /*
 677  * Return values:
 678  * < 0 : wait_for_terminate() failed to get the state of the
 679  *       process, the process was terminated by a signal, or
 680  *       failed for an unknown reason.
 681  * >=0 : The process terminated normally, and its exit code is
 682  *       returned.
 683  *
 684  * That is, success is indicated by a return value of zero, and an
 685  * error is indicated by a non-zero value.
 686  *
 687  * A warning is emitted if the process terminates abnormally,
 688  * and also if it returns non-zero unless check_exit_code is true.
 689  */
 690 int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) {
 691         _cleanup_free_ char *buffer = NULL;
 692         siginfo_t status;
 693         int r, prio;
 694
 695         assert(pid > 1);
 696
 697         if (!name) {
 698                 r = get_process_comm(pid, &buffer);
 699                 if (r < 0)
 700                         log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pid);
 701                 else
 702                         name = buffer;
 703         }
 704
 705         prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG;
 706
 707         r = wait_for_terminate(pid, &status);
 708         if (r < 0)
 709                 return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name));
 710
 711         if (status.si_code == CLD_EXITED) {
 712                 if (status.si_status != EXIT_SUCCESS)
 713                         log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG,
 714                                  "%s failed with exit status %i.", strna(name), status.si_status);
 715                 else
 716                         log_debug("%s succeeded.", name);
 717
 718                 return status.si_status;
 719
 720         } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
 721
 722                 log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status));
 723                 return -EPROTO;
 724         }
 725
 726         log_full(prio, "%s failed due to unknown reason.", strna(name));
 727         return -EPROTO;
 728 }
 729
 730 /*
 731  * Return values:
 732  * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
 733  *       process, the process timed out, the process was terminated by a
 734  *       signal, or failed for an unknown reason.
 735  * >=0 : The process terminated normally with no failures.
 736  *
 737  * Success is indicated by a return value of zero, a timeout is indicated
 738  * by ETIMEDOUT, and all other child failure states are indicated by error
 739  * is indicated by a non-zero value.
 740  */
 741 int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
 742         sigset_t mask;
 743         int r;
 744         usec_t until;
 745
 746         assert_se(sigemptyset(&mask) == 0);
 747         assert_se(sigaddset(&mask, SIGCHLD) == 0);
 748
 749         /* Drop into a sigtimewait-based timeout. Waiting for the
 750          * pid to exit. */
 751         until = now(CLOCK_MONOTONIC) + timeout;
 752         for (;;) {
 753                 usec_t n;
 754                 siginfo_t status = {};
 755                 struct timespec ts;
 756
 757                 n = now(CLOCK_MONOTONIC);
 758                 if (n >= until)
 759                         break;
 760
 761                 r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
 762                 /* Assuming we woke due to the child exiting. */
 763                 if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
 764                         if (status.si_pid == pid) {
 765                                 /* This is the correct child.*/
 766                                 if (status.si_code == CLD_EXITED)
 767                                         return (status.si_status == 0) ? 0 : -EPROTO;
 768                                 else
 769                                         return -EPROTO;
 770                         }
 771                 }
 772                 /* Not the child, check for errors and proceed appropriately */
 773                 if (r < 0) {
 774                         switch (r) {
 775                         case -EAGAIN:
 776                                 /* Timed out, child is likely hung. */
 777                                 return -ETIMEDOUT;
 778                         case -EINTR:
 779                                 /* Received a different signal and should retry */
 780                                 continue;
 781                         default:
 782                                 /* Return any unexpected errors */
 783                                 return r;
 784                         }
 785                 }
 786         }
 787
 788         return -EPROTO;
 789 }
 790
 791 void sigkill_wait(pid_t pid) {
 792         assert(pid > 1);
 793
 794         if (kill(pid, SIGKILL) > 0)
 795                 (void) wait_for_terminate(pid, NULL);
 796 }
 797
 798 void sigkill_waitp(pid_t *pid) {
 799         if (!pid)
 800                 return;
 801         if (*pid <= 1)
 802                 return;
 803
 804         sigkill_wait(*pid);
 805 }
 806
 807 int kill_and_sigcont(pid_t pid, int sig) {
 808         int r;
 809
 810         r = kill(pid, sig) < 0 ? -errno : 0;
 811
 812         /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
 813          * affected by a process being suspended anyway. */
 814         if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
 815                 (void) kill(pid, SIGCONT);
 816
 817         return r;
 818 }
 819
 820 int getenv_for_pid(pid_t pid, const char *field, char **_value) {
 821         _cleanup_fclose_ FILE *f = NULL;
 822         char *value = NULL;
 823         int r;
 824         bool done = false;
 825         size_t l;
 826         const char *path;
 827
 828         assert(pid >= 0);
 829         assert(field);
 830         assert(_value);
 831
 832         path = procfs_file_alloca(pid, "environ");
 833
 834         f = fopen(path, "re");
 835         if (!f) {
 836                 if (errno == ENOENT)
 837                         return -ESRCH;
 838                 return -errno;
 839         }
 840
 841         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 842
 843         l = strlen(field);
 844         r = 0;
 845
 846         do {
 847                 char line[LINE_MAX];
 848                 unsigned i;
 849
 850                 for (i = 0; i < sizeof(line)-1; i++) {
 851                         int c;
 852
 853                         c = getc(f);
 854                         if (_unlikely_(c == EOF)) {
 855                                 done = true;
 856                                 break;
 857                         } else if (c == 0)
 858                                 break;
 859
 860                         line[i] = c;
 861                 }
 862                 line[i] = 0;
 863
 864                 if (strneq(line, field, l) && line[l] == '=') {
 865                         value = strdup(line + l + 1);
 866                         if (!value)
 867                                 return -ENOMEM;
 868
 869                         r = 1;
 870                         break;
 871                 }
 872
 873         } while (!done);
 874
 875         *_value = value;
 876         return r;
 877 }
 878
 879 bool pid_is_unwaited(pid_t pid) {
 880         /* Checks whether a PID is still valid at all, including a zombie */
 881
 882         if (pid < 0)
 883                 return false;
 884
 885         if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
 886                 return true;
 887
 888         if (pid == getpid_cached())
 889                 return true;
 890
 891         if (kill(pid, 0) >= 0)
 892                 return true;
 893
 894         return errno != ESRCH;
 895 }
 896
 897 bool pid_is_alive(pid_t pid) {
 898         int r;
 899
 900         /* Checks whether a PID is still valid and not a zombie */
 901
 902         if (pid < 0)
 903                 return false;
 904
 905         if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
 906                 return true;
 907
 908         if (pid == getpid_cached())
 909                 return true;
 910
 911         r = get_process_state(pid);
 912         if (IN_SET(r, -ESRCH, 'Z'))
 913                 return false;
 914
 915         return true;
 916 }
 917
 918 int pid_from_same_root_fs(pid_t pid) {
 919         const char *root;
 920
 921         if (pid < 0)
 922                 return false;
 923
 924         if (pid == 0 || pid == getpid_cached())
 925                 return true;
 926
 927         root = procfs_file_alloca(pid, "root");
 928
 929         return files_same(root, "/proc/1/root", 0);
 930 }
 931
 932 bool is_main_thread(void) {
 933         static thread_local int cached = 0;
 934
 935         if (_unlikely_(cached == 0))
 936                 cached = getpid_cached() == gettid() ? 1 : -1;
 937
 938         return cached > 0;
 939 }
 940
 941 noreturn void freeze(void) {
 942
 943         log_close();
 944
 945         /* Make sure nobody waits for us on a socket anymore */
 946         close_all_fds(NULL, 0);
 947
 948         sync();
 949
 950         for (;;)
 951                 pause();
 952 }
 953
 954 bool oom_score_adjust_is_valid(int oa) {
 955         return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
 956 }
 957
 958 unsigned long personality_from_string(const char *p) {
 959         int architecture;
 960
 961         if (!p)
 962                 return PERSONALITY_INVALID;
 963
 964         /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
 965          * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
 966          * the same register size. */
 967
 968         architecture = architecture_from_string(p);
 969         if (architecture < 0)
 970                 return PERSONALITY_INVALID;
 971
 972         if (architecture == native_architecture())
 973                 return PER_LINUX;
 974 #ifdef SECONDARY_ARCHITECTURE
 975         if (architecture == SECONDARY_ARCHITECTURE)
 976                 return PER_LINUX32;
 977 #endif
 978
 979         return PERSONALITY_INVALID;
 980 }
 981
 982 const char* personality_to_string(unsigned long p) {
 983         int architecture = _ARCHITECTURE_INVALID;
 984
 985         if (p == PER_LINUX)
 986                 architecture = native_architecture();
 987 #ifdef SECONDARY_ARCHITECTURE
 988         else if (p == PER_LINUX32)
 989                 architecture = SECONDARY_ARCHITECTURE;
 990 #endif
 991
 992         if (architecture < 0)
 993                 return NULL;
 994
 995         return architecture_to_string(architecture);
 996 }
 997
 998 int safe_personality(unsigned long p) {
 999         int ret;
1000
1001         /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
1002          * and in others as negative return value containing an errno-like value. Let's work around this: this is a
1003          * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
1004          * the return value indicating the same issue, so that we are definitely on the safe side.
1005          *
1006          * See https://github.com/systemd/systemd/issues/6737 */
1007
1008         errno = 0;
1009         ret = personality(p);
1010         if (ret < 0) {
1011                 if (errno != 0)
1012                         return -errno;
1013
1014                 errno = -ret;
1015         }
1016
1017         return ret;
1018 }
1019
1020 int opinionated_personality(unsigned long *ret) {
1021         int current;
1022
1023         /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1024          * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1025          * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1026
1027         current = safe_personality(PERSONALITY_INVALID);
1028         if (current < 0)
1029                 return current;
1030
1031         if (((unsigned long) current & 0xffff) == PER_LINUX32)
1032                 *ret = PER_LINUX32;
1033         else
1034                 *ret = PER_LINUX;
1035
1036         return 0;
1037 }
1038
1039 void valgrind_summary_hack(void) {
1040 #if HAVE_VALGRIND_VALGRIND_H
1041         if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
1042                 pid_t pid;
1043                 pid = raw_clone(SIGCHLD);
1044                 if (pid < 0)
1045                         log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
1046                 else if (pid == 0)
1047                         exit(EXIT_SUCCESS);
1048                 else {
1049                         log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1050                         (void) wait_for_terminate(pid, NULL);
1051                 }
1052         }
1053 #endif
1054 }
1055
1056 int pid_compare_func(const void *a, const void *b) {
1057         const pid_t *p = a, *q = b;
1058
1059         /* Suitable for usage in qsort() */
1060
1061         if (*p < *q)
1062                 return -1;
1063         if (*p > *q)
1064                 return 1;
1065         return 0;
1066 }
1067
1068 int ioprio_parse_priority(const char *s, int *ret) {
1069         int i, r;
1070
1071         assert(s);
1072         assert(ret);
1073
1074         r = safe_atoi(s, &i);
1075         if (r < 0)
1076                 return r;
1077
1078         if (!ioprio_priority_is_valid(i))
1079                 return -EINVAL;
1080
1081         *ret = i;
1082         return 0;
1083 }
1084
1085 /* The cached PID, possible values:
1086  *
1087  *     == UNSET [0]  → cache not initialized yet
1088  *     == BUSY [-1]  → some thread is initializing it at the moment
1089  *     any other     → the cached PID
1090  */
1091
1092 #define CACHED_PID_UNSET ((pid_t) 0)
1093 #define CACHED_PID_BUSY ((pid_t) -1)
1094
1095 static pid_t cached_pid = CACHED_PID_UNSET;
1096
1097 void reset_cached_pid(void) {
1098         /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1099         cached_pid = CACHED_PID_UNSET;
1100 }
1101
1102 /* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1103  * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1104  * libpthread, as it is part of glibc anyway. */
1105 extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void * __dso_handle);
1106 extern void* __dso_handle __attribute__ ((__weak__));
1107
1108 pid_t getpid_cached(void) {
1109         pid_t current_value;
1110
1111         /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1112          * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1113          * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1114          * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1115          *
1116          * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1117          * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1118          */
1119
1120         current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
1121
1122         switch (current_value) {
1123
1124         case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
1125                 pid_t new_pid;
1126
1127                 new_pid = getpid();
1128
1129                 if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
1130                         /* OOM? Let's try again later */
1131                         cached_pid = CACHED_PID_UNSET;
1132                         return new_pid;
1133                 }
1134
1135                 cached_pid = new_pid;
1136                 return new_pid;
1137         }
1138
1139         case CACHED_PID_BUSY: /* Somebody else is currently initializing */
1140                 return getpid();
1141
1142         default: /* Properly initialized */
1143                 return current_value;
1144         }
1145 }
1146
1147 int must_be_root(void) {
1148
1149         if (geteuid() == 0)
1150                 return 0;
1151
1152         log_error("Need to be root.");
1153         return -EPERM;
1154 }
1155
1156 int safe_fork_full(
1157                 const char *name,
1158                 const int except_fds[],
1159                 size_t n_except_fds,
1160                 ForkFlags flags,
1161                 pid_t *ret_pid) {
1162
1163         pid_t original_pid, pid;
1164         sigset_t saved_ss;
1165         bool block_signals;
1166         int prio, r;
1167
1168         /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
1169          * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
1170
1171         prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG;
1172
1173         original_pid = getpid_cached();
1174
1175         block_signals = flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG);
1176
1177         if (block_signals) {
1178                 sigset_t ss;
1179
1180                 /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can be sure
1181                  * that SIGTERMs are not lost we might send to the child. */
1182                 if (sigfillset(&ss) < 0)
1183                         return log_full_errno(prio, errno, "Failed to reset signal set: %m");
1184
1185                 if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
1186                         return log_full_errno(prio, errno, "Failed to reset signal mask: %m");
1187         }
1188
1189         pid = fork();
1190         if (pid < 0) {
1191                 r = -errno;
1192
1193                 if (block_signals) /* undo what we did above */
1194                         (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1195
1196                 return log_full_errno(prio, r, "Failed to fork: %m");
1197         }
1198         if (pid > 0) {
1199                 /* We are in the parent process */
1200
1201                 if (block_signals) /* undo what we did above */
1202                         (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1203
1204                 log_debug("Sucessfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
1205
1206                 if (ret_pid)
1207                         *ret_pid = pid;
1208
1209                 return 1;
1210         }
1211
1212         /* We are in the child process */
1213
1214         if (flags & FORK_REOPEN_LOG) {
1215                 /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
1216                 log_close();
1217                 log_set_open_when_needed(true);
1218         }
1219
1220         if (name) {
1221                 r = rename_process(name);
1222                 if (r < 0)
1223                         log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG,
1224                                        r, "Failed to rename process, ignoring: %m");
1225         }
1226
1227         if (flags & FORK_DEATHSIG)
1228                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
1229                         log_full_errno(prio, errno, "Failed to set death signal: %m");
1230                         _exit(EXIT_FAILURE);
1231                 }
1232
1233         if (flags & FORK_RESET_SIGNALS) {
1234                 r = reset_all_signal_handlers();
1235                 if (r < 0) {
1236                         log_full_errno(prio, r, "Failed to reset signal handlers: %m");
1237                         _exit(EXIT_FAILURE);
1238                 }
1239
1240                 /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
1241                 r = reset_signal_mask();
1242                 if (r < 0) {
1243                         log_full_errno(prio, r, "Failed to reset signal mask: %m");
1244                         _exit(EXIT_FAILURE);
1245                 }
1246         } else if (block_signals) { /* undo what we did above */
1247                 if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
1248                         log_full_errno(prio, errno, "Failed to restore signal mask: %m");
1249                         _exit(EXIT_FAILURE);
1250                 }
1251         }
1252
1253         if (flags & FORK_DEATHSIG) {
1254                 pid_t ppid;
1255                 /* Let's see if the parent PID is still the one we started from? If not, then the parent
1256                  * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
1257
1258                 ppid = getppid();
1259                 if (ppid == 0)
1260                         /* Parent is in a differn't PID namespace. */;
1261                 else if (ppid != original_pid) {
1262                         log_debug("Parent died early, raising SIGTERM.");
1263                         (void) raise(SIGTERM);
1264                         _exit(EXIT_FAILURE);
1265                 }
1266         }
1267
1268         if (flags & FORK_CLOSE_ALL_FDS) {
1269                 /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
1270                 log_close();
1271
1272                 r = close_all_fds(except_fds, n_except_fds);
1273                 if (r < 0) {
1274                         log_full_errno(prio, r, "Failed to close all file descriptors: %m");
1275                         _exit(EXIT_FAILURE);
1276                 }
1277         }
1278
1279         /* When we were asked to reopen the logs, do so again now */
1280         if (flags & FORK_REOPEN_LOG) {
1281                 log_open();
1282                 log_set_open_when_needed(false);
1283         }
1284
1285         if (flags & FORK_NULL_STDIO) {
1286                 r = make_null_stdio();
1287                 if (r < 0) {
1288                         log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m");
1289                         _exit(EXIT_FAILURE);
1290                 }
1291         }
1292
1293         if (ret_pid)
1294                 *ret_pid = getpid_cached();
1295
1296         return 0;
1297 }
1298
1299 int fork_agent(const char *name, const int except[], unsigned n_except, pid_t *ret_pid, const char *path, ...) {
1300         bool stdout_is_tty, stderr_is_tty;
1301         unsigned n, i;
1302         va_list ap;
1303         char **l;
1304         int r;
1305
1306         assert(path);
1307
1308         /* Spawns a temporary TTY agent, making sure it goes away when we go away */
1309
1310         r = safe_fork_full(name, except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, ret_pid);
1311         if (r < 0)
1312                 return r;
1313         if (r > 0)
1314                 return 0;
1315
1316         /* In the child: */
1317
1318         stdout_is_tty = isatty(STDOUT_FILENO);
1319         stderr_is_tty = isatty(STDERR_FILENO);
1320
1321         if (!stdout_is_tty || !stderr_is_tty) {
1322                 int fd;
1323
1324                 /* Detach from stdout/stderr. and reopen
1325                  * /dev/tty for them. This is important to
1326                  * ensure that when systemctl is started via
1327                  * popen() or a similar call that expects to
1328                  * read EOF we actually do generate EOF and
1329                  * not delay this indefinitely by because we
1330                  * keep an unused copy of stdin around. */
1331                 fd = open("/dev/tty", O_WRONLY);
1332                 if (fd < 0) {
1333                         log_error_errno(errno, "Failed to open /dev/tty: %m");
1334                         _exit(EXIT_FAILURE);
1335                 }
1336
1337                 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
1338                         log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
1339                         _exit(EXIT_FAILURE);
1340                 }
1341
1342                 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
1343                         log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
1344                         _exit(EXIT_FAILURE);
1345                 }
1346
1347                 if (fd > STDERR_FILENO)
1348                         close(fd);
1349         }
1350
1351         /* Count arguments */
1352         va_start(ap, path);
1353         for (n = 0; va_arg(ap, char*); n++)
1354                 ;
1355         va_end(ap);
1356
1357         /* Allocate strv */
1358         l = alloca(sizeof(char *) * (n + 1));
1359
1360         /* Fill in arguments */
1361         va_start(ap, path);
1362         for (i = 0; i <= n; i++)
1363                 l[i] = va_arg(ap, char*);
1364         va_end(ap);
1365
1366         execv(path, l);
1367         _exit(EXIT_FAILURE);
1368 }
1369
1370 static const char *const ioprio_class_table[] = {
1371         [IOPRIO_CLASS_NONE] = "none",
1372         [IOPRIO_CLASS_RT] = "realtime",
1373         [IOPRIO_CLASS_BE] = "best-effort",
1374         [IOPRIO_CLASS_IDLE] = "idle"
1375 };
1376
1377 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, INT_MAX);
1378
1379 static const char *const sigchld_code_table[] = {
1380         [CLD_EXITED] = "exited",
1381         [CLD_KILLED] = "killed",
1382         [CLD_DUMPED] = "dumped",
1383         [CLD_TRAPPED] = "trapped",
1384         [CLD_STOPPED] = "stopped",
1385         [CLD_CONTINUED] = "continued",
1386 };
1387
1388 DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
1389
1390 static const char* const sched_policy_table[] = {
1391         [SCHED_OTHER] = "other",
1392         [SCHED_BATCH] = "batch",
1393         [SCHED_IDLE] = "idle",
1394         [SCHED_FIFO] = "fifo",
1395         [SCHED_RR] = "rr"
1396 };
1397
1398 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);