src/basic/process-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <ctype.h>
  22 #include <errno.h>
  23 #include <limits.h>
  24 #include <linux/oom.h>
  25 #include <sched.h>
  26 #include <signal.h>
  27 #include <stdbool.h>
  28 #include <stdio.h>
  29 #include <stdio_ext.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <sys/mman.h>
  33 #include <sys/personality.h>
  34 #include <sys/prctl.h>
  35 #include <sys/types.h>
  36 #include <sys/wait.h>
  37 #include <syslog.h>
  38 #include <unistd.h>
  39 #if HAVE_VALGRIND_VALGRIND_H
  40 #include <valgrind/valgrind.h>
  41 #endif
  42
  43 #include "alloc-util.h"
  44 #include "architecture.h"
  45 #include "escape.h"
  46 #include "fd-util.h"
  47 #include "fileio.h"
  48 #include "fs-util.h"
  49 #include "ioprio.h"
  50 #include "log.h"
  51 #include "macro.h"
  52 #include "missing.h"
  53 #include "process-util.h"
  54 #include "raw-clone.h"
  55 #include "signal-util.h"
  56 #include "stat-util.h"
  57 #include "string-table.h"
  58 #include "string-util.h"
  59 #include "terminal-util.h"
  60 #include "user-util.h"
  61 #include "util.h"
  62
  63 int get_process_state(pid_t pid) {
  64         const char *p;
  65         char state;
  66         int r;
  67         _cleanup_free_ char *line = NULL;
  68
  69         assert(pid >= 0);
  70
  71         p = procfs_file_alloca(pid, "stat");
  72
  73         r = read_one_line_file(p, &line);
  74         if (r == -ENOENT)
  75                 return -ESRCH;
  76         if (r < 0)
  77                 return r;
  78
  79         p = strrchr(line, ')');
  80         if (!p)
  81                 return -EIO;
  82
  83         p++;
  84
  85         if (sscanf(p, " %c", &state) != 1)
  86                 return -EIO;
  87
  88         return (unsigned char) state;
  89 }
  90
  91 int get_process_comm(pid_t pid, char **name) {
  92         const char *p;
  93         int r;
  94
  95         assert(name);
  96         assert(pid >= 0);
  97
  98         p = procfs_file_alloca(pid, "comm");
  99
 100         r = read_one_line_file(p, name);
 101         if (r == -ENOENT)
 102                 return -ESRCH;
 103
 104         return r;
 105 }
 106
 107 int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
 108         _cleanup_fclose_ FILE *f = NULL;
 109         bool space = false;
 110         char *k, *ans = NULL;
 111         const char *p;
 112         int c;
 113
 114         assert(line);
 115         assert(pid >= 0);
 116
 117         /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
 118          * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
 119          * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
 120          * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
 121          * command line that resolves to the empty string will return the "comm" name of the process instead.
 122          *
 123          * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
 124          * comm_fallback is false). Returns 0 and sets *line otherwise. */
 125
 126         p = procfs_file_alloca(pid, "cmdline");
 127
 128         f = fopen(p, "re");
 129         if (!f) {
 130                 if (errno == ENOENT)
 131                         return -ESRCH;
 132                 return -errno;
 133         }
 134
 135         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 136
 137         if (max_length == 1) {
 138
 139                 /* If there's only room for one byte, return the empty string */
 140                 ans = new0(char, 1);
 141                 if (!ans)
 142                         return -ENOMEM;
 143
 144                 *line = ans;
 145                 return 0;
 146
 147         } else if (max_length == 0) {
 148                 size_t len = 0, allocated = 0;
 149
 150                 while ((c = getc(f)) != EOF) {
 151
 152                         if (!GREEDY_REALLOC(ans, allocated, len+3)) {
 153                                 free(ans);
 154                                 return -ENOMEM;
 155                         }
 156
 157                         if (isprint(c)) {
 158                                 if (space) {
 159                                         ans[len++] = ' ';
 160                                         space = false;
 161                                 }
 162
 163                                 ans[len++] = c;
 164                         } else if (len > 0)
 165                                 space = true;
 166                }
 167
 168                 if (len > 0)
 169                         ans[len] = '\0';
 170                 else
 171                         ans = mfree(ans);
 172
 173         } else {
 174                 bool dotdotdot = false;
 175                 size_t left;
 176
 177                 ans = new(char, max_length);
 178                 if (!ans)
 179                         return -ENOMEM;
 180
 181                 k = ans;
 182                 left = max_length;
 183                 while ((c = getc(f)) != EOF) {
 184
 185                         if (isprint(c)) {
 186
 187                                 if (space) {
 188                                         if (left <= 2) {
 189                                                 dotdotdot = true;
 190                                                 break;
 191                                         }
 192
 193                                         *(k++) = ' ';
 194                                         left--;
 195                                         space = false;
 196                                 }
 197
 198                                 if (left <= 1) {
 199                                         dotdotdot = true;
 200                                         break;
 201                                 }
 202
 203                                 *(k++) = (char) c;
 204                                 left--;
 205                         } else if (k > ans)
 206                                 space = true;
 207                 }
 208
 209                 if (dotdotdot) {
 210                         if (max_length <= 4) {
 211                                 k = ans;
 212                                 left = max_length;
 213                         } else {
 214                                 k = ans + max_length - 4;
 215                                 left = 4;
 216
 217                                 /* Eat up final spaces */
 218                                 while (k > ans && isspace(k[-1])) {
 219                                         k--;
 220                                         left++;
 221                                 }
 222                         }
 223
 224                         strncpy(k, "...", left-1);
 225                         k[left-1] = 0;
 226                 } else
 227                         *k = 0;
 228         }
 229
 230         /* Kernel threads have no argv[] */
 231         if (isempty(ans)) {
 232                 _cleanup_free_ char *t = NULL;
 233                 int h;
 234
 235                 free(ans);
 236
 237                 if (!comm_fallback)
 238                         return -ENOENT;
 239
 240                 h = get_process_comm(pid, &t);
 241                 if (h < 0)
 242                         return h;
 243
 244                 if (max_length == 0)
 245                         ans = strjoin("[", t, "]");
 246                 else {
 247                         size_t l;
 248
 249                         l = strlen(t);
 250
 251                         if (l + 3 <= max_length)
 252                                 ans = strjoin("[", t, "]");
 253                         else if (max_length <= 6) {
 254
 255                                 ans = new(char, max_length);
 256                                 if (!ans)
 257                                         return -ENOMEM;
 258
 259                                 memcpy(ans, "[...]", max_length-1);
 260                                 ans[max_length-1] = 0;
 261                         } else {
 262                                 char *e;
 263
 264                                 t[max_length - 6] = 0;
 265
 266                                 /* Chop off final spaces */
 267                                 e = strchr(t, 0);
 268                                 while (e > t && isspace(e[-1]))
 269                                         e--;
 270                                 *e = 0;
 271
 272                                 ans = strjoin("[", t, "...]");
 273                         }
 274                 }
 275                 if (!ans)
 276                         return -ENOMEM;
 277         }
 278
 279         *line = ans;
 280         return 0;
 281 }
 282
 283 int rename_process(const char name[]) {
 284         static size_t mm_size = 0;
 285         static char *mm = NULL;
 286         bool truncated = false;
 287         size_t l;
 288
 289         /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
 290          * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
 291          * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
 292          * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
 293          * truncated.
 294          *
 295          * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
 296
 297         if (isempty(name))
 298                 return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
 299
 300         if (!is_main_thread())
 301                 return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we
 302                                 * cache things without locking, and we make assumptions that PR_SET_NAME sets the
 303                                 * process name that isn't correct on any other threads */
 304
 305         l = strlen(name);
 306
 307         /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we
 308          * can use PR_SET_NAME, which sets the thread name for the calling thread. */
 309         if (prctl(PR_SET_NAME, name) < 0)
 310                 log_debug_errno(errno, "PR_SET_NAME failed: %m");
 311         if (l > 15) /* Linux process names can be 15 chars at max */
 312                 truncated = true;
 313
 314         /* Second step, change glibc's ID of the process name. */
 315         if (program_invocation_name) {
 316                 size_t k;
 317
 318                 k = strlen(program_invocation_name);
 319                 strncpy(program_invocation_name, name, k);
 320                 if (l > k)
 321                         truncated = true;
 322         }
 323
 324         /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
 325          * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
 326          * the end. This is the best option for changing /proc/self/cmdline. */
 327
 328         /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
 329          * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
 330          * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
 331          * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
 332          * mmap() is not. */
 333         if (geteuid() != 0)
 334                 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
 335         else if (mm_size < l+1) {
 336                 size_t nn_size;
 337                 char *nn;
 338
 339                 nn_size = PAGE_ALIGN(l+1);
 340                 nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
 341                 if (nn == MAP_FAILED) {
 342                         log_debug_errno(errno, "mmap() failed: %m");
 343                         goto use_saved_argv;
 344                 }
 345
 346                 strncpy(nn, name, nn_size);
 347
 348                 /* Now, let's tell the kernel about this new memory */
 349                 if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
 350                         log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m");
 351                         (void) munmap(nn, nn_size);
 352                         goto use_saved_argv;
 353                 }
 354
 355                 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
 356                  * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
 357                 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
 358                         log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
 359
 360                 if (mm)
 361                         (void) munmap(mm, mm_size);
 362
 363                 mm = nn;
 364                 mm_size = nn_size;
 365         } else {
 366                 strncpy(mm, name, mm_size);
 367
 368                 /* Update the end pointer, continuing regardless of any failure. */
 369                 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
 370                         log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
 371         }
 372
 373 use_saved_argv:
 374         /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
 375          * it still looks here */
 376
 377         if (saved_argc > 0) {
 378                 int i;
 379
 380                 if (saved_argv[0]) {
 381                         size_t k;
 382
 383                         k = strlen(saved_argv[0]);
 384                         strncpy(saved_argv[0], name, k);
 385                         if (l > k)
 386                                 truncated = true;
 387                 }
 388
 389                 for (i = 1; i < saved_argc; i++) {
 390                         if (!saved_argv[i])
 391                                 break;
 392
 393                         memzero(saved_argv[i], strlen(saved_argv[i]));
 394                 }
 395         }
 396
 397         return !truncated;
 398 }
 399
 400 int is_kernel_thread(pid_t pid) {
 401         const char *p;
 402         size_t count;
 403         char c;
 404         bool eof;
 405         FILE *f;
 406
 407         if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
 408                 return 0;
 409
 410         assert(pid > 1);
 411
 412         p = procfs_file_alloca(pid, "cmdline");
 413         f = fopen(p, "re");
 414         if (!f) {
 415                 if (errno == ENOENT)
 416                         return -ESRCH;
 417                 return -errno;
 418         }
 419
 420         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 421
 422         count = fread(&c, 1, 1, f);
 423         eof = feof(f);
 424         fclose(f);
 425
 426         /* Kernel threads have an empty cmdline */
 427
 428         if (count <= 0)
 429                 return eof ? 1 : -errno;
 430
 431         return 0;
 432 }
 433
 434 int get_process_capeff(pid_t pid, char **capeff) {
 435         const char *p;
 436         int r;
 437
 438         assert(capeff);
 439         assert(pid >= 0);
 440
 441         p = procfs_file_alloca(pid, "status");
 442
 443         r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
 444         if (r == -ENOENT)
 445                 return -ESRCH;
 446
 447         return r;
 448 }
 449
 450 static int get_process_link_contents(const char *proc_file, char **name) {
 451         int r;
 452
 453         assert(proc_file);
 454         assert(name);
 455
 456         r = readlink_malloc(proc_file, name);
 457         if (r == -ENOENT)
 458                 return -ESRCH;
 459         if (r < 0)
 460                 return r;
 461
 462         return 0;
 463 }
 464
 465 int get_process_exe(pid_t pid, char **name) {
 466         const char *p;
 467         char *d;
 468         int r;
 469
 470         assert(pid >= 0);
 471
 472         p = procfs_file_alloca(pid, "exe");
 473         r = get_process_link_contents(p, name);
 474         if (r < 0)
 475                 return r;
 476
 477         d = endswith(*name, " (deleted)");
 478         if (d)
 479                 *d = '\0';
 480
 481         return 0;
 482 }
 483
 484 static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
 485         _cleanup_fclose_ FILE *f = NULL;
 486         char line[LINE_MAX];
 487         const char *p;
 488
 489         assert(field);
 490         assert(uid);
 491
 492         if (pid < 0)
 493                 return -EINVAL;
 494
 495         p = procfs_file_alloca(pid, "status");
 496         f = fopen(p, "re");
 497         if (!f) {
 498                 if (errno == ENOENT)
 499                         return -ESRCH;
 500                 return -errno;
 501         }
 502
 503         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 504
 505         FOREACH_LINE(line, f, return -errno) {
 506                 char *l;
 507
 508                 l = strstrip(line);
 509
 510                 if (startswith(l, field)) {
 511                         l += strlen(field);
 512                         l += strspn(l, WHITESPACE);
 513
 514                         l[strcspn(l, WHITESPACE)] = 0;
 515
 516                         return parse_uid(l, uid);
 517                 }
 518         }
 519
 520         return -EIO;
 521 }
 522
 523 int get_process_uid(pid_t pid, uid_t *uid) {
 524
 525         if (pid == 0 || pid == getpid_cached()) {
 526                 *uid = getuid();
 527                 return 0;
 528         }
 529
 530         return get_process_id(pid, "Uid:", uid);
 531 }
 532
 533 int get_process_gid(pid_t pid, gid_t *gid) {
 534
 535         if (pid == 0 || pid == getpid_cached()) {
 536                 *gid = getgid();
 537                 return 0;
 538         }
 539
 540         assert_cc(sizeof(uid_t) == sizeof(gid_t));
 541         return get_process_id(pid, "Gid:", gid);
 542 }
 543
 544 int get_process_cwd(pid_t pid, char **cwd) {
 545         const char *p;
 546
 547         assert(pid >= 0);
 548
 549         p = procfs_file_alloca(pid, "cwd");
 550
 551         return get_process_link_contents(p, cwd);
 552 }
 553
 554 int get_process_root(pid_t pid, char **root) {
 555         const char *p;
 556
 557         assert(pid >= 0);
 558
 559         p = procfs_file_alloca(pid, "root");
 560
 561         return get_process_link_contents(p, root);
 562 }
 563
 564 int get_process_environ(pid_t pid, char **env) {
 565         _cleanup_fclose_ FILE *f = NULL;
 566         _cleanup_free_ char *outcome = NULL;
 567         int c;
 568         const char *p;
 569         size_t allocated = 0, sz = 0;
 570
 571         assert(pid >= 0);
 572         assert(env);
 573
 574         p = procfs_file_alloca(pid, "environ");
 575
 576         f = fopen(p, "re");
 577         if (!f) {
 578                 if (errno == ENOENT)
 579                         return -ESRCH;
 580                 return -errno;
 581         }
 582
 583         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 584
 585         while ((c = fgetc(f)) != EOF) {
 586                 if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
 587                         return -ENOMEM;
 588
 589                 if (c == '\0')
 590                         outcome[sz++] = '\n';
 591                 else
 592                         sz += cescape_char(c, outcome + sz);
 593         }
 594
 595         if (!outcome) {
 596                 outcome = strdup("");
 597                 if (!outcome)
 598                         return -ENOMEM;
 599         } else
 600                 outcome[sz] = '\0';
 601
 602         *env = outcome;
 603         outcome = NULL;
 604
 605         return 0;
 606 }
 607
 608 int get_process_ppid(pid_t pid, pid_t *_ppid) {
 609         int r;
 610         _cleanup_free_ char *line = NULL;
 611         long unsigned ppid;
 612         const char *p;
 613
 614         assert(pid >= 0);
 615         assert(_ppid);
 616
 617         if (pid == 0 || pid == getpid_cached()) {
 618                 *_ppid = getppid();
 619                 return 0;
 620         }
 621
 622         p = procfs_file_alloca(pid, "stat");
 623         r = read_one_line_file(p, &line);
 624         if (r == -ENOENT)
 625                 return -ESRCH;
 626         if (r < 0)
 627                 return r;
 628
 629         /* Let's skip the pid and comm fields. The latter is enclosed
 630          * in () but does not escape any () in its value, so let's
 631          * skip over it manually */
 632
 633         p = strrchr(line, ')');
 634         if (!p)
 635                 return -EIO;
 636
 637         p++;
 638
 639         if (sscanf(p, " "
 640                    "%*c "  /* state */
 641                    "%lu ", /* ppid */
 642                    &ppid) != 1)
 643                 return -EIO;
 644
 645         if ((long unsigned) (pid_t) ppid != ppid)
 646                 return -ERANGE;
 647
 648         *_ppid = (pid_t) ppid;
 649
 650         return 0;
 651 }
 652
 653 int wait_for_terminate(pid_t pid, siginfo_t *status) {
 654         siginfo_t dummy;
 655
 656         assert(pid >= 1);
 657
 658         if (!status)
 659                 status = &dummy;
 660
 661         for (;;) {
 662                 zero(*status);
 663
 664                 if (waitid(P_PID, pid, status, WEXITED) < 0) {
 665
 666                         if (errno == EINTR)
 667                                 continue;
 668
 669                         return negative_errno();
 670                 }
 671
 672                 return 0;
 673         }
 674 }
 675
 676 /*
 677  * Return values:
 678  * < 0 : wait_for_terminate() failed to get the state of the
 679  *       process, the process was terminated by a signal, or
 680  *       failed for an unknown reason.
 681  * >=0 : The process terminated normally, and its exit code is
 682  *       returned.
 683  *
 684  * That is, success is indicated by a return value of zero, and an
 685  * error is indicated by a non-zero value.
 686  *
 687  * A warning is emitted if the process terminates abnormally,
 688  * and also if it returns non-zero unless check_exit_code is true.
 689  */
 690 int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_code) {
 691         int r;
 692         siginfo_t status;
 693
 694         assert(name);
 695         assert(pid > 1);
 696
 697         r = wait_for_terminate(pid, &status);
 698         if (r < 0)
 699                 return log_warning_errno(r, "Failed to wait for %s: %m", name);
 700
 701         if (status.si_code == CLD_EXITED) {
 702                 if (status.si_status != 0)
 703                         log_full(check_exit_code ? LOG_WARNING : LOG_DEBUG,
 704                                  "%s failed with error code %i.", name, status.si_status);
 705                 else
 706                         log_debug("%s succeeded.", name);
 707
 708                 return status.si_status;
 709         } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
 710
 711                 log_warning("%s terminated by signal %s.", name, signal_to_string(status.si_status));
 712                 return -EPROTO;
 713         }
 714
 715         log_warning("%s failed due to unknown reason.", name);
 716         return -EPROTO;
 717 }
 718
 719 /*
 720  * Return values:
 721  * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
 722  *       process, the process timed out, the process was terminated by a
 723  *       signal, or failed for an unknown reason.
 724  * >=0 : The process terminated normally with no failures.
 725  *
 726  * Success is indicated by a return value of zero, a timeout is indicated
 727  * by ETIMEDOUT, and all other child failure states are indicated by error
 728  * is indicated by a non-zero value.
 729  */
 730 int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
 731         sigset_t mask;
 732         int r;
 733         usec_t until;
 734
 735         assert_se(sigemptyset(&mask) == 0);
 736         assert_se(sigaddset(&mask, SIGCHLD) == 0);
 737
 738         /* Drop into a sigtimewait-based timeout. Waiting for the
 739          * pid to exit. */
 740         until = now(CLOCK_MONOTONIC) + timeout;
 741         for (;;) {
 742                 usec_t n;
 743                 siginfo_t status = {};
 744                 struct timespec ts;
 745
 746                 n = now(CLOCK_MONOTONIC);
 747                 if (n >= until)
 748                         break;
 749
 750                 r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
 751                 /* Assuming we woke due to the child exiting. */
 752                 if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
 753                         if (status.si_pid == pid) {
 754                                 /* This is the correct child.*/
 755                                 if (status.si_code == CLD_EXITED)
 756                                         return (status.si_status == 0) ? 0 : -EPROTO;
 757                                 else
 758                                         return -EPROTO;
 759                         }
 760                 }
 761                 /* Not the child, check for errors and proceed appropriately */
 762                 if (r < 0) {
 763                         switch (r) {
 764                         case -EAGAIN:
 765                                 /* Timed out, child is likely hung. */
 766                                 return -ETIMEDOUT;
 767                         case -EINTR:
 768                                 /* Received a different signal and should retry */
 769                                 continue;
 770                         default:
 771                                 /* Return any unexpected errors */
 772                                 return r;
 773                         }
 774                 }
 775         }
 776
 777         return -EPROTO;
 778 }
 779
 780 void sigkill_wait(pid_t pid) {
 781         assert(pid > 1);
 782
 783         if (kill(pid, SIGKILL) > 0)
 784                 (void) wait_for_terminate(pid, NULL);
 785 }
 786
 787 void sigkill_waitp(pid_t *pid) {
 788         if (!pid)
 789                 return;
 790         if (*pid <= 1)
 791                 return;
 792
 793         sigkill_wait(*pid);
 794 }
 795
 796 int kill_and_sigcont(pid_t pid, int sig) {
 797         int r;
 798
 799         r = kill(pid, sig) < 0 ? -errno : 0;
 800
 801         /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
 802          * affected by a process being suspended anyway. */
 803         if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
 804                 (void) kill(pid, SIGCONT);
 805
 806         return r;
 807 }
 808
 809 int getenv_for_pid(pid_t pid, const char *field, char **_value) {
 810         _cleanup_fclose_ FILE *f = NULL;
 811         char *value = NULL;
 812         int r;
 813         bool done = false;
 814         size_t l;
 815         const char *path;
 816
 817         assert(pid >= 0);
 818         assert(field);
 819         assert(_value);
 820
 821         path = procfs_file_alloca(pid, "environ");
 822
 823         f = fopen(path, "re");
 824         if (!f) {
 825                 if (errno == ENOENT)
 826                         return -ESRCH;
 827                 return -errno;
 828         }
 829
 830         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 831
 832         l = strlen(field);
 833         r = 0;
 834
 835         do {
 836                 char line[LINE_MAX];
 837                 unsigned i;
 838
 839                 for (i = 0; i < sizeof(line)-1; i++) {
 840                         int c;
 841
 842                         c = getc(f);
 843                         if (_unlikely_(c == EOF)) {
 844                                 done = true;
 845                                 break;
 846                         } else if (c == 0)
 847                                 break;
 848
 849                         line[i] = c;
 850                 }
 851                 line[i] = 0;
 852
 853                 if (strneq(line, field, l) && line[l] == '=') {
 854                         value = strdup(line + l + 1);
 855                         if (!value)
 856                                 return -ENOMEM;
 857
 858                         r = 1;
 859                         break;
 860                 }
 861
 862         } while (!done);
 863
 864         *_value = value;
 865         return r;
 866 }
 867
 868 bool pid_is_unwaited(pid_t pid) {
 869         /* Checks whether a PID is still valid at all, including a zombie */
 870
 871         if (pid < 0)
 872                 return false;
 873
 874         if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
 875                 return true;
 876
 877         if (pid == getpid_cached())
 878                 return true;
 879
 880         if (kill(pid, 0) >= 0)
 881                 return true;
 882
 883         return errno != ESRCH;
 884 }
 885
 886 bool pid_is_alive(pid_t pid) {
 887         int r;
 888
 889         /* Checks whether a PID is still valid and not a zombie */
 890
 891         if (pid < 0)
 892                 return false;
 893
 894         if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
 895                 return true;
 896
 897         if (pid == getpid_cached())
 898                 return true;
 899
 900         r = get_process_state(pid);
 901         if (IN_SET(r, -ESRCH, 'Z'))
 902                 return false;
 903
 904         return true;
 905 }
 906
 907 int pid_from_same_root_fs(pid_t pid) {
 908         const char *root;
 909
 910         if (pid < 0)
 911                 return false;
 912
 913         if (pid == 0 || pid == getpid_cached())
 914                 return true;
 915
 916         root = procfs_file_alloca(pid, "root");
 917
 918         return files_same(root, "/proc/1/root", 0);
 919 }
 920
 921 bool is_main_thread(void) {
 922         static thread_local int cached = 0;
 923
 924         if (_unlikely_(cached == 0))
 925                 cached = getpid_cached() == gettid() ? 1 : -1;
 926
 927         return cached > 0;
 928 }
 929
 930 noreturn void freeze(void) {
 931
 932         log_close();
 933
 934         /* Make sure nobody waits for us on a socket anymore */
 935         close_all_fds(NULL, 0);
 936
 937         sync();
 938
 939         for (;;)
 940                 pause();
 941 }
 942
 943 bool oom_score_adjust_is_valid(int oa) {
 944         return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
 945 }
 946
 947 unsigned long personality_from_string(const char *p) {
 948         int architecture;
 949
 950         if (!p)
 951                 return PERSONALITY_INVALID;
 952
 953         /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
 954          * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
 955          * the same register size. */
 956
 957         architecture = architecture_from_string(p);
 958         if (architecture < 0)
 959                 return PERSONALITY_INVALID;
 960
 961         if (architecture == native_architecture())
 962                 return PER_LINUX;
 963 #ifdef SECONDARY_ARCHITECTURE
 964         if (architecture == SECONDARY_ARCHITECTURE)
 965                 return PER_LINUX32;
 966 #endif
 967
 968         return PERSONALITY_INVALID;
 969 }
 970
 971 const char* personality_to_string(unsigned long p) {
 972         int architecture = _ARCHITECTURE_INVALID;
 973
 974         if (p == PER_LINUX)
 975                 architecture = native_architecture();
 976 #ifdef SECONDARY_ARCHITECTURE
 977         else if (p == PER_LINUX32)
 978                 architecture = SECONDARY_ARCHITECTURE;
 979 #endif
 980
 981         if (architecture < 0)
 982                 return NULL;
 983
 984         return architecture_to_string(architecture);
 985 }
 986
 987 int safe_personality(unsigned long p) {
 988         int ret;
 989
 990         /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
 991          * and in others as negative return value containing an errno-like value. Let's work around this: this is a
 992          * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
 993          * the return value indicating the same issue, so that we are definitely on the safe side.
 994          *
 995          * See https://github.com/systemd/systemd/issues/6737 */
 996
 997         errno = 0;
 998         ret = personality(p);
 999         if (ret < 0) {
1000                 if (errno != 0)
1001                         return -errno;
1002
1003                 errno = -ret;
1004         }
1005
1006         return ret;
1007 }
1008
1009 int opinionated_personality(unsigned long *ret) {
1010         int current;
1011
1012         /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1013          * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1014          * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1015
1016         current = safe_personality(PERSONALITY_INVALID);
1017         if (current < 0)
1018                 return current;
1019
1020         if (((unsigned long) current & 0xffff) == PER_LINUX32)
1021                 *ret = PER_LINUX32;
1022         else
1023                 *ret = PER_LINUX;
1024
1025         return 0;
1026 }
1027
1028 void valgrind_summary_hack(void) {
1029 #if HAVE_VALGRIND_VALGRIND_H
1030         if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
1031                 pid_t pid;
1032                 pid = raw_clone(SIGCHLD);
1033                 if (pid < 0)
1034                         log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
1035                 else if (pid == 0)
1036                         exit(EXIT_SUCCESS);
1037                 else {
1038                         log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1039                         (void) wait_for_terminate(pid, NULL);
1040                 }
1041         }
1042 #endif
1043 }
1044
1045 int pid_compare_func(const void *a, const void *b) {
1046         const pid_t *p = a, *q = b;
1047
1048         /* Suitable for usage in qsort() */
1049
1050         if (*p < *q)
1051                 return -1;
1052         if (*p > *q)
1053                 return 1;
1054         return 0;
1055 }
1056
1057 int ioprio_parse_priority(const char *s, int *ret) {
1058         int i, r;
1059
1060         assert(s);
1061         assert(ret);
1062
1063         r = safe_atoi(s, &i);
1064         if (r < 0)
1065                 return r;
1066
1067         if (!ioprio_priority_is_valid(i))
1068                 return -EINVAL;
1069
1070         *ret = i;
1071         return 0;
1072 }
1073
1074 /* The cached PID, possible values:
1075  *
1076  *     == UNSET [0]  → cache not initialized yet
1077  *     == BUSY [-1]  → some thread is initializing it at the moment
1078  *     any other     → the cached PID
1079  */
1080
1081 #define CACHED_PID_UNSET ((pid_t) 0)
1082 #define CACHED_PID_BUSY ((pid_t) -1)
1083
1084 static pid_t cached_pid = CACHED_PID_UNSET;
1085
1086 static void reset_cached_pid(void) {
1087         /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1088         cached_pid = CACHED_PID_UNSET;
1089 }
1090
1091 /* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1092  * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1093  * libpthread, as it is part of glibc anyway. */
1094 extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void * __dso_handle);
1095 extern void* __dso_handle __attribute__ ((__weak__));
1096
1097 pid_t getpid_cached(void) {
1098         pid_t current_value;
1099
1100         /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1101          * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1102          * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1103          * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1104          *
1105          * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1106          * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1107          */
1108
1109         current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
1110
1111         switch (current_value) {
1112
1113         case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
1114                 pid_t new_pid;
1115
1116                 new_pid = getpid();
1117
1118                 if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
1119                         /* OOM? Let's try again later */
1120                         cached_pid = CACHED_PID_UNSET;
1121                         return new_pid;
1122                 }
1123
1124                 cached_pid = new_pid;
1125                 return new_pid;
1126         }
1127
1128         case CACHED_PID_BUSY: /* Somebody else is currently initializing */
1129                 return getpid();
1130
1131         default: /* Properly initialized */
1132                 return current_value;
1133         }
1134 }
1135
1136 int must_be_root(void) {
1137
1138         if (geteuid() == 0)
1139                 return 0;
1140
1141         log_error("Need to be root.");
1142         return -EPERM;
1143 }
1144
1145 int safe_fork_full(
1146                 const char *name,
1147                 const int except_fds[],
1148                 size_t n_except_fds,
1149                 ForkFlags flags,
1150                 pid_t *ret_pid) {
1151
1152         pid_t original_pid, pid;
1153         sigset_t saved_ss;
1154         bool block_signals;
1155         int r;
1156
1157         /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
1158          * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
1159
1160         original_pid = getpid_cached();
1161
1162         block_signals = flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG);
1163
1164         if (block_signals) {
1165                 sigset_t ss;
1166
1167                 /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can be sure
1168                  * that SIGTERMs are not lost we might send to the child. */
1169                 if (sigfillset(&ss) < 0)
1170                         return log_debug_errno(errno, "Failed to reset signal set: %m");
1171
1172                 if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
1173                         return log_debug_errno(errno, "Failed to reset signal mask: %m");
1174         }
1175
1176         pid = fork();
1177         if (pid < 0) {
1178                 r = -errno;
1179
1180                 if (block_signals) /* undo what we did above */
1181                         (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1182
1183                 return log_debug_errno(r, "Failed to fork: %m");
1184         }
1185         if (pid > 0) {
1186                 /* We are in the parent process */
1187
1188                 if (block_signals) /* undo what we did above */
1189                         (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1190
1191                 log_debug("Sucessfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
1192
1193                 if (ret_pid)
1194                         *ret_pid = pid;
1195
1196                 return 1;
1197         }
1198
1199         /* We are in the child process */
1200
1201         if (flags & FORK_REOPEN_LOG) {
1202                 /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
1203                 log_close();
1204                 log_set_open_when_needed(true);
1205         }
1206
1207         if (name) {
1208                 r = rename_process(name);
1209                 if (r < 0)
1210                         log_debug_errno(r, "Failed to rename process, ignoring: %m");
1211         }
1212
1213         if (flags & FORK_DEATHSIG)
1214                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
1215                         log_debug_errno(errno, "Failed to set death signal: %m");
1216                         _exit(EXIT_FAILURE);
1217                 }
1218
1219         if (flags & FORK_RESET_SIGNALS) {
1220                 r = reset_all_signal_handlers();
1221                 if (r < 0) {
1222                         log_debug_errno(r, "Failed to reset signal handlers: %m");
1223                         _exit(EXIT_FAILURE);
1224                 }
1225
1226                 /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
1227                 r = reset_signal_mask();
1228                 if (r < 0) {
1229                         log_debug_errno(r, "Failed to reset signal mask: %m");
1230                         _exit(EXIT_FAILURE);
1231                 }
1232         } else if (block_signals) { /* undo what we did above */
1233                 if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
1234                         log_debug_errno(errno, "Failed to restore signal mask: %m");
1235                         _exit(EXIT_FAILURE);
1236                 }
1237         }
1238
1239         if (flags & FORK_DEATHSIG) {
1240                 /* Let's see if the parent PID is still the one we started from? If not, then the parent
1241                  * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
1242
1243                 if (getppid() != original_pid) {
1244                         log_debug("Parent died early, raising SIGTERM.");
1245                         (void) raise(SIGTERM);
1246                         _exit(EXIT_FAILURE);
1247                 }
1248         }
1249
1250         if (flags & FORK_CLOSE_ALL_FDS) {
1251                 /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
1252                 log_close();
1253
1254                 r = close_all_fds(except_fds, n_except_fds);
1255                 if (r < 0) {
1256                         log_debug_errno(r, "Failed to close all file descriptors: %m");
1257                         _exit(EXIT_FAILURE);
1258                 }
1259         }
1260
1261         /* When we were asked to reopen the logs, do so again now */
1262         if (flags & FORK_REOPEN_LOG) {
1263                 log_open();
1264                 log_set_open_when_needed(false);
1265         }
1266
1267         if (flags & FORK_NULL_STDIO) {
1268                 r = make_null_stdio();
1269                 if (r < 0) {
1270                         log_debug_errno(r, "Failed to connect stdin/stdout to /dev/null: %m");
1271                         _exit(EXIT_FAILURE);
1272                 }
1273         }
1274
1275         if (ret_pid)
1276                 *ret_pid = getpid_cached();
1277
1278         return 0;
1279 }
1280
1281 int fork_agent(const char *name, const int except[], unsigned n_except, pid_t *ret_pid, const char *path, ...) {
1282         bool stdout_is_tty, stderr_is_tty;
1283         unsigned n, i;
1284         va_list ap;
1285         char **l;
1286         int r;
1287
1288         assert(path);
1289
1290         /* Spawns a temporary TTY agent, making sure it goes away when we go away */
1291
1292         r = safe_fork_full(name, except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, ret_pid);
1293         if (r < 0)
1294                 return r;
1295         if (r > 0)
1296                 return 0;
1297
1298         /* In the child: */
1299
1300         stdout_is_tty = isatty(STDOUT_FILENO);
1301         stderr_is_tty = isatty(STDERR_FILENO);
1302
1303         if (!stdout_is_tty || !stderr_is_tty) {
1304                 int fd;
1305
1306                 /* Detach from stdout/stderr. and reopen
1307                  * /dev/tty for them. This is important to
1308                  * ensure that when systemctl is started via
1309                  * popen() or a similar call that expects to
1310                  * read EOF we actually do generate EOF and
1311                  * not delay this indefinitely by because we
1312                  * keep an unused copy of stdin around. */
1313                 fd = open("/dev/tty", O_WRONLY);
1314                 if (fd < 0) {
1315                         log_error_errno(errno, "Failed to open /dev/tty: %m");
1316                         _exit(EXIT_FAILURE);
1317                 }
1318
1319                 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
1320                         log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
1321                         _exit(EXIT_FAILURE);
1322                 }
1323
1324                 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
1325                         log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
1326                         _exit(EXIT_FAILURE);
1327                 }
1328
1329                 if (fd > STDERR_FILENO)
1330                         close(fd);
1331         }
1332
1333         /* Count arguments */
1334         va_start(ap, path);
1335         for (n = 0; va_arg(ap, char*); n++)
1336                 ;
1337         va_end(ap);
1338
1339         /* Allocate strv */
1340         l = alloca(sizeof(char *) * (n + 1));
1341
1342         /* Fill in arguments */
1343         va_start(ap, path);
1344         for (i = 0; i <= n; i++)
1345                 l[i] = va_arg(ap, char*);
1346         va_end(ap);
1347
1348         execv(path, l);
1349         _exit(EXIT_FAILURE);
1350 }
1351
1352 static const char *const ioprio_class_table[] = {
1353         [IOPRIO_CLASS_NONE] = "none",
1354         [IOPRIO_CLASS_RT] = "realtime",
1355         [IOPRIO_CLASS_BE] = "best-effort",
1356         [IOPRIO_CLASS_IDLE] = "idle"
1357 };
1358
1359 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, INT_MAX);
1360
1361 static const char *const sigchld_code_table[] = {
1362         [CLD_EXITED] = "exited",
1363         [CLD_KILLED] = "killed",
1364         [CLD_DUMPED] = "dumped",
1365         [CLD_TRAPPED] = "trapped",
1366         [CLD_STOPPED] = "stopped",
1367         [CLD_CONTINUED] = "continued",
1368 };
1369
1370 DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
1371
1372 static const char* const sched_policy_table[] = {
1373         [SCHED_OTHER] = "other",
1374         [SCHED_BATCH] = "batch",
1375         [SCHED_IDLE] = "idle",
1376         [SCHED_FIFO] = "fifo",
1377         [SCHED_RR] = "rr"
1378 };
1379
1380 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);