src/basic/process-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <ctype.h>
  22 #include <errno.h>
  23 #include <limits.h>
  24 #include <linux/oom.h>
  25 #include <sched.h>
  26 #include <signal.h>
  27 #include <stdbool.h>
  28 #include <stdio.h>
  29 #include <stdio_ext.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <sys/mman.h>
  33 #include <sys/personality.h>
  34 #include <sys/prctl.h>
  35 #include <sys/types.h>
  36 #include <sys/wait.h>
  37 #include <syslog.h>
  38 #include <unistd.h>
  39 #if HAVE_VALGRIND_VALGRIND_H
  40 #include <valgrind/valgrind.h>
  41 #endif
  42
  43 #include "alloc-util.h"
  44 #include "architecture.h"
  45 #include "escape.h"
  46 #include "fd-util.h"
  47 #include "fileio.h"
  48 #include "fs-util.h"
  49 #include "ioprio.h"
  50 #include "log.h"
  51 #include "macro.h"
  52 #include "missing.h"
  53 #include "process-util.h"
  54 #include "raw-clone.h"
  55 #include "signal-util.h"
  56 #include "stat-util.h"
  57 #include "string-table.h"
  58 #include "string-util.h"
  59 #include "terminal-util.h"
  60 #include "user-util.h"
  61 #include "util.h"
  62
  63 int get_process_state(pid_t pid) {
  64         const char *p;
  65         char state;
  66         int r;
  67         _cleanup_free_ char *line = NULL;
  68
  69         assert(pid >= 0);
  70
  71         p = procfs_file_alloca(pid, "stat");
  72
  73         r = read_one_line_file(p, &line);
  74         if (r == -ENOENT)
  75                 return -ESRCH;
  76         if (r < 0)
  77                 return r;
  78
  79         p = strrchr(line, ')');
  80         if (!p)
  81                 return -EIO;
  82
  83         p++;
  84
  85         if (sscanf(p, " %c", &state) != 1)
  86                 return -EIO;
  87
  88         return (unsigned char) state;
  89 }
  90
  91 int get_process_comm(pid_t pid, char **name) {
  92         const char *p;
  93         int r;
  94
  95         assert(name);
  96         assert(pid >= 0);
  97
  98         p = procfs_file_alloca(pid, "comm");
  99
 100         r = read_one_line_file(p, name);
 101         if (r == -ENOENT)
 102                 return -ESRCH;
 103
 104         return r;
 105 }
 106
 107 int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
 108         _cleanup_fclose_ FILE *f = NULL;
 109         bool space = false;
 110         char *k, *ans = NULL;
 111         const char *p;
 112         int c;
 113
 114         assert(line);
 115         assert(pid >= 0);
 116
 117         /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
 118          * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
 119          * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
 120          * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
 121          * command line that resolves to the empty string will return the "comm" name of the process instead.
 122          *
 123          * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
 124          * comm_fallback is false). Returns 0 and sets *line otherwise. */
 125
 126         p = procfs_file_alloca(pid, "cmdline");
 127
 128         f = fopen(p, "re");
 129         if (!f) {
 130                 if (errno == ENOENT)
 131                         return -ESRCH;
 132                 return -errno;
 133         }
 134
 135         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 136
 137         if (max_length == 1) {
 138
 139                 /* If there's only room for one byte, return the empty string */
 140                 ans = new0(char, 1);
 141                 if (!ans)
 142                         return -ENOMEM;
 143
 144                 *line = ans;
 145                 return 0;
 146
 147         } else if (max_length == 0) {
 148                 size_t len = 0, allocated = 0;
 149
 150                 while ((c = getc(f)) != EOF) {
 151
 152                         if (!GREEDY_REALLOC(ans, allocated, len+3)) {
 153                                 free(ans);
 154                                 return -ENOMEM;
 155                         }
 156
 157                         if (isprint(c)) {
 158                                 if (space) {
 159                                         ans[len++] = ' ';
 160                                         space = false;
 161                                 }
 162
 163                                 ans[len++] = c;
 164                         } else if (len > 0)
 165                                 space = true;
 166                }
 167
 168                 if (len > 0)
 169                         ans[len] = '\0';
 170                 else
 171                         ans = mfree(ans);
 172
 173         } else {
 174                 bool dotdotdot = false;
 175                 size_t left;
 176
 177                 ans = new(char, max_length);
 178                 if (!ans)
 179                         return -ENOMEM;
 180
 181                 k = ans;
 182                 left = max_length;
 183                 while ((c = getc(f)) != EOF) {
 184
 185                         if (isprint(c)) {
 186
 187                                 if (space) {
 188                                         if (left <= 2) {
 189                                                 dotdotdot = true;
 190                                                 break;
 191                                         }
 192
 193                                         *(k++) = ' ';
 194                                         left--;
 195                                         space = false;
 196                                 }
 197
 198                                 if (left <= 1) {
 199                                         dotdotdot = true;
 200                                         break;
 201                                 }
 202
 203                                 *(k++) = (char) c;
 204                                 left--;
 205                         } else if (k > ans)
 206                                 space = true;
 207                 }
 208
 209                 if (dotdotdot) {
 210                         if (max_length <= 4) {
 211                                 k = ans;
 212                                 left = max_length;
 213                         } else {
 214                                 k = ans + max_length - 4;
 215                                 left = 4;
 216
 217                                 /* Eat up final spaces */
 218                                 while (k > ans && isspace(k[-1])) {
 219                                         k--;
 220                                         left++;
 221                                 }
 222                         }
 223
 224                         strncpy(k, "...", left-1);
 225                         k[left-1] = 0;
 226                 } else
 227                         *k = 0;
 228         }
 229
 230         /* Kernel threads have no argv[] */
 231         if (isempty(ans)) {
 232                 _cleanup_free_ char *t = NULL;
 233                 int h;
 234
 235                 free(ans);
 236
 237                 if (!comm_fallback)
 238                         return -ENOENT;
 239
 240                 h = get_process_comm(pid, &t);
 241                 if (h < 0)
 242                         return h;
 243
 244                 if (max_length == 0)
 245                         ans = strjoin("[", t, "]");
 246                 else {
 247                         size_t l;
 248
 249                         l = strlen(t);
 250
 251                         if (l + 3 <= max_length)
 252                                 ans = strjoin("[", t, "]");
 253                         else if (max_length <= 6) {
 254
 255                                 ans = new(char, max_length);
 256                                 if (!ans)
 257                                         return -ENOMEM;
 258
 259                                 memcpy(ans, "[...]", max_length-1);
 260                                 ans[max_length-1] = 0;
 261                         } else {
 262                                 char *e;
 263
 264                                 t[max_length - 6] = 0;
 265
 266                                 /* Chop off final spaces */
 267                                 e = strchr(t, 0);
 268                                 while (e > t && isspace(e[-1]))
 269                                         e--;
 270                                 *e = 0;
 271
 272                                 ans = strjoin("[", t, "...]");
 273                         }
 274                 }
 275                 if (!ans)
 276                         return -ENOMEM;
 277         }
 278
 279         *line = ans;
 280         return 0;
 281 }
 282
 283 int rename_process(const char name[]) {
 284         static size_t mm_size = 0;
 285         static char *mm = NULL;
 286         bool truncated = false;
 287         size_t l;
 288
 289         /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
 290          * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
 291          * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
 292          * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
 293          * truncated.
 294          *
 295          * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
 296
 297         if (isempty(name))
 298                 return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
 299
 300         l = strlen(name);
 301
 302         /* First step, change the comm field. */
 303         (void) prctl(PR_SET_NAME, name);
 304         if (l > 15) /* Linux process names can be 15 chars at max */
 305                 truncated = true;
 306
 307         /* Second step, change glibc's ID of the process name. */
 308         if (program_invocation_name) {
 309                 size_t k;
 310
 311                 k = strlen(program_invocation_name);
 312                 strncpy(program_invocation_name, name, k);
 313                 if (l > k)
 314                         truncated = true;
 315         }
 316
 317         /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
 318          * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
 319          * the end. This is the best option for changing /proc/self/cmdline. */
 320
 321         /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
 322          * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
 323          * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
 324          * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
 325          * mmap() is not. */
 326         if (geteuid() != 0)
 327                 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
 328         else if (mm_size < l+1) {
 329                 size_t nn_size;
 330                 char *nn;
 331
 332                 nn_size = PAGE_ALIGN(l+1);
 333                 nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
 334                 if (nn == MAP_FAILED) {
 335                         log_debug_errno(errno, "mmap() failed: %m");
 336                         goto use_saved_argv;
 337                 }
 338
 339                 strncpy(nn, name, nn_size);
 340
 341                 /* Now, let's tell the kernel about this new memory */
 342                 if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
 343                         log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m");
 344                         (void) munmap(nn, nn_size);
 345                         goto use_saved_argv;
 346                 }
 347
 348                 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
 349                  * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
 350                 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
 351                         log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
 352
 353                 if (mm)
 354                         (void) munmap(mm, mm_size);
 355
 356                 mm = nn;
 357                 mm_size = nn_size;
 358         } else {
 359                 strncpy(mm, name, mm_size);
 360
 361                 /* Update the end pointer, continuing regardless of any failure. */
 362                 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
 363                         log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
 364         }
 365
 366 use_saved_argv:
 367         /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
 368          * it still looks here */
 369
 370         if (saved_argc > 0) {
 371                 int i;
 372
 373                 if (saved_argv[0]) {
 374                         size_t k;
 375
 376                         k = strlen(saved_argv[0]);
 377                         strncpy(saved_argv[0], name, k);
 378                         if (l > k)
 379                                 truncated = true;
 380                 }
 381
 382                 for (i = 1; i < saved_argc; i++) {
 383                         if (!saved_argv[i])
 384                                 break;
 385
 386                         memzero(saved_argv[i], strlen(saved_argv[i]));
 387                 }
 388         }
 389
 390         return !truncated;
 391 }
 392
 393 int is_kernel_thread(pid_t pid) {
 394         const char *p;
 395         size_t count;
 396         char c;
 397         bool eof;
 398         FILE *f;
 399
 400         if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
 401                 return 0;
 402
 403         assert(pid > 1);
 404
 405         p = procfs_file_alloca(pid, "cmdline");
 406         f = fopen(p, "re");
 407         if (!f) {
 408                 if (errno == ENOENT)
 409                         return -ESRCH;
 410                 return -errno;
 411         }
 412
 413         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 414
 415         count = fread(&c, 1, 1, f);
 416         eof = feof(f);
 417         fclose(f);
 418
 419         /* Kernel threads have an empty cmdline */
 420
 421         if (count <= 0)
 422                 return eof ? 1 : -errno;
 423
 424         return 0;
 425 }
 426
 427 int get_process_capeff(pid_t pid, char **capeff) {
 428         const char *p;
 429         int r;
 430
 431         assert(capeff);
 432         assert(pid >= 0);
 433
 434         p = procfs_file_alloca(pid, "status");
 435
 436         r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
 437         if (r == -ENOENT)
 438                 return -ESRCH;
 439
 440         return r;
 441 }
 442
 443 static int get_process_link_contents(const char *proc_file, char **name) {
 444         int r;
 445
 446         assert(proc_file);
 447         assert(name);
 448
 449         r = readlink_malloc(proc_file, name);
 450         if (r == -ENOENT)
 451                 return -ESRCH;
 452         if (r < 0)
 453                 return r;
 454
 455         return 0;
 456 }
 457
 458 int get_process_exe(pid_t pid, char **name) {
 459         const char *p;
 460         char *d;
 461         int r;
 462
 463         assert(pid >= 0);
 464
 465         p = procfs_file_alloca(pid, "exe");
 466         r = get_process_link_contents(p, name);
 467         if (r < 0)
 468                 return r;
 469
 470         d = endswith(*name, " (deleted)");
 471         if (d)
 472                 *d = '\0';
 473
 474         return 0;
 475 }
 476
 477 static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
 478         _cleanup_fclose_ FILE *f = NULL;
 479         char line[LINE_MAX];
 480         const char *p;
 481
 482         assert(field);
 483         assert(uid);
 484
 485         if (pid < 0)
 486                 return -EINVAL;
 487
 488         p = procfs_file_alloca(pid, "status");
 489         f = fopen(p, "re");
 490         if (!f) {
 491                 if (errno == ENOENT)
 492                         return -ESRCH;
 493                 return -errno;
 494         }
 495
 496         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 497
 498         FOREACH_LINE(line, f, return -errno) {
 499                 char *l;
 500
 501                 l = strstrip(line);
 502
 503                 if (startswith(l, field)) {
 504                         l += strlen(field);
 505                         l += strspn(l, WHITESPACE);
 506
 507                         l[strcspn(l, WHITESPACE)] = 0;
 508
 509                         return parse_uid(l, uid);
 510                 }
 511         }
 512
 513         return -EIO;
 514 }
 515
 516 int get_process_uid(pid_t pid, uid_t *uid) {
 517
 518         if (pid == 0 || pid == getpid_cached()) {
 519                 *uid = getuid();
 520                 return 0;
 521         }
 522
 523         return get_process_id(pid, "Uid:", uid);
 524 }
 525
 526 int get_process_gid(pid_t pid, gid_t *gid) {
 527
 528         if (pid == 0 || pid == getpid_cached()) {
 529                 *gid = getgid();
 530                 return 0;
 531         }
 532
 533         assert_cc(sizeof(uid_t) == sizeof(gid_t));
 534         return get_process_id(pid, "Gid:", gid);
 535 }
 536
 537 int get_process_cwd(pid_t pid, char **cwd) {
 538         const char *p;
 539
 540         assert(pid >= 0);
 541
 542         p = procfs_file_alloca(pid, "cwd");
 543
 544         return get_process_link_contents(p, cwd);
 545 }
 546
 547 int get_process_root(pid_t pid, char **root) {
 548         const char *p;
 549
 550         assert(pid >= 0);
 551
 552         p = procfs_file_alloca(pid, "root");
 553
 554         return get_process_link_contents(p, root);
 555 }
 556
 557 int get_process_environ(pid_t pid, char **env) {
 558         _cleanup_fclose_ FILE *f = NULL;
 559         _cleanup_free_ char *outcome = NULL;
 560         int c;
 561         const char *p;
 562         size_t allocated = 0, sz = 0;
 563
 564         assert(pid >= 0);
 565         assert(env);
 566
 567         p = procfs_file_alloca(pid, "environ");
 568
 569         f = fopen(p, "re");
 570         if (!f) {
 571                 if (errno == ENOENT)
 572                         return -ESRCH;
 573                 return -errno;
 574         }
 575
 576         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 577
 578         while ((c = fgetc(f)) != EOF) {
 579                 if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
 580                         return -ENOMEM;
 581
 582                 if (c == '\0')
 583                         outcome[sz++] = '\n';
 584                 else
 585                         sz += cescape_char(c, outcome + sz);
 586         }
 587
 588         if (!outcome) {
 589                 outcome = strdup("");
 590                 if (!outcome)
 591                         return -ENOMEM;
 592         } else
 593                 outcome[sz] = '\0';
 594
 595         *env = outcome;
 596         outcome = NULL;
 597
 598         return 0;
 599 }
 600
 601 int get_process_ppid(pid_t pid, pid_t *_ppid) {
 602         int r;
 603         _cleanup_free_ char *line = NULL;
 604         long unsigned ppid;
 605         const char *p;
 606
 607         assert(pid >= 0);
 608         assert(_ppid);
 609
 610         if (pid == 0 || pid == getpid_cached()) {
 611                 *_ppid = getppid();
 612                 return 0;
 613         }
 614
 615         p = procfs_file_alloca(pid, "stat");
 616         r = read_one_line_file(p, &line);
 617         if (r == -ENOENT)
 618                 return -ESRCH;
 619         if (r < 0)
 620                 return r;
 621
 622         /* Let's skip the pid and comm fields. The latter is enclosed
 623          * in () but does not escape any () in its value, so let's
 624          * skip over it manually */
 625
 626         p = strrchr(line, ')');
 627         if (!p)
 628                 return -EIO;
 629
 630         p++;
 631
 632         if (sscanf(p, " "
 633                    "%*c "  /* state */
 634                    "%lu ", /* ppid */
 635                    &ppid) != 1)
 636                 return -EIO;
 637
 638         if ((long unsigned) (pid_t) ppid != ppid)
 639                 return -ERANGE;
 640
 641         *_ppid = (pid_t) ppid;
 642
 643         return 0;
 644 }
 645
 646 int wait_for_terminate(pid_t pid, siginfo_t *status) {
 647         siginfo_t dummy;
 648
 649         assert(pid >= 1);
 650
 651         if (!status)
 652                 status = &dummy;
 653
 654         for (;;) {
 655                 zero(*status);
 656
 657                 if (waitid(P_PID, pid, status, WEXITED) < 0) {
 658
 659                         if (errno == EINTR)
 660                                 continue;
 661
 662                         return negative_errno();
 663                 }
 664
 665                 return 0;
 666         }
 667 }
 668
 669 /*
 670  * Return values:
 671  * < 0 : wait_for_terminate() failed to get the state of the
 672  *       process, the process was terminated by a signal, or
 673  *       failed for an unknown reason.
 674  * >=0 : The process terminated normally, and its exit code is
 675  *       returned.
 676  *
 677  * That is, success is indicated by a return value of zero, and an
 678  * error is indicated by a non-zero value.
 679  *
 680  * A warning is emitted if the process terminates abnormally,
 681  * and also if it returns non-zero unless check_exit_code is true.
 682  */
 683 int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_code) {
 684         int r;
 685         siginfo_t status;
 686
 687         assert(name);
 688         assert(pid > 1);
 689
 690         r = wait_for_terminate(pid, &status);
 691         if (r < 0)
 692                 return log_warning_errno(r, "Failed to wait for %s: %m", name);
 693
 694         if (status.si_code == CLD_EXITED) {
 695                 if (status.si_status != 0)
 696                         log_full(check_exit_code ? LOG_WARNING : LOG_DEBUG,
 697                                  "%s failed with error code %i.", name, status.si_status);
 698                 else
 699                         log_debug("%s succeeded.", name);
 700
 701                 return status.si_status;
 702         } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
 703
 704                 log_warning("%s terminated by signal %s.", name, signal_to_string(status.si_status));
 705                 return -EPROTO;
 706         }
 707
 708         log_warning("%s failed due to unknown reason.", name);
 709         return -EPROTO;
 710 }
 711
 712 /*
 713  * Return values:
 714  * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
 715  *       process, the process timed out, the process was terminated by a
 716  *       signal, or failed for an unknown reason.
 717  * >=0 : The process terminated normally with no failures.
 718  *
 719  * Success is indicated by a return value of zero, a timeout is indicated
 720  * by ETIMEDOUT, and all other child failure states are indicated by error
 721  * is indicated by a non-zero value.
 722  */
 723 int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
 724         sigset_t mask;
 725         int r;
 726         usec_t until;
 727
 728         assert_se(sigemptyset(&mask) == 0);
 729         assert_se(sigaddset(&mask, SIGCHLD) == 0);
 730
 731         /* Drop into a sigtimewait-based timeout. Waiting for the
 732          * pid to exit. */
 733         until = now(CLOCK_MONOTONIC) + timeout;
 734         for (;;) {
 735                 usec_t n;
 736                 siginfo_t status = {};
 737                 struct timespec ts;
 738
 739                 n = now(CLOCK_MONOTONIC);
 740                 if (n >= until)
 741                         break;
 742
 743                 r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
 744                 /* Assuming we woke due to the child exiting. */
 745                 if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
 746                         if (status.si_pid == pid) {
 747                                 /* This is the correct child.*/
 748                                 if (status.si_code == CLD_EXITED)
 749                                         return (status.si_status == 0) ? 0 : -EPROTO;
 750                                 else
 751                                         return -EPROTO;
 752                         }
 753                 }
 754                 /* Not the child, check for errors and proceed appropriately */
 755                 if (r < 0) {
 756                         switch (r) {
 757                         case -EAGAIN:
 758                                 /* Timed out, child is likely hung. */
 759                                 return -ETIMEDOUT;
 760                         case -EINTR:
 761                                 /* Received a different signal and should retry */
 762                                 continue;
 763                         default:
 764                                 /* Return any unexpected errors */
 765                                 return r;
 766                         }
 767                 }
 768         }
 769
 770         return -EPROTO;
 771 }
 772
 773 void sigkill_wait(pid_t pid) {
 774         assert(pid > 1);
 775
 776         if (kill(pid, SIGKILL) > 0)
 777                 (void) wait_for_terminate(pid, NULL);
 778 }
 779
 780 void sigkill_waitp(pid_t *pid) {
 781         if (!pid)
 782                 return;
 783         if (*pid <= 1)
 784                 return;
 785
 786         sigkill_wait(*pid);
 787 }
 788
 789 int kill_and_sigcont(pid_t pid, int sig) {
 790         int r;
 791
 792         r = kill(pid, sig) < 0 ? -errno : 0;
 793
 794         /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
 795          * affected by a process being suspended anyway. */
 796         if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
 797                 (void) kill(pid, SIGCONT);
 798
 799         return r;
 800 }
 801
 802 int getenv_for_pid(pid_t pid, const char *field, char **_value) {
 803         _cleanup_fclose_ FILE *f = NULL;
 804         char *value = NULL;
 805         int r;
 806         bool done = false;
 807         size_t l;
 808         const char *path;
 809
 810         assert(pid >= 0);
 811         assert(field);
 812         assert(_value);
 813
 814         path = procfs_file_alloca(pid, "environ");
 815
 816         f = fopen(path, "re");
 817         if (!f) {
 818                 if (errno == ENOENT)
 819                         return -ESRCH;
 820                 return -errno;
 821         }
 822
 823         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
 824
 825         l = strlen(field);
 826         r = 0;
 827
 828         do {
 829                 char line[LINE_MAX];
 830                 unsigned i;
 831
 832                 for (i = 0; i < sizeof(line)-1; i++) {
 833                         int c;
 834
 835                         c = getc(f);
 836                         if (_unlikely_(c == EOF)) {
 837                                 done = true;
 838                                 break;
 839                         } else if (c == 0)
 840                                 break;
 841
 842                         line[i] = c;
 843                 }
 844                 line[i] = 0;
 845
 846                 if (strneq(line, field, l) && line[l] == '=') {
 847                         value = strdup(line + l + 1);
 848                         if (!value)
 849                                 return -ENOMEM;
 850
 851                         r = 1;
 852                         break;
 853                 }
 854
 855         } while (!done);
 856
 857         *_value = value;
 858         return r;
 859 }
 860
 861 bool pid_is_unwaited(pid_t pid) {
 862         /* Checks whether a PID is still valid at all, including a zombie */
 863
 864         if (pid < 0)
 865                 return false;
 866
 867         if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
 868                 return true;
 869
 870         if (pid == getpid_cached())
 871                 return true;
 872
 873         if (kill(pid, 0) >= 0)
 874                 return true;
 875
 876         return errno != ESRCH;
 877 }
 878
 879 bool pid_is_alive(pid_t pid) {
 880         int r;
 881
 882         /* Checks whether a PID is still valid and not a zombie */
 883
 884         if (pid < 0)
 885                 return false;
 886
 887         if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
 888                 return true;
 889
 890         if (pid == getpid_cached())
 891                 return true;
 892
 893         r = get_process_state(pid);
 894         if (IN_SET(r, -ESRCH, 'Z'))
 895                 return false;
 896
 897         return true;
 898 }
 899
 900 int pid_from_same_root_fs(pid_t pid) {
 901         const char *root;
 902
 903         if (pid < 0)
 904                 return false;
 905
 906         if (pid == 0 || pid == getpid_cached())
 907                 return true;
 908
 909         root = procfs_file_alloca(pid, "root");
 910
 911         return files_same(root, "/proc/1/root", 0);
 912 }
 913
 914 bool is_main_thread(void) {
 915         static thread_local int cached = 0;
 916
 917         if (_unlikely_(cached == 0))
 918                 cached = getpid_cached() == gettid() ? 1 : -1;
 919
 920         return cached > 0;
 921 }
 922
 923 noreturn void freeze(void) {
 924
 925         log_close();
 926
 927         /* Make sure nobody waits for us on a socket anymore */
 928         close_all_fds(NULL, 0);
 929
 930         sync();
 931
 932         for (;;)
 933                 pause();
 934 }
 935
 936 bool oom_score_adjust_is_valid(int oa) {
 937         return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
 938 }
 939
 940 unsigned long personality_from_string(const char *p) {
 941         int architecture;
 942
 943         if (!p)
 944                 return PERSONALITY_INVALID;
 945
 946         /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
 947          * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
 948          * the same register size. */
 949
 950         architecture = architecture_from_string(p);
 951         if (architecture < 0)
 952                 return PERSONALITY_INVALID;
 953
 954         if (architecture == native_architecture())
 955                 return PER_LINUX;
 956 #ifdef SECONDARY_ARCHITECTURE
 957         if (architecture == SECONDARY_ARCHITECTURE)
 958                 return PER_LINUX32;
 959 #endif
 960
 961         return PERSONALITY_INVALID;
 962 }
 963
 964 const char* personality_to_string(unsigned long p) {
 965         int architecture = _ARCHITECTURE_INVALID;
 966
 967         if (p == PER_LINUX)
 968                 architecture = native_architecture();
 969 #ifdef SECONDARY_ARCHITECTURE
 970         else if (p == PER_LINUX32)
 971                 architecture = SECONDARY_ARCHITECTURE;
 972 #endif
 973
 974         if (architecture < 0)
 975                 return NULL;
 976
 977         return architecture_to_string(architecture);
 978 }
 979
 980 int safe_personality(unsigned long p) {
 981         int ret;
 982
 983         /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
 984          * and in others as negative return value containing an errno-like value. Let's work around this: this is a
 985          * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
 986          * the return value indicating the same issue, so that we are definitely on the safe side.
 987          *
 988          * See https://github.com/systemd/systemd/issues/6737 */
 989
 990         errno = 0;
 991         ret = personality(p);
 992         if (ret < 0) {
 993                 if (errno != 0)
 994                         return -errno;
 995
 996                 errno = -ret;
 997         }
 998
 999         return ret;
1000 }
1001
1002 int opinionated_personality(unsigned long *ret) {
1003         int current;
1004
1005         /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1006          * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1007          * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1008
1009         current = safe_personality(PERSONALITY_INVALID);
1010         if (current < 0)
1011                 return current;
1012
1013         if (((unsigned long) current & 0xffff) == PER_LINUX32)
1014                 *ret = PER_LINUX32;
1015         else
1016                 *ret = PER_LINUX;
1017
1018         return 0;
1019 }
1020
1021 void valgrind_summary_hack(void) {
1022 #if HAVE_VALGRIND_VALGRIND_H
1023         if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
1024                 pid_t pid;
1025                 pid = raw_clone(SIGCHLD);
1026                 if (pid < 0)
1027                         log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
1028                 else if (pid == 0)
1029                         exit(EXIT_SUCCESS);
1030                 else {
1031                         log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1032                         (void) wait_for_terminate(pid, NULL);
1033                 }
1034         }
1035 #endif
1036 }
1037
1038 int pid_compare_func(const void *a, const void *b) {
1039         const pid_t *p = a, *q = b;
1040
1041         /* Suitable for usage in qsort() */
1042
1043         if (*p < *q)
1044                 return -1;
1045         if (*p > *q)
1046                 return 1;
1047         return 0;
1048 }
1049
1050 int ioprio_parse_priority(const char *s, int *ret) {
1051         int i, r;
1052
1053         assert(s);
1054         assert(ret);
1055
1056         r = safe_atoi(s, &i);
1057         if (r < 0)
1058                 return r;
1059
1060         if (!ioprio_priority_is_valid(i))
1061                 return -EINVAL;
1062
1063         *ret = i;
1064         return 0;
1065 }
1066
1067 /* The cached PID, possible values:
1068  *
1069  *     == UNSET [0]  → cache not initialized yet
1070  *     == BUSY [-1]  → some thread is initializing it at the moment
1071  *     any other     → the cached PID
1072  */
1073
1074 #define CACHED_PID_UNSET ((pid_t) 0)
1075 #define CACHED_PID_BUSY ((pid_t) -1)
1076
1077 static pid_t cached_pid = CACHED_PID_UNSET;
1078
1079 static void reset_cached_pid(void) {
1080         /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1081         cached_pid = CACHED_PID_UNSET;
1082 }
1083
1084 /* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1085  * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1086  * libpthread, as it is part of glibc anyway. */
1087 extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void * __dso_handle);
1088 extern void* __dso_handle __attribute__ ((__weak__));
1089
1090 pid_t getpid_cached(void) {
1091         pid_t current_value;
1092
1093         /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1094          * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1095          * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1096          * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1097          *
1098          * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1099          * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1100          */
1101
1102         current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
1103
1104         switch (current_value) {
1105
1106         case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
1107                 pid_t new_pid;
1108
1109                 new_pid = getpid();
1110
1111                 if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
1112                         /* OOM? Let's try again later */
1113                         cached_pid = CACHED_PID_UNSET;
1114                         return new_pid;
1115                 }
1116
1117                 cached_pid = new_pid;
1118                 return new_pid;
1119         }
1120
1121         case CACHED_PID_BUSY: /* Somebody else is currently initializing */
1122                 return getpid();
1123
1124         default: /* Properly initialized */
1125                 return current_value;
1126         }
1127 }
1128
1129 int must_be_root(void) {
1130
1131         if (geteuid() == 0)
1132                 return 0;
1133
1134         log_error("Need to be root.");
1135         return -EPERM;
1136 }
1137
1138 int safe_fork_full(
1139                 const char *name,
1140                 const int except_fds[],
1141                 size_t n_except_fds,
1142                 ForkFlags flags,
1143                 pid_t *ret_pid) {
1144
1145         pid_t original_pid, pid;
1146         sigset_t saved_ss;
1147         bool block_signals;
1148         int r;
1149
1150         /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
1151          * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
1152
1153         original_pid = getpid_cached();
1154
1155         block_signals = flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG);
1156
1157         if (block_signals) {
1158                 sigset_t ss;
1159
1160                 /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can be sure
1161                  * that SIGTERMs are not lost we might send to the child. */
1162                 if (sigfillset(&ss) < 0)
1163                         return log_debug_errno(errno, "Failed to reset signal set: %m");
1164
1165                 if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
1166                         return log_debug_errno(errno, "Failed to reset signal mask: %m");
1167         }
1168
1169         pid = fork();
1170         if (pid < 0) {
1171                 r = -errno;
1172
1173                 if (block_signals) /* undo what we did above */
1174                         (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1175
1176                 return log_debug_errno(r, "Failed to fork: %m");
1177         }
1178         if (pid > 0) {
1179                 /* We are in the parent process */
1180
1181                 if (block_signals) /* undo what we did above */
1182                         (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
1183
1184                 log_debug("Sucessfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
1185
1186                 if (ret_pid)
1187                         *ret_pid = pid;
1188
1189                 return 1;
1190         }
1191
1192         /* We are in the child process */
1193
1194         if (flags & FORK_REOPEN_LOG) {
1195                 /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
1196                 log_close();
1197                 log_set_open_when_needed(true);
1198         }
1199
1200         if (name) {
1201                 r = rename_process(name);
1202                 if (r < 0)
1203                         log_debug_errno(r, "Failed to rename process, ignoring: %m");
1204         }
1205
1206         if (flags & FORK_DEATHSIG)
1207                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
1208                         log_debug_errno(errno, "Failed to set death signal: %m");
1209                         _exit(EXIT_FAILURE);
1210                 }
1211
1212         if (flags & FORK_RESET_SIGNALS) {
1213                 r = reset_all_signal_handlers();
1214                 if (r < 0) {
1215                         log_debug_errno(r, "Failed to reset signal handlers: %m");
1216                         _exit(EXIT_FAILURE);
1217                 }
1218
1219                 /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
1220                 r = reset_signal_mask();
1221                 if (r < 0) {
1222                         log_debug_errno(r, "Failed to reset signal mask: %m");
1223                         _exit(EXIT_FAILURE);
1224                 }
1225         } else if (block_signals) { /* undo what we did above */
1226                 if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
1227                         log_debug_errno(errno, "Failed to restore signal mask: %m");
1228                         _exit(EXIT_FAILURE);
1229                 }
1230         }
1231
1232         if (flags & FORK_DEATHSIG) {
1233                 /* Let's see if the parent PID is still the one we started from? If not, then the parent
1234                  * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
1235
1236                 if (getppid() != original_pid) {
1237                         log_debug("Parent died early, raising SIGTERM.");
1238                         (void) raise(SIGTERM);
1239                         _exit(EXIT_FAILURE);
1240                 }
1241         }
1242
1243         if (flags & FORK_CLOSE_ALL_FDS) {
1244                 /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
1245                 log_close();
1246
1247                 r = close_all_fds(except_fds, n_except_fds);
1248                 if (r < 0) {
1249                         log_debug_errno(r, "Failed to close all file descriptors: %m");
1250                         _exit(EXIT_FAILURE);
1251                 }
1252         }
1253
1254         /* When we were asked to reopen the logs, do so again now */
1255         if (flags & FORK_REOPEN_LOG) {
1256                 log_open();
1257                 log_set_open_when_needed(false);
1258         }
1259
1260         if (flags & FORK_NULL_STDIO) {
1261                 r = make_null_stdio();
1262                 if (r < 0) {
1263                         log_debug_errno(r, "Failed to connect stdin/stdout to /dev/null: %m");
1264                         _exit(EXIT_FAILURE);
1265                 }
1266         }
1267
1268         if (ret_pid)
1269                 *ret_pid = getpid_cached();
1270
1271         return 0;
1272 }
1273
1274 static const char *const ioprio_class_table[] = {
1275         [IOPRIO_CLASS_NONE] = "none",
1276         [IOPRIO_CLASS_RT] = "realtime",
1277         [IOPRIO_CLASS_BE] = "best-effort",
1278         [IOPRIO_CLASS_IDLE] = "idle"
1279 };
1280
1281 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, INT_MAX);
1282
1283 static const char *const sigchld_code_table[] = {
1284         [CLD_EXITED] = "exited",
1285         [CLD_KILLED] = "killed",
1286         [CLD_DUMPED] = "dumped",
1287         [CLD_TRAPPED] = "trapped",
1288         [CLD_STOPPED] = "stopped",
1289         [CLD_CONTINUED] = "continued",
1290 };
1291
1292 DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
1293
1294 static const char* const sched_policy_table[] = {
1295         [SCHED_OTHER] = "other",
1296         [SCHED_BATCH] = "batch",
1297         [SCHED_IDLE] = "idle",
1298         [SCHED_FIFO] = "fifo",
1299         [SCHED_RR] = "rr"
1300 };
1301
1302 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);