src/basic/util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <alloca.h>
   4 #include <errno.h>
   5 #include <fcntl.h>
   6 #include <sched.h>
   7 #include <signal.h>
   8 #include <stdarg.h>
   9 #include <stdio.h>
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <sys/mman.h>
  13 #include <sys/prctl.h>
  14 #include <sys/statfs.h>
  15 #include <sys/sysmacros.h>
  16 #include <sys/types.h>
  17 #include <unistd.h>
  18
  19 #include "alloc-util.h"
  20 #include "btrfs-util.h"
  21 #include "build.h"
  22 #include "cgroup-util.h"
  23 #include "def.h"
  24 #include "device-nodes.h"
  25 #include "dirent-util.h"
  26 #include "env-file.h"
  27 #include "env-util.h"
  28 #include "fd-util.h"
  29 #include "fileio.h"
  30 #include "format-util.h"
  31 #include "hashmap.h"
  32 #include "hostname-util.h"
  33 #include "log.h"
  34 #include "macro.h"
  35 #include "missing.h"
  36 #include "parse-util.h"
  37 #include "path-util.h"
  38 #include "process-util.h"
  39 #include "procfs-util.h"
  40 #include "set.h"
  41 #include "signal-util.h"
  42 #include "stat-util.h"
  43 #include "string-util.h"
  44 #include "strv.h"
  45 #include "time-util.h"
  46 #include "umask-util.h"
  47 #include "user-util.h"
  48 #include "util.h"
  49 #include "virt.h"
  50
  51 int saved_argc = 0;
  52 char **saved_argv = NULL;
  53 static int saved_in_initrd = -1;
  54
  55 size_t page_size(void) {
  56         static thread_local size_t pgsz = 0;
  57         long r;
  58
  59         if (_likely_(pgsz > 0))
  60                 return pgsz;
  61
  62         r = sysconf(_SC_PAGESIZE);
  63         assert(r > 0);
  64
  65         pgsz = (size_t) r;
  66         return pgsz;
  67 }
  68
  69 bool plymouth_running(void) {
  70         return access("/run/plymouth/pid", F_OK) >= 0;
  71 }
  72
  73 bool display_is_local(const char *display) {
  74         assert(display);
  75
  76         return
  77                 display[0] == ':' &&
  78                 display[1] >= '0' &&
  79                 display[1] <= '9';
  80 }
  81
  82 bool kexec_loaded(void) {
  83        _cleanup_free_ char *s = NULL;
  84
  85        if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
  86                return false;
  87
  88        return s[0] == '1';
  89 }
  90
  91 int prot_from_flags(int flags) {
  92
  93         switch (flags & O_ACCMODE) {
  94
  95         case O_RDONLY:
  96                 return PROT_READ;
  97
  98         case O_WRONLY:
  99                 return PROT_WRITE;
 100
 101         case O_RDWR:
 102                 return PROT_READ|PROT_WRITE;
 103
 104         default:
 105                 return -EINVAL;
 106         }
 107 }
 108
 109 bool in_initrd(void) {
 110         struct statfs s;
 111         int r;
 112
 113         if (saved_in_initrd >= 0)
 114                 return saved_in_initrd;
 115
 116         /* We make two checks here:
 117          *
 118          * 1. the flag file /etc/initrd-release must exist
 119          * 2. the root file system must be a memory file system
 120          *
 121          * The second check is extra paranoia, since misdetecting an
 122          * initrd can have bad consequences due the initrd
 123          * emptying when transititioning to the main systemd.
 124          */
 125
 126         r = getenv_bool_secure("SYSTEMD_IN_INITRD");
 127         if (r < 0 && r != -ENXIO)
 128                 log_debug_errno(r, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m");
 129
 130         if (r >= 0)
 131                 saved_in_initrd = r > 0;
 132         else
 133                 saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
 134                                   statfs("/", &s) >= 0 &&
 135                                   is_temporary_fs(&s);
 136
 137         return saved_in_initrd;
 138 }
 139
 140 void in_initrd_force(bool value) {
 141         saved_in_initrd = value;
 142 }
 143
 144 /* hey glibc, APIs with callbacks without a user pointer are so useless */
 145 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
 146                  __compar_d_fn_t compar, void *arg) {
 147         size_t l, u, idx;
 148         const void *p;
 149         int comparison;
 150
 151         assert(!size_multiply_overflow(nmemb, size));
 152
 153         l = 0;
 154         u = nmemb;
 155         while (l < u) {
 156                 idx = (l + u) / 2;
 157                 p = (const uint8_t*) base + idx * size;
 158                 comparison = compar(key, p, arg);
 159                 if (comparison < 0)
 160                         u = idx;
 161                 else if (comparison > 0)
 162                         l = idx + 1;
 163                 else
 164                         return (void *)p;
 165         }
 166         return NULL;
 167 }
 168
 169 bool memeqzero(const void *data, size_t length) {
 170         /* Does the buffer consist entirely of NULs?
 171          * Copied from https://github.com/systemd/casync/, copied in turn from
 172          * https://github.com/rustyrussell/ccan/blob/master/ccan/mem/mem.c#L92,
 173          * which is licensed CC-0.
 174          */
 175
 176         const uint8_t *p = data;
 177         size_t i;
 178
 179         /* Check first 16 bytes manually */
 180         for (i = 0; i < 16; i++, length--) {
 181                 if (length == 0)
 182                         return true;
 183                 if (p[i])
 184                         return false;
 185         }
 186
 187         /* Now we know first 16 bytes are NUL, memcmp with self.  */
 188         return memcmp(data, p + i, length) == 0;
 189 }
 190
 191 int on_ac_power(void) {
 192         bool found_offline = false, found_online = false;
 193         _cleanup_closedir_ DIR *d = NULL;
 194         struct dirent *de;
 195
 196         d = opendir("/sys/class/power_supply");
 197         if (!d)
 198                 return errno == ENOENT ? true : -errno;
 199
 200         FOREACH_DIRENT(de, d, return -errno) {
 201                 _cleanup_close_ int fd = -1, device = -1;
 202                 char contents[6];
 203                 ssize_t n;
 204
 205                 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
 206                 if (device < 0) {
 207                         if (IN_SET(errno, ENOENT, ENOTDIR))
 208                                 continue;
 209
 210                         return -errno;
 211                 }
 212
 213                 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 214                 if (fd < 0) {
 215                         if (errno == ENOENT)
 216                                 continue;
 217
 218                         return -errno;
 219                 }
 220
 221                 n = read(fd, contents, sizeof(contents));
 222                 if (n < 0)
 223                         return -errno;
 224
 225                 if (n != 6 || memcmp(contents, "Mains\n", 6))
 226                         continue;
 227
 228                 safe_close(fd);
 229                 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 230                 if (fd < 0) {
 231                         if (errno == ENOENT)
 232                                 continue;
 233
 234                         return -errno;
 235                 }
 236
 237                 n = read(fd, contents, sizeof(contents));
 238                 if (n < 0)
 239                         return -errno;
 240
 241                 if (n != 2 || contents[1] != '\n')
 242                         return -EIO;
 243
 244                 if (contents[0] == '1') {
 245                         found_online = true;
 246                         break;
 247                 } else if (contents[0] == '0')
 248                         found_offline = true;
 249                 else
 250                         return -EIO;
 251         }
 252
 253         return found_online || !found_offline;
 254 }
 255
 256 int container_get_leader(const char *machine, pid_t *pid) {
 257         _cleanup_free_ char *s = NULL, *class = NULL;
 258         const char *p;
 259         pid_t leader;
 260         int r;
 261
 262         assert(machine);
 263         assert(pid);
 264
 265         if (streq(machine, ".host")) {
 266                 *pid = 1;
 267                 return 0;
 268         }
 269
 270         if (!machine_name_is_valid(machine))
 271                 return -EINVAL;
 272
 273         p = strjoina("/run/systemd/machines/", machine);
 274         r = parse_env_file(NULL, p,
 275                            "LEADER", &s,
 276                            "CLASS", &class);
 277         if (r == -ENOENT)
 278                 return -EHOSTDOWN;
 279         if (r < 0)
 280                 return r;
 281         if (!s)
 282                 return -EIO;
 283
 284         if (!streq_ptr(class, "container"))
 285                 return -EIO;
 286
 287         r = parse_pid(s, &leader);
 288         if (r < 0)
 289                 return r;
 290         if (leader <= 1)
 291                 return -EIO;
 292
 293         *pid = leader;
 294         return 0;
 295 }
 296
 297 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
 298         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
 299         int rfd = -1;
 300
 301         assert(pid >= 0);
 302
 303         if (mntns_fd) {
 304                 const char *mntns;
 305
 306                 mntns = procfs_file_alloca(pid, "ns/mnt");
 307                 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 308                 if (mntnsfd < 0)
 309                         return -errno;
 310         }
 311
 312         if (pidns_fd) {
 313                 const char *pidns;
 314
 315                 pidns = procfs_file_alloca(pid, "ns/pid");
 316                 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 317                 if (pidnsfd < 0)
 318                         return -errno;
 319         }
 320
 321         if (netns_fd) {
 322                 const char *netns;
 323
 324                 netns = procfs_file_alloca(pid, "ns/net");
 325                 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 326                 if (netnsfd < 0)
 327                         return -errno;
 328         }
 329
 330         if (userns_fd) {
 331                 const char *userns;
 332
 333                 userns = procfs_file_alloca(pid, "ns/user");
 334                 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 335                 if (usernsfd < 0 && errno != ENOENT)
 336                         return -errno;
 337         }
 338
 339         if (root_fd) {
 340                 const char *root;
 341
 342                 root = procfs_file_alloca(pid, "root");
 343                 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
 344                 if (rfd < 0)
 345                         return -errno;
 346         }
 347
 348         if (pidns_fd)
 349                 *pidns_fd = pidnsfd;
 350
 351         if (mntns_fd)
 352                 *mntns_fd = mntnsfd;
 353
 354         if (netns_fd)
 355                 *netns_fd = netnsfd;
 356
 357         if (userns_fd)
 358                 *userns_fd = usernsfd;
 359
 360         if (root_fd)
 361                 *root_fd = rfd;
 362
 363         pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
 364
 365         return 0;
 366 }
 367
 368 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
 369         if (userns_fd >= 0) {
 370                 /* Can't setns to your own userns, since then you could
 371                  * escalate from non-root to root in your own namespace, so
 372                  * check if namespaces equal before attempting to enter. */
 373                 _cleanup_free_ char *userns_fd_path = NULL;
 374                 int r;
 375                 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
 376                         return -ENOMEM;
 377
 378                 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
 379                 if (r < 0)
 380                         return r;
 381                 if (r)
 382                         userns_fd = -1;
 383         }
 384
 385         if (pidns_fd >= 0)
 386                 if (setns(pidns_fd, CLONE_NEWPID) < 0)
 387                         return -errno;
 388
 389         if (mntns_fd >= 0)
 390                 if (setns(mntns_fd, CLONE_NEWNS) < 0)
 391                         return -errno;
 392
 393         if (netns_fd >= 0)
 394                 if (setns(netns_fd, CLONE_NEWNET) < 0)
 395                         return -errno;
 396
 397         if (userns_fd >= 0)
 398                 if (setns(userns_fd, CLONE_NEWUSER) < 0)
 399                         return -errno;
 400
 401         if (root_fd >= 0) {
 402                 if (fchdir(root_fd) < 0)
 403                         return -errno;
 404
 405                 if (chroot(".") < 0)
 406                         return -errno;
 407         }
 408
 409         return reset_uid_gid();
 410 }
 411
 412 uint64_t physical_memory(void) {
 413         _cleanup_free_ char *root = NULL, *value = NULL;
 414         uint64_t mem, lim;
 415         size_t ps;
 416         long sc;
 417         int r;
 418
 419         /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
 420          * memory.
 421          *
 422          * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
 423          * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
 424
 425         sc = sysconf(_SC_PHYS_PAGES);
 426         assert(sc > 0);
 427
 428         ps = page_size();
 429         mem = (uint64_t) sc * (uint64_t) ps;
 430
 431         r = cg_get_root_path(&root);
 432         if (r < 0) {
 433                 log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
 434                 return mem;
 435         }
 436
 437         r = cg_all_unified();
 438         if (r < 0) {
 439                 log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
 440                 return mem;
 441         }
 442         if (r > 0) {
 443                 r = cg_get_attribute("memory", root, "memory.max", &value);
 444                 if (r < 0) {
 445                         log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
 446                         return mem;
 447                 }
 448
 449                 if (streq(value, "max"))
 450                         return mem;
 451         } else {
 452                 r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
 453                 if (r < 0) {
 454                         log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
 455                         return mem;
 456                 }
 457         }
 458
 459         r = safe_atou64(value, &lim);
 460         if (r < 0) {
 461                 log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
 462                 return mem;
 463         }
 464         if (lim == UINT64_MAX)
 465                 return mem;
 466
 467         /* Make sure the limit is a multiple of our own page size */
 468         lim /= ps;
 469         lim *= ps;
 470
 471         return MIN(mem, lim);
 472 }
 473
 474 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
 475         uint64_t p, m, ps, r;
 476
 477         assert(max > 0);
 478
 479         /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
 480          * the result is a multiple of the page size (rounds down). */
 481
 482         ps = page_size();
 483         assert(ps > 0);
 484
 485         p = physical_memory() / ps;
 486         assert(p > 0);
 487
 488         m = p * v;
 489         if (m / p != v)
 490                 return UINT64_MAX;
 491
 492         m /= max;
 493
 494         r = m * ps;
 495         if (r / ps != m)
 496                 return UINT64_MAX;
 497
 498         return r;
 499 }
 500
 501 uint64_t system_tasks_max(void) {
 502
 503         uint64_t a = TASKS_MAX, b = TASKS_MAX;
 504         _cleanup_free_ char *root = NULL;
 505         int r;
 506
 507         /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
 508          * limit:
 509          *
 510          * a) the maximum tasks value the kernel allows on this architecture
 511          * b) the cgroups pids_max attribute for the system
 512          * c) the kernel's configured maximum PID value
 513          *
 514          * And then pick the smallest of the three */
 515
 516         r = procfs_tasks_get_limit(&a);
 517         if (r < 0)
 518                 log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
 519
 520         r = cg_get_root_path(&root);
 521         if (r < 0)
 522                 log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m");
 523         else {
 524                 _cleanup_free_ char *value = NULL;
 525
 526                 r = cg_get_attribute("pids", root, "pids.max", &value);
 527                 if (r < 0)
 528                         log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
 529                 else if (!streq(value, "max")) {
 530                         r = safe_atou64(value, &b);
 531                         if (r < 0)
 532                                 log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
 533                 }
 534         }
 535
 536         return MIN3(TASKS_MAX,
 537                     a <= 0 ? TASKS_MAX : a,
 538                     b <= 0 ? TASKS_MAX : b);
 539 }
 540
 541 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
 542         uint64_t t, m;
 543
 544         assert(max > 0);
 545
 546         /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
 547          * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
 548
 549         t = system_tasks_max();
 550         assert(t > 0);
 551
 552         m = t * v;
 553         if (m / t != v) /* overflow? */
 554                 return UINT64_MAX;
 555
 556         return m / max;
 557 }
 558
 559 int version(void) {
 560         puts(PACKAGE_STRING "\n"
 561              SYSTEMD_FEATURES);
 562         return 0;
 563 }
 564
 565 /* This is a direct translation of str_verscmp from boot.c */
 566 static bool is_digit(int c) {
 567         return c >= '0' && c <= '9';
 568 }
 569
 570 static int c_order(int c) {
 571         if (c == 0 || is_digit(c))
 572                 return 0;
 573
 574         if ((c >= 'a') && (c <= 'z'))
 575                 return c;
 576
 577         return c + 0x10000;
 578 }
 579
 580 int str_verscmp(const char *s1, const char *s2) {
 581         const char *os1, *os2;
 582
 583         assert(s1);
 584         assert(s2);
 585
 586         os1 = s1;
 587         os2 = s2;
 588
 589         while (*s1 || *s2) {
 590                 int first;
 591
 592                 while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
 593                         int order;
 594
 595                         order = c_order(*s1) - c_order(*s2);
 596                         if (order != 0)
 597                                 return order;
 598                         s1++;
 599                         s2++;
 600                 }
 601
 602                 while (*s1 == '0')
 603                         s1++;
 604                 while (*s2 == '0')
 605                         s2++;
 606
 607                 first = 0;
 608                 while (is_digit(*s1) && is_digit(*s2)) {
 609                         if (first == 0)
 610                                 first = *s1 - *s2;
 611                         s1++;
 612                         s2++;
 613                 }
 614
 615                 if (is_digit(*s1))
 616                         return 1;
 617                 if (is_digit(*s2))
 618                         return -1;
 619
 620                 if (first != 0)
 621                         return first;
 622         }
 623
 624         return strcmp(os1, os2);
 625 }
 626
 627 /* Turn off core dumps but only if we're running outside of a container. */
 628 void disable_coredumps(void) {
 629         int r;
 630
 631         if (detect_container() > 0)
 632                 return;
 633
 634         r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", WRITE_STRING_FILE_DISABLE_BUFFER);
 635         if (r < 0)
 636                 log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
 637 }