src/basic/util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <alloca.h>
   4 #include <errno.h>
   5 #include <fcntl.h>
   6 #include <sched.h>
   7 #include <signal.h>
   8 #include <stdarg.h>
   9 #include <stdio.h>
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <sys/mman.h>
  13 #include <sys/prctl.h>
  14 #include <sys/statfs.h>
  15 #include <sys/sysmacros.h>
  16 #include <sys/types.h>
  17 #include <unistd.h>
  18
  19 #include "alloc-util.h"
  20 #include "btrfs-util.h"
  21 #include "build.h"
  22 #include "cgroup-util.h"
  23 #include "def.h"
  24 #include "device-nodes.h"
  25 #include "dirent-util.h"
  26 #include "fd-util.h"
  27 #include "fileio.h"
  28 #include "format-util.h"
  29 #include "hashmap.h"
  30 #include "hostname-util.h"
  31 #include "log.h"
  32 #include "macro.h"
  33 #include "missing.h"
  34 #include "parse-util.h"
  35 #include "path-util.h"
  36 #include "process-util.h"
  37 #include "procfs-util.h"
  38 #include "set.h"
  39 #include "signal-util.h"
  40 #include "stat-util.h"
  41 #include "string-util.h"
  42 #include "strv.h"
  43 #include "time-util.h"
  44 #include "umask-util.h"
  45 #include "user-util.h"
  46 #include "util.h"
  47 #include "virt.h"
  48
  49 int saved_argc = 0;
  50 char **saved_argv = NULL;
  51 static int saved_in_initrd = -1;
  52
  53 size_t page_size(void) {
  54         static thread_local size_t pgsz = 0;
  55         long r;
  56
  57         if (_likely_(pgsz > 0))
  58                 return pgsz;
  59
  60         r = sysconf(_SC_PAGESIZE);
  61         assert(r > 0);
  62
  63         pgsz = (size_t) r;
  64         return pgsz;
  65 }
  66
  67 bool plymouth_running(void) {
  68         return access("/run/plymouth/pid", F_OK) >= 0;
  69 }
  70
  71 bool display_is_local(const char *display) {
  72         assert(display);
  73
  74         return
  75                 display[0] == ':' &&
  76                 display[1] >= '0' &&
  77                 display[1] <= '9';
  78 }
  79
  80 int socket_from_display(const char *display, char **path) {
  81         size_t k;
  82         char *f, *c;
  83
  84         assert(display);
  85         assert(path);
  86
  87         if (!display_is_local(display))
  88                 return -EINVAL;
  89
  90         k = strspn(display+1, "0123456789");
  91
  92         f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1);
  93         if (!f)
  94                 return -ENOMEM;
  95
  96         c = stpcpy(f, "/tmp/.X11-unix/X");
  97         memcpy(c, display+1, k);
  98         c[k] = 0;
  99
 100         *path = f;
 101
 102         return 0;
 103 }
 104
 105 bool kexec_loaded(void) {
 106        _cleanup_free_ char *s = NULL;
 107
 108        if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
 109                return false;
 110
 111        return s[0] == '1';
 112 }
 113
 114 int prot_from_flags(int flags) {
 115
 116         switch (flags & O_ACCMODE) {
 117
 118         case O_RDONLY:
 119                 return PROT_READ;
 120
 121         case O_WRONLY:
 122                 return PROT_WRITE;
 123
 124         case O_RDWR:
 125                 return PROT_READ|PROT_WRITE;
 126
 127         default:
 128                 return -EINVAL;
 129         }
 130 }
 131
 132 bool in_initrd(void) {
 133         struct statfs s;
 134
 135         if (saved_in_initrd >= 0)
 136                 return saved_in_initrd;
 137
 138         /* We make two checks here:
 139          *
 140          * 1. the flag file /etc/initrd-release must exist
 141          * 2. the root file system must be a memory file system
 142          *
 143          * The second check is extra paranoia, since misdetecting an
 144          * initrd can have bad consequences due the initrd
 145          * emptying when transititioning to the main systemd.
 146          */
 147
 148         saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
 149                           statfs("/", &s) >= 0 &&
 150                           is_temporary_fs(&s);
 151
 152         return saved_in_initrd;
 153 }
 154
 155 void in_initrd_force(bool value) {
 156         saved_in_initrd = value;
 157 }
 158
 159 /* hey glibc, APIs with callbacks without a user pointer are so useless */
 160 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
 161                  int (*compar) (const void *, const void *, void *), void *arg) {
 162         size_t l, u, idx;
 163         const void *p;
 164         int comparison;
 165
 166         assert(!size_multiply_overflow(nmemb, size));
 167
 168         l = 0;
 169         u = nmemb;
 170         while (l < u) {
 171                 idx = (l + u) / 2;
 172                 p = (const uint8_t*) base + idx * size;
 173                 comparison = compar(key, p, arg);
 174                 if (comparison < 0)
 175                         u = idx;
 176                 else if (comparison > 0)
 177                         l = idx + 1;
 178                 else
 179                         return (void *)p;
 180         }
 181         return NULL;
 182 }
 183
 184 int on_ac_power(void) {
 185         bool found_offline = false, found_online = false;
 186         _cleanup_closedir_ DIR *d = NULL;
 187         struct dirent *de;
 188
 189         d = opendir("/sys/class/power_supply");
 190         if (!d)
 191                 return errno == ENOENT ? true : -errno;
 192
 193         FOREACH_DIRENT(de, d, return -errno) {
 194                 _cleanup_close_ int fd = -1, device = -1;
 195                 char contents[6];
 196                 ssize_t n;
 197
 198                 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
 199                 if (device < 0) {
 200                         if (IN_SET(errno, ENOENT, ENOTDIR))
 201                                 continue;
 202
 203                         return -errno;
 204                 }
 205
 206                 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 207                 if (fd < 0) {
 208                         if (errno == ENOENT)
 209                                 continue;
 210
 211                         return -errno;
 212                 }
 213
 214                 n = read(fd, contents, sizeof(contents));
 215                 if (n < 0)
 216                         return -errno;
 217
 218                 if (n != 6 || memcmp(contents, "Mains\n", 6))
 219                         continue;
 220
 221                 safe_close(fd);
 222                 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 223                 if (fd < 0) {
 224                         if (errno == ENOENT)
 225                                 continue;
 226
 227                         return -errno;
 228                 }
 229
 230                 n = read(fd, contents, sizeof(contents));
 231                 if (n < 0)
 232                         return -errno;
 233
 234                 if (n != 2 || contents[1] != '\n')
 235                         return -EIO;
 236
 237                 if (contents[0] == '1') {
 238                         found_online = true;
 239                         break;
 240                 } else if (contents[0] == '0')
 241                         found_offline = true;
 242                 else
 243                         return -EIO;
 244         }
 245
 246         return found_online || !found_offline;
 247 }
 248
 249 int container_get_leader(const char *machine, pid_t *pid) {
 250         _cleanup_free_ char *s = NULL, *class = NULL;
 251         const char *p;
 252         pid_t leader;
 253         int r;
 254
 255         assert(machine);
 256         assert(pid);
 257
 258         if (!machine_name_is_valid(machine))
 259                 return -EINVAL;
 260
 261         p = strjoina("/run/systemd/machines/", machine);
 262         r = parse_env_file(NULL, p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
 263         if (r == -ENOENT)
 264                 return -EHOSTDOWN;
 265         if (r < 0)
 266                 return r;
 267         if (!s)
 268                 return -EIO;
 269
 270         if (!streq_ptr(class, "container"))
 271                 return -EIO;
 272
 273         r = parse_pid(s, &leader);
 274         if (r < 0)
 275                 return r;
 276         if (leader <= 1)
 277                 return -EIO;
 278
 279         *pid = leader;
 280         return 0;
 281 }
 282
 283 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
 284         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
 285         int rfd = -1;
 286
 287         assert(pid >= 0);
 288
 289         if (mntns_fd) {
 290                 const char *mntns;
 291
 292                 mntns = procfs_file_alloca(pid, "ns/mnt");
 293                 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 294                 if (mntnsfd < 0)
 295                         return -errno;
 296         }
 297
 298         if (pidns_fd) {
 299                 const char *pidns;
 300
 301                 pidns = procfs_file_alloca(pid, "ns/pid");
 302                 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 303                 if (pidnsfd < 0)
 304                         return -errno;
 305         }
 306
 307         if (netns_fd) {
 308                 const char *netns;
 309
 310                 netns = procfs_file_alloca(pid, "ns/net");
 311                 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 312                 if (netnsfd < 0)
 313                         return -errno;
 314         }
 315
 316         if (userns_fd) {
 317                 const char *userns;
 318
 319                 userns = procfs_file_alloca(pid, "ns/user");
 320                 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 321                 if (usernsfd < 0 && errno != ENOENT)
 322                         return -errno;
 323         }
 324
 325         if (root_fd) {
 326                 const char *root;
 327
 328                 root = procfs_file_alloca(pid, "root");
 329                 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
 330                 if (rfd < 0)
 331                         return -errno;
 332         }
 333
 334         if (pidns_fd)
 335                 *pidns_fd = pidnsfd;
 336
 337         if (mntns_fd)
 338                 *mntns_fd = mntnsfd;
 339
 340         if (netns_fd)
 341                 *netns_fd = netnsfd;
 342
 343         if (userns_fd)
 344                 *userns_fd = usernsfd;
 345
 346         if (root_fd)
 347                 *root_fd = rfd;
 348
 349         pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
 350
 351         return 0;
 352 }
 353
 354 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
 355         if (userns_fd >= 0) {
 356                 /* Can't setns to your own userns, since then you could
 357                  * escalate from non-root to root in your own namespace, so
 358                  * check if namespaces equal before attempting to enter. */
 359                 _cleanup_free_ char *userns_fd_path = NULL;
 360                 int r;
 361                 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
 362                         return -ENOMEM;
 363
 364                 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
 365                 if (r < 0)
 366                         return r;
 367                 if (r)
 368                         userns_fd = -1;
 369         }
 370
 371         if (pidns_fd >= 0)
 372                 if (setns(pidns_fd, CLONE_NEWPID) < 0)
 373                         return -errno;
 374
 375         if (mntns_fd >= 0)
 376                 if (setns(mntns_fd, CLONE_NEWNS) < 0)
 377                         return -errno;
 378
 379         if (netns_fd >= 0)
 380                 if (setns(netns_fd, CLONE_NEWNET) < 0)
 381                         return -errno;
 382
 383         if (userns_fd >= 0)
 384                 if (setns(userns_fd, CLONE_NEWUSER) < 0)
 385                         return -errno;
 386
 387         if (root_fd >= 0) {
 388                 if (fchdir(root_fd) < 0)
 389                         return -errno;
 390
 391                 if (chroot(".") < 0)
 392                         return -errno;
 393         }
 394
 395         return reset_uid_gid();
 396 }
 397
 398 uint64_t physical_memory(void) {
 399         _cleanup_free_ char *root = NULL, *value = NULL;
 400         uint64_t mem, lim;
 401         size_t ps;
 402         long sc;
 403         int r;
 404
 405         /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
 406          * memory.
 407          *
 408          * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
 409          * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
 410
 411         sc = sysconf(_SC_PHYS_PAGES);
 412         assert(sc > 0);
 413
 414         ps = page_size();
 415         mem = (uint64_t) sc * (uint64_t) ps;
 416
 417         r = cg_get_root_path(&root);
 418         if (r < 0) {
 419                 log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
 420                 return mem;
 421         }
 422
 423         r = cg_all_unified();
 424         if (r < 0) {
 425                 log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
 426                 return mem;
 427         }
 428         if (r > 0) {
 429                 r = cg_get_attribute("memory", root, "memory.max", &value);
 430                 if (r < 0) {
 431                         log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
 432                         return mem;
 433                 }
 434
 435                 if (streq(value, "max"))
 436                         return mem;
 437         } else {
 438                 r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
 439                 if (r < 0) {
 440                         log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
 441                         return mem;
 442                 }
 443         }
 444
 445         r = safe_atou64(value, &lim);
 446         if (r < 0) {
 447                 log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
 448                 return mem;
 449         }
 450         if (lim == UINT64_MAX)
 451                 return mem;
 452
 453         /* Make sure the limit is a multiple of our own page size */
 454         lim /= ps;
 455         lim *= ps;
 456
 457         return MIN(mem, lim);
 458 }
 459
 460 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
 461         uint64_t p, m, ps, r;
 462
 463         assert(max > 0);
 464
 465         /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
 466          * the result is a multiple of the page size (rounds down). */
 467
 468         ps = page_size();
 469         assert(ps > 0);
 470
 471         p = physical_memory() / ps;
 472         assert(p > 0);
 473
 474         m = p * v;
 475         if (m / p != v)
 476                 return UINT64_MAX;
 477
 478         m /= max;
 479
 480         r = m * ps;
 481         if (r / ps != m)
 482                 return UINT64_MAX;
 483
 484         return r;
 485 }
 486
 487 uint64_t system_tasks_max(void) {
 488
 489         uint64_t a = TASKS_MAX, b = TASKS_MAX;
 490         _cleanup_free_ char *root = NULL;
 491         int r;
 492
 493         /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
 494          * limit:
 495          *
 496          * a) the maximum tasks value the kernel allows on this architecture
 497          * b) the cgroups pids_max attribute for the system
 498          * c) the kernel's configured maximum PID value
 499          *
 500          * And then pick the smallest of the three */
 501
 502         r = procfs_tasks_get_limit(&a);
 503         if (r < 0)
 504                 log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
 505
 506         r = cg_get_root_path(&root);
 507         if (r < 0)
 508                 log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m");
 509         else {
 510                 _cleanup_free_ char *value = NULL;
 511
 512                 r = cg_get_attribute("pids", root, "pids.max", &value);
 513                 if (r < 0)
 514                         log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
 515                 else if (!streq(value, "max")) {
 516                         r = safe_atou64(value, &b);
 517                         if (r < 0)
 518                                 log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
 519                 }
 520         }
 521
 522         return MIN3(TASKS_MAX,
 523                     a <= 0 ? TASKS_MAX : a,
 524                     b <= 0 ? TASKS_MAX : b);
 525 }
 526
 527 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
 528         uint64_t t, m;
 529
 530         assert(max > 0);
 531
 532         /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
 533          * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
 534
 535         t = system_tasks_max();
 536         assert(t > 0);
 537
 538         m = t * v;
 539         if (m / t != v) /* overflow? */
 540                 return UINT64_MAX;
 541
 542         return m / max;
 543 }
 544
 545 int version(void) {
 546         puts(PACKAGE_STRING "\n"
 547              SYSTEMD_FEATURES);
 548         return 0;
 549 }
 550
 551 /* This is a direct translation of str_verscmp from boot.c */
 552 static bool is_digit(int c) {
 553         return c >= '0' && c <= '9';
 554 }
 555
 556 static int c_order(int c) {
 557         if (c == 0 || is_digit(c))
 558                 return 0;
 559
 560         if ((c >= 'a') && (c <= 'z'))
 561                 return c;
 562
 563         return c + 0x10000;
 564 }
 565
 566 int str_verscmp(const char *s1, const char *s2) {
 567         const char *os1, *os2;
 568
 569         assert(s1);
 570         assert(s2);
 571
 572         os1 = s1;
 573         os2 = s2;
 574
 575         while (*s1 || *s2) {
 576                 int first;
 577
 578                 while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
 579                         int order;
 580
 581                         order = c_order(*s1) - c_order(*s2);
 582                         if (order != 0)
 583                                 return order;
 584                         s1++;
 585                         s2++;
 586                 }
 587
 588                 while (*s1 == '0')
 589                         s1++;
 590                 while (*s2 == '0')
 591                         s2++;
 592
 593                 first = 0;
 594                 while (is_digit(*s1) && is_digit(*s2)) {
 595                         if (first == 0)
 596                                 first = *s1 - *s2;
 597                         s1++;
 598                         s2++;
 599                 }
 600
 601                 if (is_digit(*s1))
 602                         return 1;
 603                 if (is_digit(*s2))
 604                         return -1;
 605
 606                 if (first != 0)
 607                         return first;
 608         }
 609
 610         return strcmp(os1, os2);
 611 }
 612
 613 /* Turn off core dumps but only if we're running outside of a container. */
 614 void disable_coredumps(void) {
 615         int r;
 616
 617         if (detect_container() > 0)
 618                 return;
 619
 620         r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
 621         if (r < 0)
 622                 log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
 623 }