src/basic/util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <alloca.h>
   4 #include <errno.h>
   5 #include <fcntl.h>
   6 #include <sched.h>
   7 #include <signal.h>
   8 #include <stdarg.h>
   9 #include <stdio.h>
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <sys/mman.h>
  13 #include <sys/prctl.h>
  14 #include <sys/statfs.h>
  15 #include <sys/sysmacros.h>
  16 #include <sys/types.h>
  17 #include <unistd.h>
  18
  19 #include "alloc-util.h"
  20 #include "btrfs-util.h"
  21 #include "build.h"
  22 #include "cgroup-util.h"
  23 #include "def.h"
  24 #include "device-nodes.h"
  25 #include "dirent-util.h"
  26 #include "env-util.h"
  27 #include "fd-util.h"
  28 #include "fileio.h"
  29 #include "format-util.h"
  30 #include "hashmap.h"
  31 #include "hostname-util.h"
  32 #include "log.h"
  33 #include "macro.h"
  34 #include "missing.h"
  35 #include "parse-util.h"
  36 #include "path-util.h"
  37 #include "process-util.h"
  38 #include "procfs-util.h"
  39 #include "set.h"
  40 #include "signal-util.h"
  41 #include "stat-util.h"
  42 #include "string-util.h"
  43 #include "strv.h"
  44 #include "time-util.h"
  45 #include "umask-util.h"
  46 #include "user-util.h"
  47 #include "util.h"
  48 #include "virt.h"
  49
  50 int saved_argc = 0;
  51 char **saved_argv = NULL;
  52 static int saved_in_initrd = -1;
  53
  54 size_t page_size(void) {
  55         static thread_local size_t pgsz = 0;
  56         long r;
  57
  58         if (_likely_(pgsz > 0))
  59                 return pgsz;
  60
  61         r = sysconf(_SC_PAGESIZE);
  62         assert(r > 0);
  63
  64         pgsz = (size_t) r;
  65         return pgsz;
  66 }
  67
  68 bool plymouth_running(void) {
  69         return access("/run/plymouth/pid", F_OK) >= 0;
  70 }
  71
  72 bool display_is_local(const char *display) {
  73         assert(display);
  74
  75         return
  76                 display[0] == ':' &&
  77                 display[1] >= '0' &&
  78                 display[1] <= '9';
  79 }
  80
  81 bool kexec_loaded(void) {
  82        _cleanup_free_ char *s = NULL;
  83
  84        if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
  85                return false;
  86
  87        return s[0] == '1';
  88 }
  89
  90 int prot_from_flags(int flags) {
  91
  92         switch (flags & O_ACCMODE) {
  93
  94         case O_RDONLY:
  95                 return PROT_READ;
  96
  97         case O_WRONLY:
  98                 return PROT_WRITE;
  99
 100         case O_RDWR:
 101                 return PROT_READ|PROT_WRITE;
 102
 103         default:
 104                 return -EINVAL;
 105         }
 106 }
 107
 108 bool in_initrd(void) {
 109         struct statfs s;
 110         int r;
 111
 112         if (saved_in_initrd >= 0)
 113                 return saved_in_initrd;
 114
 115         /* We make two checks here:
 116          *
 117          * 1. the flag file /etc/initrd-release must exist
 118          * 2. the root file system must be a memory file system
 119          *
 120          * The second check is extra paranoia, since misdetecting an
 121          * initrd can have bad consequences due the initrd
 122          * emptying when transititioning to the main systemd.
 123          */
 124
 125         r = getenv_bool_secure("SYSTEMD_IN_INITRD");
 126         if (r < 0 && r != -ENXIO)
 127                 log_debug_errno(r, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m");
 128
 129         if (r >= 0)
 130                 saved_in_initrd = r > 0;
 131         else
 132                 saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
 133                                   statfs("/", &s) >= 0 &&
 134                                   is_temporary_fs(&s);
 135
 136         return saved_in_initrd;
 137 }
 138
 139 void in_initrd_force(bool value) {
 140         saved_in_initrd = value;
 141 }
 142
 143 /* hey glibc, APIs with callbacks without a user pointer are so useless */
 144 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
 145                  __compar_d_fn_t compar, void *arg) {
 146         size_t l, u, idx;
 147         const void *p;
 148         int comparison;
 149
 150         assert(!size_multiply_overflow(nmemb, size));
 151
 152         l = 0;
 153         u = nmemb;
 154         while (l < u) {
 155                 idx = (l + u) / 2;
 156                 p = (const uint8_t*) base + idx * size;
 157                 comparison = compar(key, p, arg);
 158                 if (comparison < 0)
 159                         u = idx;
 160                 else if (comparison > 0)
 161                         l = idx + 1;
 162                 else
 163                         return (void *)p;
 164         }
 165         return NULL;
 166 }
 167
 168 int on_ac_power(void) {
 169         bool found_offline = false, found_online = false;
 170         _cleanup_closedir_ DIR *d = NULL;
 171         struct dirent *de;
 172
 173         d = opendir("/sys/class/power_supply");
 174         if (!d)
 175                 return errno == ENOENT ? true : -errno;
 176
 177         FOREACH_DIRENT(de, d, return -errno) {
 178                 _cleanup_close_ int fd = -1, device = -1;
 179                 char contents[6];
 180                 ssize_t n;
 181
 182                 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
 183                 if (device < 0) {
 184                         if (IN_SET(errno, ENOENT, ENOTDIR))
 185                                 continue;
 186
 187                         return -errno;
 188                 }
 189
 190                 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 191                 if (fd < 0) {
 192                         if (errno == ENOENT)
 193                                 continue;
 194
 195                         return -errno;
 196                 }
 197
 198                 n = read(fd, contents, sizeof(contents));
 199                 if (n < 0)
 200                         return -errno;
 201
 202                 if (n != 6 || memcmp(contents, "Mains\n", 6))
 203                         continue;
 204
 205                 safe_close(fd);
 206                 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 207                 if (fd < 0) {
 208                         if (errno == ENOENT)
 209                                 continue;
 210
 211                         return -errno;
 212                 }
 213
 214                 n = read(fd, contents, sizeof(contents));
 215                 if (n < 0)
 216                         return -errno;
 217
 218                 if (n != 2 || contents[1] != '\n')
 219                         return -EIO;
 220
 221                 if (contents[0] == '1') {
 222                         found_online = true;
 223                         break;
 224                 } else if (contents[0] == '0')
 225                         found_offline = true;
 226                 else
 227                         return -EIO;
 228         }
 229
 230         return found_online || !found_offline;
 231 }
 232
 233 int container_get_leader(const char *machine, pid_t *pid) {
 234         _cleanup_free_ char *s = NULL, *class = NULL;
 235         const char *p;
 236         pid_t leader;
 237         int r;
 238
 239         assert(machine);
 240         assert(pid);
 241
 242         if (streq(machine, ".host")) {
 243                 *pid = 1;
 244                 return 0;
 245         }
 246
 247         if (!machine_name_is_valid(machine))
 248                 return -EINVAL;
 249
 250         p = strjoina("/run/systemd/machines/", machine);
 251         r = parse_env_file(NULL, p,
 252                            "LEADER", &s,
 253                            "CLASS", &class);
 254         if (r == -ENOENT)
 255                 return -EHOSTDOWN;
 256         if (r < 0)
 257                 return r;
 258         if (!s)
 259                 return -EIO;
 260
 261         if (!streq_ptr(class, "container"))
 262                 return -EIO;
 263
 264         r = parse_pid(s, &leader);
 265         if (r < 0)
 266                 return r;
 267         if (leader <= 1)
 268                 return -EIO;
 269
 270         *pid = leader;
 271         return 0;
 272 }
 273
 274 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
 275         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
 276         int rfd = -1;
 277
 278         assert(pid >= 0);
 279
 280         if (mntns_fd) {
 281                 const char *mntns;
 282
 283                 mntns = procfs_file_alloca(pid, "ns/mnt");
 284                 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 285                 if (mntnsfd < 0)
 286                         return -errno;
 287         }
 288
 289         if (pidns_fd) {
 290                 const char *pidns;
 291
 292                 pidns = procfs_file_alloca(pid, "ns/pid");
 293                 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 294                 if (pidnsfd < 0)
 295                         return -errno;
 296         }
 297
 298         if (netns_fd) {
 299                 const char *netns;
 300
 301                 netns = procfs_file_alloca(pid, "ns/net");
 302                 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 303                 if (netnsfd < 0)
 304                         return -errno;
 305         }
 306
 307         if (userns_fd) {
 308                 const char *userns;
 309
 310                 userns = procfs_file_alloca(pid, "ns/user");
 311                 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 312                 if (usernsfd < 0 && errno != ENOENT)
 313                         return -errno;
 314         }
 315
 316         if (root_fd) {
 317                 const char *root;
 318
 319                 root = procfs_file_alloca(pid, "root");
 320                 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
 321                 if (rfd < 0)
 322                         return -errno;
 323         }
 324
 325         if (pidns_fd)
 326                 *pidns_fd = pidnsfd;
 327
 328         if (mntns_fd)
 329                 *mntns_fd = mntnsfd;
 330
 331         if (netns_fd)
 332                 *netns_fd = netnsfd;
 333
 334         if (userns_fd)
 335                 *userns_fd = usernsfd;
 336
 337         if (root_fd)
 338                 *root_fd = rfd;
 339
 340         pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
 341
 342         return 0;
 343 }
 344
 345 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
 346         if (userns_fd >= 0) {
 347                 /* Can't setns to your own userns, since then you could
 348                  * escalate from non-root to root in your own namespace, so
 349                  * check if namespaces equal before attempting to enter. */
 350                 _cleanup_free_ char *userns_fd_path = NULL;
 351                 int r;
 352                 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
 353                         return -ENOMEM;
 354
 355                 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
 356                 if (r < 0)
 357                         return r;
 358                 if (r)
 359                         userns_fd = -1;
 360         }
 361
 362         if (pidns_fd >= 0)
 363                 if (setns(pidns_fd, CLONE_NEWPID) < 0)
 364                         return -errno;
 365
 366         if (mntns_fd >= 0)
 367                 if (setns(mntns_fd, CLONE_NEWNS) < 0)
 368                         return -errno;
 369
 370         if (netns_fd >= 0)
 371                 if (setns(netns_fd, CLONE_NEWNET) < 0)
 372                         return -errno;
 373
 374         if (userns_fd >= 0)
 375                 if (setns(userns_fd, CLONE_NEWUSER) < 0)
 376                         return -errno;
 377
 378         if (root_fd >= 0) {
 379                 if (fchdir(root_fd) < 0)
 380                         return -errno;
 381
 382                 if (chroot(".") < 0)
 383                         return -errno;
 384         }
 385
 386         return reset_uid_gid();
 387 }
 388
 389 uint64_t physical_memory(void) {
 390         _cleanup_free_ char *root = NULL, *value = NULL;
 391         uint64_t mem, lim;
 392         size_t ps;
 393         long sc;
 394         int r;
 395
 396         /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
 397          * memory.
 398          *
 399          * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
 400          * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
 401
 402         sc = sysconf(_SC_PHYS_PAGES);
 403         assert(sc > 0);
 404
 405         ps = page_size();
 406         mem = (uint64_t) sc * (uint64_t) ps;
 407
 408         r = cg_get_root_path(&root);
 409         if (r < 0) {
 410                 log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
 411                 return mem;
 412         }
 413
 414         r = cg_all_unified();
 415         if (r < 0) {
 416                 log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
 417                 return mem;
 418         }
 419         if (r > 0) {
 420                 r = cg_get_attribute("memory", root, "memory.max", &value);
 421                 if (r < 0) {
 422                         log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
 423                         return mem;
 424                 }
 425
 426                 if (streq(value, "max"))
 427                         return mem;
 428         } else {
 429                 r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
 430                 if (r < 0) {
 431                         log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
 432                         return mem;
 433                 }
 434         }
 435
 436         r = safe_atou64(value, &lim);
 437         if (r < 0) {
 438                 log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
 439                 return mem;
 440         }
 441         if (lim == UINT64_MAX)
 442                 return mem;
 443
 444         /* Make sure the limit is a multiple of our own page size */
 445         lim /= ps;
 446         lim *= ps;
 447
 448         return MIN(mem, lim);
 449 }
 450
 451 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
 452         uint64_t p, m, ps, r;
 453
 454         assert(max > 0);
 455
 456         /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
 457          * the result is a multiple of the page size (rounds down). */
 458
 459         ps = page_size();
 460         assert(ps > 0);
 461
 462         p = physical_memory() / ps;
 463         assert(p > 0);
 464
 465         m = p * v;
 466         if (m / p != v)
 467                 return UINT64_MAX;
 468
 469         m /= max;
 470
 471         r = m * ps;
 472         if (r / ps != m)
 473                 return UINT64_MAX;
 474
 475         return r;
 476 }
 477
 478 uint64_t system_tasks_max(void) {
 479
 480         uint64_t a = TASKS_MAX, b = TASKS_MAX;
 481         _cleanup_free_ char *root = NULL;
 482         int r;
 483
 484         /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
 485          * limit:
 486          *
 487          * a) the maximum tasks value the kernel allows on this architecture
 488          * b) the cgroups pids_max attribute for the system
 489          * c) the kernel's configured maximum PID value
 490          *
 491          * And then pick the smallest of the three */
 492
 493         r = procfs_tasks_get_limit(&a);
 494         if (r < 0)
 495                 log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
 496
 497         r = cg_get_root_path(&root);
 498         if (r < 0)
 499                 log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m");
 500         else {
 501                 _cleanup_free_ char *value = NULL;
 502
 503                 r = cg_get_attribute("pids", root, "pids.max", &value);
 504                 if (r < 0)
 505                         log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
 506                 else if (!streq(value, "max")) {
 507                         r = safe_atou64(value, &b);
 508                         if (r < 0)
 509                                 log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
 510                 }
 511         }
 512
 513         return MIN3(TASKS_MAX,
 514                     a <= 0 ? TASKS_MAX : a,
 515                     b <= 0 ? TASKS_MAX : b);
 516 }
 517
 518 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
 519         uint64_t t, m;
 520
 521         assert(max > 0);
 522
 523         /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
 524          * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
 525
 526         t = system_tasks_max();
 527         assert(t > 0);
 528
 529         m = t * v;
 530         if (m / t != v) /* overflow? */
 531                 return UINT64_MAX;
 532
 533         return m / max;
 534 }
 535
 536 int version(void) {
 537         puts(PACKAGE_STRING "\n"
 538              SYSTEMD_FEATURES);
 539         return 0;
 540 }
 541
 542 /* This is a direct translation of str_verscmp from boot.c */
 543 static bool is_digit(int c) {
 544         return c >= '0' && c <= '9';
 545 }
 546
 547 static int c_order(int c) {
 548         if (c == 0 || is_digit(c))
 549                 return 0;
 550
 551         if ((c >= 'a') && (c <= 'z'))
 552                 return c;
 553
 554         return c + 0x10000;
 555 }
 556
 557 int str_verscmp(const char *s1, const char *s2) {
 558         const char *os1, *os2;
 559
 560         assert(s1);
 561         assert(s2);
 562
 563         os1 = s1;
 564         os2 = s2;
 565
 566         while (*s1 || *s2) {
 567                 int first;
 568
 569                 while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
 570                         int order;
 571
 572                         order = c_order(*s1) - c_order(*s2);
 573                         if (order != 0)
 574                                 return order;
 575                         s1++;
 576                         s2++;
 577                 }
 578
 579                 while (*s1 == '0')
 580                         s1++;
 581                 while (*s2 == '0')
 582                         s2++;
 583
 584                 first = 0;
 585                 while (is_digit(*s1) && is_digit(*s2)) {
 586                         if (first == 0)
 587                                 first = *s1 - *s2;
 588                         s1++;
 589                         s2++;
 590                 }
 591
 592                 if (is_digit(*s1))
 593                         return 1;
 594                 if (is_digit(*s2))
 595                         return -1;
 596
 597                 if (first != 0)
 598                         return first;
 599         }
 600
 601         return strcmp(os1, os2);
 602 }
 603
 604 /* Turn off core dumps but only if we're running outside of a container. */
 605 void disable_coredumps(void) {
 606         int r;
 607
 608         if (detect_container() > 0)
 609                 return;
 610
 611         r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", WRITE_STRING_FILE_DISABLE_BUFFER);
 612         if (r < 0)
 613                 log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
 614 }