src/basic/util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6 ***/
   7
   8 #include <alloca.h>
   9 #include <errno.h>
  10 #include <fcntl.h>
  11 #include <sched.h>
  12 #include <signal.h>
  13 #include <stdarg.h>
  14 #include <stdio.h>
  15 #include <stdlib.h>
  16 #include <string.h>
  17 #include <sys/mman.h>
  18 #include <sys/prctl.h>
  19 #include <sys/statfs.h>
  20 #include <sys/sysmacros.h>
  21 #include <sys/types.h>
  22 #include <unistd.h>
  23
  24 #include "alloc-util.h"
  25 #include "btrfs-util.h"
  26 #include "build.h"
  27 #include "cgroup-util.h"
  28 #include "def.h"
  29 #include "device-nodes.h"
  30 #include "dirent-util.h"
  31 #include "fd-util.h"
  32 #include "fileio.h"
  33 #include "format-util.h"
  34 #include "hashmap.h"
  35 #include "hostname-util.h"
  36 #include "log.h"
  37 #include "macro.h"
  38 #include "missing.h"
  39 #include "parse-util.h"
  40 #include "path-util.h"
  41 #include "process-util.h"
  42 #include "procfs-util.h"
  43 #include "set.h"
  44 #include "signal-util.h"
  45 #include "stat-util.h"
  46 #include "string-util.h"
  47 #include "strv.h"
  48 #include "time-util.h"
  49 #include "umask-util.h"
  50 #include "user-util.h"
  51 #include "util.h"
  52 #include "virt.h"
  53
  54 int saved_argc = 0;
  55 char **saved_argv = NULL;
  56 static int saved_in_initrd = -1;
  57
  58 size_t page_size(void) {
  59         static thread_local size_t pgsz = 0;
  60         long r;
  61
  62         if (_likely_(pgsz > 0))
  63                 return pgsz;
  64
  65         r = sysconf(_SC_PAGESIZE);
  66         assert(r > 0);
  67
  68         pgsz = (size_t) r;
  69         return pgsz;
  70 }
  71
  72 bool plymouth_running(void) {
  73         return access("/run/plymouth/pid", F_OK) >= 0;
  74 }
  75
  76 bool display_is_local(const char *display) {
  77         assert(display);
  78
  79         return
  80                 display[0] == ':' &&
  81                 display[1] >= '0' &&
  82                 display[1] <= '9';
  83 }
  84
  85 int socket_from_display(const char *display, char **path) {
  86         size_t k;
  87         char *f, *c;
  88
  89         assert(display);
  90         assert(path);
  91
  92         if (!display_is_local(display))
  93                 return -EINVAL;
  94
  95         k = strspn(display+1, "0123456789");
  96
  97         f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1);
  98         if (!f)
  99                 return -ENOMEM;
 100
 101         c = stpcpy(f, "/tmp/.X11-unix/X");
 102         memcpy(c, display+1, k);
 103         c[k] = 0;
 104
 105         *path = f;
 106
 107         return 0;
 108 }
 109
 110 bool kexec_loaded(void) {
 111        _cleanup_free_ char *s = NULL;
 112
 113        if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
 114                return false;
 115
 116        return s[0] == '1';
 117 }
 118
 119 int prot_from_flags(int flags) {
 120
 121         switch (flags & O_ACCMODE) {
 122
 123         case O_RDONLY:
 124                 return PROT_READ;
 125
 126         case O_WRONLY:
 127                 return PROT_WRITE;
 128
 129         case O_RDWR:
 130                 return PROT_READ|PROT_WRITE;
 131
 132         default:
 133                 return -EINVAL;
 134         }
 135 }
 136
 137 bool in_initrd(void) {
 138         struct statfs s;
 139
 140         if (saved_in_initrd >= 0)
 141                 return saved_in_initrd;
 142
 143         /* We make two checks here:
 144          *
 145          * 1. the flag file /etc/initrd-release must exist
 146          * 2. the root file system must be a memory file system
 147          *
 148          * The second check is extra paranoia, since misdetecting an
 149          * initrd can have bad consequences due the initrd
 150          * emptying when transititioning to the main systemd.
 151          */
 152
 153         saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
 154                           statfs("/", &s) >= 0 &&
 155                           is_temporary_fs(&s);
 156
 157         return saved_in_initrd;
 158 }
 159
 160 void in_initrd_force(bool value) {
 161         saved_in_initrd = value;
 162 }
 163
 164 /* hey glibc, APIs with callbacks without a user pointer are so useless */
 165 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
 166                  int (*compar) (const void *, const void *, void *), void *arg) {
 167         size_t l, u, idx;
 168         const void *p;
 169         int comparison;
 170
 171         assert(!size_multiply_overflow(nmemb, size));
 172
 173         l = 0;
 174         u = nmemb;
 175         while (l < u) {
 176                 idx = (l + u) / 2;
 177                 p = (const uint8_t*) base + idx * size;
 178                 comparison = compar(key, p, arg);
 179                 if (comparison < 0)
 180                         u = idx;
 181                 else if (comparison > 0)
 182                         l = idx + 1;
 183                 else
 184                         return (void *)p;
 185         }
 186         return NULL;
 187 }
 188
 189 int on_ac_power(void) {
 190         bool found_offline = false, found_online = false;
 191         _cleanup_closedir_ DIR *d = NULL;
 192         struct dirent *de;
 193
 194         d = opendir("/sys/class/power_supply");
 195         if (!d)
 196                 return errno == ENOENT ? true : -errno;
 197
 198         FOREACH_DIRENT(de, d, return -errno) {
 199                 _cleanup_close_ int fd = -1, device = -1;
 200                 char contents[6];
 201                 ssize_t n;
 202
 203                 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
 204                 if (device < 0) {
 205                         if (IN_SET(errno, ENOENT, ENOTDIR))
 206                                 continue;
 207
 208                         return -errno;
 209                 }
 210
 211                 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 212                 if (fd < 0) {
 213                         if (errno == ENOENT)
 214                                 continue;
 215
 216                         return -errno;
 217                 }
 218
 219                 n = read(fd, contents, sizeof(contents));
 220                 if (n < 0)
 221                         return -errno;
 222
 223                 if (n != 6 || memcmp(contents, "Mains\n", 6))
 224                         continue;
 225
 226                 safe_close(fd);
 227                 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 228                 if (fd < 0) {
 229                         if (errno == ENOENT)
 230                                 continue;
 231
 232                         return -errno;
 233                 }
 234
 235                 n = read(fd, contents, sizeof(contents));
 236                 if (n < 0)
 237                         return -errno;
 238
 239                 if (n != 2 || contents[1] != '\n')
 240                         return -EIO;
 241
 242                 if (contents[0] == '1') {
 243                         found_online = true;
 244                         break;
 245                 } else if (contents[0] == '0')
 246                         found_offline = true;
 247                 else
 248                         return -EIO;
 249         }
 250
 251         return found_online || !found_offline;
 252 }
 253
 254 int container_get_leader(const char *machine, pid_t *pid) {
 255         _cleanup_free_ char *s = NULL, *class = NULL;
 256         const char *p;
 257         pid_t leader;
 258         int r;
 259
 260         assert(machine);
 261         assert(pid);
 262
 263         if (!machine_name_is_valid(machine))
 264                 return -EINVAL;
 265
 266         p = strjoina("/run/systemd/machines/", machine);
 267         r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
 268         if (r == -ENOENT)
 269                 return -EHOSTDOWN;
 270         if (r < 0)
 271                 return r;
 272         if (!s)
 273                 return -EIO;
 274
 275         if (!streq_ptr(class, "container"))
 276                 return -EIO;
 277
 278         r = parse_pid(s, &leader);
 279         if (r < 0)
 280                 return r;
 281         if (leader <= 1)
 282                 return -EIO;
 283
 284         *pid = leader;
 285         return 0;
 286 }
 287
 288 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
 289         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
 290         int rfd = -1;
 291
 292         assert(pid >= 0);
 293
 294         if (mntns_fd) {
 295                 const char *mntns;
 296
 297                 mntns = procfs_file_alloca(pid, "ns/mnt");
 298                 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 299                 if (mntnsfd < 0)
 300                         return -errno;
 301         }
 302
 303         if (pidns_fd) {
 304                 const char *pidns;
 305
 306                 pidns = procfs_file_alloca(pid, "ns/pid");
 307                 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 308                 if (pidnsfd < 0)
 309                         return -errno;
 310         }
 311
 312         if (netns_fd) {
 313                 const char *netns;
 314
 315                 netns = procfs_file_alloca(pid, "ns/net");
 316                 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 317                 if (netnsfd < 0)
 318                         return -errno;
 319         }
 320
 321         if (userns_fd) {
 322                 const char *userns;
 323
 324                 userns = procfs_file_alloca(pid, "ns/user");
 325                 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 326                 if (usernsfd < 0 && errno != ENOENT)
 327                         return -errno;
 328         }
 329
 330         if (root_fd) {
 331                 const char *root;
 332
 333                 root = procfs_file_alloca(pid, "root");
 334                 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
 335                 if (rfd < 0)
 336                         return -errno;
 337         }
 338
 339         if (pidns_fd)
 340                 *pidns_fd = pidnsfd;
 341
 342         if (mntns_fd)
 343                 *mntns_fd = mntnsfd;
 344
 345         if (netns_fd)
 346                 *netns_fd = netnsfd;
 347
 348         if (userns_fd)
 349                 *userns_fd = usernsfd;
 350
 351         if (root_fd)
 352                 *root_fd = rfd;
 353
 354         pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
 355
 356         return 0;
 357 }
 358
 359 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
 360         if (userns_fd >= 0) {
 361                 /* Can't setns to your own userns, since then you could
 362                  * escalate from non-root to root in your own namespace, so
 363                  * check if namespaces equal before attempting to enter. */
 364                 _cleanup_free_ char *userns_fd_path = NULL;
 365                 int r;
 366                 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
 367                         return -ENOMEM;
 368
 369                 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
 370                 if (r < 0)
 371                         return r;
 372                 if (r)
 373                         userns_fd = -1;
 374         }
 375
 376         if (pidns_fd >= 0)
 377                 if (setns(pidns_fd, CLONE_NEWPID) < 0)
 378                         return -errno;
 379
 380         if (mntns_fd >= 0)
 381                 if (setns(mntns_fd, CLONE_NEWNS) < 0)
 382                         return -errno;
 383
 384         if (netns_fd >= 0)
 385                 if (setns(netns_fd, CLONE_NEWNET) < 0)
 386                         return -errno;
 387
 388         if (userns_fd >= 0)
 389                 if (setns(userns_fd, CLONE_NEWUSER) < 0)
 390                         return -errno;
 391
 392         if (root_fd >= 0) {
 393                 if (fchdir(root_fd) < 0)
 394                         return -errno;
 395
 396                 if (chroot(".") < 0)
 397                         return -errno;
 398         }
 399
 400         return reset_uid_gid();
 401 }
 402
 403 uint64_t physical_memory(void) {
 404         _cleanup_free_ char *root = NULL, *value = NULL;
 405         uint64_t mem, lim;
 406         size_t ps;
 407         long sc;
 408
 409         /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
 410          * memory.
 411          *
 412          * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
 413          * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
 414
 415         sc = sysconf(_SC_PHYS_PAGES);
 416         assert(sc > 0);
 417
 418         ps = page_size();
 419         mem = (uint64_t) sc * (uint64_t) ps;
 420
 421         if (cg_get_root_path(&root) < 0)
 422                 return mem;
 423
 424         if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value))
 425                 return mem;
 426
 427         if (safe_atou64(value, &lim) < 0)
 428                 return mem;
 429
 430         /* Make sure the limit is a multiple of our own page size */
 431         lim /= ps;
 432         lim *= ps;
 433
 434         return MIN(mem, lim);
 435 }
 436
 437 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
 438         uint64_t p, m, ps, r;
 439
 440         assert(max > 0);
 441
 442         /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
 443          * the result is a multiple of the page size (rounds down). */
 444
 445         ps = page_size();
 446         assert(ps > 0);
 447
 448         p = physical_memory() / ps;
 449         assert(p > 0);
 450
 451         m = p * v;
 452         if (m / p != v)
 453                 return UINT64_MAX;
 454
 455         m /= max;
 456
 457         r = m * ps;
 458         if (r / ps != m)
 459                 return UINT64_MAX;
 460
 461         return r;
 462 }
 463
 464 uint64_t system_tasks_max(void) {
 465
 466         uint64_t a = TASKS_MAX, b = TASKS_MAX;
 467         _cleanup_free_ char *root = NULL;
 468
 469         /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
 470          * limit:
 471          *
 472          * a) the maximum tasks value the kernel allows on this architecture
 473          * b) the cgroups pids_max attribute for the system
 474          * c) the kernel's configured maximum PID value
 475          *
 476          * And then pick the smallest of the three */
 477
 478         (void) procfs_tasks_get_limit(&a);
 479
 480         if (cg_get_root_path(&root) >= 0) {
 481                 _cleanup_free_ char *value = NULL;
 482
 483                 if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
 484                         (void) safe_atou64(value, &b);
 485         }
 486
 487         return MIN3(TASKS_MAX,
 488                     a <= 0 ? TASKS_MAX : a,
 489                     b <= 0 ? TASKS_MAX : b);
 490 }
 491
 492 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
 493         uint64_t t, m;
 494
 495         assert(max > 0);
 496
 497         /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
 498          * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
 499
 500         t = system_tasks_max();
 501         assert(t > 0);
 502
 503         m = t * v;
 504         if (m / t != v) /* overflow? */
 505                 return UINT64_MAX;
 506
 507         return m / max;
 508 }
 509
 510 int version(void) {
 511         puts(PACKAGE_STRING "\n"
 512              SYSTEMD_FEATURES);
 513         return 0;
 514 }
 515
 516 /* This is a direct translation of str_verscmp from boot.c */
 517 static bool is_digit(int c) {
 518         return c >= '0' && c <= '9';
 519 }
 520
 521 static int c_order(int c) {
 522         if (c == 0 || is_digit(c))
 523                 return 0;
 524
 525         if ((c >= 'a') && (c <= 'z'))
 526                 return c;
 527
 528         return c + 0x10000;
 529 }
 530
 531 int str_verscmp(const char *s1, const char *s2) {
 532         const char *os1, *os2;
 533
 534         assert(s1);
 535         assert(s2);
 536
 537         os1 = s1;
 538         os2 = s2;
 539
 540         while (*s1 || *s2) {
 541                 int first;
 542
 543                 while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
 544                         int order;
 545
 546                         order = c_order(*s1) - c_order(*s2);
 547                         if (order != 0)
 548                                 return order;
 549                         s1++;
 550                         s2++;
 551                 }
 552
 553                 while (*s1 == '0')
 554                         s1++;
 555                 while (*s2 == '0')
 556                         s2++;
 557
 558                 first = 0;
 559                 while (is_digit(*s1) && is_digit(*s2)) {
 560                         if (first == 0)
 561                                 first = *s1 - *s2;
 562                         s1++;
 563                         s2++;
 564                 }
 565
 566                 if (is_digit(*s1))
 567                         return 1;
 568                 if (is_digit(*s2))
 569                         return -1;
 570
 571                 if (first != 0)
 572                         return first;
 573         }
 574
 575         return strcmp(os1, os2);
 576 }
 577
 578 /* Turn off core dumps but only if we're running outside of a container. */
 579 void disable_coredumps(void) {
 580         int r;
 581
 582         if (detect_container() > 0)
 583                 return;
 584
 585         r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
 586         if (r < 0)
 587                 log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
 588 }