src/basic/util.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <alloca.h>
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <sched.h>
  24 #include <signal.h>
  25 #include <stdarg.h>
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <sys/mman.h>
  30 #include <sys/prctl.h>
  31 #include <sys/statfs.h>
  32 #include <sys/sysmacros.h>
  33 #include <sys/types.h>
  34 #include <unistd.h>
  35
  36 #include "alloc-util.h"
  37 #include "build.h"
  38 #include "cgroup-util.h"
  39 #include "def.h"
  40 #include "dirent-util.h"
  41 #include "fd-util.h"
  42 #include "fileio.h"
  43 #include "format-util.h"
  44 #include "hashmap.h"
  45 #include "hostname-util.h"
  46 #include "log.h"
  47 #include "macro.h"
  48 #include "missing.h"
  49 #include "parse-util.h"
  50 #include "path-util.h"
  51 #include "process-util.h"
  52 #include "set.h"
  53 #include "signal-util.h"
  54 #include "stat-util.h"
  55 #include "string-util.h"
  56 #include "strv.h"
  57 #include "time-util.h"
  58 #include "umask-util.h"
  59 #include "user-util.h"
  60 #include "util.h"
  61
  62 int saved_argc = 0;
  63 char **saved_argv = NULL;
  64 static int saved_in_initrd = -1;
  65
  66 size_t page_size(void) {
  67         static thread_local size_t pgsz = 0;
  68         long r;
  69
  70         if (_likely_(pgsz > 0))
  71                 return pgsz;
  72
  73         r = sysconf(_SC_PAGESIZE);
  74         assert(r > 0);
  75
  76         pgsz = (size_t) r;
  77         return pgsz;
  78 }
  79
  80 bool plymouth_running(void) {
  81         return access("/run/plymouth/pid", F_OK) >= 0;
  82 }
  83
  84 bool display_is_local(const char *display) {
  85         assert(display);
  86
  87         return
  88                 display[0] == ':' &&
  89                 display[1] >= '0' &&
  90                 display[1] <= '9';
  91 }
  92
  93 int socket_from_display(const char *display, char **path) {
  94         size_t k;
  95         char *f, *c;
  96
  97         assert(display);
  98         assert(path);
  99
 100         if (!display_is_local(display))
 101                 return -EINVAL;
 102
 103         k = strspn(display+1, "0123456789");
 104
 105         f = new(char, strlen("/tmp/.X11-unix/X") + k + 1);
 106         if (!f)
 107                 return -ENOMEM;
 108
 109         c = stpcpy(f, "/tmp/.X11-unix/X");
 110         memcpy(c, display+1, k);
 111         c[k] = 0;
 112
 113         *path = f;
 114
 115         return 0;
 116 }
 117
 118 int block_get_whole_disk(dev_t d, dev_t *ret) {
 119         char *p, *s;
 120         int r;
 121         unsigned n, m;
 122
 123         assert(ret);
 124
 125         /* If it has a queue this is good enough for us */
 126         if (asprintf(&p, "/sys/dev/block/%u:%u/queue", major(d), minor(d)) < 0)
 127                 return -ENOMEM;
 128
 129         r = access(p, F_OK);
 130         free(p);
 131
 132         if (r >= 0) {
 133                 *ret = d;
 134                 return 0;
 135         }
 136
 137         /* If it is a partition find the originating device */
 138         if (asprintf(&p, "/sys/dev/block/%u:%u/partition", major(d), minor(d)) < 0)
 139                 return -ENOMEM;
 140
 141         r = access(p, F_OK);
 142         free(p);
 143
 144         if (r < 0)
 145                 return -ENOENT;
 146
 147         /* Get parent dev_t */
 148         if (asprintf(&p, "/sys/dev/block/%u:%u/../dev", major(d), minor(d)) < 0)
 149                 return -ENOMEM;
 150
 151         r = read_one_line_file(p, &s);
 152         free(p);
 153
 154         if (r < 0)
 155                 return r;
 156
 157         r = sscanf(s, "%u:%u", &m, &n);
 158         free(s);
 159
 160         if (r != 2)
 161                 return -EINVAL;
 162
 163         /* Only return this if it is really good enough for us. */
 164         if (asprintf(&p, "/sys/dev/block/%u:%u/queue", m, n) < 0)
 165                 return -ENOMEM;
 166
 167         r = access(p, F_OK);
 168         free(p);
 169
 170         if (r >= 0) {
 171                 *ret = makedev(m, n);
 172                 return 0;
 173         }
 174
 175         return -ENOENT;
 176 }
 177
 178 bool kexec_loaded(void) {
 179        bool loaded = false;
 180        char *s;
 181
 182        if (read_one_line_file("/sys/kernel/kexec_loaded", &s) >= 0) {
 183                if (s[0] == '1')
 184                        loaded = true;
 185                free(s);
 186        }
 187        return loaded;
 188 }
 189
 190 int prot_from_flags(int flags) {
 191
 192         switch (flags & O_ACCMODE) {
 193
 194         case O_RDONLY:
 195                 return PROT_READ;
 196
 197         case O_WRONLY:
 198                 return PROT_WRITE;
 199
 200         case O_RDWR:
 201                 return PROT_READ|PROT_WRITE;
 202
 203         default:
 204                 return -EINVAL;
 205         }
 206 }
 207
 208 int fork_agent(pid_t *pid, const int except[], unsigned n_except, const char *path, ...) {
 209         bool stdout_is_tty, stderr_is_tty;
 210         pid_t parent_pid, agent_pid;
 211         sigset_t ss, saved_ss;
 212         unsigned n, i;
 213         va_list ap;
 214         char **l;
 215
 216         assert(pid);
 217         assert(path);
 218
 219         /* Spawns a temporary TTY agent, making sure it goes away when
 220          * we go away */
 221
 222         parent_pid = getpid();
 223
 224         /* First we temporarily block all signals, so that the new
 225          * child has them blocked initially. This way, we can be sure
 226          * that SIGTERMs are not lost we might send to the agent. */
 227         assert_se(sigfillset(&ss) >= 0);
 228         assert_se(sigprocmask(SIG_SETMASK, &ss, &saved_ss) >= 0);
 229
 230         agent_pid = fork();
 231         if (agent_pid < 0) {
 232                 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
 233                 return -errno;
 234         }
 235
 236         if (agent_pid != 0) {
 237                 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
 238                 *pid = agent_pid;
 239                 return 0;
 240         }
 241
 242         /* In the child:
 243          *
 244          * Make sure the agent goes away when the parent dies */
 245         if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
 246                 _exit(EXIT_FAILURE);
 247
 248         /* Make sure we actually can kill the agent, if we need to, in
 249          * case somebody invoked us from a shell script that trapped
 250          * SIGTERM or so... */
 251         (void) reset_all_signal_handlers();
 252         (void) reset_signal_mask();
 253
 254         /* Check whether our parent died before we were able
 255          * to set the death signal and unblock the signals */
 256         if (getppid() != parent_pid)
 257                 _exit(EXIT_SUCCESS);
 258
 259         /* Don't leak fds to the agent */
 260         close_all_fds(except, n_except);
 261
 262         stdout_is_tty = isatty(STDOUT_FILENO);
 263         stderr_is_tty = isatty(STDERR_FILENO);
 264
 265         if (!stdout_is_tty || !stderr_is_tty) {
 266                 int fd;
 267
 268                 /* Detach from stdout/stderr. and reopen
 269                  * /dev/tty for them. This is important to
 270                  * ensure that when systemctl is started via
 271                  * popen() or a similar call that expects to
 272                  * read EOF we actually do generate EOF and
 273                  * not delay this indefinitely by because we
 274                  * keep an unused copy of stdin around. */
 275                 fd = open("/dev/tty", O_WRONLY);
 276                 if (fd < 0) {
 277                         log_error_errno(errno, "Failed to open /dev/tty: %m");
 278                         _exit(EXIT_FAILURE);
 279                 }
 280
 281                 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
 282                         log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
 283                         _exit(EXIT_FAILURE);
 284                 }
 285
 286                 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
 287                         log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
 288                         _exit(EXIT_FAILURE);
 289                 }
 290
 291                 if (fd > STDERR_FILENO)
 292                         close(fd);
 293         }
 294
 295         /* Count arguments */
 296         va_start(ap, path);
 297         for (n = 0; va_arg(ap, char*); n++)
 298                 ;
 299         va_end(ap);
 300
 301         /* Allocate strv */
 302         l = alloca(sizeof(char *) * (n + 1));
 303
 304         /* Fill in arguments */
 305         va_start(ap, path);
 306         for (i = 0; i <= n; i++)
 307                 l[i] = va_arg(ap, char*);
 308         va_end(ap);
 309
 310         execv(path, l);
 311         _exit(EXIT_FAILURE);
 312 }
 313
 314 bool in_initrd(void) {
 315         struct statfs s;
 316
 317         if (saved_in_initrd >= 0)
 318                 return saved_in_initrd;
 319
 320         /* We make two checks here:
 321          *
 322          * 1. the flag file /etc/initrd-release must exist
 323          * 2. the root file system must be a memory file system
 324          *
 325          * The second check is extra paranoia, since misdetecting an
 326          * initrd can have bad consequences due the initrd
 327          * emptying when transititioning to the main systemd.
 328          */
 329
 330         saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
 331                           statfs("/", &s) >= 0 &&
 332                           is_temporary_fs(&s);
 333
 334         return saved_in_initrd;
 335 }
 336
 337 void in_initrd_force(bool value) {
 338         saved_in_initrd = value;
 339 }
 340
 341 /* hey glibc, APIs with callbacks without a user pointer are so useless */
 342 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
 343                  int (*compar) (const void *, const void *, void *), void *arg) {
 344         size_t l, u, idx;
 345         const void *p;
 346         int comparison;
 347
 348         l = 0;
 349         u = nmemb;
 350         while (l < u) {
 351                 idx = (l + u) / 2;
 352                 p = (const char *) base + idx * size;
 353                 comparison = compar(key, p, arg);
 354                 if (comparison < 0)
 355                         u = idx;
 356                 else if (comparison > 0)
 357                         l = idx + 1;
 358                 else
 359                         return (void *)p;
 360         }
 361         return NULL;
 362 }
 363
 364 int on_ac_power(void) {
 365         bool found_offline = false, found_online = false;
 366         _cleanup_closedir_ DIR *d = NULL;
 367         struct dirent *de;
 368
 369         d = opendir("/sys/class/power_supply");
 370         if (!d)
 371                 return errno == ENOENT ? true : -errno;
 372
 373         FOREACH_DIRENT(de, d, return -errno) {
 374                 _cleanup_close_ int fd = -1, device = -1;
 375                 char contents[6];
 376                 ssize_t n;
 377
 378                 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
 379                 if (device < 0) {
 380                         if (errno == ENOENT || errno == ENOTDIR)
 381                                 continue;
 382
 383                         return -errno;
 384                 }
 385
 386                 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 387                 if (fd < 0) {
 388                         if (errno == ENOENT)
 389                                 continue;
 390
 391                         return -errno;
 392                 }
 393
 394                 n = read(fd, contents, sizeof(contents));
 395                 if (n < 0)
 396                         return -errno;
 397
 398                 if (n != 6 || memcmp(contents, "Mains\n", 6))
 399                         continue;
 400
 401                 safe_close(fd);
 402                 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 403                 if (fd < 0) {
 404                         if (errno == ENOENT)
 405                                 continue;
 406
 407                         return -errno;
 408                 }
 409
 410                 n = read(fd, contents, sizeof(contents));
 411                 if (n < 0)
 412                         return -errno;
 413
 414                 if (n != 2 || contents[1] != '\n')
 415                         return -EIO;
 416
 417                 if (contents[0] == '1') {
 418                         found_online = true;
 419                         break;
 420                 } else if (contents[0] == '0')
 421                         found_offline = true;
 422                 else
 423                         return -EIO;
 424         }
 425
 426         return found_online || !found_offline;
 427 }
 428
 429 int container_get_leader(const char *machine, pid_t *pid) {
 430         _cleanup_free_ char *s = NULL, *class = NULL;
 431         const char *p;
 432         pid_t leader;
 433         int r;
 434
 435         assert(machine);
 436         assert(pid);
 437
 438         if (!machine_name_is_valid(machine))
 439                 return -EINVAL;
 440
 441         p = strjoina("/run/systemd/machines/", machine);
 442         r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
 443         if (r == -ENOENT)
 444                 return -EHOSTDOWN;
 445         if (r < 0)
 446                 return r;
 447         if (!s)
 448                 return -EIO;
 449
 450         if (!streq_ptr(class, "container"))
 451                 return -EIO;
 452
 453         r = parse_pid(s, &leader);
 454         if (r < 0)
 455                 return r;
 456         if (leader <= 1)
 457                 return -EIO;
 458
 459         *pid = leader;
 460         return 0;
 461 }
 462
 463 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
 464         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
 465         int rfd = -1;
 466
 467         assert(pid >= 0);
 468
 469         if (mntns_fd) {
 470                 const char *mntns;
 471
 472                 mntns = procfs_file_alloca(pid, "ns/mnt");
 473                 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 474                 if (mntnsfd < 0)
 475                         return -errno;
 476         }
 477
 478         if (pidns_fd) {
 479                 const char *pidns;
 480
 481                 pidns = procfs_file_alloca(pid, "ns/pid");
 482                 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 483                 if (pidnsfd < 0)
 484                         return -errno;
 485         }
 486
 487         if (netns_fd) {
 488                 const char *netns;
 489
 490                 netns = procfs_file_alloca(pid, "ns/net");
 491                 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 492                 if (netnsfd < 0)
 493                         return -errno;
 494         }
 495
 496         if (userns_fd) {
 497                 const char *userns;
 498
 499                 userns = procfs_file_alloca(pid, "ns/user");
 500                 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 501                 if (usernsfd < 0 && errno != ENOENT)
 502                         return -errno;
 503         }
 504
 505         if (root_fd) {
 506                 const char *root;
 507
 508                 root = procfs_file_alloca(pid, "root");
 509                 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
 510                 if (rfd < 0)
 511                         return -errno;
 512         }
 513
 514         if (pidns_fd)
 515                 *pidns_fd = pidnsfd;
 516
 517         if (mntns_fd)
 518                 *mntns_fd = mntnsfd;
 519
 520         if (netns_fd)
 521                 *netns_fd = netnsfd;
 522
 523         if (userns_fd)
 524                 *userns_fd = usernsfd;
 525
 526         if (root_fd)
 527                 *root_fd = rfd;
 528
 529         pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
 530
 531         return 0;
 532 }
 533
 534 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
 535         if (userns_fd >= 0) {
 536                 /* Can't setns to your own userns, since then you could
 537                  * escalate from non-root to root in your own namespace, so
 538                  * check if namespaces equal before attempting to enter. */
 539                 _cleanup_free_ char *userns_fd_path = NULL;
 540                 int r;
 541                 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
 542                         return -ENOMEM;
 543
 544                 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
 545                 if (r < 0)
 546                         return r;
 547                 if (r)
 548                         userns_fd = -1;
 549         }
 550
 551         if (pidns_fd >= 0)
 552                 if (setns(pidns_fd, CLONE_NEWPID) < 0)
 553                         return -errno;
 554
 555         if (mntns_fd >= 0)
 556                 if (setns(mntns_fd, CLONE_NEWNS) < 0)
 557                         return -errno;
 558
 559         if (netns_fd >= 0)
 560                 if (setns(netns_fd, CLONE_NEWNET) < 0)
 561                         return -errno;
 562
 563         if (userns_fd >= 0)
 564                 if (setns(userns_fd, CLONE_NEWUSER) < 0)
 565                         return -errno;
 566
 567         if (root_fd >= 0) {
 568                 if (fchdir(root_fd) < 0)
 569                         return -errno;
 570
 571                 if (chroot(".") < 0)
 572                         return -errno;
 573         }
 574
 575         return reset_uid_gid();
 576 }
 577
 578 uint64_t physical_memory(void) {
 579         _cleanup_free_ char *root = NULL, *value = NULL;
 580         uint64_t mem, lim;
 581         size_t ps;
 582         long sc;
 583
 584         /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
 585          * memory.
 586          *
 587          * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
 588          * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
 589
 590         sc = sysconf(_SC_PHYS_PAGES);
 591         assert(sc > 0);
 592
 593         ps = page_size();
 594         mem = (uint64_t) sc * (uint64_t) ps;
 595
 596         if (cg_get_root_path(&root) < 0)
 597                 return mem;
 598
 599         if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value))
 600                 return mem;
 601
 602         if (safe_atou64(value, &lim) < 0)
 603                 return mem;
 604
 605         /* Make sure the limit is a multiple of our own page size */
 606         lim /= ps;
 607         lim *= ps;
 608
 609         return MIN(mem, lim);
 610 }
 611
 612 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
 613         uint64_t p, m, ps, r;
 614
 615         assert(max > 0);
 616
 617         /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
 618          * the result is a multiple of the page size (rounds down). */
 619
 620         ps = page_size();
 621         assert(ps > 0);
 622
 623         p = physical_memory() / ps;
 624         assert(p > 0);
 625
 626         m = p * v;
 627         if (m / p != v)
 628                 return UINT64_MAX;
 629
 630         m /= max;
 631
 632         r = m * ps;
 633         if (r / ps != m)
 634                 return UINT64_MAX;
 635
 636         return r;
 637 }
 638
 639 uint64_t system_tasks_max(void) {
 640
 641 #if SIZEOF_PID_T == 4
 642 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
 643 #elif SIZEOF_PID_T == 2
 644 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
 645 #else
 646 #error "Unknown pid_t size"
 647 #endif
 648
 649         _cleanup_free_ char *value = NULL, *root = NULL;
 650         uint64_t a = TASKS_MAX, b = TASKS_MAX;
 651
 652         /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
 653          * limit:
 654          *
 655          * a) the maximum value for the pid_t type
 656          * b) the cgroups pids_max attribute for the system
 657          * c) the kernel's configure maximum PID value
 658          *
 659          * And then pick the smallest of the three */
 660
 661         if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
 662                 (void) safe_atou64(value, &a);
 663
 664         if (cg_get_root_path(&root) >= 0) {
 665                 value = mfree(value);
 666
 667                 if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
 668                         (void) safe_atou64(value, &b);
 669         }
 670
 671         return MIN3(TASKS_MAX,
 672                     a <= 0 ? TASKS_MAX : a,
 673                     b <= 0 ? TASKS_MAX : b);
 674 }
 675
 676 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
 677         uint64_t t, m;
 678
 679         assert(max > 0);
 680
 681         /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
 682          * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
 683
 684         t = system_tasks_max();
 685         assert(t > 0);
 686
 687         m = t * v;
 688         if (m / t != v) /* overflow? */
 689                 return UINT64_MAX;
 690
 691         return m / max;
 692 }
 693
 694 int update_reboot_parameter_and_warn(const char *param) {
 695         int r;
 696
 697         if (isempty(param)) {
 698                 if (unlink("/run/systemd/reboot-param") < 0) {
 699                         if (errno == ENOENT)
 700                                 return 0;
 701
 702                         return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
 703                 }
 704
 705                 return 0;
 706         }
 707
 708         RUN_WITH_UMASK(0022) {
 709                 r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE);
 710                 if (r < 0)
 711                         return log_warning_errno(r, "Failed to write reboot parameter file: %m");
 712         }
 713
 714         return 0;
 715 }
 716
 717 int version(void) {
 718         puts(PACKAGE_STRING "\n"
 719              SYSTEMD_FEATURES);
 720         return 0;
 721 }