]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/util.c
tree-wide: introduce new safe_fork() helper and port everything over
[thirdparty/systemd.git] / src / basic / util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <alloca.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <sched.h>
25 #include <signal.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/mman.h>
31 #include <sys/prctl.h>
32 #include <sys/statfs.h>
33 #include <sys/sysmacros.h>
34 #include <sys/types.h>
35 #include <unistd.h>
36
37 #include "alloc-util.h"
38 #include "btrfs-util.h"
39 #include "build.h"
40 #include "cgroup-util.h"
41 #include "def.h"
42 #include "device-nodes.h"
43 #include "dirent-util.h"
44 #include "fd-util.h"
45 #include "fileio.h"
46 #include "format-util.h"
47 #include "hashmap.h"
48 #include "hostname-util.h"
49 #include "log.h"
50 #include "macro.h"
51 #include "missing.h"
52 #include "parse-util.h"
53 #include "path-util.h"
54 #include "process-util.h"
55 #include "set.h"
56 #include "signal-util.h"
57 #include "stat-util.h"
58 #include "string-util.h"
59 #include "strv.h"
60 #include "time-util.h"
61 #include "umask-util.h"
62 #include "user-util.h"
63 #include "util.h"
64
65 int saved_argc = 0;
66 char **saved_argv = NULL;
67 static int saved_in_initrd = -1;
68
69 size_t page_size(void) {
70 static thread_local size_t pgsz = 0;
71 long r;
72
73 if (_likely_(pgsz > 0))
74 return pgsz;
75
76 r = sysconf(_SC_PAGESIZE);
77 assert(r > 0);
78
79 pgsz = (size_t) r;
80 return pgsz;
81 }
82
83 bool plymouth_running(void) {
84 return access("/run/plymouth/pid", F_OK) >= 0;
85 }
86
87 bool display_is_local(const char *display) {
88 assert(display);
89
90 return
91 display[0] == ':' &&
92 display[1] >= '0' &&
93 display[1] <= '9';
94 }
95
96 int socket_from_display(const char *display, char **path) {
97 size_t k;
98 char *f, *c;
99
100 assert(display);
101 assert(path);
102
103 if (!display_is_local(display))
104 return -EINVAL;
105
106 k = strspn(display+1, "0123456789");
107
108 f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1);
109 if (!f)
110 return -ENOMEM;
111
112 c = stpcpy(f, "/tmp/.X11-unix/X");
113 memcpy(c, display+1, k);
114 c[k] = 0;
115
116 *path = f;
117
118 return 0;
119 }
120
121 int block_get_whole_disk(dev_t d, dev_t *ret) {
122 char p[SYS_BLOCK_PATH_MAX("/partition")];
123 _cleanup_free_ char *s = NULL;
124 int r;
125 unsigned n, m;
126
127 assert(ret);
128
129 /* If it has a queue this is good enough for us */
130 xsprintf_sys_block_path(p, "/queue", d);
131 if (access(p, F_OK) >= 0) {
132 *ret = d;
133 return 0;
134 }
135
136 /* If it is a partition find the originating device */
137 xsprintf_sys_block_path(p, "/partition", d);
138 if (access(p, F_OK) < 0)
139 return -ENOENT;
140
141 /* Get parent dev_t */
142 xsprintf_sys_block_path(p, "/../dev", d);
143 r = read_one_line_file(p, &s);
144 if (r < 0)
145 return r;
146
147 r = sscanf(s, "%u:%u", &m, &n);
148 if (r != 2)
149 return -EINVAL;
150
151 /* Only return this if it is really good enough for us. */
152 xsprintf_sys_block_path(p, "/queue", makedev(m, n));
153 if (access(p, F_OK) < 0)
154 return -ENOENT;
155
156 *ret = makedev(m, n);
157 return 0;
158 }
159
160 bool kexec_loaded(void) {
161 _cleanup_free_ char *s = NULL;
162
163 if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
164 return false;
165
166 return s[0] == '1';
167 }
168
169 int prot_from_flags(int flags) {
170
171 switch (flags & O_ACCMODE) {
172
173 case O_RDONLY:
174 return PROT_READ;
175
176 case O_WRONLY:
177 return PROT_WRITE;
178
179 case O_RDWR:
180 return PROT_READ|PROT_WRITE;
181
182 default:
183 return -EINVAL;
184 }
185 }
186
187 int fork_agent(pid_t *pid, const int except[], unsigned n_except, const char *path, ...) {
188 bool stdout_is_tty, stderr_is_tty;
189 pid_t agent_pid;
190 unsigned n, i;
191 va_list ap;
192 char **l;
193 int r;
194
195 assert(pid);
196 assert(path);
197
198 /* Spawns a temporary TTY agent, making sure it goes away when
199 * we go away */
200
201 r = safe_fork_full("(sd-agent)", except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, &agent_pid);
202 if (r < 0)
203 return r;
204 if (r > 0)
205 return 0;
206
207 /* In the child: */
208
209 stdout_is_tty = isatty(STDOUT_FILENO);
210 stderr_is_tty = isatty(STDERR_FILENO);
211
212 if (!stdout_is_tty || !stderr_is_tty) {
213 int fd;
214
215 /* Detach from stdout/stderr. and reopen
216 * /dev/tty for them. This is important to
217 * ensure that when systemctl is started via
218 * popen() or a similar call that expects to
219 * read EOF we actually do generate EOF and
220 * not delay this indefinitely by because we
221 * keep an unused copy of stdin around. */
222 fd = open("/dev/tty", O_WRONLY);
223 if (fd < 0) {
224 log_error_errno(errno, "Failed to open /dev/tty: %m");
225 _exit(EXIT_FAILURE);
226 }
227
228 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
229 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
230 _exit(EXIT_FAILURE);
231 }
232
233 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
234 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
235 _exit(EXIT_FAILURE);
236 }
237
238 if (fd > STDERR_FILENO)
239 close(fd);
240 }
241
242 /* Count arguments */
243 va_start(ap, path);
244 for (n = 0; va_arg(ap, char*); n++)
245 ;
246 va_end(ap);
247
248 /* Allocate strv */
249 l = alloca(sizeof(char *) * (n + 1));
250
251 /* Fill in arguments */
252 va_start(ap, path);
253 for (i = 0; i <= n; i++)
254 l[i] = va_arg(ap, char*);
255 va_end(ap);
256
257 execv(path, l);
258 _exit(EXIT_FAILURE);
259 }
260
261 bool in_initrd(void) {
262 struct statfs s;
263
264 if (saved_in_initrd >= 0)
265 return saved_in_initrd;
266
267 /* We make two checks here:
268 *
269 * 1. the flag file /etc/initrd-release must exist
270 * 2. the root file system must be a memory file system
271 *
272 * The second check is extra paranoia, since misdetecting an
273 * initrd can have bad consequences due the initrd
274 * emptying when transititioning to the main systemd.
275 */
276
277 saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
278 statfs("/", &s) >= 0 &&
279 is_temporary_fs(&s);
280
281 return saved_in_initrd;
282 }
283
284 void in_initrd_force(bool value) {
285 saved_in_initrd = value;
286 }
287
288 /* hey glibc, APIs with callbacks without a user pointer are so useless */
289 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
290 int (*compar) (const void *, const void *, void *), void *arg) {
291 size_t l, u, idx;
292 const void *p;
293 int comparison;
294
295 l = 0;
296 u = nmemb;
297 while (l < u) {
298 idx = (l + u) / 2;
299 p = (const char *) base + idx * size;
300 comparison = compar(key, p, arg);
301 if (comparison < 0)
302 u = idx;
303 else if (comparison > 0)
304 l = idx + 1;
305 else
306 return (void *)p;
307 }
308 return NULL;
309 }
310
311 int on_ac_power(void) {
312 bool found_offline = false, found_online = false;
313 _cleanup_closedir_ DIR *d = NULL;
314 struct dirent *de;
315
316 d = opendir("/sys/class/power_supply");
317 if (!d)
318 return errno == ENOENT ? true : -errno;
319
320 FOREACH_DIRENT(de, d, return -errno) {
321 _cleanup_close_ int fd = -1, device = -1;
322 char contents[6];
323 ssize_t n;
324
325 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
326 if (device < 0) {
327 if (IN_SET(errno, ENOENT, ENOTDIR))
328 continue;
329
330 return -errno;
331 }
332
333 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
334 if (fd < 0) {
335 if (errno == ENOENT)
336 continue;
337
338 return -errno;
339 }
340
341 n = read(fd, contents, sizeof(contents));
342 if (n < 0)
343 return -errno;
344
345 if (n != 6 || memcmp(contents, "Mains\n", 6))
346 continue;
347
348 safe_close(fd);
349 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
350 if (fd < 0) {
351 if (errno == ENOENT)
352 continue;
353
354 return -errno;
355 }
356
357 n = read(fd, contents, sizeof(contents));
358 if (n < 0)
359 return -errno;
360
361 if (n != 2 || contents[1] != '\n')
362 return -EIO;
363
364 if (contents[0] == '1') {
365 found_online = true;
366 break;
367 } else if (contents[0] == '0')
368 found_offline = true;
369 else
370 return -EIO;
371 }
372
373 return found_online || !found_offline;
374 }
375
376 int container_get_leader(const char *machine, pid_t *pid) {
377 _cleanup_free_ char *s = NULL, *class = NULL;
378 const char *p;
379 pid_t leader;
380 int r;
381
382 assert(machine);
383 assert(pid);
384
385 if (!machine_name_is_valid(machine))
386 return -EINVAL;
387
388 p = strjoina("/run/systemd/machines/", machine);
389 r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
390 if (r == -ENOENT)
391 return -EHOSTDOWN;
392 if (r < 0)
393 return r;
394 if (!s)
395 return -EIO;
396
397 if (!streq_ptr(class, "container"))
398 return -EIO;
399
400 r = parse_pid(s, &leader);
401 if (r < 0)
402 return r;
403 if (leader <= 1)
404 return -EIO;
405
406 *pid = leader;
407 return 0;
408 }
409
410 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
411 _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
412 int rfd = -1;
413
414 assert(pid >= 0);
415
416 if (mntns_fd) {
417 const char *mntns;
418
419 mntns = procfs_file_alloca(pid, "ns/mnt");
420 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
421 if (mntnsfd < 0)
422 return -errno;
423 }
424
425 if (pidns_fd) {
426 const char *pidns;
427
428 pidns = procfs_file_alloca(pid, "ns/pid");
429 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
430 if (pidnsfd < 0)
431 return -errno;
432 }
433
434 if (netns_fd) {
435 const char *netns;
436
437 netns = procfs_file_alloca(pid, "ns/net");
438 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
439 if (netnsfd < 0)
440 return -errno;
441 }
442
443 if (userns_fd) {
444 const char *userns;
445
446 userns = procfs_file_alloca(pid, "ns/user");
447 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
448 if (usernsfd < 0 && errno != ENOENT)
449 return -errno;
450 }
451
452 if (root_fd) {
453 const char *root;
454
455 root = procfs_file_alloca(pid, "root");
456 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
457 if (rfd < 0)
458 return -errno;
459 }
460
461 if (pidns_fd)
462 *pidns_fd = pidnsfd;
463
464 if (mntns_fd)
465 *mntns_fd = mntnsfd;
466
467 if (netns_fd)
468 *netns_fd = netnsfd;
469
470 if (userns_fd)
471 *userns_fd = usernsfd;
472
473 if (root_fd)
474 *root_fd = rfd;
475
476 pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
477
478 return 0;
479 }
480
481 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
482 if (userns_fd >= 0) {
483 /* Can't setns to your own userns, since then you could
484 * escalate from non-root to root in your own namespace, so
485 * check if namespaces equal before attempting to enter. */
486 _cleanup_free_ char *userns_fd_path = NULL;
487 int r;
488 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
489 return -ENOMEM;
490
491 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
492 if (r < 0)
493 return r;
494 if (r)
495 userns_fd = -1;
496 }
497
498 if (pidns_fd >= 0)
499 if (setns(pidns_fd, CLONE_NEWPID) < 0)
500 return -errno;
501
502 if (mntns_fd >= 0)
503 if (setns(mntns_fd, CLONE_NEWNS) < 0)
504 return -errno;
505
506 if (netns_fd >= 0)
507 if (setns(netns_fd, CLONE_NEWNET) < 0)
508 return -errno;
509
510 if (userns_fd >= 0)
511 if (setns(userns_fd, CLONE_NEWUSER) < 0)
512 return -errno;
513
514 if (root_fd >= 0) {
515 if (fchdir(root_fd) < 0)
516 return -errno;
517
518 if (chroot(".") < 0)
519 return -errno;
520 }
521
522 return reset_uid_gid();
523 }
524
525 uint64_t physical_memory(void) {
526 _cleanup_free_ char *root = NULL, *value = NULL;
527 uint64_t mem, lim;
528 size_t ps;
529 long sc;
530
531 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
532 * memory.
533 *
534 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
535 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
536
537 sc = sysconf(_SC_PHYS_PAGES);
538 assert(sc > 0);
539
540 ps = page_size();
541 mem = (uint64_t) sc * (uint64_t) ps;
542
543 if (cg_get_root_path(&root) < 0)
544 return mem;
545
546 if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value))
547 return mem;
548
549 if (safe_atou64(value, &lim) < 0)
550 return mem;
551
552 /* Make sure the limit is a multiple of our own page size */
553 lim /= ps;
554 lim *= ps;
555
556 return MIN(mem, lim);
557 }
558
559 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
560 uint64_t p, m, ps, r;
561
562 assert(max > 0);
563
564 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
565 * the result is a multiple of the page size (rounds down). */
566
567 ps = page_size();
568 assert(ps > 0);
569
570 p = physical_memory() / ps;
571 assert(p > 0);
572
573 m = p * v;
574 if (m / p != v)
575 return UINT64_MAX;
576
577 m /= max;
578
579 r = m * ps;
580 if (r / ps != m)
581 return UINT64_MAX;
582
583 return r;
584 }
585
586 uint64_t system_tasks_max(void) {
587
588 #if SIZEOF_PID_T == 4
589 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
590 #elif SIZEOF_PID_T == 2
591 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
592 #else
593 #error "Unknown pid_t size"
594 #endif
595
596 _cleanup_free_ char *value = NULL, *root = NULL;
597 uint64_t a = TASKS_MAX, b = TASKS_MAX;
598
599 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
600 * limit:
601 *
602 * a) the maximum value for the pid_t type
603 * b) the cgroups pids_max attribute for the system
604 * c) the kernel's configure maximum PID value
605 *
606 * And then pick the smallest of the three */
607
608 if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
609 (void) safe_atou64(value, &a);
610
611 if (cg_get_root_path(&root) >= 0) {
612 value = mfree(value);
613
614 if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
615 (void) safe_atou64(value, &b);
616 }
617
618 return MIN3(TASKS_MAX,
619 a <= 0 ? TASKS_MAX : a,
620 b <= 0 ? TASKS_MAX : b);
621 }
622
623 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
624 uint64_t t, m;
625
626 assert(max > 0);
627
628 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
629 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
630
631 t = system_tasks_max();
632 assert(t > 0);
633
634 m = t * v;
635 if (m / t != v) /* overflow? */
636 return UINT64_MAX;
637
638 return m / max;
639 }
640
641 int update_reboot_parameter_and_warn(const char *param) {
642 int r;
643
644 if (isempty(param)) {
645 if (unlink("/run/systemd/reboot-param") < 0) {
646 if (errno == ENOENT)
647 return 0;
648
649 return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
650 }
651
652 return 0;
653 }
654
655 RUN_WITH_UMASK(0022) {
656 r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE);
657 if (r < 0)
658 return log_warning_errno(r, "Failed to write reboot parameter file: %m");
659 }
660
661 return 0;
662 }
663
664 int version(void) {
665 puts(PACKAGE_STRING "\n"
666 SYSTEMD_FEATURES);
667 return 0;
668 }
669
670 int get_block_device(const char *path, dev_t *dev) {
671 struct stat st;
672 struct statfs sfs;
673
674 assert(path);
675 assert(dev);
676
677 /* Get's the block device directly backing a file system. If
678 * the block device is encrypted, returns the device mapper
679 * block device. */
680
681 if (lstat(path, &st))
682 return -errno;
683
684 if (major(st.st_dev) != 0) {
685 *dev = st.st_dev;
686 return 1;
687 }
688
689 if (statfs(path, &sfs) < 0)
690 return -errno;
691
692 if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC))
693 return btrfs_get_block_device(path, dev);
694
695 return 0;
696 }
697
698 int get_block_device_harder(const char *path, dev_t *dev) {
699 _cleanup_closedir_ DIR *d = NULL;
700 _cleanup_free_ char *t = NULL;
701 char p[SYS_BLOCK_PATH_MAX("/slaves")];
702 struct dirent *de, *found = NULL;
703 const char *q;
704 unsigned maj, min;
705 dev_t dt;
706 int r;
707
708 assert(path);
709 assert(dev);
710
711 /* Gets the backing block device for a file system, and
712 * handles LUKS encrypted file systems, looking for its
713 * immediate parent, if there is one. */
714
715 r = get_block_device(path, &dt);
716 if (r <= 0)
717 return r;
718
719 xsprintf_sys_block_path(p, "/slaves", dt);
720 d = opendir(p);
721 if (!d) {
722 if (errno == ENOENT)
723 goto fallback;
724
725 return -errno;
726 }
727
728 FOREACH_DIRENT_ALL(de, d, return -errno) {
729
730 if (dot_or_dot_dot(de->d_name))
731 continue;
732
733 if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
734 continue;
735
736 if (found) {
737 _cleanup_free_ char *u = NULL, *v = NULL, *a = NULL, *b = NULL;
738
739 /* We found a device backed by multiple other devices. We don't really support automatic
740 * discovery on such setups, with the exception of dm-verity partitions. In this case there are
741 * two backing devices: the data partition and the hash partition. We are fine with such
742 * setups, however, only if both partitions are on the same physical device. Hence, let's
743 * verify this. */
744
745 u = strjoin(p, "/", de->d_name, "/../dev");
746 if (!u)
747 return -ENOMEM;
748
749 v = strjoin(p, "/", found->d_name, "/../dev");
750 if (!v)
751 return -ENOMEM;
752
753 r = read_one_line_file(u, &a);
754 if (r < 0) {
755 log_debug_errno(r, "Failed to read %s: %m", u);
756 goto fallback;
757 }
758
759 r = read_one_line_file(v, &b);
760 if (r < 0) {
761 log_debug_errno(r, "Failed to read %s: %m", v);
762 goto fallback;
763 }
764
765 /* Check if the parent device is the same. If not, then the two backing devices are on
766 * different physical devices, and we don't support that. */
767 if (!streq(a, b))
768 goto fallback;
769 }
770
771 found = de;
772 }
773
774 if (!found)
775 goto fallback;
776
777 q = strjoina(p, "/", found->d_name, "/dev");
778
779 r = read_one_line_file(q, &t);
780 if (r == -ENOENT)
781 goto fallback;
782 if (r < 0)
783 return r;
784
785 if (sscanf(t, "%u:%u", &maj, &min) != 2)
786 return -EINVAL;
787
788 if (maj == 0)
789 goto fallback;
790
791 *dev = makedev(maj, min);
792 return 1;
793
794 fallback:
795 *dev = dt;
796 return 1;
797 }