]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
31 #include <sys/prctl.h>
32 #include <sys/statfs.h>
33 #include <sys/sysmacros.h>
34 #include <sys/types.h>
37 #include "alloc-util.h"
38 #include "btrfs-util.h"
40 #include "cgroup-util.h"
42 #include "device-nodes.h"
43 #include "dirent-util.h"
46 #include "format-util.h"
48 #include "hostname-util.h"
52 #include "parse-util.h"
53 #include "path-util.h"
54 #include "process-util.h"
56 #include "signal-util.h"
57 #include "stat-util.h"
58 #include "string-util.h"
60 #include "time-util.h"
61 #include "umask-util.h"
62 #include "user-util.h"
66 char **saved_argv
= NULL
;
67 static int saved_in_initrd
= -1;
69 size_t page_size(void) {
70 static thread_local
size_t pgsz
= 0;
73 if (_likely_(pgsz
> 0))
76 r
= sysconf(_SC_PAGESIZE
);
83 bool plymouth_running(void) {
84 return access("/run/plymouth/pid", F_OK
) >= 0;
87 bool display_is_local(const char *display
) {
96 int socket_from_display(const char *display
, char **path
) {
103 if (!display_is_local(display
))
106 k
= strspn(display
+1, "0123456789");
108 f
= new(char, STRLEN("/tmp/.X11-unix/X") + k
+ 1);
112 c
= stpcpy(f
, "/tmp/.X11-unix/X");
113 memcpy(c
, display
+1, k
);
121 int block_get_whole_disk(dev_t d
, dev_t
*ret
) {
122 char p
[SYS_BLOCK_PATH_MAX("/partition")];
123 _cleanup_free_
char *s
= NULL
;
129 /* If it has a queue this is good enough for us */
130 xsprintf_sys_block_path(p
, "/queue", d
);
131 if (access(p
, F_OK
) >= 0) {
136 /* If it is a partition find the originating device */
137 xsprintf_sys_block_path(p
, "/partition", d
);
138 if (access(p
, F_OK
) < 0)
141 /* Get parent dev_t */
142 xsprintf_sys_block_path(p
, "/../dev", d
);
143 r
= read_one_line_file(p
, &s
);
147 r
= sscanf(s
, "%u:%u", &m
, &n
);
151 /* Only return this if it is really good enough for us. */
152 xsprintf_sys_block_path(p
, "/queue", makedev(m
, n
));
153 if (access(p
, F_OK
) < 0)
156 *ret
= makedev(m
, n
);
160 bool kexec_loaded(void) {
161 _cleanup_free_
char *s
= NULL
;
163 if (read_one_line_file("/sys/kernel/kexec_loaded", &s
) < 0)
169 int prot_from_flags(int flags
) {
171 switch (flags
& O_ACCMODE
) {
180 return PROT_READ
|PROT_WRITE
;
187 int fork_agent(pid_t
*pid
, const int except
[], unsigned n_except
, const char *path
, ...) {
188 bool stdout_is_tty
, stderr_is_tty
;
189 pid_t parent_pid
, agent_pid
;
190 sigset_t ss
, saved_ss
;
198 /* Spawns a temporary TTY agent, making sure it goes away when
201 parent_pid
= getpid_cached();
203 /* First we temporarily block all signals, so that the new
204 * child has them blocked initially. This way, we can be sure
205 * that SIGTERMs are not lost we might send to the agent. */
206 assert_se(sigfillset(&ss
) >= 0);
207 assert_se(sigprocmask(SIG_SETMASK
, &ss
, &saved_ss
) >= 0);
211 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
215 if (agent_pid
!= 0) {
216 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
223 * Make sure the agent goes away when the parent dies */
224 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
227 /* Make sure we actually can kill the agent, if we need to, in
228 * case somebody invoked us from a shell script that trapped
229 * SIGTERM or so... */
230 (void) reset_all_signal_handlers();
231 (void) reset_signal_mask();
233 /* Check whether our parent died before we were able
234 * to set the death signal and unblock the signals */
235 if (getppid() != parent_pid
)
238 /* Don't leak fds to the agent */
239 close_all_fds(except
, n_except
);
241 stdout_is_tty
= isatty(STDOUT_FILENO
);
242 stderr_is_tty
= isatty(STDERR_FILENO
);
244 if (!stdout_is_tty
|| !stderr_is_tty
) {
247 /* Detach from stdout/stderr. and reopen
248 * /dev/tty for them. This is important to
249 * ensure that when systemctl is started via
250 * popen() or a similar call that expects to
251 * read EOF we actually do generate EOF and
252 * not delay this indefinitely by because we
253 * keep an unused copy of stdin around. */
254 fd
= open("/dev/tty", O_WRONLY
);
256 log_error_errno(errno
, "Failed to open /dev/tty: %m");
260 if (!stdout_is_tty
&& dup2(fd
, STDOUT_FILENO
) < 0) {
261 log_error_errno(errno
, "Failed to dup2 /dev/tty: %m");
265 if (!stderr_is_tty
&& dup2(fd
, STDERR_FILENO
) < 0) {
266 log_error_errno(errno
, "Failed to dup2 /dev/tty: %m");
270 if (fd
> STDERR_FILENO
)
274 /* Count arguments */
276 for (n
= 0; va_arg(ap
, char*); n
++)
281 l
= alloca(sizeof(char *) * (n
+ 1));
283 /* Fill in arguments */
285 for (i
= 0; i
<= n
; i
++)
286 l
[i
] = va_arg(ap
, char*);
293 bool in_initrd(void) {
296 if (saved_in_initrd
>= 0)
297 return saved_in_initrd
;
299 /* We make two checks here:
301 * 1. the flag file /etc/initrd-release must exist
302 * 2. the root file system must be a memory file system
304 * The second check is extra paranoia, since misdetecting an
305 * initrd can have bad consequences due the initrd
306 * emptying when transititioning to the main systemd.
309 saved_in_initrd
= access("/etc/initrd-release", F_OK
) >= 0 &&
310 statfs("/", &s
) >= 0 &&
313 return saved_in_initrd
;
316 void in_initrd_force(bool value
) {
317 saved_in_initrd
= value
;
320 /* hey glibc, APIs with callbacks without a user pointer are so useless */
321 void *xbsearch_r(const void *key
, const void *base
, size_t nmemb
, size_t size
,
322 int (*compar
) (const void *, const void *, void *), void *arg
) {
331 p
= (const char *) base
+ idx
* size
;
332 comparison
= compar(key
, p
, arg
);
335 else if (comparison
> 0)
343 int on_ac_power(void) {
344 bool found_offline
= false, found_online
= false;
345 _cleanup_closedir_
DIR *d
= NULL
;
348 d
= opendir("/sys/class/power_supply");
350 return errno
== ENOENT
? true : -errno
;
352 FOREACH_DIRENT(de
, d
, return -errno
) {
353 _cleanup_close_
int fd
= -1, device
= -1;
357 device
= openat(dirfd(d
), de
->d_name
, O_DIRECTORY
|O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
359 if (IN_SET(errno
, ENOENT
, ENOTDIR
))
365 fd
= openat(device
, "type", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
373 n
= read(fd
, contents
, sizeof(contents
));
377 if (n
!= 6 || memcmp(contents
, "Mains\n", 6))
381 fd
= openat(device
, "online", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
389 n
= read(fd
, contents
, sizeof(contents
));
393 if (n
!= 2 || contents
[1] != '\n')
396 if (contents
[0] == '1') {
399 } else if (contents
[0] == '0')
400 found_offline
= true;
405 return found_online
|| !found_offline
;
408 int container_get_leader(const char *machine
, pid_t
*pid
) {
409 _cleanup_free_
char *s
= NULL
, *class = NULL
;
417 if (!machine_name_is_valid(machine
))
420 p
= strjoina("/run/systemd/machines/", machine
);
421 r
= parse_env_file(p
, NEWLINE
, "LEADER", &s
, "CLASS", &class, NULL
);
429 if (!streq_ptr(class, "container"))
432 r
= parse_pid(s
, &leader
);
442 int namespace_open(pid_t pid
, int *pidns_fd
, int *mntns_fd
, int *netns_fd
, int *userns_fd
, int *root_fd
) {
443 _cleanup_close_
int pidnsfd
= -1, mntnsfd
= -1, netnsfd
= -1, usernsfd
= -1;
451 mntns
= procfs_file_alloca(pid
, "ns/mnt");
452 mntnsfd
= open(mntns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
460 pidns
= procfs_file_alloca(pid
, "ns/pid");
461 pidnsfd
= open(pidns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
469 netns
= procfs_file_alloca(pid
, "ns/net");
470 netnsfd
= open(netns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
478 userns
= procfs_file_alloca(pid
, "ns/user");
479 usernsfd
= open(userns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
480 if (usernsfd
< 0 && errno
!= ENOENT
)
487 root
= procfs_file_alloca(pid
, "root");
488 rfd
= open(root
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
503 *userns_fd
= usernsfd
;
508 pidnsfd
= mntnsfd
= netnsfd
= usernsfd
= -1;
513 int namespace_enter(int pidns_fd
, int mntns_fd
, int netns_fd
, int userns_fd
, int root_fd
) {
514 if (userns_fd
>= 0) {
515 /* Can't setns to your own userns, since then you could
516 * escalate from non-root to root in your own namespace, so
517 * check if namespaces equal before attempting to enter. */
518 _cleanup_free_
char *userns_fd_path
= NULL
;
520 if (asprintf(&userns_fd_path
, "/proc/self/fd/%d", userns_fd
) < 0)
523 r
= files_same(userns_fd_path
, "/proc/self/ns/user", 0);
531 if (setns(pidns_fd
, CLONE_NEWPID
) < 0)
535 if (setns(mntns_fd
, CLONE_NEWNS
) < 0)
539 if (setns(netns_fd
, CLONE_NEWNET
) < 0)
543 if (setns(userns_fd
, CLONE_NEWUSER
) < 0)
547 if (fchdir(root_fd
) < 0)
554 return reset_uid_gid();
557 uint64_t physical_memory(void) {
558 _cleanup_free_
char *root
= NULL
, *value
= NULL
;
563 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
566 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
567 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
569 sc
= sysconf(_SC_PHYS_PAGES
);
573 mem
= (uint64_t) sc
* (uint64_t) ps
;
575 if (cg_get_root_path(&root
) < 0)
578 if (cg_get_attribute("memory", root
, "memory.limit_in_bytes", &value
))
581 if (safe_atou64(value
, &lim
) < 0)
584 /* Make sure the limit is a multiple of our own page size */
588 return MIN(mem
, lim
);
591 uint64_t physical_memory_scale(uint64_t v
, uint64_t max
) {
592 uint64_t p
, m
, ps
, r
;
596 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
597 * the result is a multiple of the page size (rounds down). */
602 p
= physical_memory() / ps
;
618 uint64_t system_tasks_max(void) {
620 #if SIZEOF_PID_T == 4
621 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
622 #elif SIZEOF_PID_T == 2
623 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
625 #error "Unknown pid_t size"
628 _cleanup_free_
char *value
= NULL
, *root
= NULL
;
629 uint64_t a
= TASKS_MAX
, b
= TASKS_MAX
;
631 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
634 * a) the maximum value for the pid_t type
635 * b) the cgroups pids_max attribute for the system
636 * c) the kernel's configure maximum PID value
638 * And then pick the smallest of the three */
640 if (read_one_line_file("/proc/sys/kernel/pid_max", &value
) >= 0)
641 (void) safe_atou64(value
, &a
);
643 if (cg_get_root_path(&root
) >= 0) {
644 value
= mfree(value
);
646 if (cg_get_attribute("pids", root
, "pids.max", &value
) >= 0)
647 (void) safe_atou64(value
, &b
);
650 return MIN3(TASKS_MAX
,
651 a
<= 0 ? TASKS_MAX
: a
,
652 b
<= 0 ? TASKS_MAX
: b
);
655 uint64_t system_tasks_max_scale(uint64_t v
, uint64_t max
) {
660 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
661 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
663 t
= system_tasks_max();
667 if (m
/ t
!= v
) /* overflow? */
673 int update_reboot_parameter_and_warn(const char *param
) {
676 if (isempty(param
)) {
677 if (unlink("/run/systemd/reboot-param") < 0) {
681 return log_warning_errno(errno
, "Failed to unlink reboot parameter file: %m");
687 RUN_WITH_UMASK(0022) {
688 r
= write_string_file("/run/systemd/reboot-param", param
, WRITE_STRING_FILE_CREATE
);
690 return log_warning_errno(r
, "Failed to write reboot parameter file: %m");
697 puts(PACKAGE_STRING
"\n"
702 int get_block_device(const char *path
, dev_t
*dev
) {
709 /* Get's the block device directly backing a file system. If
710 * the block device is encrypted, returns the device mapper
713 if (lstat(path
, &st
))
716 if (major(st
.st_dev
) != 0) {
721 if (statfs(path
, &sfs
) < 0)
724 if (F_TYPE_EQUAL(sfs
.f_type
, BTRFS_SUPER_MAGIC
))
725 return btrfs_get_block_device(path
, dev
);
730 int get_block_device_harder(const char *path
, dev_t
*dev
) {
731 _cleanup_closedir_
DIR *d
= NULL
;
732 _cleanup_free_
char *t
= NULL
;
733 char p
[SYS_BLOCK_PATH_MAX("/slaves")];
734 struct dirent
*de
, *found
= NULL
;
743 /* Gets the backing block device for a file system, and
744 * handles LUKS encrypted file systems, looking for its
745 * immediate parent, if there is one. */
747 r
= get_block_device(path
, &dt
);
751 xsprintf_sys_block_path(p
, "/slaves", dt
);
760 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
762 if (dot_or_dot_dot(de
->d_name
))
765 if (!IN_SET(de
->d_type
, DT_LNK
, DT_UNKNOWN
))
769 _cleanup_free_
char *u
= NULL
, *v
= NULL
, *a
= NULL
, *b
= NULL
;
771 /* We found a device backed by multiple other devices. We don't really support automatic
772 * discovery on such setups, with the exception of dm-verity partitions. In this case there are
773 * two backing devices: the data partition and the hash partition. We are fine with such
774 * setups, however, only if both partitions are on the same physical device. Hence, let's
777 u
= strjoin(p
, "/", de
->d_name
, "/../dev");
781 v
= strjoin(p
, "/", found
->d_name
, "/../dev");
785 r
= read_one_line_file(u
, &a
);
787 log_debug_errno(r
, "Failed to read %s: %m", u
);
791 r
= read_one_line_file(v
, &b
);
793 log_debug_errno(r
, "Failed to read %s: %m", v
);
797 /* Check if the parent device is the same. If not, then the two backing devices are on
798 * different physical devices, and we don't support that. */
809 q
= strjoina(p
, "/", found
->d_name
, "/dev");
811 r
= read_one_line_file(q
, &t
);
817 if (sscanf(t
, "%u:%u", &maj
, &min
) != 2)
823 *dev
= makedev(maj
, min
);