]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
31 #include <sys/prctl.h>
32 #include <sys/statfs.h>
33 #include <sys/sysmacros.h>
34 #include <sys/types.h>
37 #include "alloc-util.h"
38 #include "btrfs-util.h"
40 #include "cgroup-util.h"
42 #include "dirent-util.h"
45 #include "format-util.h"
47 #include "hostname-util.h"
51 #include "parse-util.h"
52 #include "path-util.h"
53 #include "process-util.h"
55 #include "signal-util.h"
56 #include "stat-util.h"
57 #include "string-util.h"
59 #include "time-util.h"
60 #include "umask-util.h"
61 #include "user-util.h"
65 char **saved_argv
= NULL
;
66 static int saved_in_initrd
= -1;
68 size_t page_size(void) {
69 static thread_local
size_t pgsz
= 0;
72 if (_likely_(pgsz
> 0))
75 r
= sysconf(_SC_PAGESIZE
);
82 bool plymouth_running(void) {
83 return access("/run/plymouth/pid", F_OK
) >= 0;
86 bool display_is_local(const char *display
) {
95 int socket_from_display(const char *display
, char **path
) {
102 if (!display_is_local(display
))
105 k
= strspn(display
+1, "0123456789");
107 f
= new(char, strlen("/tmp/.X11-unix/X") + k
+ 1);
111 c
= stpcpy(f
, "/tmp/.X11-unix/X");
112 memcpy(c
, display
+1, k
);
120 int block_get_whole_disk(dev_t d
, dev_t
*ret
) {
127 /* If it has a queue this is good enough for us */
128 if (asprintf(&p
, "/sys/dev/block/%u:%u/queue", major(d
), minor(d
)) < 0)
139 /* If it is a partition find the originating device */
140 if (asprintf(&p
, "/sys/dev/block/%u:%u/partition", major(d
), minor(d
)) < 0)
149 /* Get parent dev_t */
150 if (asprintf(&p
, "/sys/dev/block/%u:%u/../dev", major(d
), minor(d
)) < 0)
153 r
= read_one_line_file(p
, &s
);
159 r
= sscanf(s
, "%u:%u", &m
, &n
);
165 /* Only return this if it is really good enough for us. */
166 if (asprintf(&p
, "/sys/dev/block/%u:%u/queue", m
, n
) < 0)
173 *ret
= makedev(m
, n
);
180 bool kexec_loaded(void) {
181 _cleanup_free_
char *s
= NULL
;
183 if (read_one_line_file("/sys/kernel/kexec_loaded", &s
) < 0)
189 int prot_from_flags(int flags
) {
191 switch (flags
& O_ACCMODE
) {
200 return PROT_READ
|PROT_WRITE
;
207 int fork_agent(pid_t
*pid
, const int except
[], unsigned n_except
, const char *path
, ...) {
208 bool stdout_is_tty
, stderr_is_tty
;
209 pid_t parent_pid
, agent_pid
;
210 sigset_t ss
, saved_ss
;
218 /* Spawns a temporary TTY agent, making sure it goes away when
221 parent_pid
= getpid_cached();
223 /* First we temporarily block all signals, so that the new
224 * child has them blocked initially. This way, we can be sure
225 * that SIGTERMs are not lost we might send to the agent. */
226 assert_se(sigfillset(&ss
) >= 0);
227 assert_se(sigprocmask(SIG_SETMASK
, &ss
, &saved_ss
) >= 0);
231 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
235 if (agent_pid
!= 0) {
236 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
243 * Make sure the agent goes away when the parent dies */
244 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
247 /* Make sure we actually can kill the agent, if we need to, in
248 * case somebody invoked us from a shell script that trapped
249 * SIGTERM or so... */
250 (void) reset_all_signal_handlers();
251 (void) reset_signal_mask();
253 /* Check whether our parent died before we were able
254 * to set the death signal and unblock the signals */
255 if (getppid() != parent_pid
)
258 /* Don't leak fds to the agent */
259 close_all_fds(except
, n_except
);
261 stdout_is_tty
= isatty(STDOUT_FILENO
);
262 stderr_is_tty
= isatty(STDERR_FILENO
);
264 if (!stdout_is_tty
|| !stderr_is_tty
) {
267 /* Detach from stdout/stderr. and reopen
268 * /dev/tty for them. This is important to
269 * ensure that when systemctl is started via
270 * popen() or a similar call that expects to
271 * read EOF we actually do generate EOF and
272 * not delay this indefinitely by because we
273 * keep an unused copy of stdin around. */
274 fd
= open("/dev/tty", O_WRONLY
);
276 log_error_errno(errno
, "Failed to open /dev/tty: %m");
280 if (!stdout_is_tty
&& dup2(fd
, STDOUT_FILENO
) < 0) {
281 log_error_errno(errno
, "Failed to dup2 /dev/tty: %m");
285 if (!stderr_is_tty
&& dup2(fd
, STDERR_FILENO
) < 0) {
286 log_error_errno(errno
, "Failed to dup2 /dev/tty: %m");
290 if (fd
> STDERR_FILENO
)
294 /* Count arguments */
296 for (n
= 0; va_arg(ap
, char*); n
++)
301 l
= alloca(sizeof(char *) * (n
+ 1));
303 /* Fill in arguments */
305 for (i
= 0; i
<= n
; i
++)
306 l
[i
] = va_arg(ap
, char*);
313 bool in_initrd(void) {
316 if (saved_in_initrd
>= 0)
317 return saved_in_initrd
;
319 /* We make two checks here:
321 * 1. the flag file /etc/initrd-release must exist
322 * 2. the root file system must be a memory file system
324 * The second check is extra paranoia, since misdetecting an
325 * initrd can have bad consequences due the initrd
326 * emptying when transititioning to the main systemd.
329 saved_in_initrd
= access("/etc/initrd-release", F_OK
) >= 0 &&
330 statfs("/", &s
) >= 0 &&
333 return saved_in_initrd
;
336 void in_initrd_force(bool value
) {
337 saved_in_initrd
= value
;
340 /* hey glibc, APIs with callbacks without a user pointer are so useless */
341 void *xbsearch_r(const void *key
, const void *base
, size_t nmemb
, size_t size
,
342 int (*compar
) (const void *, const void *, void *), void *arg
) {
351 p
= (const char *) base
+ idx
* size
;
352 comparison
= compar(key
, p
, arg
);
355 else if (comparison
> 0)
363 int on_ac_power(void) {
364 bool found_offline
= false, found_online
= false;
365 _cleanup_closedir_
DIR *d
= NULL
;
368 d
= opendir("/sys/class/power_supply");
370 return errno
== ENOENT
? true : -errno
;
372 FOREACH_DIRENT(de
, d
, return -errno
) {
373 _cleanup_close_
int fd
= -1, device
= -1;
377 device
= openat(dirfd(d
), de
->d_name
, O_DIRECTORY
|O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
379 if (IN_SET(errno
, ENOENT
, ENOTDIR
))
385 fd
= openat(device
, "type", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
393 n
= read(fd
, contents
, sizeof(contents
));
397 if (n
!= 6 || memcmp(contents
, "Mains\n", 6))
401 fd
= openat(device
, "online", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
409 n
= read(fd
, contents
, sizeof(contents
));
413 if (n
!= 2 || contents
[1] != '\n')
416 if (contents
[0] == '1') {
419 } else if (contents
[0] == '0')
420 found_offline
= true;
425 return found_online
|| !found_offline
;
428 int container_get_leader(const char *machine
, pid_t
*pid
) {
429 _cleanup_free_
char *s
= NULL
, *class = NULL
;
437 if (!machine_name_is_valid(machine
))
440 p
= strjoina("/run/systemd/machines/", machine
);
441 r
= parse_env_file(p
, NEWLINE
, "LEADER", &s
, "CLASS", &class, NULL
);
449 if (!streq_ptr(class, "container"))
452 r
= parse_pid(s
, &leader
);
462 int namespace_open(pid_t pid
, int *pidns_fd
, int *mntns_fd
, int *netns_fd
, int *userns_fd
, int *root_fd
) {
463 _cleanup_close_
int pidnsfd
= -1, mntnsfd
= -1, netnsfd
= -1, usernsfd
= -1;
471 mntns
= procfs_file_alloca(pid
, "ns/mnt");
472 mntnsfd
= open(mntns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
480 pidns
= procfs_file_alloca(pid
, "ns/pid");
481 pidnsfd
= open(pidns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
489 netns
= procfs_file_alloca(pid
, "ns/net");
490 netnsfd
= open(netns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
498 userns
= procfs_file_alloca(pid
, "ns/user");
499 usernsfd
= open(userns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
500 if (usernsfd
< 0 && errno
!= ENOENT
)
507 root
= procfs_file_alloca(pid
, "root");
508 rfd
= open(root
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
523 *userns_fd
= usernsfd
;
528 pidnsfd
= mntnsfd
= netnsfd
= usernsfd
= -1;
533 int namespace_enter(int pidns_fd
, int mntns_fd
, int netns_fd
, int userns_fd
, int root_fd
) {
534 if (userns_fd
>= 0) {
535 /* Can't setns to your own userns, since then you could
536 * escalate from non-root to root in your own namespace, so
537 * check if namespaces equal before attempting to enter. */
538 _cleanup_free_
char *userns_fd_path
= NULL
;
540 if (asprintf(&userns_fd_path
, "/proc/self/fd/%d", userns_fd
) < 0)
543 r
= files_same(userns_fd_path
, "/proc/self/ns/user", 0);
551 if (setns(pidns_fd
, CLONE_NEWPID
) < 0)
555 if (setns(mntns_fd
, CLONE_NEWNS
) < 0)
559 if (setns(netns_fd
, CLONE_NEWNET
) < 0)
563 if (setns(userns_fd
, CLONE_NEWUSER
) < 0)
567 if (fchdir(root_fd
) < 0)
574 return reset_uid_gid();
577 uint64_t physical_memory(void) {
578 _cleanup_free_
char *root
= NULL
, *value
= NULL
;
583 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
586 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
587 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
589 sc
= sysconf(_SC_PHYS_PAGES
);
593 mem
= (uint64_t) sc
* (uint64_t) ps
;
595 if (cg_get_root_path(&root
) < 0)
598 if (cg_get_attribute("memory", root
, "memory.limit_in_bytes", &value
))
601 if (safe_atou64(value
, &lim
) < 0)
604 /* Make sure the limit is a multiple of our own page size */
608 return MIN(mem
, lim
);
611 uint64_t physical_memory_scale(uint64_t v
, uint64_t max
) {
612 uint64_t p
, m
, ps
, r
;
616 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
617 * the result is a multiple of the page size (rounds down). */
622 p
= physical_memory() / ps
;
638 uint64_t system_tasks_max(void) {
640 #if SIZEOF_PID_T == 4
641 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
642 #elif SIZEOF_PID_T == 2
643 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
645 #error "Unknown pid_t size"
648 _cleanup_free_
char *value
= NULL
, *root
= NULL
;
649 uint64_t a
= TASKS_MAX
, b
= TASKS_MAX
;
651 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
654 * a) the maximum value for the pid_t type
655 * b) the cgroups pids_max attribute for the system
656 * c) the kernel's configure maximum PID value
658 * And then pick the smallest of the three */
660 if (read_one_line_file("/proc/sys/kernel/pid_max", &value
) >= 0)
661 (void) safe_atou64(value
, &a
);
663 if (cg_get_root_path(&root
) >= 0) {
664 value
= mfree(value
);
666 if (cg_get_attribute("pids", root
, "pids.max", &value
) >= 0)
667 (void) safe_atou64(value
, &b
);
670 return MIN3(TASKS_MAX
,
671 a
<= 0 ? TASKS_MAX
: a
,
672 b
<= 0 ? TASKS_MAX
: b
);
675 uint64_t system_tasks_max_scale(uint64_t v
, uint64_t max
) {
680 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
681 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
683 t
= system_tasks_max();
687 if (m
/ t
!= v
) /* overflow? */
693 int update_reboot_parameter_and_warn(const char *param
) {
696 if (isempty(param
)) {
697 if (unlink("/run/systemd/reboot-param") < 0) {
701 return log_warning_errno(errno
, "Failed to unlink reboot parameter file: %m");
707 RUN_WITH_UMASK(0022) {
708 r
= write_string_file("/run/systemd/reboot-param", param
, WRITE_STRING_FILE_CREATE
);
710 return log_warning_errno(r
, "Failed to write reboot parameter file: %m");
717 puts(PACKAGE_STRING
"\n"
722 int get_block_device(const char *path
, dev_t
*dev
) {
729 /* Get's the block device directly backing a file system. If
730 * the block device is encrypted, returns the device mapper
733 if (lstat(path
, &st
))
736 if (major(st
.st_dev
) != 0) {
741 if (statfs(path
, &sfs
) < 0)
744 if (F_TYPE_EQUAL(sfs
.f_type
, BTRFS_SUPER_MAGIC
))
745 return btrfs_get_block_device(path
, dev
);
750 int get_block_device_harder(const char *path
, dev_t
*dev
) {
751 _cleanup_closedir_
DIR *d
= NULL
;
752 _cleanup_free_
char *p
= NULL
, *t
= NULL
;
753 struct dirent
*de
, *found
= NULL
;
762 /* Gets the backing block device for a file system, and
763 * handles LUKS encrypted file systems, looking for its
764 * immediate parent, if there is one. */
766 r
= get_block_device(path
, &dt
);
770 if (asprintf(&p
, "/sys/dev/block/%u:%u/slaves", major(dt
), minor(dt
)) < 0)
781 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
783 if (dot_or_dot_dot(de
->d_name
))
786 if (!IN_SET(de
->d_type
, DT_LNK
, DT_UNKNOWN
))
790 _cleanup_free_
char *u
= NULL
, *v
= NULL
, *a
= NULL
, *b
= NULL
;
792 /* We found a device backed by multiple other devices. We don't really support automatic
793 * discovery on such setups, with the exception of dm-verity partitions. In this case there are
794 * two backing devices: the data partition and the hash partition. We are fine with such
795 * setups, however, only if both partitions are on the same physical device. Hence, let's
798 u
= strjoin(p
, "/", de
->d_name
, "/../dev");
802 v
= strjoin(p
, "/", found
->d_name
, "/../dev");
806 r
= read_one_line_file(u
, &a
);
808 log_debug_errno(r
, "Failed to read %s: %m", u
);
812 r
= read_one_line_file(v
, &b
);
814 log_debug_errno(r
, "Failed to read %s: %m", v
);
818 /* Check if the parent device is the same. If not, then the two backing devices are on
819 * different physical devices, and we don't support that. */
830 q
= strjoina(p
, "/", found
->d_name
, "/dev");
832 r
= read_one_line_file(q
, &t
);
838 if (sscanf(t
, "%u:%u", &maj
, &min
) != 2)
844 *dev
= makedev(maj
, min
);