]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/util.c
2 This file is part of systemd.
4 Copyright 2010 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
30 #include <sys/prctl.h>
31 #include <sys/statfs.h>
32 #include <sys/sysmacros.h>
33 #include <sys/types.h>
36 #include "alloc-util.h"
37 #include "btrfs-util.h"
39 #include "cgroup-util.h"
41 #include "dirent-util.h"
44 #include "format-util.h"
46 #include "hostname-util.h"
50 #include "parse-util.h"
51 #include "path-util.h"
52 #include "process-util.h"
54 #include "signal-util.h"
55 #include "stat-util.h"
56 #include "string-util.h"
58 #include "time-util.h"
59 #include "umask-util.h"
60 #include "user-util.h"
64 char **saved_argv
= NULL
;
65 static int saved_in_initrd
= -1;
67 size_t page_size(void) {
68 static thread_local
size_t pgsz
= 0;
71 if (_likely_(pgsz
> 0))
74 r
= sysconf(_SC_PAGESIZE
);
81 bool plymouth_running(void) {
82 return access("/run/plymouth/pid", F_OK
) >= 0;
85 bool display_is_local(const char *display
) {
94 int socket_from_display(const char *display
, char **path
) {
101 if (!display_is_local(display
))
104 k
= strspn(display
+1, "0123456789");
106 f
= new(char, strlen("/tmp/.X11-unix/X") + k
+ 1);
110 c
= stpcpy(f
, "/tmp/.X11-unix/X");
111 memcpy(c
, display
+1, k
);
119 int block_get_whole_disk(dev_t d
, dev_t
*ret
) {
126 /* If it has a queue this is good enough for us */
127 if (asprintf(&p
, "/sys/dev/block/%u:%u/queue", major(d
), minor(d
)) < 0)
138 /* If it is a partition find the originating device */
139 if (asprintf(&p
, "/sys/dev/block/%u:%u/partition", major(d
), minor(d
)) < 0)
148 /* Get parent dev_t */
149 if (asprintf(&p
, "/sys/dev/block/%u:%u/../dev", major(d
), minor(d
)) < 0)
152 r
= read_one_line_file(p
, &s
);
158 r
= sscanf(s
, "%u:%u", &m
, &n
);
164 /* Only return this if it is really good enough for us. */
165 if (asprintf(&p
, "/sys/dev/block/%u:%u/queue", m
, n
) < 0)
172 *ret
= makedev(m
, n
);
179 bool kexec_loaded(void) {
180 _cleanup_free_
char *s
= NULL
;
182 if (read_one_line_file("/sys/kernel/kexec_loaded", &s
) < 0)
188 int prot_from_flags(int flags
) {
190 switch (flags
& O_ACCMODE
) {
199 return PROT_READ
|PROT_WRITE
;
206 int fork_agent(pid_t
*pid
, const int except
[], unsigned n_except
, const char *path
, ...) {
207 bool stdout_is_tty
, stderr_is_tty
;
208 pid_t parent_pid
, agent_pid
;
209 sigset_t ss
, saved_ss
;
217 /* Spawns a temporary TTY agent, making sure it goes away when
220 parent_pid
= getpid_cached();
222 /* First we temporarily block all signals, so that the new
223 * child has them blocked initially. This way, we can be sure
224 * that SIGTERMs are not lost we might send to the agent. */
225 assert_se(sigfillset(&ss
) >= 0);
226 assert_se(sigprocmask(SIG_SETMASK
, &ss
, &saved_ss
) >= 0);
230 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
234 if (agent_pid
!= 0) {
235 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
242 * Make sure the agent goes away when the parent dies */
243 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
246 /* Make sure we actually can kill the agent, if we need to, in
247 * case somebody invoked us from a shell script that trapped
248 * SIGTERM or so... */
249 (void) reset_all_signal_handlers();
250 (void) reset_signal_mask();
252 /* Check whether our parent died before we were able
253 * to set the death signal and unblock the signals */
254 if (getppid() != parent_pid
)
257 /* Don't leak fds to the agent */
258 close_all_fds(except
, n_except
);
260 stdout_is_tty
= isatty(STDOUT_FILENO
);
261 stderr_is_tty
= isatty(STDERR_FILENO
);
263 if (!stdout_is_tty
|| !stderr_is_tty
) {
266 /* Detach from stdout/stderr. and reopen
267 * /dev/tty for them. This is important to
268 * ensure that when systemctl is started via
269 * popen() or a similar call that expects to
270 * read EOF we actually do generate EOF and
271 * not delay this indefinitely by because we
272 * keep an unused copy of stdin around. */
273 fd
= open("/dev/tty", O_WRONLY
);
275 log_error_errno(errno
, "Failed to open /dev/tty: %m");
279 if (!stdout_is_tty
&& dup2(fd
, STDOUT_FILENO
) < 0) {
280 log_error_errno(errno
, "Failed to dup2 /dev/tty: %m");
284 if (!stderr_is_tty
&& dup2(fd
, STDERR_FILENO
) < 0) {
285 log_error_errno(errno
, "Failed to dup2 /dev/tty: %m");
289 if (fd
> STDERR_FILENO
)
293 /* Count arguments */
295 for (n
= 0; va_arg(ap
, char*); n
++)
300 l
= alloca(sizeof(char *) * (n
+ 1));
302 /* Fill in arguments */
304 for (i
= 0; i
<= n
; i
++)
305 l
[i
] = va_arg(ap
, char*);
312 bool in_initrd(void) {
315 if (saved_in_initrd
>= 0)
316 return saved_in_initrd
;
318 /* We make two checks here:
320 * 1. the flag file /etc/initrd-release must exist
321 * 2. the root file system must be a memory file system
323 * The second check is extra paranoia, since misdetecting an
324 * initrd can have bad consequences due the initrd
325 * emptying when transititioning to the main systemd.
328 saved_in_initrd
= access("/etc/initrd-release", F_OK
) >= 0 &&
329 statfs("/", &s
) >= 0 &&
332 return saved_in_initrd
;
335 void in_initrd_force(bool value
) {
336 saved_in_initrd
= value
;
339 /* hey glibc, APIs with callbacks without a user pointer are so useless */
340 void *xbsearch_r(const void *key
, const void *base
, size_t nmemb
, size_t size
,
341 int (*compar
) (const void *, const void *, void *), void *arg
) {
350 p
= (const char *) base
+ idx
* size
;
351 comparison
= compar(key
, p
, arg
);
354 else if (comparison
> 0)
362 int on_ac_power(void) {
363 bool found_offline
= false, found_online
= false;
364 _cleanup_closedir_
DIR *d
= NULL
;
367 d
= opendir("/sys/class/power_supply");
369 return errno
== ENOENT
? true : -errno
;
371 FOREACH_DIRENT(de
, d
, return -errno
) {
372 _cleanup_close_
int fd
= -1, device
= -1;
376 device
= openat(dirfd(d
), de
->d_name
, O_DIRECTORY
|O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
378 if (IN_SET(errno
, ENOENT
, ENOTDIR
))
384 fd
= openat(device
, "type", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
392 n
= read(fd
, contents
, sizeof(contents
));
396 if (n
!= 6 || memcmp(contents
, "Mains\n", 6))
400 fd
= openat(device
, "online", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
408 n
= read(fd
, contents
, sizeof(contents
));
412 if (n
!= 2 || contents
[1] != '\n')
415 if (contents
[0] == '1') {
418 } else if (contents
[0] == '0')
419 found_offline
= true;
424 return found_online
|| !found_offline
;
427 int container_get_leader(const char *machine
, pid_t
*pid
) {
428 _cleanup_free_
char *s
= NULL
, *class = NULL
;
436 if (!machine_name_is_valid(machine
))
439 p
= strjoina("/run/systemd/machines/", machine
);
440 r
= parse_env_file(p
, NEWLINE
, "LEADER", &s
, "CLASS", &class, NULL
);
448 if (!streq_ptr(class, "container"))
451 r
= parse_pid(s
, &leader
);
461 int namespace_open(pid_t pid
, int *pidns_fd
, int *mntns_fd
, int *netns_fd
, int *userns_fd
, int *root_fd
) {
462 _cleanup_close_
int pidnsfd
= -1, mntnsfd
= -1, netnsfd
= -1, usernsfd
= -1;
470 mntns
= procfs_file_alloca(pid
, "ns/mnt");
471 mntnsfd
= open(mntns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
479 pidns
= procfs_file_alloca(pid
, "ns/pid");
480 pidnsfd
= open(pidns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
488 netns
= procfs_file_alloca(pid
, "ns/net");
489 netnsfd
= open(netns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
497 userns
= procfs_file_alloca(pid
, "ns/user");
498 usernsfd
= open(userns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
499 if (usernsfd
< 0 && errno
!= ENOENT
)
506 root
= procfs_file_alloca(pid
, "root");
507 rfd
= open(root
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
522 *userns_fd
= usernsfd
;
527 pidnsfd
= mntnsfd
= netnsfd
= usernsfd
= -1;
532 int namespace_enter(int pidns_fd
, int mntns_fd
, int netns_fd
, int userns_fd
, int root_fd
) {
533 if (userns_fd
>= 0) {
534 /* Can't setns to your own userns, since then you could
535 * escalate from non-root to root in your own namespace, so
536 * check if namespaces equal before attempting to enter. */
537 _cleanup_free_
char *userns_fd_path
= NULL
;
539 if (asprintf(&userns_fd_path
, "/proc/self/fd/%d", userns_fd
) < 0)
542 r
= files_same(userns_fd_path
, "/proc/self/ns/user", 0);
550 if (setns(pidns_fd
, CLONE_NEWPID
) < 0)
554 if (setns(mntns_fd
, CLONE_NEWNS
) < 0)
558 if (setns(netns_fd
, CLONE_NEWNET
) < 0)
562 if (setns(userns_fd
, CLONE_NEWUSER
) < 0)
566 if (fchdir(root_fd
) < 0)
573 return reset_uid_gid();
576 uint64_t physical_memory(void) {
577 _cleanup_free_
char *root
= NULL
, *value
= NULL
;
582 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
585 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
586 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
588 sc
= sysconf(_SC_PHYS_PAGES
);
592 mem
= (uint64_t) sc
* (uint64_t) ps
;
594 if (cg_get_root_path(&root
) < 0)
597 if (cg_get_attribute("memory", root
, "memory.limit_in_bytes", &value
))
600 if (safe_atou64(value
, &lim
) < 0)
603 /* Make sure the limit is a multiple of our own page size */
607 return MIN(mem
, lim
);
610 uint64_t physical_memory_scale(uint64_t v
, uint64_t max
) {
611 uint64_t p
, m
, ps
, r
;
615 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
616 * the result is a multiple of the page size (rounds down). */
621 p
= physical_memory() / ps
;
637 uint64_t system_tasks_max(void) {
639 #if SIZEOF_PID_T == 4
640 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
641 #elif SIZEOF_PID_T == 2
642 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
644 #error "Unknown pid_t size"
647 _cleanup_free_
char *value
= NULL
, *root
= NULL
;
648 uint64_t a
= TASKS_MAX
, b
= TASKS_MAX
;
650 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
653 * a) the maximum value for the pid_t type
654 * b) the cgroups pids_max attribute for the system
655 * c) the kernel's configure maximum PID value
657 * And then pick the smallest of the three */
659 if (read_one_line_file("/proc/sys/kernel/pid_max", &value
) >= 0)
660 (void) safe_atou64(value
, &a
);
662 if (cg_get_root_path(&root
) >= 0) {
663 value
= mfree(value
);
665 if (cg_get_attribute("pids", root
, "pids.max", &value
) >= 0)
666 (void) safe_atou64(value
, &b
);
669 return MIN3(TASKS_MAX
,
670 a
<= 0 ? TASKS_MAX
: a
,
671 b
<= 0 ? TASKS_MAX
: b
);
674 uint64_t system_tasks_max_scale(uint64_t v
, uint64_t max
) {
679 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
680 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
682 t
= system_tasks_max();
686 if (m
/ t
!= v
) /* overflow? */
692 int update_reboot_parameter_and_warn(const char *param
) {
695 if (isempty(param
)) {
696 if (unlink("/run/systemd/reboot-param") < 0) {
700 return log_warning_errno(errno
, "Failed to unlink reboot parameter file: %m");
706 RUN_WITH_UMASK(0022) {
707 r
= write_string_file("/run/systemd/reboot-param", param
, WRITE_STRING_FILE_CREATE
);
709 return log_warning_errno(r
, "Failed to write reboot parameter file: %m");
716 puts(PACKAGE_STRING
"\n"
721 int get_block_device(const char *path
, dev_t
*dev
) {
728 /* Get's the block device directly backing a file system. If
729 * the block device is encrypted, returns the device mapper
732 if (lstat(path
, &st
))
735 if (major(st
.st_dev
) != 0) {
740 if (statfs(path
, &sfs
) < 0)
743 if (F_TYPE_EQUAL(sfs
.f_type
, BTRFS_SUPER_MAGIC
))
744 return btrfs_get_block_device(path
, dev
);
749 int get_block_device_harder(const char *path
, dev_t
*dev
) {
750 _cleanup_closedir_
DIR *d
= NULL
;
751 _cleanup_free_
char *p
= NULL
, *t
= NULL
;
752 struct dirent
*de
, *found
= NULL
;
761 /* Gets the backing block device for a file system, and
762 * handles LUKS encrypted file systems, looking for its
763 * immediate parent, if there is one. */
765 r
= get_block_device(path
, &dt
);
769 if (asprintf(&p
, "/sys/dev/block/%u:%u/slaves", major(dt
), minor(dt
)) < 0)
780 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
782 if (dot_or_dot_dot(de
->d_name
))
785 if (!IN_SET(de
->d_type
, DT_LNK
, DT_UNKNOWN
))
789 _cleanup_free_
char *u
= NULL
, *v
= NULL
, *a
= NULL
, *b
= NULL
;
791 /* We found a device backed by multiple other devices. We don't really support automatic
792 * discovery on such setups, with the exception of dm-verity partitions. In this case there are
793 * two backing devices: the data partition and the hash partition. We are fine with such
794 * setups, however, only if both partitions are on the same physical device. Hence, let's
797 u
= strjoin(p
, "/", de
->d_name
, "/../dev");
801 v
= strjoin(p
, "/", found
->d_name
, "/../dev");
805 r
= read_one_line_file(u
, &a
);
807 log_debug_errno(r
, "Failed to read %s: %m", u
);
811 r
= read_one_line_file(v
, &b
);
813 log_debug_errno(r
, "Failed to read %s: %m", v
);
817 /* Check if the parent device is the same. If not, then the two backing devices are on
818 * different physical devices, and we don't support that. */
829 q
= strjoina(p
, "/", found
->d_name
, "/dev");
831 r
= read_one_line_file(q
, &t
);
837 if (sscanf(t
, "%u:%u", &maj
, &min
) != 2)
843 *dev
= makedev(maj
, min
);