]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/util.c
2 This file is part of systemd.
4 Copyright 2010 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
30 #include <sys/prctl.h>
31 #include <sys/statfs.h>
32 #include <sys/sysmacros.h>
33 #include <sys/types.h>
36 #include "alloc-util.h"
37 #include "btrfs-util.h"
39 #include "cgroup-util.h"
41 #include "dirent-util.h"
44 #include "format-util.h"
46 #include "hostname-util.h"
50 #include "parse-util.h"
51 #include "path-util.h"
52 #include "process-util.h"
54 #include "signal-util.h"
55 #include "stat-util.h"
56 #include "string-util.h"
58 #include "time-util.h"
59 #include "umask-util.h"
60 #include "user-util.h"
64 char **saved_argv
= NULL
;
65 static int saved_in_initrd
= -1;
67 size_t page_size(void) {
68 static thread_local
size_t pgsz
= 0;
71 if (_likely_(pgsz
> 0))
74 r
= sysconf(_SC_PAGESIZE
);
81 bool plymouth_running(void) {
82 return access("/run/plymouth/pid", F_OK
) >= 0;
85 bool display_is_local(const char *display
) {
94 int socket_from_display(const char *display
, char **path
) {
101 if (!display_is_local(display
))
104 k
= strspn(display
+1, "0123456789");
106 f
= new(char, strlen("/tmp/.X11-unix/X") + k
+ 1);
110 c
= stpcpy(f
, "/tmp/.X11-unix/X");
111 memcpy(c
, display
+1, k
);
119 int block_get_whole_disk(dev_t d
, dev_t
*ret
) {
126 /* If it has a queue this is good enough for us */
127 if (asprintf(&p
, "/sys/dev/block/%u:%u/queue", major(d
), minor(d
)) < 0)
138 /* If it is a partition find the originating device */
139 if (asprintf(&p
, "/sys/dev/block/%u:%u/partition", major(d
), minor(d
)) < 0)
148 /* Get parent dev_t */
149 if (asprintf(&p
, "/sys/dev/block/%u:%u/../dev", major(d
), minor(d
)) < 0)
152 r
= read_one_line_file(p
, &s
);
158 r
= sscanf(s
, "%u:%u", &m
, &n
);
164 /* Only return this if it is really good enough for us. */
165 if (asprintf(&p
, "/sys/dev/block/%u:%u/queue", m
, n
) < 0)
172 *ret
= makedev(m
, n
);
179 bool kexec_loaded(void) {
183 if (read_one_line_file("/sys/kernel/kexec_loaded", &s
) >= 0) {
191 int prot_from_flags(int flags
) {
193 switch (flags
& O_ACCMODE
) {
202 return PROT_READ
|PROT_WRITE
;
209 int fork_agent(pid_t
*pid
, const int except
[], unsigned n_except
, const char *path
, ...) {
210 bool stdout_is_tty
, stderr_is_tty
;
211 pid_t parent_pid
, agent_pid
;
212 sigset_t ss
, saved_ss
;
220 /* Spawns a temporary TTY agent, making sure it goes away when
223 parent_pid
= getpid_cached();
225 /* First we temporarily block all signals, so that the new
226 * child has them blocked initially. This way, we can be sure
227 * that SIGTERMs are not lost we might send to the agent. */
228 assert_se(sigfillset(&ss
) >= 0);
229 assert_se(sigprocmask(SIG_SETMASK
, &ss
, &saved_ss
) >= 0);
233 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
237 if (agent_pid
!= 0) {
238 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
245 * Make sure the agent goes away when the parent dies */
246 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
249 /* Make sure we actually can kill the agent, if we need to, in
250 * case somebody invoked us from a shell script that trapped
251 * SIGTERM or so... */
252 (void) reset_all_signal_handlers();
253 (void) reset_signal_mask();
255 /* Check whether our parent died before we were able
256 * to set the death signal and unblock the signals */
257 if (getppid() != parent_pid
)
260 /* Don't leak fds to the agent */
261 close_all_fds(except
, n_except
);
263 stdout_is_tty
= isatty(STDOUT_FILENO
);
264 stderr_is_tty
= isatty(STDERR_FILENO
);
266 if (!stdout_is_tty
|| !stderr_is_tty
) {
269 /* Detach from stdout/stderr. and reopen
270 * /dev/tty for them. This is important to
271 * ensure that when systemctl is started via
272 * popen() or a similar call that expects to
273 * read EOF we actually do generate EOF and
274 * not delay this indefinitely by because we
275 * keep an unused copy of stdin around. */
276 fd
= open("/dev/tty", O_WRONLY
);
278 log_error_errno(errno
, "Failed to open /dev/tty: %m");
282 if (!stdout_is_tty
&& dup2(fd
, STDOUT_FILENO
) < 0) {
283 log_error_errno(errno
, "Failed to dup2 /dev/tty: %m");
287 if (!stderr_is_tty
&& dup2(fd
, STDERR_FILENO
) < 0) {
288 log_error_errno(errno
, "Failed to dup2 /dev/tty: %m");
292 if (fd
> STDERR_FILENO
)
296 /* Count arguments */
298 for (n
= 0; va_arg(ap
, char*); n
++)
303 l
= alloca(sizeof(char *) * (n
+ 1));
305 /* Fill in arguments */
307 for (i
= 0; i
<= n
; i
++)
308 l
[i
] = va_arg(ap
, char*);
315 bool in_initrd(void) {
318 if (saved_in_initrd
>= 0)
319 return saved_in_initrd
;
321 /* We make two checks here:
323 * 1. the flag file /etc/initrd-release must exist
324 * 2. the root file system must be a memory file system
326 * The second check is extra paranoia, since misdetecting an
327 * initrd can have bad consequences due the initrd
328 * emptying when transititioning to the main systemd.
331 saved_in_initrd
= access("/etc/initrd-release", F_OK
) >= 0 &&
332 statfs("/", &s
) >= 0 &&
335 return saved_in_initrd
;
338 void in_initrd_force(bool value
) {
339 saved_in_initrd
= value
;
342 /* hey glibc, APIs with callbacks without a user pointer are so useless */
343 void *xbsearch_r(const void *key
, const void *base
, size_t nmemb
, size_t size
,
344 int (*compar
) (const void *, const void *, void *), void *arg
) {
353 p
= (const char *) base
+ idx
* size
;
354 comparison
= compar(key
, p
, arg
);
357 else if (comparison
> 0)
365 int on_ac_power(void) {
366 bool found_offline
= false, found_online
= false;
367 _cleanup_closedir_
DIR *d
= NULL
;
370 d
= opendir("/sys/class/power_supply");
372 return errno
== ENOENT
? true : -errno
;
374 FOREACH_DIRENT(de
, d
, return -errno
) {
375 _cleanup_close_
int fd
= -1, device
= -1;
379 device
= openat(dirfd(d
), de
->d_name
, O_DIRECTORY
|O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
381 if (errno
== ENOENT
|| errno
== ENOTDIR
)
387 fd
= openat(device
, "type", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
395 n
= read(fd
, contents
, sizeof(contents
));
399 if (n
!= 6 || memcmp(contents
, "Mains\n", 6))
403 fd
= openat(device
, "online", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
411 n
= read(fd
, contents
, sizeof(contents
));
415 if (n
!= 2 || contents
[1] != '\n')
418 if (contents
[0] == '1') {
421 } else if (contents
[0] == '0')
422 found_offline
= true;
427 return found_online
|| !found_offline
;
430 int container_get_leader(const char *machine
, pid_t
*pid
) {
431 _cleanup_free_
char *s
= NULL
, *class = NULL
;
439 if (!machine_name_is_valid(machine
))
442 p
= strjoina("/run/systemd/machines/", machine
);
443 r
= parse_env_file(p
, NEWLINE
, "LEADER", &s
, "CLASS", &class, NULL
);
451 if (!streq_ptr(class, "container"))
454 r
= parse_pid(s
, &leader
);
464 int namespace_open(pid_t pid
, int *pidns_fd
, int *mntns_fd
, int *netns_fd
, int *userns_fd
, int *root_fd
) {
465 _cleanup_close_
int pidnsfd
= -1, mntnsfd
= -1, netnsfd
= -1, usernsfd
= -1;
473 mntns
= procfs_file_alloca(pid
, "ns/mnt");
474 mntnsfd
= open(mntns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
482 pidns
= procfs_file_alloca(pid
, "ns/pid");
483 pidnsfd
= open(pidns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
491 netns
= procfs_file_alloca(pid
, "ns/net");
492 netnsfd
= open(netns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
500 userns
= procfs_file_alloca(pid
, "ns/user");
501 usernsfd
= open(userns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
502 if (usernsfd
< 0 && errno
!= ENOENT
)
509 root
= procfs_file_alloca(pid
, "root");
510 rfd
= open(root
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
525 *userns_fd
= usernsfd
;
530 pidnsfd
= mntnsfd
= netnsfd
= usernsfd
= -1;
535 int namespace_enter(int pidns_fd
, int mntns_fd
, int netns_fd
, int userns_fd
, int root_fd
) {
536 if (userns_fd
>= 0) {
537 /* Can't setns to your own userns, since then you could
538 * escalate from non-root to root in your own namespace, so
539 * check if namespaces equal before attempting to enter. */
540 _cleanup_free_
char *userns_fd_path
= NULL
;
542 if (asprintf(&userns_fd_path
, "/proc/self/fd/%d", userns_fd
) < 0)
545 r
= files_same(userns_fd_path
, "/proc/self/ns/user", 0);
553 if (setns(pidns_fd
, CLONE_NEWPID
) < 0)
557 if (setns(mntns_fd
, CLONE_NEWNS
) < 0)
561 if (setns(netns_fd
, CLONE_NEWNET
) < 0)
565 if (setns(userns_fd
, CLONE_NEWUSER
) < 0)
569 if (fchdir(root_fd
) < 0)
576 return reset_uid_gid();
579 uint64_t physical_memory(void) {
580 _cleanup_free_
char *root
= NULL
, *value
= NULL
;
585 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
588 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
589 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
591 sc
= sysconf(_SC_PHYS_PAGES
);
595 mem
= (uint64_t) sc
* (uint64_t) ps
;
597 if (cg_get_root_path(&root
) < 0)
600 if (cg_get_attribute("memory", root
, "memory.limit_in_bytes", &value
))
603 if (safe_atou64(value
, &lim
) < 0)
606 /* Make sure the limit is a multiple of our own page size */
610 return MIN(mem
, lim
);
613 uint64_t physical_memory_scale(uint64_t v
, uint64_t max
) {
614 uint64_t p
, m
, ps
, r
;
618 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
619 * the result is a multiple of the page size (rounds down). */
624 p
= physical_memory() / ps
;
640 uint64_t system_tasks_max(void) {
642 #if SIZEOF_PID_T == 4
643 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
644 #elif SIZEOF_PID_T == 2
645 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
647 #error "Unknown pid_t size"
650 _cleanup_free_
char *value
= NULL
, *root
= NULL
;
651 uint64_t a
= TASKS_MAX
, b
= TASKS_MAX
;
653 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
656 * a) the maximum value for the pid_t type
657 * b) the cgroups pids_max attribute for the system
658 * c) the kernel's configure maximum PID value
660 * And then pick the smallest of the three */
662 if (read_one_line_file("/proc/sys/kernel/pid_max", &value
) >= 0)
663 (void) safe_atou64(value
, &a
);
665 if (cg_get_root_path(&root
) >= 0) {
666 value
= mfree(value
);
668 if (cg_get_attribute("pids", root
, "pids.max", &value
) >= 0)
669 (void) safe_atou64(value
, &b
);
672 return MIN3(TASKS_MAX
,
673 a
<= 0 ? TASKS_MAX
: a
,
674 b
<= 0 ? TASKS_MAX
: b
);
677 uint64_t system_tasks_max_scale(uint64_t v
, uint64_t max
) {
682 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
683 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
685 t
= system_tasks_max();
689 if (m
/ t
!= v
) /* overflow? */
695 int update_reboot_parameter_and_warn(const char *param
) {
698 if (isempty(param
)) {
699 if (unlink("/run/systemd/reboot-param") < 0) {
703 return log_warning_errno(errno
, "Failed to unlink reboot parameter file: %m");
709 RUN_WITH_UMASK(0022) {
710 r
= write_string_file("/run/systemd/reboot-param", param
, WRITE_STRING_FILE_CREATE
);
712 return log_warning_errno(r
, "Failed to write reboot parameter file: %m");
719 puts(PACKAGE_STRING
"\n"
724 int get_block_device(const char *path
, dev_t
*dev
) {
731 /* Get's the block device directly backing a file system. If
732 * the block device is encrypted, returns the device mapper
735 if (lstat(path
, &st
))
738 if (major(st
.st_dev
) != 0) {
743 if (statfs(path
, &sfs
) < 0)
746 if (F_TYPE_EQUAL(sfs
.f_type
, BTRFS_SUPER_MAGIC
))
747 return btrfs_get_block_device(path
, dev
);
752 int get_block_device_harder(const char *path
, dev_t
*dev
) {
753 _cleanup_closedir_
DIR *d
= NULL
;
754 _cleanup_free_
char *p
= NULL
, *t
= NULL
;
755 struct dirent
*de
, *found
= NULL
;
764 /* Gets the backing block device for a file system, and
765 * handles LUKS encrypted file systems, looking for its
766 * immediate parent, if there is one. */
768 r
= get_block_device(path
, &dt
);
772 if (asprintf(&p
, "/sys/dev/block/%u:%u/slaves", major(dt
), minor(dt
)) < 0)
783 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
785 if (dot_or_dot_dot(de
->d_name
))
788 if (!IN_SET(de
->d_type
, DT_LNK
, DT_UNKNOWN
))
792 _cleanup_free_
char *u
= NULL
, *v
= NULL
, *a
= NULL
, *b
= NULL
;
794 /* We found a device backed by multiple other devices. We don't really support automatic
795 * discovery on such setups, with the exception of dm-verity partitions. In this case there are
796 * two backing devices: the data partition and the hash partition. We are fine with such
797 * setups, however, only if both partitions are on the same physical device. Hence, let's
800 u
= strjoin(p
, "/", de
->d_name
, "/../dev");
804 v
= strjoin(p
, "/", found
->d_name
, "/../dev");
808 r
= read_one_line_file(u
, &a
);
810 log_debug_errno(r
, "Failed to read %s: %m", u
);
814 r
= read_one_line_file(v
, &b
);
816 log_debug_errno(r
, "Failed to read %s: %m", v
);
820 /* Check if the parent device is the same. If not, then the two backing devices are on
821 * different physical devices, and we don't support that. */
832 q
= strjoina(p
, "/", found
->d_name
, "/dev");
834 r
= read_one_line_file(q
, &t
);
840 if (sscanf(t
, "%u:%u", &maj
, &min
) != 2)
846 *dev
= makedev(maj
, min
);