1 /* SPDX-License-Identifier: LGPL-2.1+ */
13 #include <sys/prctl.h>
14 #include <sys/statfs.h>
15 #include <sys/sysmacros.h>
16 #include <sys/types.h>
19 #include "alloc-util.h"
20 #include "btrfs-util.h"
22 #include "cgroup-util.h"
24 #include "device-nodes.h"
25 #include "dirent-util.h"
29 #include "format-util.h"
31 #include "hostname-util.h"
35 #include "parse-util.h"
36 #include "path-util.h"
37 #include "process-util.h"
38 #include "procfs-util.h"
40 #include "signal-util.h"
41 #include "stat-util.h"
42 #include "string-util.h"
44 #include "time-util.h"
45 #include "umask-util.h"
46 #include "user-util.h"
51 char **saved_argv
= NULL
;
52 static int saved_in_initrd
= -1;
54 size_t page_size(void) {
55 static thread_local
size_t pgsz
= 0;
58 if (_likely_(pgsz
> 0))
61 r
= sysconf(_SC_PAGESIZE
);
68 bool plymouth_running(void) {
69 return access("/run/plymouth/pid", F_OK
) >= 0;
72 bool display_is_local(const char *display
) {
81 bool kexec_loaded(void) {
82 _cleanup_free_
char *s
= NULL
;
84 if (read_one_line_file("/sys/kernel/kexec_loaded", &s
) < 0)
90 int prot_from_flags(int flags
) {
92 switch (flags
& O_ACCMODE
) {
101 return PROT_READ
|PROT_WRITE
;
108 bool in_initrd(void) {
112 if (saved_in_initrd
>= 0)
113 return saved_in_initrd
;
115 /* We make two checks here:
117 * 1. the flag file /etc/initrd-release must exist
118 * 2. the root file system must be a memory file system
120 * The second check is extra paranoia, since misdetecting an
121 * initrd can have bad consequences due the initrd
122 * emptying when transititioning to the main systemd.
125 r
= getenv_bool_secure("SYSTEMD_IN_INITRD");
126 if (r
< 0 && r
!= -ENXIO
)
127 log_debug_errno(r
, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m");
130 saved_in_initrd
= r
> 0;
132 saved_in_initrd
= access("/etc/initrd-release", F_OK
) >= 0 &&
133 statfs("/", &s
) >= 0 &&
136 return saved_in_initrd
;
139 void in_initrd_force(bool value
) {
140 saved_in_initrd
= value
;
143 /* hey glibc, APIs with callbacks without a user pointer are so useless */
144 void *xbsearch_r(const void *key
, const void *base
, size_t nmemb
, size_t size
,
145 __compar_d_fn_t compar
, void *arg
) {
150 assert(!size_multiply_overflow(nmemb
, size
));
156 p
= (const uint8_t*) base
+ idx
* size
;
157 comparison
= compar(key
, p
, arg
);
160 else if (comparison
> 0)
168 int on_ac_power(void) {
169 bool found_offline
= false, found_online
= false;
170 _cleanup_closedir_
DIR *d
= NULL
;
173 d
= opendir("/sys/class/power_supply");
175 return errno
== ENOENT
? true : -errno
;
177 FOREACH_DIRENT(de
, d
, return -errno
) {
178 _cleanup_close_
int fd
= -1, device
= -1;
182 device
= openat(dirfd(d
), de
->d_name
, O_DIRECTORY
|O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
184 if (IN_SET(errno
, ENOENT
, ENOTDIR
))
190 fd
= openat(device
, "type", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
198 n
= read(fd
, contents
, sizeof(contents
));
202 if (n
!= 6 || memcmp(contents
, "Mains\n", 6))
206 fd
= openat(device
, "online", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
214 n
= read(fd
, contents
, sizeof(contents
));
218 if (n
!= 2 || contents
[1] != '\n')
221 if (contents
[0] == '1') {
224 } else if (contents
[0] == '0')
225 found_offline
= true;
230 return found_online
|| !found_offline
;
233 int container_get_leader(const char *machine
, pid_t
*pid
) {
234 _cleanup_free_
char *s
= NULL
, *class = NULL
;
242 if (streq(machine
, ".host")) {
247 if (!machine_name_is_valid(machine
))
250 p
= strjoina("/run/systemd/machines/", machine
);
251 r
= parse_env_file(NULL
, p
,
261 if (!streq_ptr(class, "container"))
264 r
= parse_pid(s
, &leader
);
274 int namespace_open(pid_t pid
, int *pidns_fd
, int *mntns_fd
, int *netns_fd
, int *userns_fd
, int *root_fd
) {
275 _cleanup_close_
int pidnsfd
= -1, mntnsfd
= -1, netnsfd
= -1, usernsfd
= -1;
283 mntns
= procfs_file_alloca(pid
, "ns/mnt");
284 mntnsfd
= open(mntns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
292 pidns
= procfs_file_alloca(pid
, "ns/pid");
293 pidnsfd
= open(pidns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
301 netns
= procfs_file_alloca(pid
, "ns/net");
302 netnsfd
= open(netns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
310 userns
= procfs_file_alloca(pid
, "ns/user");
311 usernsfd
= open(userns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
312 if (usernsfd
< 0 && errno
!= ENOENT
)
319 root
= procfs_file_alloca(pid
, "root");
320 rfd
= open(root
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
335 *userns_fd
= usernsfd
;
340 pidnsfd
= mntnsfd
= netnsfd
= usernsfd
= -1;
345 int namespace_enter(int pidns_fd
, int mntns_fd
, int netns_fd
, int userns_fd
, int root_fd
) {
346 if (userns_fd
>= 0) {
347 /* Can't setns to your own userns, since then you could
348 * escalate from non-root to root in your own namespace, so
349 * check if namespaces equal before attempting to enter. */
350 _cleanup_free_
char *userns_fd_path
= NULL
;
352 if (asprintf(&userns_fd_path
, "/proc/self/fd/%d", userns_fd
) < 0)
355 r
= files_same(userns_fd_path
, "/proc/self/ns/user", 0);
363 if (setns(pidns_fd
, CLONE_NEWPID
) < 0)
367 if (setns(mntns_fd
, CLONE_NEWNS
) < 0)
371 if (setns(netns_fd
, CLONE_NEWNET
) < 0)
375 if (setns(userns_fd
, CLONE_NEWUSER
) < 0)
379 if (fchdir(root_fd
) < 0)
386 return reset_uid_gid();
389 uint64_t physical_memory(void) {
390 _cleanup_free_
char *root
= NULL
, *value
= NULL
;
396 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
399 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
400 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
402 sc
= sysconf(_SC_PHYS_PAGES
);
406 mem
= (uint64_t) sc
* (uint64_t) ps
;
408 r
= cg_get_root_path(&root
);
410 log_debug_errno(r
, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
414 r
= cg_all_unified();
416 log_debug_errno(r
, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
420 r
= cg_get_attribute("memory", root
, "memory.max", &value
);
422 log_debug_errno(r
, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
426 if (streq(value
, "max"))
429 r
= cg_get_attribute("memory", root
, "memory.limit_in_bytes", &value
);
431 log_debug_errno(r
, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
436 r
= safe_atou64(value
, &lim
);
438 log_debug_errno(r
, "Failed to parse cgroup memory limit '%s', ignoring: %m", value
);
441 if (lim
== UINT64_MAX
)
444 /* Make sure the limit is a multiple of our own page size */
448 return MIN(mem
, lim
);
451 uint64_t physical_memory_scale(uint64_t v
, uint64_t max
) {
452 uint64_t p
, m
, ps
, r
;
456 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
457 * the result is a multiple of the page size (rounds down). */
462 p
= physical_memory() / ps
;
478 uint64_t system_tasks_max(void) {
480 uint64_t a
= TASKS_MAX
, b
= TASKS_MAX
;
481 _cleanup_free_
char *root
= NULL
;
484 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
487 * a) the maximum tasks value the kernel allows on this architecture
488 * b) the cgroups pids_max attribute for the system
489 * c) the kernel's configured maximum PID value
491 * And then pick the smallest of the three */
493 r
= procfs_tasks_get_limit(&a
);
495 log_debug_errno(r
, "Failed to read maximum number of tasks from /proc, ignoring: %m");
497 r
= cg_get_root_path(&root
);
499 log_debug_errno(r
, "Failed to determine cgroup root path, ignoring: %m");
501 _cleanup_free_
char *value
= NULL
;
503 r
= cg_get_attribute("pids", root
, "pids.max", &value
);
505 log_debug_errno(r
, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
506 else if (!streq(value
, "max")) {
507 r
= safe_atou64(value
, &b
);
509 log_debug_errno(r
, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
513 return MIN3(TASKS_MAX
,
514 a
<= 0 ? TASKS_MAX
: a
,
515 b
<= 0 ? TASKS_MAX
: b
);
518 uint64_t system_tasks_max_scale(uint64_t v
, uint64_t max
) {
523 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
524 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
526 t
= system_tasks_max();
530 if (m
/ t
!= v
) /* overflow? */
537 puts(PACKAGE_STRING
"\n"
542 /* This is a direct translation of str_verscmp from boot.c */
543 static bool is_digit(int c
) {
544 return c
>= '0' && c
<= '9';
547 static int c_order(int c
) {
548 if (c
== 0 || is_digit(c
))
551 if ((c
>= 'a') && (c
<= 'z'))
557 int str_verscmp(const char *s1
, const char *s2
) {
558 const char *os1
, *os2
;
569 while ((*s1
&& !is_digit(*s1
)) || (*s2
&& !is_digit(*s2
))) {
572 order
= c_order(*s1
) - c_order(*s2
);
585 while (is_digit(*s1
) && is_digit(*s2
)) {
601 return strcmp(os1
, os2
);
604 /* Turn off core dumps but only if we're running outside of a container. */
605 void disable_coredumps(void) {
608 if (detect_container() > 0)
611 r
= write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", WRITE_STRING_FILE_DISABLE_BUFFER
);
613 log_debug_errno(r
, "Failed to turn off coredumps, ignoring: %m");