1 /* SPDX-License-Identifier: LGPL-2.1+ */
13 #include <sys/prctl.h>
14 #include <sys/statfs.h>
15 #include <sys/sysmacros.h>
16 #include <sys/types.h>
19 #include "alloc-util.h"
20 #include "btrfs-util.h"
22 #include "cgroup-util.h"
24 #include "device-nodes.h"
25 #include "dirent-util.h"
30 #include "format-util.h"
32 #include "hostname-util.h"
36 #include "parse-util.h"
37 #include "path-util.h"
38 #include "process-util.h"
39 #include "procfs-util.h"
41 #include "signal-util.h"
42 #include "stat-util.h"
43 #include "string-util.h"
45 #include "time-util.h"
46 #include "umask-util.h"
47 #include "user-util.h"
52 char **saved_argv
= NULL
;
53 static int saved_in_initrd
= -1;
55 size_t page_size(void) {
56 static thread_local
size_t pgsz
= 0;
59 if (_likely_(pgsz
> 0))
62 r
= sysconf(_SC_PAGESIZE
);
69 bool plymouth_running(void) {
70 return access("/run/plymouth/pid", F_OK
) >= 0;
73 bool display_is_local(const char *display
) {
82 bool kexec_loaded(void) {
83 _cleanup_free_
char *s
= NULL
;
85 if (read_one_line_file("/sys/kernel/kexec_loaded", &s
) < 0)
91 int prot_from_flags(int flags
) {
93 switch (flags
& O_ACCMODE
) {
102 return PROT_READ
|PROT_WRITE
;
109 bool in_initrd(void) {
113 if (saved_in_initrd
>= 0)
114 return saved_in_initrd
;
116 /* We make two checks here:
118 * 1. the flag file /etc/initrd-release must exist
119 * 2. the root file system must be a memory file system
121 * The second check is extra paranoia, since misdetecting an
122 * initrd can have bad consequences due the initrd
123 * emptying when transititioning to the main systemd.
126 r
= getenv_bool_secure("SYSTEMD_IN_INITRD");
127 if (r
< 0 && r
!= -ENXIO
)
128 log_debug_errno(r
, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m");
131 saved_in_initrd
= r
> 0;
133 saved_in_initrd
= access("/etc/initrd-release", F_OK
) >= 0 &&
134 statfs("/", &s
) >= 0 &&
137 return saved_in_initrd
;
140 void in_initrd_force(bool value
) {
141 saved_in_initrd
= value
;
144 /* hey glibc, APIs with callbacks without a user pointer are so useless */
145 void *xbsearch_r(const void *key
, const void *base
, size_t nmemb
, size_t size
,
146 __compar_d_fn_t compar
, void *arg
) {
151 assert(!size_multiply_overflow(nmemb
, size
));
157 p
= (const uint8_t*) base
+ idx
* size
;
158 comparison
= compar(key
, p
, arg
);
161 else if (comparison
> 0)
169 bool memeqzero(const void *data
, size_t length
) {
170 /* Does the buffer consist entirely of NULs?
171 * Copied from https://github.com/systemd/casync/, copied in turn from
172 * https://github.com/rustyrussell/ccan/blob/master/ccan/mem/mem.c#L92,
173 * which is licensed CC-0.
176 const uint8_t *p
= data
;
179 /* Check first 16 bytes manually */
180 for (i
= 0; i
< 16; i
++, length
--) {
187 /* Now we know first 16 bytes are NUL, memcmp with self. */
188 return memcmp(data
, p
+ i
, length
) == 0;
191 int on_ac_power(void) {
192 bool found_offline
= false, found_online
= false;
193 _cleanup_closedir_
DIR *d
= NULL
;
196 d
= opendir("/sys/class/power_supply");
198 return errno
== ENOENT
? true : -errno
;
200 FOREACH_DIRENT(de
, d
, return -errno
) {
201 _cleanup_close_
int fd
= -1, device
= -1;
205 device
= openat(dirfd(d
), de
->d_name
, O_DIRECTORY
|O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
207 if (IN_SET(errno
, ENOENT
, ENOTDIR
))
213 fd
= openat(device
, "type", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
221 n
= read(fd
, contents
, sizeof(contents
));
225 if (n
!= 6 || memcmp(contents
, "Mains\n", 6))
229 fd
= openat(device
, "online", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
237 n
= read(fd
, contents
, sizeof(contents
));
241 if (n
!= 2 || contents
[1] != '\n')
244 if (contents
[0] == '1') {
247 } else if (contents
[0] == '0')
248 found_offline
= true;
253 return found_online
|| !found_offline
;
256 int container_get_leader(const char *machine
, pid_t
*pid
) {
257 _cleanup_free_
char *s
= NULL
, *class = NULL
;
265 if (streq(machine
, ".host")) {
270 if (!machine_name_is_valid(machine
))
273 p
= strjoina("/run/systemd/machines/", machine
);
274 r
= parse_env_file(NULL
, p
,
284 if (!streq_ptr(class, "container"))
287 r
= parse_pid(s
, &leader
);
297 int namespace_open(pid_t pid
, int *pidns_fd
, int *mntns_fd
, int *netns_fd
, int *userns_fd
, int *root_fd
) {
298 _cleanup_close_
int pidnsfd
= -1, mntnsfd
= -1, netnsfd
= -1, usernsfd
= -1;
306 mntns
= procfs_file_alloca(pid
, "ns/mnt");
307 mntnsfd
= open(mntns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
315 pidns
= procfs_file_alloca(pid
, "ns/pid");
316 pidnsfd
= open(pidns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
324 netns
= procfs_file_alloca(pid
, "ns/net");
325 netnsfd
= open(netns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
333 userns
= procfs_file_alloca(pid
, "ns/user");
334 usernsfd
= open(userns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
335 if (usernsfd
< 0 && errno
!= ENOENT
)
342 root
= procfs_file_alloca(pid
, "root");
343 rfd
= open(root
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
358 *userns_fd
= usernsfd
;
363 pidnsfd
= mntnsfd
= netnsfd
= usernsfd
= -1;
368 int namespace_enter(int pidns_fd
, int mntns_fd
, int netns_fd
, int userns_fd
, int root_fd
) {
369 if (userns_fd
>= 0) {
370 /* Can't setns to your own userns, since then you could
371 * escalate from non-root to root in your own namespace, so
372 * check if namespaces equal before attempting to enter. */
373 _cleanup_free_
char *userns_fd_path
= NULL
;
375 if (asprintf(&userns_fd_path
, "/proc/self/fd/%d", userns_fd
) < 0)
378 r
= files_same(userns_fd_path
, "/proc/self/ns/user", 0);
386 if (setns(pidns_fd
, CLONE_NEWPID
) < 0)
390 if (setns(mntns_fd
, CLONE_NEWNS
) < 0)
394 if (setns(netns_fd
, CLONE_NEWNET
) < 0)
398 if (setns(userns_fd
, CLONE_NEWUSER
) < 0)
402 if (fchdir(root_fd
) < 0)
409 return reset_uid_gid();
412 uint64_t physical_memory(void) {
413 _cleanup_free_
char *root
= NULL
, *value
= NULL
;
419 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
422 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
423 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
425 sc
= sysconf(_SC_PHYS_PAGES
);
429 mem
= (uint64_t) sc
* (uint64_t) ps
;
431 r
= cg_get_root_path(&root
);
433 log_debug_errno(r
, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
437 r
= cg_all_unified();
439 log_debug_errno(r
, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
443 r
= cg_get_attribute("memory", root
, "memory.max", &value
);
445 log_debug_errno(r
, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
449 if (streq(value
, "max"))
452 r
= cg_get_attribute("memory", root
, "memory.limit_in_bytes", &value
);
454 log_debug_errno(r
, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
459 r
= safe_atou64(value
, &lim
);
461 log_debug_errno(r
, "Failed to parse cgroup memory limit '%s', ignoring: %m", value
);
464 if (lim
== UINT64_MAX
)
467 /* Make sure the limit is a multiple of our own page size */
471 return MIN(mem
, lim
);
474 uint64_t physical_memory_scale(uint64_t v
, uint64_t max
) {
475 uint64_t p
, m
, ps
, r
;
479 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
480 * the result is a multiple of the page size (rounds down). */
485 p
= physical_memory() / ps
;
501 uint64_t system_tasks_max(void) {
503 uint64_t a
= TASKS_MAX
, b
= TASKS_MAX
;
504 _cleanup_free_
char *root
= NULL
;
507 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
510 * a) the maximum tasks value the kernel allows on this architecture
511 * b) the cgroups pids_max attribute for the system
512 * c) the kernel's configured maximum PID value
514 * And then pick the smallest of the three */
516 r
= procfs_tasks_get_limit(&a
);
518 log_debug_errno(r
, "Failed to read maximum number of tasks from /proc, ignoring: %m");
520 r
= cg_get_root_path(&root
);
522 log_debug_errno(r
, "Failed to determine cgroup root path, ignoring: %m");
524 _cleanup_free_
char *value
= NULL
;
526 r
= cg_get_attribute("pids", root
, "pids.max", &value
);
528 log_debug_errno(r
, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
529 else if (!streq(value
, "max")) {
530 r
= safe_atou64(value
, &b
);
532 log_debug_errno(r
, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
536 return MIN3(TASKS_MAX
,
537 a
<= 0 ? TASKS_MAX
: a
,
538 b
<= 0 ? TASKS_MAX
: b
);
541 uint64_t system_tasks_max_scale(uint64_t v
, uint64_t max
) {
546 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
547 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
549 t
= system_tasks_max();
553 if (m
/ t
!= v
) /* overflow? */
560 puts(PACKAGE_STRING
"\n"
565 /* This is a direct translation of str_verscmp from boot.c */
566 static bool is_digit(int c
) {
567 return c
>= '0' && c
<= '9';
570 static int c_order(int c
) {
571 if (c
== 0 || is_digit(c
))
574 if ((c
>= 'a') && (c
<= 'z'))
580 int str_verscmp(const char *s1
, const char *s2
) {
581 const char *os1
, *os2
;
592 while ((*s1
&& !is_digit(*s1
)) || (*s2
&& !is_digit(*s2
))) {
595 order
= c_order(*s1
) - c_order(*s2
);
608 while (is_digit(*s1
) && is_digit(*s2
)) {
624 return strcmp(os1
, os2
);
627 /* Turn off core dumps but only if we're running outside of a container. */
628 void disable_coredumps(void) {
631 if (detect_container() > 0)
634 r
= write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", WRITE_STRING_FILE_DISABLE_BUFFER
);
636 log_debug_errno(r
, "Failed to turn off coredumps, ignoring: %m");