]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/util.c
pkgconfig: define variables relative to ${prefix}/${rootprefix}/${sysconfdir}
[thirdparty/systemd.git] / src / basic / util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <alloca.h>
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <sched.h>
7 #include <signal.h>
8 #include <stdarg.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/mman.h>
13 #include <sys/prctl.h>
14 #include <sys/statfs.h>
15 #include <sys/sysmacros.h>
16 #include <sys/types.h>
17 #include <unistd.h>
18
19 #include "alloc-util.h"
20 #include "btrfs-util.h"
21 #include "build.h"
22 #include "cgroup-util.h"
23 #include "def.h"
24 #include "device-nodes.h"
25 #include "dirent-util.h"
26 #include "env-util.h"
27 #include "fd-util.h"
28 #include "fileio.h"
29 #include "format-util.h"
30 #include "hashmap.h"
31 #include "hostname-util.h"
32 #include "log.h"
33 #include "macro.h"
34 #include "missing.h"
35 #include "parse-util.h"
36 #include "path-util.h"
37 #include "process-util.h"
38 #include "procfs-util.h"
39 #include "set.h"
40 #include "signal-util.h"
41 #include "stat-util.h"
42 #include "string-util.h"
43 #include "strv.h"
44 #include "time-util.h"
45 #include "umask-util.h"
46 #include "user-util.h"
47 #include "util.h"
48 #include "virt.h"
49
50 int saved_argc = 0;
51 char **saved_argv = NULL;
52 static int saved_in_initrd = -1;
53
54 size_t page_size(void) {
55 static thread_local size_t pgsz = 0;
56 long r;
57
58 if (_likely_(pgsz > 0))
59 return pgsz;
60
61 r = sysconf(_SC_PAGESIZE);
62 assert(r > 0);
63
64 pgsz = (size_t) r;
65 return pgsz;
66 }
67
68 bool plymouth_running(void) {
69 return access("/run/plymouth/pid", F_OK) >= 0;
70 }
71
72 bool display_is_local(const char *display) {
73 assert(display);
74
75 return
76 display[0] == ':' &&
77 display[1] >= '0' &&
78 display[1] <= '9';
79 }
80
81 bool kexec_loaded(void) {
82 _cleanup_free_ char *s = NULL;
83
84 if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
85 return false;
86
87 return s[0] == '1';
88 }
89
90 int prot_from_flags(int flags) {
91
92 switch (flags & O_ACCMODE) {
93
94 case O_RDONLY:
95 return PROT_READ;
96
97 case O_WRONLY:
98 return PROT_WRITE;
99
100 case O_RDWR:
101 return PROT_READ|PROT_WRITE;
102
103 default:
104 return -EINVAL;
105 }
106 }
107
108 bool in_initrd(void) {
109 struct statfs s;
110 int r;
111
112 if (saved_in_initrd >= 0)
113 return saved_in_initrd;
114
115 /* We make two checks here:
116 *
117 * 1. the flag file /etc/initrd-release must exist
118 * 2. the root file system must be a memory file system
119 *
120 * The second check is extra paranoia, since misdetecting an
121 * initrd can have bad consequences due the initrd
122 * emptying when transititioning to the main systemd.
123 */
124
125 r = getenv_bool_secure("SYSTEMD_IN_INITRD");
126 if (r < 0 && r != -ENXIO)
127 log_debug_errno(r, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m");
128
129 if (r >= 0)
130 saved_in_initrd = r > 0;
131 else
132 saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
133 statfs("/", &s) >= 0 &&
134 is_temporary_fs(&s);
135
136 return saved_in_initrd;
137 }
138
139 void in_initrd_force(bool value) {
140 saved_in_initrd = value;
141 }
142
143 /* hey glibc, APIs with callbacks without a user pointer are so useless */
144 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
145 __compar_d_fn_t compar, void *arg) {
146 size_t l, u, idx;
147 const void *p;
148 int comparison;
149
150 assert(!size_multiply_overflow(nmemb, size));
151
152 l = 0;
153 u = nmemb;
154 while (l < u) {
155 idx = (l + u) / 2;
156 p = (const uint8_t*) base + idx * size;
157 comparison = compar(key, p, arg);
158 if (comparison < 0)
159 u = idx;
160 else if (comparison > 0)
161 l = idx + 1;
162 else
163 return (void *)p;
164 }
165 return NULL;
166 }
167
168 int on_ac_power(void) {
169 bool found_offline = false, found_online = false;
170 _cleanup_closedir_ DIR *d = NULL;
171 struct dirent *de;
172
173 d = opendir("/sys/class/power_supply");
174 if (!d)
175 return errno == ENOENT ? true : -errno;
176
177 FOREACH_DIRENT(de, d, return -errno) {
178 _cleanup_close_ int fd = -1, device = -1;
179 char contents[6];
180 ssize_t n;
181
182 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
183 if (device < 0) {
184 if (IN_SET(errno, ENOENT, ENOTDIR))
185 continue;
186
187 return -errno;
188 }
189
190 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
191 if (fd < 0) {
192 if (errno == ENOENT)
193 continue;
194
195 return -errno;
196 }
197
198 n = read(fd, contents, sizeof(contents));
199 if (n < 0)
200 return -errno;
201
202 if (n != 6 || memcmp(contents, "Mains\n", 6))
203 continue;
204
205 safe_close(fd);
206 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
207 if (fd < 0) {
208 if (errno == ENOENT)
209 continue;
210
211 return -errno;
212 }
213
214 n = read(fd, contents, sizeof(contents));
215 if (n < 0)
216 return -errno;
217
218 if (n != 2 || contents[1] != '\n')
219 return -EIO;
220
221 if (contents[0] == '1') {
222 found_online = true;
223 break;
224 } else if (contents[0] == '0')
225 found_offline = true;
226 else
227 return -EIO;
228 }
229
230 return found_online || !found_offline;
231 }
232
233 int container_get_leader(const char *machine, pid_t *pid) {
234 _cleanup_free_ char *s = NULL, *class = NULL;
235 const char *p;
236 pid_t leader;
237 int r;
238
239 assert(machine);
240 assert(pid);
241
242 if (streq(machine, ".host")) {
243 *pid = 1;
244 return 0;
245 }
246
247 if (!machine_name_is_valid(machine))
248 return -EINVAL;
249
250 p = strjoina("/run/systemd/machines/", machine);
251 r = parse_env_file(NULL, p,
252 "LEADER", &s,
253 "CLASS", &class);
254 if (r == -ENOENT)
255 return -EHOSTDOWN;
256 if (r < 0)
257 return r;
258 if (!s)
259 return -EIO;
260
261 if (!streq_ptr(class, "container"))
262 return -EIO;
263
264 r = parse_pid(s, &leader);
265 if (r < 0)
266 return r;
267 if (leader <= 1)
268 return -EIO;
269
270 *pid = leader;
271 return 0;
272 }
273
274 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
275 _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
276 int rfd = -1;
277
278 assert(pid >= 0);
279
280 if (mntns_fd) {
281 const char *mntns;
282
283 mntns = procfs_file_alloca(pid, "ns/mnt");
284 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
285 if (mntnsfd < 0)
286 return -errno;
287 }
288
289 if (pidns_fd) {
290 const char *pidns;
291
292 pidns = procfs_file_alloca(pid, "ns/pid");
293 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
294 if (pidnsfd < 0)
295 return -errno;
296 }
297
298 if (netns_fd) {
299 const char *netns;
300
301 netns = procfs_file_alloca(pid, "ns/net");
302 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
303 if (netnsfd < 0)
304 return -errno;
305 }
306
307 if (userns_fd) {
308 const char *userns;
309
310 userns = procfs_file_alloca(pid, "ns/user");
311 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
312 if (usernsfd < 0 && errno != ENOENT)
313 return -errno;
314 }
315
316 if (root_fd) {
317 const char *root;
318
319 root = procfs_file_alloca(pid, "root");
320 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
321 if (rfd < 0)
322 return -errno;
323 }
324
325 if (pidns_fd)
326 *pidns_fd = pidnsfd;
327
328 if (mntns_fd)
329 *mntns_fd = mntnsfd;
330
331 if (netns_fd)
332 *netns_fd = netnsfd;
333
334 if (userns_fd)
335 *userns_fd = usernsfd;
336
337 if (root_fd)
338 *root_fd = rfd;
339
340 pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
341
342 return 0;
343 }
344
345 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
346 if (userns_fd >= 0) {
347 /* Can't setns to your own userns, since then you could
348 * escalate from non-root to root in your own namespace, so
349 * check if namespaces equal before attempting to enter. */
350 _cleanup_free_ char *userns_fd_path = NULL;
351 int r;
352 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
353 return -ENOMEM;
354
355 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
356 if (r < 0)
357 return r;
358 if (r)
359 userns_fd = -1;
360 }
361
362 if (pidns_fd >= 0)
363 if (setns(pidns_fd, CLONE_NEWPID) < 0)
364 return -errno;
365
366 if (mntns_fd >= 0)
367 if (setns(mntns_fd, CLONE_NEWNS) < 0)
368 return -errno;
369
370 if (netns_fd >= 0)
371 if (setns(netns_fd, CLONE_NEWNET) < 0)
372 return -errno;
373
374 if (userns_fd >= 0)
375 if (setns(userns_fd, CLONE_NEWUSER) < 0)
376 return -errno;
377
378 if (root_fd >= 0) {
379 if (fchdir(root_fd) < 0)
380 return -errno;
381
382 if (chroot(".") < 0)
383 return -errno;
384 }
385
386 return reset_uid_gid();
387 }
388
389 uint64_t physical_memory(void) {
390 _cleanup_free_ char *root = NULL, *value = NULL;
391 uint64_t mem, lim;
392 size_t ps;
393 long sc;
394 int r;
395
396 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
397 * memory.
398 *
399 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
400 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
401
402 sc = sysconf(_SC_PHYS_PAGES);
403 assert(sc > 0);
404
405 ps = page_size();
406 mem = (uint64_t) sc * (uint64_t) ps;
407
408 r = cg_get_root_path(&root);
409 if (r < 0) {
410 log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
411 return mem;
412 }
413
414 r = cg_all_unified();
415 if (r < 0) {
416 log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
417 return mem;
418 }
419 if (r > 0) {
420 r = cg_get_attribute("memory", root, "memory.max", &value);
421 if (r < 0) {
422 log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
423 return mem;
424 }
425
426 if (streq(value, "max"))
427 return mem;
428 } else {
429 r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
430 if (r < 0) {
431 log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
432 return mem;
433 }
434 }
435
436 r = safe_atou64(value, &lim);
437 if (r < 0) {
438 log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
439 return mem;
440 }
441 if (lim == UINT64_MAX)
442 return mem;
443
444 /* Make sure the limit is a multiple of our own page size */
445 lim /= ps;
446 lim *= ps;
447
448 return MIN(mem, lim);
449 }
450
451 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
452 uint64_t p, m, ps, r;
453
454 assert(max > 0);
455
456 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
457 * the result is a multiple of the page size (rounds down). */
458
459 ps = page_size();
460 assert(ps > 0);
461
462 p = physical_memory() / ps;
463 assert(p > 0);
464
465 m = p * v;
466 if (m / p != v)
467 return UINT64_MAX;
468
469 m /= max;
470
471 r = m * ps;
472 if (r / ps != m)
473 return UINT64_MAX;
474
475 return r;
476 }
477
478 uint64_t system_tasks_max(void) {
479
480 uint64_t a = TASKS_MAX, b = TASKS_MAX;
481 _cleanup_free_ char *root = NULL;
482 int r;
483
484 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
485 * limit:
486 *
487 * a) the maximum tasks value the kernel allows on this architecture
488 * b) the cgroups pids_max attribute for the system
489 * c) the kernel's configured maximum PID value
490 *
491 * And then pick the smallest of the three */
492
493 r = procfs_tasks_get_limit(&a);
494 if (r < 0)
495 log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
496
497 r = cg_get_root_path(&root);
498 if (r < 0)
499 log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m");
500 else {
501 _cleanup_free_ char *value = NULL;
502
503 r = cg_get_attribute("pids", root, "pids.max", &value);
504 if (r < 0)
505 log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
506 else if (!streq(value, "max")) {
507 r = safe_atou64(value, &b);
508 if (r < 0)
509 log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
510 }
511 }
512
513 return MIN3(TASKS_MAX,
514 a <= 0 ? TASKS_MAX : a,
515 b <= 0 ? TASKS_MAX : b);
516 }
517
518 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
519 uint64_t t, m;
520
521 assert(max > 0);
522
523 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
524 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
525
526 t = system_tasks_max();
527 assert(t > 0);
528
529 m = t * v;
530 if (m / t != v) /* overflow? */
531 return UINT64_MAX;
532
533 return m / max;
534 }
535
536 int version(void) {
537 puts(PACKAGE_STRING "\n"
538 SYSTEMD_FEATURES);
539 return 0;
540 }
541
542 /* This is a direct translation of str_verscmp from boot.c */
543 static bool is_digit(int c) {
544 return c >= '0' && c <= '9';
545 }
546
547 static int c_order(int c) {
548 if (c == 0 || is_digit(c))
549 return 0;
550
551 if ((c >= 'a') && (c <= 'z'))
552 return c;
553
554 return c + 0x10000;
555 }
556
557 int str_verscmp(const char *s1, const char *s2) {
558 const char *os1, *os2;
559
560 assert(s1);
561 assert(s2);
562
563 os1 = s1;
564 os2 = s2;
565
566 while (*s1 || *s2) {
567 int first;
568
569 while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
570 int order;
571
572 order = c_order(*s1) - c_order(*s2);
573 if (order != 0)
574 return order;
575 s1++;
576 s2++;
577 }
578
579 while (*s1 == '0')
580 s1++;
581 while (*s2 == '0')
582 s2++;
583
584 first = 0;
585 while (is_digit(*s1) && is_digit(*s2)) {
586 if (first == 0)
587 first = *s1 - *s2;
588 s1++;
589 s2++;
590 }
591
592 if (is_digit(*s1))
593 return 1;
594 if (is_digit(*s2))
595 return -1;
596
597 if (first != 0)
598 return first;
599 }
600
601 return strcmp(os1, os2);
602 }
603
604 /* Turn off core dumps but only if we're running outside of a container. */
605 void disable_coredumps(void) {
606 int r;
607
608 if (detect_container() > 0)
609 return;
610
611 r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", WRITE_STRING_FILE_DISABLE_BUFFER);
612 if (r < 0)
613 log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
614 }