]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/util.c
Merge pull request #11827 from keszybz/pkgconfig-variables
[thirdparty/systemd.git] / src / basic / util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
a7334b09 2
11c3a366 3#include <alloca.h>
60918275 4#include <errno.h>
f6c2284a 5#include <fcntl.h>
f6c2284a
LP
6#include <sched.h>
7#include <signal.h>
8#include <stdarg.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
87d2c1ff 12#include <sys/mman.h>
f6c2284a 13#include <sys/prctl.h>
11c3a366
TA
14#include <sys/statfs.h>
15#include <sys/sysmacros.h>
f6c2284a 16#include <sys/types.h>
f6c2284a 17#include <unistd.h>
eef46c37 18
b5efdb8a 19#include "alloc-util.h"
c43b2b9c 20#include "btrfs-util.h"
3f6fd1ba 21#include "build.h"
d9ab2bcf 22#include "cgroup-util.h"
f6c2284a 23#include "def.h"
553e15f2 24#include "device-nodes.h"
cf0fbc49 25#include "dirent-util.h"
686d13b9 26#include "env-file.h"
0307ea49 27#include "env-util.h"
3ffd4af2 28#include "fd-util.h"
f6c2284a 29#include "fileio.h"
f97b34a6 30#include "format-util.h"
f6c2284a
LP
31#include "hashmap.h"
32#include "hostname-util.h"
a9f5d454 33#include "log.h"
f6c2284a
LP
34#include "macro.h"
35#include "missing.h"
6bedfcbb 36#include "parse-util.h"
9eb977db 37#include "path-util.h"
0b452006 38#include "process-util.h"
1e7da35b 39#include "procfs-util.h"
11c3a366 40#include "set.h"
93cc7779 41#include "signal-util.h"
cf0fbc49 42#include "stat-util.h"
07630cea 43#include "string-util.h"
f6c2284a 44#include "strv.h"
93cc7779 45#include "time-util.h"
8612da97 46#include "umask-util.h"
b1d4f8e1 47#include "user-util.h"
4f5dd394 48#include "util.h"
9ce17593 49#include "virt.h"
56cf987f 50
9a0e6896
LP
51int saved_argc = 0;
52char **saved_argv = NULL;
dcd61450 53static int saved_in_initrd = -1;
9086e840 54
37f85e66 55size_t page_size(void) {
ec202eae 56 static thread_local size_t pgsz = 0;
37f85e66 57 long r;
58
87d2c1ff 59 if (_likely_(pgsz > 0))
37f85e66 60 return pgsz;
61
e67f47e5
LP
62 r = sysconf(_SC_PAGESIZE);
63 assert(r > 0);
37f85e66 64
65 pgsz = (size_t) r;
37f85e66 66 return pgsz;
67}
68
a88c8750
TG
69bool plymouth_running(void) {
70 return access("/run/plymouth/pid", F_OK) >= 0;
71}
72
4d6d6518
LP
73bool display_is_local(const char *display) {
74 assert(display);
75
76 return
77 display[0] == ':' &&
78 display[1] >= '0' &&
79 display[1] <= '9';
80}
81
65457142 82bool kexec_loaded(void) {
c47f86e6
ZJS
83 _cleanup_free_ char *s = NULL;
84
85 if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
86 return false;
87
88 return s[0] == '1';
65457142 89}
fb9de93d 90
87d2c1ff
LP
91int prot_from_flags(int flags) {
92
93 switch (flags & O_ACCMODE) {
94
95 case O_RDONLY:
96 return PROT_READ;
97
98 case O_WRONLY:
99 return PROT_WRITE;
100
101 case O_RDWR:
102 return PROT_READ|PROT_WRITE;
103
104 default:
105 return -EINVAL;
106 }
7c99e0c1 107}
689b9a22 108
9be346c9 109bool in_initrd(void) {
825c6fe5 110 struct statfs s;
0307ea49 111 int r;
8f33b5b8 112
dcd61450
IS
113 if (saved_in_initrd >= 0)
114 return saved_in_initrd;
825c6fe5
LP
115
116 /* We make two checks here:
117 *
118 * 1. the flag file /etc/initrd-release must exist
119 * 2. the root file system must be a memory file system
120 *
121 * The second check is extra paranoia, since misdetecting an
629ff674 122 * initrd can have bad consequences due the initrd
825c6fe5
LP
123 * emptying when transititioning to the main systemd.
124 */
125
0307ea49
ZJS
126 r = getenv_bool_secure("SYSTEMD_IN_INITRD");
127 if (r < 0 && r != -ENXIO)
128 log_debug_errno(r, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m");
129
130 if (r >= 0)
131 saved_in_initrd = r > 0;
132 else
133 saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
134 statfs("/", &s) >= 0 &&
135 is_temporary_fs(&s);
9be346c9 136
dcd61450
IS
137 return saved_in_initrd;
138}
139
140void in_initrd_force(bool value) {
141 saved_in_initrd = value;
9be346c9 142}
069cfc85 143
a9e12476
KS
144/* hey glibc, APIs with callbacks without a user pointer are so useless */
145void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
f0f6d791 146 __compar_d_fn_t compar, void *arg) {
a9e12476
KS
147 size_t l, u, idx;
148 const void *p;
149 int comparison;
150
2901f4b3
LP
151 assert(!size_multiply_overflow(nmemb, size));
152
a9e12476
KS
153 l = 0;
154 u = nmemb;
155 while (l < u) {
156 idx = (l + u) / 2;
2901f4b3 157 p = (const uint8_t*) base + idx * size;
a9e12476
KS
158 comparison = compar(key, p, arg);
159 if (comparison < 0)
160 u = idx;
161 else if (comparison > 0)
162 l = idx + 1;
163 else
164 return (void *)p;
165 }
166 return NULL;
167}
09017585 168
7f6bfc56
ZJS
169bool memeqzero(const void *data, size_t length) {
170 /* Does the buffer consist entirely of NULs?
171 * Copied from https://github.com/systemd/casync/, copied in turn from
172 * https://github.com/rustyrussell/ccan/blob/master/ccan/mem/mem.c#L92,
173 * which is licensed CC-0.
174 */
175
176 const uint8_t *p = data;
177 size_t i;
178
179 /* Check first 16 bytes manually */
180 for (i = 0; i < 16; i++, length--) {
181 if (length == 0)
182 return true;
183 if (p[i])
184 return false;
185 }
186
187 /* Now we know first 16 bytes are NUL, memcmp with self. */
188 return memcmp(data, p + i, length) == 0;
189}
190
240dbaa4
LP
191int on_ac_power(void) {
192 bool found_offline = false, found_online = false;
193 _cleanup_closedir_ DIR *d = NULL;
8fb3f009 194 struct dirent *de;
240dbaa4
LP
195
196 d = opendir("/sys/class/power_supply");
197 if (!d)
6d890034 198 return errno == ENOENT ? true : -errno;
240dbaa4 199
8fb3f009 200 FOREACH_DIRENT(de, d, return -errno) {
240dbaa4
LP
201 _cleanup_close_ int fd = -1, device = -1;
202 char contents[6];
203 ssize_t n;
240dbaa4 204
240dbaa4
LP
205 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
206 if (device < 0) {
3742095b 207 if (IN_SET(errno, ENOENT, ENOTDIR))
240dbaa4
LP
208 continue;
209
210 return -errno;
211 }
212
213 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
214 if (fd < 0) {
215 if (errno == ENOENT)
216 continue;
217
218 return -errno;
219 }
220
221 n = read(fd, contents, sizeof(contents));
222 if (n < 0)
223 return -errno;
224
225 if (n != 6 || memcmp(contents, "Mains\n", 6))
226 continue;
227
03e334a1 228 safe_close(fd);
240dbaa4
LP
229 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
230 if (fd < 0) {
231 if (errno == ENOENT)
232 continue;
233
234 return -errno;
235 }
236
237 n = read(fd, contents, sizeof(contents));
238 if (n < 0)
239 return -errno;
240
241 if (n != 2 || contents[1] != '\n')
242 return -EIO;
243
244 if (contents[0] == '1') {
245 found_online = true;
246 break;
247 } else if (contents[0] == '0')
248 found_offline = true;
249 else
250 return -EIO;
251 }
252
253 return found_online || !found_offline;
254}
fabe5c0e 255
bc9fd78c
LP
256int container_get_leader(const char *machine, pid_t *pid) {
257 _cleanup_free_ char *s = NULL, *class = NULL;
258 const char *p;
259 pid_t leader;
260 int r;
261
262 assert(machine);
263 assert(pid);
264
1e5057b9
LP
265 if (streq(machine, ".host")) {
266 *pid = 1;
267 return 0;
268 }
269
b9a8d250
LP
270 if (!machine_name_is_valid(machine))
271 return -EINVAL;
272
63c372cb 273 p = strjoina("/run/systemd/machines/", machine);
13df9c39
LP
274 r = parse_env_file(NULL, p,
275 "LEADER", &s,
276 "CLASS", &class);
bc9fd78c
LP
277 if (r == -ENOENT)
278 return -EHOSTDOWN;
279 if (r < 0)
280 return r;
281 if (!s)
282 return -EIO;
283
284 if (!streq_ptr(class, "container"))
285 return -EIO;
286
287 r = parse_pid(s, &leader);
288 if (r < 0)
289 return r;
290 if (leader <= 1)
291 return -EIO;
292
293 *pid = leader;
294 return 0;
295}
296
671c3419
RM
297int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
298 _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
359a06aa 299 int rfd = -1;
bc9fd78c
LP
300
301 assert(pid >= 0);
bc9fd78c 302
878cd7e9
LP
303 if (mntns_fd) {
304 const char *mntns;
a4475f57 305
878cd7e9
LP
306 mntns = procfs_file_alloca(pid, "ns/mnt");
307 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
308 if (mntnsfd < 0)
309 return -errno;
310 }
bc9fd78c 311
878cd7e9
LP
312 if (pidns_fd) {
313 const char *pidns;
314
315 pidns = procfs_file_alloca(pid, "ns/pid");
316 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
317 if (pidnsfd < 0)
318 return -errno;
319 }
320
321 if (netns_fd) {
322 const char *netns;
323
324 netns = procfs_file_alloca(pid, "ns/net");
325 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
326 if (netnsfd < 0)
327 return -errno;
328 }
329
671c3419
RM
330 if (userns_fd) {
331 const char *userns;
332
333 userns = procfs_file_alloca(pid, "ns/user");
334 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
335 if (usernsfd < 0 && errno != ENOENT)
336 return -errno;
337 }
338
878cd7e9
LP
339 if (root_fd) {
340 const char *root;
341
342 root = procfs_file_alloca(pid, "root");
343 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
344 if (rfd < 0)
345 return -errno;
346 }
347
348 if (pidns_fd)
349 *pidns_fd = pidnsfd;
bc9fd78c 350
878cd7e9
LP
351 if (mntns_fd)
352 *mntns_fd = mntnsfd;
353
354 if (netns_fd)
355 *netns_fd = netnsfd;
356
671c3419
RM
357 if (userns_fd)
358 *userns_fd = usernsfd;
359
878cd7e9
LP
360 if (root_fd)
361 *root_fd = rfd;
362
671c3419 363 pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
bc9fd78c
LP
364
365 return 0;
366}
367
671c3419
RM
368int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
369 if (userns_fd >= 0) {
370 /* Can't setns to your own userns, since then you could
371 * escalate from non-root to root in your own namespace, so
372 * check if namespaces equal before attempting to enter. */
373 _cleanup_free_ char *userns_fd_path = NULL;
374 int r;
375 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
376 return -ENOMEM;
377
e3f791a2 378 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
671c3419
RM
379 if (r < 0)
380 return r;
381 if (r)
382 userns_fd = -1;
383 }
bc9fd78c 384
878cd7e9
LP
385 if (pidns_fd >= 0)
386 if (setns(pidns_fd, CLONE_NEWPID) < 0)
387 return -errno;
a4475f57 388
878cd7e9
LP
389 if (mntns_fd >= 0)
390 if (setns(mntns_fd, CLONE_NEWNS) < 0)
391 return -errno;
bc9fd78c 392
878cd7e9
LP
393 if (netns_fd >= 0)
394 if (setns(netns_fd, CLONE_NEWNET) < 0)
395 return -errno;
bc9fd78c 396
671c3419
RM
397 if (userns_fd >= 0)
398 if (setns(userns_fd, CLONE_NEWUSER) < 0)
399 return -errno;
400
878cd7e9
LP
401 if (root_fd >= 0) {
402 if (fchdir(root_fd) < 0)
403 return -errno;
404
405 if (chroot(".") < 0)
406 return -errno;
407 }
bc9fd78c 408
b4da6d6b 409 return reset_uid_gid();
bc9fd78c 410}
bf108e55 411
1c231f56 412uint64_t physical_memory(void) {
d9ab2bcf
LP
413 _cleanup_free_ char *root = NULL, *value = NULL;
414 uint64_t mem, lim;
415 size_t ps;
416 long sc;
bd969ee6 417 int r;
1c231f56 418
d9ab2bcf
LP
419 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
420 * memory.
421 *
422 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
423 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
424
425 sc = sysconf(_SC_PHYS_PAGES);
426 assert(sc > 0);
427
428 ps = page_size();
429 mem = (uint64_t) sc * (uint64_t) ps;
430
bd969ee6
LP
431 r = cg_get_root_path(&root);
432 if (r < 0) {
433 log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
d9ab2bcf 434 return mem;
bd969ee6 435 }
d9ab2bcf 436
bd969ee6
LP
437 r = cg_all_unified();
438 if (r < 0) {
439 log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
d9ab2bcf 440 return mem;
bd969ee6
LP
441 }
442 if (r > 0) {
443 r = cg_get_attribute("memory", root, "memory.max", &value);
444 if (r < 0) {
445 log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
446 return mem;
447 }
d9ab2bcf 448
bd969ee6
LP
449 if (streq(value, "max"))
450 return mem;
451 } else {
452 r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
453 if (r < 0) {
454 log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
455 return mem;
456 }
457 }
458
459 r = safe_atou64(value, &lim);
460 if (r < 0) {
461 log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
462 return mem;
463 }
464 if (lim == UINT64_MAX)
d9ab2bcf 465 return mem;
1c231f56 466
d9ab2bcf
LP
467 /* Make sure the limit is a multiple of our own page size */
468 lim /= ps;
469 lim *= ps;
1c231f56 470
d9ab2bcf 471 return MIN(mem, lim);
1c231f56 472}
6db615c1 473
d8cf2ac7
LP
474uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
475 uint64_t p, m, ps, r;
476
477 assert(max > 0);
478
479 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
480 * the result is a multiple of the page size (rounds down). */
481
482 ps = page_size();
483 assert(ps > 0);
484
485 p = physical_memory() / ps;
486 assert(p > 0);
487
488 m = p * v;
489 if (m / p != v)
490 return UINT64_MAX;
491
492 m /= max;
493
494 r = m * ps;
495 if (r / ps != m)
496 return UINT64_MAX;
497
498 return r;
499}
500
83f8e808
LP
501uint64_t system_tasks_max(void) {
502
83f8e808 503 uint64_t a = TASKS_MAX, b = TASKS_MAX;
1e7da35b 504 _cleanup_free_ char *root = NULL;
0f578ea2 505 int r;
83f8e808
LP
506
507 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
508 * limit:
509 *
f3a367d6 510 * a) the maximum tasks value the kernel allows on this architecture
83f8e808 511 * b) the cgroups pids_max attribute for the system
f3a367d6 512 * c) the kernel's configured maximum PID value
83f8e808
LP
513 *
514 * And then pick the smallest of the three */
515
0f578ea2
LP
516 r = procfs_tasks_get_limit(&a);
517 if (r < 0)
518 log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
83f8e808 519
0f578ea2
LP
520 r = cg_get_root_path(&root);
521 if (r < 0)
522 log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m");
523 else {
1e7da35b 524 _cleanup_free_ char *value = NULL;
83f8e808 525
0f578ea2
LP
526 r = cg_get_attribute("pids", root, "pids.max", &value);
527 if (r < 0)
528 log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
529 else if (!streq(value, "max")) {
530 r = safe_atou64(value, &b);
531 if (r < 0)
532 log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
533 }
83f8e808
LP
534 }
535
536 return MIN3(TASKS_MAX,
537 a <= 0 ? TASKS_MAX : a,
538 b <= 0 ? TASKS_MAX : b);
539}
540
541uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
542 uint64_t t, m;
543
544 assert(max > 0);
545
546 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
547 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
548
549 t = system_tasks_max();
550 assert(t > 0);
551
552 m = t * v;
553 if (m / t != v) /* overflow? */
554 return UINT64_MAX;
555
556 return m / max;
557}
558
3f6fd1ba 559int version(void) {
f1028f57 560 puts("systemd " STRINGIFY(PROJECT_VERSION) " (" GIT_VERSION ")\n"
3f6fd1ba
LP
561 SYSTEMD_FEATURES);
562 return 0;
563}
68c58c67
LP
564
565/* This is a direct translation of str_verscmp from boot.c */
566static bool is_digit(int c) {
567 return c >= '0' && c <= '9';
568}
569
570static int c_order(int c) {
571 if (c == 0 || is_digit(c))
572 return 0;
573
574 if ((c >= 'a') && (c <= 'z'))
575 return c;
576
577 return c + 0x10000;
578}
579
580int str_verscmp(const char *s1, const char *s2) {
581 const char *os1, *os2;
582
583 assert(s1);
584 assert(s2);
585
586 os1 = s1;
587 os2 = s2;
588
589 while (*s1 || *s2) {
590 int first;
591
592 while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
593 int order;
594
595 order = c_order(*s1) - c_order(*s2);
596 if (order != 0)
597 return order;
598 s1++;
599 s2++;
600 }
601
602 while (*s1 == '0')
603 s1++;
604 while (*s2 == '0')
605 s2++;
606
607 first = 0;
608 while (is_digit(*s1) && is_digit(*s2)) {
609 if (first == 0)
610 first = *s1 - *s2;
611 s1++;
612 s2++;
613 }
614
615 if (is_digit(*s1))
616 return 1;
617 if (is_digit(*s2))
618 return -1;
619
620 if (first != 0)
621 return first;
622 }
623
624 return strcmp(os1, os2);
625}
9ce17593
JK
626
627/* Turn off core dumps but only if we're running outside of a container. */
e557b1a6
LP
628void disable_coredumps(void) {
629 int r;
630
631 if (detect_container() > 0)
632 return;
633
57512c89 634 r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", WRITE_STRING_FILE_DISABLE_BUFFER);
e557b1a6
LP
635 if (r < 0)
636 log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
9ce17593 637}