]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
a7334b09 LP |
2 | /*** |
3 | This file is part of systemd. | |
4 | ||
5 | Copyright 2010 Lennart Poettering | |
a7334b09 LP |
6 | ***/ |
7 | ||
11c3a366 | 8 | #include <alloca.h> |
60918275 | 9 | #include <errno.h> |
f6c2284a | 10 | #include <fcntl.h> |
f6c2284a LP |
11 | #include <sched.h> |
12 | #include <signal.h> | |
13 | #include <stdarg.h> | |
14 | #include <stdio.h> | |
15 | #include <stdlib.h> | |
16 | #include <string.h> | |
87d2c1ff | 17 | #include <sys/mman.h> |
f6c2284a | 18 | #include <sys/prctl.h> |
11c3a366 TA |
19 | #include <sys/statfs.h> |
20 | #include <sys/sysmacros.h> | |
f6c2284a | 21 | #include <sys/types.h> |
f6c2284a | 22 | #include <unistd.h> |
eef46c37 | 23 | |
b5efdb8a | 24 | #include "alloc-util.h" |
c43b2b9c | 25 | #include "btrfs-util.h" |
3f6fd1ba | 26 | #include "build.h" |
d9ab2bcf | 27 | #include "cgroup-util.h" |
f6c2284a | 28 | #include "def.h" |
553e15f2 | 29 | #include "device-nodes.h" |
cf0fbc49 | 30 | #include "dirent-util.h" |
3ffd4af2 | 31 | #include "fd-util.h" |
f6c2284a | 32 | #include "fileio.h" |
f97b34a6 | 33 | #include "format-util.h" |
f6c2284a LP |
34 | #include "hashmap.h" |
35 | #include "hostname-util.h" | |
a9f5d454 | 36 | #include "log.h" |
f6c2284a LP |
37 | #include "macro.h" |
38 | #include "missing.h" | |
6bedfcbb | 39 | #include "parse-util.h" |
9eb977db | 40 | #include "path-util.h" |
0b452006 | 41 | #include "process-util.h" |
1e7da35b | 42 | #include "procfs-util.h" |
11c3a366 | 43 | #include "set.h" |
93cc7779 | 44 | #include "signal-util.h" |
cf0fbc49 | 45 | #include "stat-util.h" |
07630cea | 46 | #include "string-util.h" |
f6c2284a | 47 | #include "strv.h" |
93cc7779 | 48 | #include "time-util.h" |
8612da97 | 49 | #include "umask-util.h" |
b1d4f8e1 | 50 | #include "user-util.h" |
4f5dd394 | 51 | #include "util.h" |
9ce17593 | 52 | #include "virt.h" |
56cf987f | 53 | |
9a0e6896 LP |
54 | int saved_argc = 0; |
55 | char **saved_argv = NULL; | |
dcd61450 | 56 | static int saved_in_initrd = -1; |
9086e840 | 57 | |
37f85e66 | 58 | size_t page_size(void) { |
ec202eae | 59 | static thread_local size_t pgsz = 0; |
37f85e66 | 60 | long r; |
61 | ||
87d2c1ff | 62 | if (_likely_(pgsz > 0)) |
37f85e66 | 63 | return pgsz; |
64 | ||
e67f47e5 LP |
65 | r = sysconf(_SC_PAGESIZE); |
66 | assert(r > 0); | |
37f85e66 | 67 | |
68 | pgsz = (size_t) r; | |
37f85e66 | 69 | return pgsz; |
70 | } | |
71 | ||
a88c8750 TG |
72 | bool plymouth_running(void) { |
73 | return access("/run/plymouth/pid", F_OK) >= 0; | |
74 | } | |
75 | ||
4d6d6518 LP |
76 | bool display_is_local(const char *display) { |
77 | assert(display); | |
78 | ||
79 | return | |
80 | display[0] == ':' && | |
81 | display[1] >= '0' && | |
82 | display[1] <= '9'; | |
83 | } | |
84 | ||
85 | int socket_from_display(const char *display, char **path) { | |
86 | size_t k; | |
87 | char *f, *c; | |
88 | ||
89 | assert(display); | |
90 | assert(path); | |
91 | ||
92 | if (!display_is_local(display)) | |
93 | return -EINVAL; | |
94 | ||
95 | k = strspn(display+1, "0123456789"); | |
96 | ||
fbd0b64f | 97 | f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1); |
4d6d6518 LP |
98 | if (!f) |
99 | return -ENOMEM; | |
100 | ||
101 | c = stpcpy(f, "/tmp/.X11-unix/X"); | |
102 | memcpy(c, display+1, k); | |
103 | c[k] = 0; | |
104 | ||
105 | *path = f; | |
106 | ||
107 | return 0; | |
108 | } | |
109 | ||
65457142 | 110 | bool kexec_loaded(void) { |
c47f86e6 ZJS |
111 | _cleanup_free_ char *s = NULL; |
112 | ||
113 | if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0) | |
114 | return false; | |
115 | ||
116 | return s[0] == '1'; | |
65457142 | 117 | } |
fb9de93d | 118 | |
87d2c1ff LP |
119 | int prot_from_flags(int flags) { |
120 | ||
121 | switch (flags & O_ACCMODE) { | |
122 | ||
123 | case O_RDONLY: | |
124 | return PROT_READ; | |
125 | ||
126 | case O_WRONLY: | |
127 | return PROT_WRITE; | |
128 | ||
129 | case O_RDWR: | |
130 | return PROT_READ|PROT_WRITE; | |
131 | ||
132 | default: | |
133 | return -EINVAL; | |
134 | } | |
7c99e0c1 | 135 | } |
689b9a22 | 136 | |
9be346c9 | 137 | bool in_initrd(void) { |
825c6fe5 | 138 | struct statfs s; |
8f33b5b8 | 139 | |
dcd61450 IS |
140 | if (saved_in_initrd >= 0) |
141 | return saved_in_initrd; | |
825c6fe5 LP |
142 | |
143 | /* We make two checks here: | |
144 | * | |
145 | * 1. the flag file /etc/initrd-release must exist | |
146 | * 2. the root file system must be a memory file system | |
147 | * | |
148 | * The second check is extra paranoia, since misdetecting an | |
629ff674 | 149 | * initrd can have bad consequences due the initrd |
825c6fe5 LP |
150 | * emptying when transititioning to the main systemd. |
151 | */ | |
152 | ||
dcd61450 IS |
153 | saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 && |
154 | statfs("/", &s) >= 0 && | |
155 | is_temporary_fs(&s); | |
9be346c9 | 156 | |
dcd61450 IS |
157 | return saved_in_initrd; |
158 | } | |
159 | ||
160 | void in_initrd_force(bool value) { | |
161 | saved_in_initrd = value; | |
9be346c9 | 162 | } |
069cfc85 | 163 | |
a9e12476 KS |
164 | /* hey glibc, APIs with callbacks without a user pointer are so useless */ |
165 | void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size, | |
1c574591 | 166 | int (*compar) (const void *, const void *, void *), void *arg) { |
a9e12476 KS |
167 | size_t l, u, idx; |
168 | const void *p; | |
169 | int comparison; | |
170 | ||
2901f4b3 LP |
171 | assert(!size_multiply_overflow(nmemb, size)); |
172 | ||
a9e12476 KS |
173 | l = 0; |
174 | u = nmemb; | |
175 | while (l < u) { | |
176 | idx = (l + u) / 2; | |
2901f4b3 | 177 | p = (const uint8_t*) base + idx * size; |
a9e12476 KS |
178 | comparison = compar(key, p, arg); |
179 | if (comparison < 0) | |
180 | u = idx; | |
181 | else if (comparison > 0) | |
182 | l = idx + 1; | |
183 | else | |
184 | return (void *)p; | |
185 | } | |
186 | return NULL; | |
187 | } | |
09017585 | 188 | |
240dbaa4 LP |
189 | int on_ac_power(void) { |
190 | bool found_offline = false, found_online = false; | |
191 | _cleanup_closedir_ DIR *d = NULL; | |
8fb3f009 | 192 | struct dirent *de; |
240dbaa4 LP |
193 | |
194 | d = opendir("/sys/class/power_supply"); | |
195 | if (!d) | |
6d890034 | 196 | return errno == ENOENT ? true : -errno; |
240dbaa4 | 197 | |
8fb3f009 | 198 | FOREACH_DIRENT(de, d, return -errno) { |
240dbaa4 LP |
199 | _cleanup_close_ int fd = -1, device = -1; |
200 | char contents[6]; | |
201 | ssize_t n; | |
240dbaa4 | 202 | |
240dbaa4 LP |
203 | device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY); |
204 | if (device < 0) { | |
3742095b | 205 | if (IN_SET(errno, ENOENT, ENOTDIR)) |
240dbaa4 LP |
206 | continue; |
207 | ||
208 | return -errno; | |
209 | } | |
210 | ||
211 | fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY); | |
212 | if (fd < 0) { | |
213 | if (errno == ENOENT) | |
214 | continue; | |
215 | ||
216 | return -errno; | |
217 | } | |
218 | ||
219 | n = read(fd, contents, sizeof(contents)); | |
220 | if (n < 0) | |
221 | return -errno; | |
222 | ||
223 | if (n != 6 || memcmp(contents, "Mains\n", 6)) | |
224 | continue; | |
225 | ||
03e334a1 | 226 | safe_close(fd); |
240dbaa4 LP |
227 | fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY); |
228 | if (fd < 0) { | |
229 | if (errno == ENOENT) | |
230 | continue; | |
231 | ||
232 | return -errno; | |
233 | } | |
234 | ||
235 | n = read(fd, contents, sizeof(contents)); | |
236 | if (n < 0) | |
237 | return -errno; | |
238 | ||
239 | if (n != 2 || contents[1] != '\n') | |
240 | return -EIO; | |
241 | ||
242 | if (contents[0] == '1') { | |
243 | found_online = true; | |
244 | break; | |
245 | } else if (contents[0] == '0') | |
246 | found_offline = true; | |
247 | else | |
248 | return -EIO; | |
249 | } | |
250 | ||
251 | return found_online || !found_offline; | |
252 | } | |
fabe5c0e | 253 | |
bc9fd78c LP |
254 | int container_get_leader(const char *machine, pid_t *pid) { |
255 | _cleanup_free_ char *s = NULL, *class = NULL; | |
256 | const char *p; | |
257 | pid_t leader; | |
258 | int r; | |
259 | ||
260 | assert(machine); | |
261 | assert(pid); | |
262 | ||
b9a8d250 LP |
263 | if (!machine_name_is_valid(machine)) |
264 | return -EINVAL; | |
265 | ||
63c372cb | 266 | p = strjoina("/run/systemd/machines/", machine); |
bc9fd78c LP |
267 | r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL); |
268 | if (r == -ENOENT) | |
269 | return -EHOSTDOWN; | |
270 | if (r < 0) | |
271 | return r; | |
272 | if (!s) | |
273 | return -EIO; | |
274 | ||
275 | if (!streq_ptr(class, "container")) | |
276 | return -EIO; | |
277 | ||
278 | r = parse_pid(s, &leader); | |
279 | if (r < 0) | |
280 | return r; | |
281 | if (leader <= 1) | |
282 | return -EIO; | |
283 | ||
284 | *pid = leader; | |
285 | return 0; | |
286 | } | |
287 | ||
671c3419 RM |
288 | int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) { |
289 | _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1; | |
359a06aa | 290 | int rfd = -1; |
bc9fd78c LP |
291 | |
292 | assert(pid >= 0); | |
bc9fd78c | 293 | |
878cd7e9 LP |
294 | if (mntns_fd) { |
295 | const char *mntns; | |
a4475f57 | 296 | |
878cd7e9 LP |
297 | mntns = procfs_file_alloca(pid, "ns/mnt"); |
298 | mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
299 | if (mntnsfd < 0) | |
300 | return -errno; | |
301 | } | |
bc9fd78c | 302 | |
878cd7e9 LP |
303 | if (pidns_fd) { |
304 | const char *pidns; | |
305 | ||
306 | pidns = procfs_file_alloca(pid, "ns/pid"); | |
307 | pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
308 | if (pidnsfd < 0) | |
309 | return -errno; | |
310 | } | |
311 | ||
312 | if (netns_fd) { | |
313 | const char *netns; | |
314 | ||
315 | netns = procfs_file_alloca(pid, "ns/net"); | |
316 | netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
317 | if (netnsfd < 0) | |
318 | return -errno; | |
319 | } | |
320 | ||
671c3419 RM |
321 | if (userns_fd) { |
322 | const char *userns; | |
323 | ||
324 | userns = procfs_file_alloca(pid, "ns/user"); | |
325 | usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
326 | if (usernsfd < 0 && errno != ENOENT) | |
327 | return -errno; | |
328 | } | |
329 | ||
878cd7e9 LP |
330 | if (root_fd) { |
331 | const char *root; | |
332 | ||
333 | root = procfs_file_alloca(pid, "root"); | |
334 | rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); | |
335 | if (rfd < 0) | |
336 | return -errno; | |
337 | } | |
338 | ||
339 | if (pidns_fd) | |
340 | *pidns_fd = pidnsfd; | |
bc9fd78c | 341 | |
878cd7e9 LP |
342 | if (mntns_fd) |
343 | *mntns_fd = mntnsfd; | |
344 | ||
345 | if (netns_fd) | |
346 | *netns_fd = netnsfd; | |
347 | ||
671c3419 RM |
348 | if (userns_fd) |
349 | *userns_fd = usernsfd; | |
350 | ||
878cd7e9 LP |
351 | if (root_fd) |
352 | *root_fd = rfd; | |
353 | ||
671c3419 | 354 | pidnsfd = mntnsfd = netnsfd = usernsfd = -1; |
bc9fd78c LP |
355 | |
356 | return 0; | |
357 | } | |
358 | ||
671c3419 RM |
359 | int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) { |
360 | if (userns_fd >= 0) { | |
361 | /* Can't setns to your own userns, since then you could | |
362 | * escalate from non-root to root in your own namespace, so | |
363 | * check if namespaces equal before attempting to enter. */ | |
364 | _cleanup_free_ char *userns_fd_path = NULL; | |
365 | int r; | |
366 | if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0) | |
367 | return -ENOMEM; | |
368 | ||
e3f791a2 | 369 | r = files_same(userns_fd_path, "/proc/self/ns/user", 0); |
671c3419 RM |
370 | if (r < 0) |
371 | return r; | |
372 | if (r) | |
373 | userns_fd = -1; | |
374 | } | |
bc9fd78c | 375 | |
878cd7e9 LP |
376 | if (pidns_fd >= 0) |
377 | if (setns(pidns_fd, CLONE_NEWPID) < 0) | |
378 | return -errno; | |
a4475f57 | 379 | |
878cd7e9 LP |
380 | if (mntns_fd >= 0) |
381 | if (setns(mntns_fd, CLONE_NEWNS) < 0) | |
382 | return -errno; | |
bc9fd78c | 383 | |
878cd7e9 LP |
384 | if (netns_fd >= 0) |
385 | if (setns(netns_fd, CLONE_NEWNET) < 0) | |
386 | return -errno; | |
bc9fd78c | 387 | |
671c3419 RM |
388 | if (userns_fd >= 0) |
389 | if (setns(userns_fd, CLONE_NEWUSER) < 0) | |
390 | return -errno; | |
391 | ||
878cd7e9 LP |
392 | if (root_fd >= 0) { |
393 | if (fchdir(root_fd) < 0) | |
394 | return -errno; | |
395 | ||
396 | if (chroot(".") < 0) | |
397 | return -errno; | |
398 | } | |
bc9fd78c | 399 | |
b4da6d6b | 400 | return reset_uid_gid(); |
bc9fd78c | 401 | } |
bf108e55 | 402 | |
1c231f56 | 403 | uint64_t physical_memory(void) { |
d9ab2bcf LP |
404 | _cleanup_free_ char *root = NULL, *value = NULL; |
405 | uint64_t mem, lim; | |
406 | size_t ps; | |
407 | long sc; | |
1c231f56 | 408 | |
d9ab2bcf LP |
409 | /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of |
410 | * memory. | |
411 | * | |
412 | * In order to support containers nicely that have a configured memory limit we'll take the minimum of the | |
413 | * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */ | |
414 | ||
415 | sc = sysconf(_SC_PHYS_PAGES); | |
416 | assert(sc > 0); | |
417 | ||
418 | ps = page_size(); | |
419 | mem = (uint64_t) sc * (uint64_t) ps; | |
420 | ||
421 | if (cg_get_root_path(&root) < 0) | |
422 | return mem; | |
423 | ||
424 | if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value)) | |
425 | return mem; | |
426 | ||
427 | if (safe_atou64(value, &lim) < 0) | |
428 | return mem; | |
1c231f56 | 429 | |
d9ab2bcf LP |
430 | /* Make sure the limit is a multiple of our own page size */ |
431 | lim /= ps; | |
432 | lim *= ps; | |
1c231f56 | 433 | |
d9ab2bcf | 434 | return MIN(mem, lim); |
1c231f56 | 435 | } |
6db615c1 | 436 | |
d8cf2ac7 LP |
437 | uint64_t physical_memory_scale(uint64_t v, uint64_t max) { |
438 | uint64_t p, m, ps, r; | |
439 | ||
440 | assert(max > 0); | |
441 | ||
442 | /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success | |
443 | * the result is a multiple of the page size (rounds down). */ | |
444 | ||
445 | ps = page_size(); | |
446 | assert(ps > 0); | |
447 | ||
448 | p = physical_memory() / ps; | |
449 | assert(p > 0); | |
450 | ||
451 | m = p * v; | |
452 | if (m / p != v) | |
453 | return UINT64_MAX; | |
454 | ||
455 | m /= max; | |
456 | ||
457 | r = m * ps; | |
458 | if (r / ps != m) | |
459 | return UINT64_MAX; | |
460 | ||
461 | return r; | |
462 | } | |
463 | ||
83f8e808 LP |
464 | uint64_t system_tasks_max(void) { |
465 | ||
83f8e808 | 466 | uint64_t a = TASKS_MAX, b = TASKS_MAX; |
1e7da35b | 467 | _cleanup_free_ char *root = NULL; |
83f8e808 LP |
468 | |
469 | /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this | |
470 | * limit: | |
471 | * | |
f3a367d6 | 472 | * a) the maximum tasks value the kernel allows on this architecture |
83f8e808 | 473 | * b) the cgroups pids_max attribute for the system |
f3a367d6 | 474 | * c) the kernel's configured maximum PID value |
83f8e808 LP |
475 | * |
476 | * And then pick the smallest of the three */ | |
477 | ||
1e7da35b | 478 | (void) procfs_tasks_get_limit(&a); |
83f8e808 LP |
479 | |
480 | if (cg_get_root_path(&root) >= 0) { | |
1e7da35b | 481 | _cleanup_free_ char *value = NULL; |
83f8e808 LP |
482 | |
483 | if (cg_get_attribute("pids", root, "pids.max", &value) >= 0) | |
484 | (void) safe_atou64(value, &b); | |
485 | } | |
486 | ||
487 | return MIN3(TASKS_MAX, | |
488 | a <= 0 ? TASKS_MAX : a, | |
489 | b <= 0 ? TASKS_MAX : b); | |
490 | } | |
491 | ||
492 | uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) { | |
493 | uint64_t t, m; | |
494 | ||
495 | assert(max > 0); | |
496 | ||
497 | /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages | |
498 | * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */ | |
499 | ||
500 | t = system_tasks_max(); | |
501 | assert(t > 0); | |
502 | ||
503 | m = t * v; | |
504 | if (m / t != v) /* overflow? */ | |
505 | return UINT64_MAX; | |
506 | ||
507 | return m / max; | |
508 | } | |
509 | ||
3f6fd1ba LP |
510 | int version(void) { |
511 | puts(PACKAGE_STRING "\n" | |
512 | SYSTEMD_FEATURES); | |
513 | return 0; | |
514 | } | |
68c58c67 LP |
515 | |
516 | /* This is a direct translation of str_verscmp from boot.c */ | |
517 | static bool is_digit(int c) { | |
518 | return c >= '0' && c <= '9'; | |
519 | } | |
520 | ||
521 | static int c_order(int c) { | |
522 | if (c == 0 || is_digit(c)) | |
523 | return 0; | |
524 | ||
525 | if ((c >= 'a') && (c <= 'z')) | |
526 | return c; | |
527 | ||
528 | return c + 0x10000; | |
529 | } | |
530 | ||
531 | int str_verscmp(const char *s1, const char *s2) { | |
532 | const char *os1, *os2; | |
533 | ||
534 | assert(s1); | |
535 | assert(s2); | |
536 | ||
537 | os1 = s1; | |
538 | os2 = s2; | |
539 | ||
540 | while (*s1 || *s2) { | |
541 | int first; | |
542 | ||
543 | while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) { | |
544 | int order; | |
545 | ||
546 | order = c_order(*s1) - c_order(*s2); | |
547 | if (order != 0) | |
548 | return order; | |
549 | s1++; | |
550 | s2++; | |
551 | } | |
552 | ||
553 | while (*s1 == '0') | |
554 | s1++; | |
555 | while (*s2 == '0') | |
556 | s2++; | |
557 | ||
558 | first = 0; | |
559 | while (is_digit(*s1) && is_digit(*s2)) { | |
560 | if (first == 0) | |
561 | first = *s1 - *s2; | |
562 | s1++; | |
563 | s2++; | |
564 | } | |
565 | ||
566 | if (is_digit(*s1)) | |
567 | return 1; | |
568 | if (is_digit(*s2)) | |
569 | return -1; | |
570 | ||
571 | if (first != 0) | |
572 | return first; | |
573 | } | |
574 | ||
575 | return strcmp(os1, os2); | |
576 | } | |
9ce17593 JK |
577 | |
578 | /* Turn off core dumps but only if we're running outside of a container. */ | |
e557b1a6 LP |
579 | void disable_coredumps(void) { |
580 | int r; | |
581 | ||
582 | if (detect_container() > 0) | |
583 | return; | |
584 | ||
585 | r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0); | |
586 | if (r < 0) | |
587 | log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m"); | |
9ce17593 | 588 | } |