]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
a7334b09 LP |
2 | /*** |
3 | This file is part of systemd. | |
4 | ||
5 | Copyright 2010 Lennart Poettering | |
6 | ||
7 | systemd is free software; you can redistribute it and/or modify it | |
5430f7f2 LP |
8 | under the terms of the GNU Lesser General Public License as published by |
9 | the Free Software Foundation; either version 2.1 of the License, or | |
a7334b09 LP |
10 | (at your option) any later version. |
11 | ||
12 | systemd is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
5430f7f2 | 15 | Lesser General Public License for more details. |
a7334b09 | 16 | |
5430f7f2 | 17 | You should have received a copy of the GNU Lesser General Public License |
a7334b09 LP |
18 | along with systemd; If not, see <http://www.gnu.org/licenses/>. |
19 | ***/ | |
20 | ||
11c3a366 | 21 | #include <alloca.h> |
60918275 | 22 | #include <errno.h> |
f6c2284a | 23 | #include <fcntl.h> |
f6c2284a LP |
24 | #include <sched.h> |
25 | #include <signal.h> | |
26 | #include <stdarg.h> | |
27 | #include <stdio.h> | |
28 | #include <stdlib.h> | |
29 | #include <string.h> | |
87d2c1ff | 30 | #include <sys/mman.h> |
f6c2284a | 31 | #include <sys/prctl.h> |
11c3a366 TA |
32 | #include <sys/statfs.h> |
33 | #include <sys/sysmacros.h> | |
f6c2284a | 34 | #include <sys/types.h> |
f6c2284a | 35 | #include <unistd.h> |
eef46c37 | 36 | |
b5efdb8a | 37 | #include "alloc-util.h" |
c43b2b9c | 38 | #include "btrfs-util.h" |
3f6fd1ba | 39 | #include "build.h" |
d9ab2bcf | 40 | #include "cgroup-util.h" |
f6c2284a | 41 | #include "def.h" |
cf0fbc49 | 42 | #include "dirent-util.h" |
3ffd4af2 | 43 | #include "fd-util.h" |
f6c2284a | 44 | #include "fileio.h" |
f97b34a6 | 45 | #include "format-util.h" |
f6c2284a LP |
46 | #include "hashmap.h" |
47 | #include "hostname-util.h" | |
a9f5d454 | 48 | #include "log.h" |
f6c2284a LP |
49 | #include "macro.h" |
50 | #include "missing.h" | |
6bedfcbb | 51 | #include "parse-util.h" |
9eb977db | 52 | #include "path-util.h" |
0b452006 | 53 | #include "process-util.h" |
11c3a366 | 54 | #include "set.h" |
93cc7779 | 55 | #include "signal-util.h" |
cf0fbc49 | 56 | #include "stat-util.h" |
07630cea | 57 | #include "string-util.h" |
f6c2284a | 58 | #include "strv.h" |
93cc7779 | 59 | #include "time-util.h" |
8612da97 | 60 | #include "umask-util.h" |
b1d4f8e1 | 61 | #include "user-util.h" |
4f5dd394 | 62 | #include "util.h" |
56cf987f | 63 | |
9a0e6896 LP |
64 | int saved_argc = 0; |
65 | char **saved_argv = NULL; | |
dcd61450 | 66 | static int saved_in_initrd = -1; |
9086e840 | 67 | |
37f85e66 | 68 | size_t page_size(void) { |
ec202eae | 69 | static thread_local size_t pgsz = 0; |
37f85e66 | 70 | long r; |
71 | ||
87d2c1ff | 72 | if (_likely_(pgsz > 0)) |
37f85e66 | 73 | return pgsz; |
74 | ||
e67f47e5 LP |
75 | r = sysconf(_SC_PAGESIZE); |
76 | assert(r > 0); | |
37f85e66 | 77 | |
78 | pgsz = (size_t) r; | |
37f85e66 | 79 | return pgsz; |
80 | } | |
81 | ||
a88c8750 TG |
82 | bool plymouth_running(void) { |
83 | return access("/run/plymouth/pid", F_OK) >= 0; | |
84 | } | |
85 | ||
4d6d6518 LP |
86 | bool display_is_local(const char *display) { |
87 | assert(display); | |
88 | ||
89 | return | |
90 | display[0] == ':' && | |
91 | display[1] >= '0' && | |
92 | display[1] <= '9'; | |
93 | } | |
94 | ||
95 | int socket_from_display(const char *display, char **path) { | |
96 | size_t k; | |
97 | char *f, *c; | |
98 | ||
99 | assert(display); | |
100 | assert(path); | |
101 | ||
102 | if (!display_is_local(display)) | |
103 | return -EINVAL; | |
104 | ||
105 | k = strspn(display+1, "0123456789"); | |
106 | ||
f8294e41 | 107 | f = new(char, strlen("/tmp/.X11-unix/X") + k + 1); |
4d6d6518 LP |
108 | if (!f) |
109 | return -ENOMEM; | |
110 | ||
111 | c = stpcpy(f, "/tmp/.X11-unix/X"); | |
112 | memcpy(c, display+1, k); | |
113 | c[k] = 0; | |
114 | ||
115 | *path = f; | |
116 | ||
117 | return 0; | |
118 | } | |
119 | ||
94959f0f LP |
120 | int block_get_whole_disk(dev_t d, dev_t *ret) { |
121 | char *p, *s; | |
122 | int r; | |
123 | unsigned n, m; | |
124 | ||
125 | assert(ret); | |
126 | ||
127 | /* If it has a queue this is good enough for us */ | |
128 | if (asprintf(&p, "/sys/dev/block/%u:%u/queue", major(d), minor(d)) < 0) | |
129 | return -ENOMEM; | |
130 | ||
131 | r = access(p, F_OK); | |
132 | free(p); | |
133 | ||
134 | if (r >= 0) { | |
135 | *ret = d; | |
136 | return 0; | |
137 | } | |
138 | ||
139 | /* If it is a partition find the originating device */ | |
140 | if (asprintf(&p, "/sys/dev/block/%u:%u/partition", major(d), minor(d)) < 0) | |
141 | return -ENOMEM; | |
142 | ||
143 | r = access(p, F_OK); | |
144 | free(p); | |
145 | ||
146 | if (r < 0) | |
147 | return -ENOENT; | |
148 | ||
149 | /* Get parent dev_t */ | |
150 | if (asprintf(&p, "/sys/dev/block/%u:%u/../dev", major(d), minor(d)) < 0) | |
151 | return -ENOMEM; | |
152 | ||
153 | r = read_one_line_file(p, &s); | |
154 | free(p); | |
155 | ||
156 | if (r < 0) | |
157 | return r; | |
158 | ||
159 | r = sscanf(s, "%u:%u", &m, &n); | |
160 | free(s); | |
161 | ||
162 | if (r != 2) | |
163 | return -EINVAL; | |
164 | ||
165 | /* Only return this if it is really good enough for us. */ | |
166 | if (asprintf(&p, "/sys/dev/block/%u:%u/queue", m, n) < 0) | |
167 | return -ENOMEM; | |
168 | ||
169 | r = access(p, F_OK); | |
170 | free(p); | |
171 | ||
172 | if (r >= 0) { | |
173 | *ret = makedev(m, n); | |
174 | return 0; | |
175 | } | |
176 | ||
177 | return -ENOENT; | |
178 | } | |
179 | ||
65457142 | 180 | bool kexec_loaded(void) { |
c47f86e6 ZJS |
181 | _cleanup_free_ char *s = NULL; |
182 | ||
183 | if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0) | |
184 | return false; | |
185 | ||
186 | return s[0] == '1'; | |
65457142 | 187 | } |
fb9de93d | 188 | |
87d2c1ff LP |
189 | int prot_from_flags(int flags) { |
190 | ||
191 | switch (flags & O_ACCMODE) { | |
192 | ||
193 | case O_RDONLY: | |
194 | return PROT_READ; | |
195 | ||
196 | case O_WRONLY: | |
197 | return PROT_WRITE; | |
198 | ||
199 | case O_RDWR: | |
200 | return PROT_READ|PROT_WRITE; | |
201 | ||
202 | default: | |
203 | return -EINVAL; | |
204 | } | |
7c99e0c1 | 205 | } |
689b9a22 | 206 | |
9bdc770c | 207 | int fork_agent(pid_t *pid, const int except[], unsigned n_except, const char *path, ...) { |
6bb92a16 | 208 | bool stdout_is_tty, stderr_is_tty; |
8a7c93d8 LP |
209 | pid_t parent_pid, agent_pid; |
210 | sigset_t ss, saved_ss; | |
6bb92a16 LP |
211 | unsigned n, i; |
212 | va_list ap; | |
213 | char **l; | |
214 | ||
215 | assert(pid); | |
216 | assert(path); | |
217 | ||
6bb92a16 LP |
218 | /* Spawns a temporary TTY agent, making sure it goes away when |
219 | * we go away */ | |
220 | ||
df0ff127 | 221 | parent_pid = getpid_cached(); |
8a7c93d8 LP |
222 | |
223 | /* First we temporarily block all signals, so that the new | |
224 | * child has them blocked initially. This way, we can be sure | |
225 | * that SIGTERMs are not lost we might send to the agent. */ | |
226 | assert_se(sigfillset(&ss) >= 0); | |
227 | assert_se(sigprocmask(SIG_SETMASK, &ss, &saved_ss) >= 0); | |
228 | ||
6bb92a16 | 229 | agent_pid = fork(); |
8a7c93d8 LP |
230 | if (agent_pid < 0) { |
231 | assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0); | |
6bb92a16 | 232 | return -errno; |
8a7c93d8 | 233 | } |
6bb92a16 LP |
234 | |
235 | if (agent_pid != 0) { | |
8a7c93d8 | 236 | assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0); |
6bb92a16 LP |
237 | *pid = agent_pid; |
238 | return 0; | |
239 | } | |
240 | ||
241 | /* In the child: | |
242 | * | |
243 | * Make sure the agent goes away when the parent dies */ | |
244 | if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) | |
245 | _exit(EXIT_FAILURE); | |
246 | ||
8a7c93d8 LP |
247 | /* Make sure we actually can kill the agent, if we need to, in |
248 | * case somebody invoked us from a shell script that trapped | |
249 | * SIGTERM or so... */ | |
ce30c8dc LP |
250 | (void) reset_all_signal_handlers(); |
251 | (void) reset_signal_mask(); | |
8a7c93d8 | 252 | |
6bb92a16 | 253 | /* Check whether our parent died before we were able |
8a7c93d8 | 254 | * to set the death signal and unblock the signals */ |
6bb92a16 LP |
255 | if (getppid() != parent_pid) |
256 | _exit(EXIT_SUCCESS); | |
257 | ||
258 | /* Don't leak fds to the agent */ | |
9bdc770c | 259 | close_all_fds(except, n_except); |
6bb92a16 LP |
260 | |
261 | stdout_is_tty = isatty(STDOUT_FILENO); | |
262 | stderr_is_tty = isatty(STDERR_FILENO); | |
263 | ||
264 | if (!stdout_is_tty || !stderr_is_tty) { | |
8a7c93d8 LP |
265 | int fd; |
266 | ||
6bb92a16 LP |
267 | /* Detach from stdout/stderr. and reopen |
268 | * /dev/tty for them. This is important to | |
269 | * ensure that when systemctl is started via | |
270 | * popen() or a similar call that expects to | |
271 | * read EOF we actually do generate EOF and | |
272 | * not delay this indefinitely by because we | |
273 | * keep an unused copy of stdin around. */ | |
274 | fd = open("/dev/tty", O_WRONLY); | |
275 | if (fd < 0) { | |
56f64d95 | 276 | log_error_errno(errno, "Failed to open /dev/tty: %m"); |
6bb92a16 LP |
277 | _exit(EXIT_FAILURE); |
278 | } | |
279 | ||
94edd38e ZJS |
280 | if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) { |
281 | log_error_errno(errno, "Failed to dup2 /dev/tty: %m"); | |
282 | _exit(EXIT_FAILURE); | |
283 | } | |
6bb92a16 | 284 | |
94edd38e ZJS |
285 | if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) { |
286 | log_error_errno(errno, "Failed to dup2 /dev/tty: %m"); | |
287 | _exit(EXIT_FAILURE); | |
288 | } | |
6bb92a16 | 289 | |
94edd38e | 290 | if (fd > STDERR_FILENO) |
6bb92a16 LP |
291 | close(fd); |
292 | } | |
293 | ||
294 | /* Count arguments */ | |
295 | va_start(ap, path); | |
296 | for (n = 0; va_arg(ap, char*); n++) | |
297 | ; | |
298 | va_end(ap); | |
299 | ||
300 | /* Allocate strv */ | |
301 | l = alloca(sizeof(char *) * (n + 1)); | |
302 | ||
303 | /* Fill in arguments */ | |
304 | va_start(ap, path); | |
305 | for (i = 0; i <= n; i++) | |
306 | l[i] = va_arg(ap, char*); | |
307 | va_end(ap); | |
308 | ||
309 | execv(path, l); | |
310 | _exit(EXIT_FAILURE); | |
311 | } | |
68faf98c | 312 | |
9be346c9 | 313 | bool in_initrd(void) { |
825c6fe5 | 314 | struct statfs s; |
8f33b5b8 | 315 | |
dcd61450 IS |
316 | if (saved_in_initrd >= 0) |
317 | return saved_in_initrd; | |
825c6fe5 LP |
318 | |
319 | /* We make two checks here: | |
320 | * | |
321 | * 1. the flag file /etc/initrd-release must exist | |
322 | * 2. the root file system must be a memory file system | |
323 | * | |
324 | * The second check is extra paranoia, since misdetecting an | |
629ff674 | 325 | * initrd can have bad consequences due the initrd |
825c6fe5 LP |
326 | * emptying when transititioning to the main systemd. |
327 | */ | |
328 | ||
dcd61450 IS |
329 | saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 && |
330 | statfs("/", &s) >= 0 && | |
331 | is_temporary_fs(&s); | |
9be346c9 | 332 | |
dcd61450 IS |
333 | return saved_in_initrd; |
334 | } | |
335 | ||
336 | void in_initrd_force(bool value) { | |
337 | saved_in_initrd = value; | |
9be346c9 | 338 | } |
069cfc85 | 339 | |
a9e12476 KS |
340 | /* hey glibc, APIs with callbacks without a user pointer are so useless */ |
341 | void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size, | |
1c574591 | 342 | int (*compar) (const void *, const void *, void *), void *arg) { |
a9e12476 KS |
343 | size_t l, u, idx; |
344 | const void *p; | |
345 | int comparison; | |
346 | ||
347 | l = 0; | |
348 | u = nmemb; | |
349 | while (l < u) { | |
350 | idx = (l + u) / 2; | |
0f2e01a5 | 351 | p = (const char *) base + idx * size; |
a9e12476 KS |
352 | comparison = compar(key, p, arg); |
353 | if (comparison < 0) | |
354 | u = idx; | |
355 | else if (comparison > 0) | |
356 | l = idx + 1; | |
357 | else | |
358 | return (void *)p; | |
359 | } | |
360 | return NULL; | |
361 | } | |
09017585 | 362 | |
240dbaa4 LP |
363 | int on_ac_power(void) { |
364 | bool found_offline = false, found_online = false; | |
365 | _cleanup_closedir_ DIR *d = NULL; | |
8fb3f009 | 366 | struct dirent *de; |
240dbaa4 LP |
367 | |
368 | d = opendir("/sys/class/power_supply"); | |
369 | if (!d) | |
6d890034 | 370 | return errno == ENOENT ? true : -errno; |
240dbaa4 | 371 | |
8fb3f009 | 372 | FOREACH_DIRENT(de, d, return -errno) { |
240dbaa4 LP |
373 | _cleanup_close_ int fd = -1, device = -1; |
374 | char contents[6]; | |
375 | ssize_t n; | |
240dbaa4 | 376 | |
240dbaa4 LP |
377 | device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY); |
378 | if (device < 0) { | |
3742095b | 379 | if (IN_SET(errno, ENOENT, ENOTDIR)) |
240dbaa4 LP |
380 | continue; |
381 | ||
382 | return -errno; | |
383 | } | |
384 | ||
385 | fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY); | |
386 | if (fd < 0) { | |
387 | if (errno == ENOENT) | |
388 | continue; | |
389 | ||
390 | return -errno; | |
391 | } | |
392 | ||
393 | n = read(fd, contents, sizeof(contents)); | |
394 | if (n < 0) | |
395 | return -errno; | |
396 | ||
397 | if (n != 6 || memcmp(contents, "Mains\n", 6)) | |
398 | continue; | |
399 | ||
03e334a1 | 400 | safe_close(fd); |
240dbaa4 LP |
401 | fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY); |
402 | if (fd < 0) { | |
403 | if (errno == ENOENT) | |
404 | continue; | |
405 | ||
406 | return -errno; | |
407 | } | |
408 | ||
409 | n = read(fd, contents, sizeof(contents)); | |
410 | if (n < 0) | |
411 | return -errno; | |
412 | ||
413 | if (n != 2 || contents[1] != '\n') | |
414 | return -EIO; | |
415 | ||
416 | if (contents[0] == '1') { | |
417 | found_online = true; | |
418 | break; | |
419 | } else if (contents[0] == '0') | |
420 | found_offline = true; | |
421 | else | |
422 | return -EIO; | |
423 | } | |
424 | ||
425 | return found_online || !found_offline; | |
426 | } | |
fabe5c0e | 427 | |
bc9fd78c LP |
428 | int container_get_leader(const char *machine, pid_t *pid) { |
429 | _cleanup_free_ char *s = NULL, *class = NULL; | |
430 | const char *p; | |
431 | pid_t leader; | |
432 | int r; | |
433 | ||
434 | assert(machine); | |
435 | assert(pid); | |
436 | ||
b9a8d250 LP |
437 | if (!machine_name_is_valid(machine)) |
438 | return -EINVAL; | |
439 | ||
63c372cb | 440 | p = strjoina("/run/systemd/machines/", machine); |
bc9fd78c LP |
441 | r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL); |
442 | if (r == -ENOENT) | |
443 | return -EHOSTDOWN; | |
444 | if (r < 0) | |
445 | return r; | |
446 | if (!s) | |
447 | return -EIO; | |
448 | ||
449 | if (!streq_ptr(class, "container")) | |
450 | return -EIO; | |
451 | ||
452 | r = parse_pid(s, &leader); | |
453 | if (r < 0) | |
454 | return r; | |
455 | if (leader <= 1) | |
456 | return -EIO; | |
457 | ||
458 | *pid = leader; | |
459 | return 0; | |
460 | } | |
461 | ||
671c3419 RM |
462 | int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) { |
463 | _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1; | |
359a06aa | 464 | int rfd = -1; |
bc9fd78c LP |
465 | |
466 | assert(pid >= 0); | |
bc9fd78c | 467 | |
878cd7e9 LP |
468 | if (mntns_fd) { |
469 | const char *mntns; | |
a4475f57 | 470 | |
878cd7e9 LP |
471 | mntns = procfs_file_alloca(pid, "ns/mnt"); |
472 | mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
473 | if (mntnsfd < 0) | |
474 | return -errno; | |
475 | } | |
bc9fd78c | 476 | |
878cd7e9 LP |
477 | if (pidns_fd) { |
478 | const char *pidns; | |
479 | ||
480 | pidns = procfs_file_alloca(pid, "ns/pid"); | |
481 | pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
482 | if (pidnsfd < 0) | |
483 | return -errno; | |
484 | } | |
485 | ||
486 | if (netns_fd) { | |
487 | const char *netns; | |
488 | ||
489 | netns = procfs_file_alloca(pid, "ns/net"); | |
490 | netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
491 | if (netnsfd < 0) | |
492 | return -errno; | |
493 | } | |
494 | ||
671c3419 RM |
495 | if (userns_fd) { |
496 | const char *userns; | |
497 | ||
498 | userns = procfs_file_alloca(pid, "ns/user"); | |
499 | usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
500 | if (usernsfd < 0 && errno != ENOENT) | |
501 | return -errno; | |
502 | } | |
503 | ||
878cd7e9 LP |
504 | if (root_fd) { |
505 | const char *root; | |
506 | ||
507 | root = procfs_file_alloca(pid, "root"); | |
508 | rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); | |
509 | if (rfd < 0) | |
510 | return -errno; | |
511 | } | |
512 | ||
513 | if (pidns_fd) | |
514 | *pidns_fd = pidnsfd; | |
bc9fd78c | 515 | |
878cd7e9 LP |
516 | if (mntns_fd) |
517 | *mntns_fd = mntnsfd; | |
518 | ||
519 | if (netns_fd) | |
520 | *netns_fd = netnsfd; | |
521 | ||
671c3419 RM |
522 | if (userns_fd) |
523 | *userns_fd = usernsfd; | |
524 | ||
878cd7e9 LP |
525 | if (root_fd) |
526 | *root_fd = rfd; | |
527 | ||
671c3419 | 528 | pidnsfd = mntnsfd = netnsfd = usernsfd = -1; |
bc9fd78c LP |
529 | |
530 | return 0; | |
531 | } | |
532 | ||
671c3419 RM |
533 | int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) { |
534 | if (userns_fd >= 0) { | |
535 | /* Can't setns to your own userns, since then you could | |
536 | * escalate from non-root to root in your own namespace, so | |
537 | * check if namespaces equal before attempting to enter. */ | |
538 | _cleanup_free_ char *userns_fd_path = NULL; | |
539 | int r; | |
540 | if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0) | |
541 | return -ENOMEM; | |
542 | ||
e3f791a2 | 543 | r = files_same(userns_fd_path, "/proc/self/ns/user", 0); |
671c3419 RM |
544 | if (r < 0) |
545 | return r; | |
546 | if (r) | |
547 | userns_fd = -1; | |
548 | } | |
bc9fd78c | 549 | |
878cd7e9 LP |
550 | if (pidns_fd >= 0) |
551 | if (setns(pidns_fd, CLONE_NEWPID) < 0) | |
552 | return -errno; | |
a4475f57 | 553 | |
878cd7e9 LP |
554 | if (mntns_fd >= 0) |
555 | if (setns(mntns_fd, CLONE_NEWNS) < 0) | |
556 | return -errno; | |
bc9fd78c | 557 | |
878cd7e9 LP |
558 | if (netns_fd >= 0) |
559 | if (setns(netns_fd, CLONE_NEWNET) < 0) | |
560 | return -errno; | |
bc9fd78c | 561 | |
671c3419 RM |
562 | if (userns_fd >= 0) |
563 | if (setns(userns_fd, CLONE_NEWUSER) < 0) | |
564 | return -errno; | |
565 | ||
878cd7e9 LP |
566 | if (root_fd >= 0) { |
567 | if (fchdir(root_fd) < 0) | |
568 | return -errno; | |
569 | ||
570 | if (chroot(".") < 0) | |
571 | return -errno; | |
572 | } | |
bc9fd78c | 573 | |
b4da6d6b | 574 | return reset_uid_gid(); |
bc9fd78c | 575 | } |
bf108e55 | 576 | |
1c231f56 | 577 | uint64_t physical_memory(void) { |
d9ab2bcf LP |
578 | _cleanup_free_ char *root = NULL, *value = NULL; |
579 | uint64_t mem, lim; | |
580 | size_t ps; | |
581 | long sc; | |
1c231f56 | 582 | |
d9ab2bcf LP |
583 | /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of |
584 | * memory. | |
585 | * | |
586 | * In order to support containers nicely that have a configured memory limit we'll take the minimum of the | |
587 | * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */ | |
588 | ||
589 | sc = sysconf(_SC_PHYS_PAGES); | |
590 | assert(sc > 0); | |
591 | ||
592 | ps = page_size(); | |
593 | mem = (uint64_t) sc * (uint64_t) ps; | |
594 | ||
595 | if (cg_get_root_path(&root) < 0) | |
596 | return mem; | |
597 | ||
598 | if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value)) | |
599 | return mem; | |
600 | ||
601 | if (safe_atou64(value, &lim) < 0) | |
602 | return mem; | |
1c231f56 | 603 | |
d9ab2bcf LP |
604 | /* Make sure the limit is a multiple of our own page size */ |
605 | lim /= ps; | |
606 | lim *= ps; | |
1c231f56 | 607 | |
d9ab2bcf | 608 | return MIN(mem, lim); |
1c231f56 | 609 | } |
6db615c1 | 610 | |
d8cf2ac7 LP |
611 | uint64_t physical_memory_scale(uint64_t v, uint64_t max) { |
612 | uint64_t p, m, ps, r; | |
613 | ||
614 | assert(max > 0); | |
615 | ||
616 | /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success | |
617 | * the result is a multiple of the page size (rounds down). */ | |
618 | ||
619 | ps = page_size(); | |
620 | assert(ps > 0); | |
621 | ||
622 | p = physical_memory() / ps; | |
623 | assert(p > 0); | |
624 | ||
625 | m = p * v; | |
626 | if (m / p != v) | |
627 | return UINT64_MAX; | |
628 | ||
629 | m /= max; | |
630 | ||
631 | r = m * ps; | |
632 | if (r / ps != m) | |
633 | return UINT64_MAX; | |
634 | ||
635 | return r; | |
636 | } | |
637 | ||
83f8e808 LP |
638 | uint64_t system_tasks_max(void) { |
639 | ||
640 | #if SIZEOF_PID_T == 4 | |
641 | #define TASKS_MAX ((uint64_t) (INT32_MAX-1)) | |
642 | #elif SIZEOF_PID_T == 2 | |
643 | #define TASKS_MAX ((uint64_t) (INT16_MAX-1)) | |
644 | #else | |
645 | #error "Unknown pid_t size" | |
646 | #endif | |
647 | ||
648 | _cleanup_free_ char *value = NULL, *root = NULL; | |
649 | uint64_t a = TASKS_MAX, b = TASKS_MAX; | |
650 | ||
651 | /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this | |
652 | * limit: | |
653 | * | |
654 | * a) the maximum value for the pid_t type | |
655 | * b) the cgroups pids_max attribute for the system | |
656 | * c) the kernel's configure maximum PID value | |
657 | * | |
658 | * And then pick the smallest of the three */ | |
659 | ||
660 | if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0) | |
661 | (void) safe_atou64(value, &a); | |
662 | ||
663 | if (cg_get_root_path(&root) >= 0) { | |
664 | value = mfree(value); | |
665 | ||
666 | if (cg_get_attribute("pids", root, "pids.max", &value) >= 0) | |
667 | (void) safe_atou64(value, &b); | |
668 | } | |
669 | ||
670 | return MIN3(TASKS_MAX, | |
671 | a <= 0 ? TASKS_MAX : a, | |
672 | b <= 0 ? TASKS_MAX : b); | |
673 | } | |
674 | ||
675 | uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) { | |
676 | uint64_t t, m; | |
677 | ||
678 | assert(max > 0); | |
679 | ||
680 | /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages | |
681 | * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */ | |
682 | ||
683 | t = system_tasks_max(); | |
684 | assert(t > 0); | |
685 | ||
686 | m = t * v; | |
687 | if (m / t != v) /* overflow? */ | |
688 | return UINT64_MAX; | |
689 | ||
690 | return m / max; | |
691 | } | |
692 | ||
27c06cb5 LP |
693 | int update_reboot_parameter_and_warn(const char *param) { |
694 | int r; | |
c5220a94 | 695 | |
27c06cb5 LP |
696 | if (isempty(param)) { |
697 | if (unlink("/run/systemd/reboot-param") < 0) { | |
698 | if (errno == ENOENT) | |
699 | return 0; | |
700 | ||
701 | return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m"); | |
702 | } | |
703 | ||
704 | return 0; | |
705 | } | |
706 | ||
78e334b5 | 707 | RUN_WITH_UMASK(0022) { |
27c06cb5 | 708 | r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE); |
78e334b5 ZJS |
709 | if (r < 0) |
710 | return log_warning_errno(r, "Failed to write reboot parameter file: %m"); | |
711 | } | |
c5220a94 | 712 | |
e53fc357 | 713 | return 0; |
c5220a94 | 714 | } |
6d313367 | 715 | |
3f6fd1ba LP |
716 | int version(void) { |
717 | puts(PACKAGE_STRING "\n" | |
718 | SYSTEMD_FEATURES); | |
719 | return 0; | |
720 | } | |
c43b2b9c FB |
721 | |
722 | int get_block_device(const char *path, dev_t *dev) { | |
723 | struct stat st; | |
724 | struct statfs sfs; | |
725 | ||
726 | assert(path); | |
727 | assert(dev); | |
728 | ||
729 | /* Get's the block device directly backing a file system. If | |
730 | * the block device is encrypted, returns the device mapper | |
731 | * block device. */ | |
732 | ||
733 | if (lstat(path, &st)) | |
734 | return -errno; | |
735 | ||
736 | if (major(st.st_dev) != 0) { | |
737 | *dev = st.st_dev; | |
738 | return 1; | |
739 | } | |
740 | ||
741 | if (statfs(path, &sfs) < 0) | |
742 | return -errno; | |
743 | ||
744 | if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) | |
745 | return btrfs_get_block_device(path, dev); | |
746 | ||
747 | return 0; | |
748 | } | |
749 | ||
750 | int get_block_device_harder(const char *path, dev_t *dev) { | |
751 | _cleanup_closedir_ DIR *d = NULL; | |
752 | _cleanup_free_ char *p = NULL, *t = NULL; | |
753 | struct dirent *de, *found = NULL; | |
754 | const char *q; | |
755 | unsigned maj, min; | |
756 | dev_t dt; | |
757 | int r; | |
758 | ||
759 | assert(path); | |
760 | assert(dev); | |
761 | ||
762 | /* Gets the backing block device for a file system, and | |
763 | * handles LUKS encrypted file systems, looking for its | |
764 | * immediate parent, if there is one. */ | |
765 | ||
766 | r = get_block_device(path, &dt); | |
767 | if (r <= 0) | |
768 | return r; | |
769 | ||
770 | if (asprintf(&p, "/sys/dev/block/%u:%u/slaves", major(dt), minor(dt)) < 0) | |
771 | return -ENOMEM; | |
772 | ||
773 | d = opendir(p); | |
774 | if (!d) { | |
775 | if (errno == ENOENT) | |
776 | goto fallback; | |
777 | ||
778 | return -errno; | |
779 | } | |
780 | ||
781 | FOREACH_DIRENT_ALL(de, d, return -errno) { | |
782 | ||
783 | if (dot_or_dot_dot(de->d_name)) | |
784 | continue; | |
785 | ||
786 | if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN)) | |
787 | continue; | |
788 | ||
789 | if (found) { | |
790 | _cleanup_free_ char *u = NULL, *v = NULL, *a = NULL, *b = NULL; | |
791 | ||
792 | /* We found a device backed by multiple other devices. We don't really support automatic | |
793 | * discovery on such setups, with the exception of dm-verity partitions. In this case there are | |
794 | * two backing devices: the data partition and the hash partition. We are fine with such | |
795 | * setups, however, only if both partitions are on the same physical device. Hence, let's | |
796 | * verify this. */ | |
797 | ||
798 | u = strjoin(p, "/", de->d_name, "/../dev"); | |
799 | if (!u) | |
800 | return -ENOMEM; | |
801 | ||
802 | v = strjoin(p, "/", found->d_name, "/../dev"); | |
803 | if (!v) | |
804 | return -ENOMEM; | |
805 | ||
806 | r = read_one_line_file(u, &a); | |
807 | if (r < 0) { | |
808 | log_debug_errno(r, "Failed to read %s: %m", u); | |
809 | goto fallback; | |
810 | } | |
811 | ||
812 | r = read_one_line_file(v, &b); | |
813 | if (r < 0) { | |
814 | log_debug_errno(r, "Failed to read %s: %m", v); | |
815 | goto fallback; | |
816 | } | |
817 | ||
818 | /* Check if the parent device is the same. If not, then the two backing devices are on | |
819 | * different physical devices, and we don't support that. */ | |
820 | if (!streq(a, b)) | |
821 | goto fallback; | |
822 | } | |
823 | ||
824 | found = de; | |
825 | } | |
826 | ||
827 | if (!found) | |
828 | goto fallback; | |
829 | ||
830 | q = strjoina(p, "/", found->d_name, "/dev"); | |
831 | ||
832 | r = read_one_line_file(q, &t); | |
833 | if (r == -ENOENT) | |
834 | goto fallback; | |
835 | if (r < 0) | |
836 | return r; | |
837 | ||
838 | if (sscanf(t, "%u:%u", &maj, &min) != 2) | |
839 | return -EINVAL; | |
840 | ||
841 | if (maj == 0) | |
842 | goto fallback; | |
843 | ||
844 | *dev = makedev(maj, min); | |
845 | return 1; | |
846 | ||
847 | fallback: | |
848 | *dev = dt; | |
849 | return 1; | |
850 | } |