]>
Commit | Line | Data |
---|---|---|
a7334b09 LP |
1 | /*** |
2 | This file is part of systemd. | |
3 | ||
4 | Copyright 2010 Lennart Poettering | |
5 | ||
6 | systemd is free software; you can redistribute it and/or modify it | |
5430f7f2 LP |
7 | under the terms of the GNU Lesser General Public License as published by |
8 | the Free Software Foundation; either version 2.1 of the License, or | |
a7334b09 LP |
9 | (at your option) any later version. |
10 | ||
11 | systemd is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
5430f7f2 | 14 | Lesser General Public License for more details. |
a7334b09 | 15 | |
5430f7f2 | 16 | You should have received a copy of the GNU Lesser General Public License |
a7334b09 LP |
17 | along with systemd; If not, see <http://www.gnu.org/licenses/>. |
18 | ***/ | |
19 | ||
11c3a366 | 20 | #include <alloca.h> |
60918275 | 21 | #include <errno.h> |
f6c2284a | 22 | #include <fcntl.h> |
f6c2284a LP |
23 | #include <sched.h> |
24 | #include <signal.h> | |
25 | #include <stdarg.h> | |
26 | #include <stdio.h> | |
27 | #include <stdlib.h> | |
28 | #include <string.h> | |
87d2c1ff | 29 | #include <sys/mman.h> |
f6c2284a | 30 | #include <sys/prctl.h> |
11c3a366 TA |
31 | #include <sys/statfs.h> |
32 | #include <sys/sysmacros.h> | |
f6c2284a | 33 | #include <sys/types.h> |
f6c2284a | 34 | #include <unistd.h> |
eef46c37 | 35 | |
b5efdb8a | 36 | #include "alloc-util.h" |
c43b2b9c | 37 | #include "btrfs-util.h" |
3f6fd1ba | 38 | #include "build.h" |
d9ab2bcf | 39 | #include "cgroup-util.h" |
f6c2284a | 40 | #include "def.h" |
cf0fbc49 | 41 | #include "dirent-util.h" |
3ffd4af2 | 42 | #include "fd-util.h" |
f6c2284a | 43 | #include "fileio.h" |
f97b34a6 | 44 | #include "format-util.h" |
f6c2284a LP |
45 | #include "hashmap.h" |
46 | #include "hostname-util.h" | |
a9f5d454 | 47 | #include "log.h" |
f6c2284a LP |
48 | #include "macro.h" |
49 | #include "missing.h" | |
6bedfcbb | 50 | #include "parse-util.h" |
9eb977db | 51 | #include "path-util.h" |
0b452006 | 52 | #include "process-util.h" |
11c3a366 | 53 | #include "set.h" |
93cc7779 | 54 | #include "signal-util.h" |
cf0fbc49 | 55 | #include "stat-util.h" |
07630cea | 56 | #include "string-util.h" |
f6c2284a | 57 | #include "strv.h" |
93cc7779 | 58 | #include "time-util.h" |
8612da97 | 59 | #include "umask-util.h" |
b1d4f8e1 | 60 | #include "user-util.h" |
4f5dd394 | 61 | #include "util.h" |
56cf987f | 62 | |
9a0e6896 LP |
63 | int saved_argc = 0; |
64 | char **saved_argv = NULL; | |
dcd61450 | 65 | static int saved_in_initrd = -1; |
9086e840 | 66 | |
37f85e66 | 67 | size_t page_size(void) { |
ec202eae | 68 | static thread_local size_t pgsz = 0; |
37f85e66 | 69 | long r; |
70 | ||
87d2c1ff | 71 | if (_likely_(pgsz > 0)) |
37f85e66 | 72 | return pgsz; |
73 | ||
e67f47e5 LP |
74 | r = sysconf(_SC_PAGESIZE); |
75 | assert(r > 0); | |
37f85e66 | 76 | |
77 | pgsz = (size_t) r; | |
37f85e66 | 78 | return pgsz; |
79 | } | |
80 | ||
a88c8750 TG |
81 | bool plymouth_running(void) { |
82 | return access("/run/plymouth/pid", F_OK) >= 0; | |
83 | } | |
84 | ||
4d6d6518 LP |
85 | bool display_is_local(const char *display) { |
86 | assert(display); | |
87 | ||
88 | return | |
89 | display[0] == ':' && | |
90 | display[1] >= '0' && | |
91 | display[1] <= '9'; | |
92 | } | |
93 | ||
94 | int socket_from_display(const char *display, char **path) { | |
95 | size_t k; | |
96 | char *f, *c; | |
97 | ||
98 | assert(display); | |
99 | assert(path); | |
100 | ||
101 | if (!display_is_local(display)) | |
102 | return -EINVAL; | |
103 | ||
104 | k = strspn(display+1, "0123456789"); | |
105 | ||
f8294e41 | 106 | f = new(char, strlen("/tmp/.X11-unix/X") + k + 1); |
4d6d6518 LP |
107 | if (!f) |
108 | return -ENOMEM; | |
109 | ||
110 | c = stpcpy(f, "/tmp/.X11-unix/X"); | |
111 | memcpy(c, display+1, k); | |
112 | c[k] = 0; | |
113 | ||
114 | *path = f; | |
115 | ||
116 | return 0; | |
117 | } | |
118 | ||
94959f0f LP |
119 | int block_get_whole_disk(dev_t d, dev_t *ret) { |
120 | char *p, *s; | |
121 | int r; | |
122 | unsigned n, m; | |
123 | ||
124 | assert(ret); | |
125 | ||
126 | /* If it has a queue this is good enough for us */ | |
127 | if (asprintf(&p, "/sys/dev/block/%u:%u/queue", major(d), minor(d)) < 0) | |
128 | return -ENOMEM; | |
129 | ||
130 | r = access(p, F_OK); | |
131 | free(p); | |
132 | ||
133 | if (r >= 0) { | |
134 | *ret = d; | |
135 | return 0; | |
136 | } | |
137 | ||
138 | /* If it is a partition find the originating device */ | |
139 | if (asprintf(&p, "/sys/dev/block/%u:%u/partition", major(d), minor(d)) < 0) | |
140 | return -ENOMEM; | |
141 | ||
142 | r = access(p, F_OK); | |
143 | free(p); | |
144 | ||
145 | if (r < 0) | |
146 | return -ENOENT; | |
147 | ||
148 | /* Get parent dev_t */ | |
149 | if (asprintf(&p, "/sys/dev/block/%u:%u/../dev", major(d), minor(d)) < 0) | |
150 | return -ENOMEM; | |
151 | ||
152 | r = read_one_line_file(p, &s); | |
153 | free(p); | |
154 | ||
155 | if (r < 0) | |
156 | return r; | |
157 | ||
158 | r = sscanf(s, "%u:%u", &m, &n); | |
159 | free(s); | |
160 | ||
161 | if (r != 2) | |
162 | return -EINVAL; | |
163 | ||
164 | /* Only return this if it is really good enough for us. */ | |
165 | if (asprintf(&p, "/sys/dev/block/%u:%u/queue", m, n) < 0) | |
166 | return -ENOMEM; | |
167 | ||
168 | r = access(p, F_OK); | |
169 | free(p); | |
170 | ||
171 | if (r >= 0) { | |
172 | *ret = makedev(m, n); | |
173 | return 0; | |
174 | } | |
175 | ||
176 | return -ENOENT; | |
177 | } | |
178 | ||
65457142 FC |
179 | bool kexec_loaded(void) { |
180 | bool loaded = false; | |
181 | char *s; | |
182 | ||
183 | if (read_one_line_file("/sys/kernel/kexec_loaded", &s) >= 0) { | |
184 | if (s[0] == '1') | |
185 | loaded = true; | |
186 | free(s); | |
187 | } | |
188 | return loaded; | |
189 | } | |
fb9de93d | 190 | |
87d2c1ff LP |
191 | int prot_from_flags(int flags) { |
192 | ||
193 | switch (flags & O_ACCMODE) { | |
194 | ||
195 | case O_RDONLY: | |
196 | return PROT_READ; | |
197 | ||
198 | case O_WRONLY: | |
199 | return PROT_WRITE; | |
200 | ||
201 | case O_RDWR: | |
202 | return PROT_READ|PROT_WRITE; | |
203 | ||
204 | default: | |
205 | return -EINVAL; | |
206 | } | |
7c99e0c1 | 207 | } |
689b9a22 | 208 | |
9bdc770c | 209 | int fork_agent(pid_t *pid, const int except[], unsigned n_except, const char *path, ...) { |
6bb92a16 | 210 | bool stdout_is_tty, stderr_is_tty; |
8a7c93d8 LP |
211 | pid_t parent_pid, agent_pid; |
212 | sigset_t ss, saved_ss; | |
6bb92a16 LP |
213 | unsigned n, i; |
214 | va_list ap; | |
215 | char **l; | |
216 | ||
217 | assert(pid); | |
218 | assert(path); | |
219 | ||
6bb92a16 LP |
220 | /* Spawns a temporary TTY agent, making sure it goes away when |
221 | * we go away */ | |
222 | ||
df0ff127 | 223 | parent_pid = getpid_cached(); |
8a7c93d8 LP |
224 | |
225 | /* First we temporarily block all signals, so that the new | |
226 | * child has them blocked initially. This way, we can be sure | |
227 | * that SIGTERMs are not lost we might send to the agent. */ | |
228 | assert_se(sigfillset(&ss) >= 0); | |
229 | assert_se(sigprocmask(SIG_SETMASK, &ss, &saved_ss) >= 0); | |
230 | ||
6bb92a16 | 231 | agent_pid = fork(); |
8a7c93d8 LP |
232 | if (agent_pid < 0) { |
233 | assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0); | |
6bb92a16 | 234 | return -errno; |
8a7c93d8 | 235 | } |
6bb92a16 LP |
236 | |
237 | if (agent_pid != 0) { | |
8a7c93d8 | 238 | assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0); |
6bb92a16 LP |
239 | *pid = agent_pid; |
240 | return 0; | |
241 | } | |
242 | ||
243 | /* In the child: | |
244 | * | |
245 | * Make sure the agent goes away when the parent dies */ | |
246 | if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) | |
247 | _exit(EXIT_FAILURE); | |
248 | ||
8a7c93d8 LP |
249 | /* Make sure we actually can kill the agent, if we need to, in |
250 | * case somebody invoked us from a shell script that trapped | |
251 | * SIGTERM or so... */ | |
ce30c8dc LP |
252 | (void) reset_all_signal_handlers(); |
253 | (void) reset_signal_mask(); | |
8a7c93d8 | 254 | |
6bb92a16 | 255 | /* Check whether our parent died before we were able |
8a7c93d8 | 256 | * to set the death signal and unblock the signals */ |
6bb92a16 LP |
257 | if (getppid() != parent_pid) |
258 | _exit(EXIT_SUCCESS); | |
259 | ||
260 | /* Don't leak fds to the agent */ | |
9bdc770c | 261 | close_all_fds(except, n_except); |
6bb92a16 LP |
262 | |
263 | stdout_is_tty = isatty(STDOUT_FILENO); | |
264 | stderr_is_tty = isatty(STDERR_FILENO); | |
265 | ||
266 | if (!stdout_is_tty || !stderr_is_tty) { | |
8a7c93d8 LP |
267 | int fd; |
268 | ||
6bb92a16 LP |
269 | /* Detach from stdout/stderr. and reopen |
270 | * /dev/tty for them. This is important to | |
271 | * ensure that when systemctl is started via | |
272 | * popen() or a similar call that expects to | |
273 | * read EOF we actually do generate EOF and | |
274 | * not delay this indefinitely by because we | |
275 | * keep an unused copy of stdin around. */ | |
276 | fd = open("/dev/tty", O_WRONLY); | |
277 | if (fd < 0) { | |
56f64d95 | 278 | log_error_errno(errno, "Failed to open /dev/tty: %m"); |
6bb92a16 LP |
279 | _exit(EXIT_FAILURE); |
280 | } | |
281 | ||
94edd38e ZJS |
282 | if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) { |
283 | log_error_errno(errno, "Failed to dup2 /dev/tty: %m"); | |
284 | _exit(EXIT_FAILURE); | |
285 | } | |
6bb92a16 | 286 | |
94edd38e ZJS |
287 | if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) { |
288 | log_error_errno(errno, "Failed to dup2 /dev/tty: %m"); | |
289 | _exit(EXIT_FAILURE); | |
290 | } | |
6bb92a16 | 291 | |
94edd38e | 292 | if (fd > STDERR_FILENO) |
6bb92a16 LP |
293 | close(fd); |
294 | } | |
295 | ||
296 | /* Count arguments */ | |
297 | va_start(ap, path); | |
298 | for (n = 0; va_arg(ap, char*); n++) | |
299 | ; | |
300 | va_end(ap); | |
301 | ||
302 | /* Allocate strv */ | |
303 | l = alloca(sizeof(char *) * (n + 1)); | |
304 | ||
305 | /* Fill in arguments */ | |
306 | va_start(ap, path); | |
307 | for (i = 0; i <= n; i++) | |
308 | l[i] = va_arg(ap, char*); | |
309 | va_end(ap); | |
310 | ||
311 | execv(path, l); | |
312 | _exit(EXIT_FAILURE); | |
313 | } | |
68faf98c | 314 | |
9be346c9 | 315 | bool in_initrd(void) { |
825c6fe5 | 316 | struct statfs s; |
8f33b5b8 | 317 | |
dcd61450 IS |
318 | if (saved_in_initrd >= 0) |
319 | return saved_in_initrd; | |
825c6fe5 LP |
320 | |
321 | /* We make two checks here: | |
322 | * | |
323 | * 1. the flag file /etc/initrd-release must exist | |
324 | * 2. the root file system must be a memory file system | |
325 | * | |
326 | * The second check is extra paranoia, since misdetecting an | |
629ff674 | 327 | * initrd can have bad consequences due the initrd |
825c6fe5 LP |
328 | * emptying when transititioning to the main systemd. |
329 | */ | |
330 | ||
dcd61450 IS |
331 | saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 && |
332 | statfs("/", &s) >= 0 && | |
333 | is_temporary_fs(&s); | |
9be346c9 | 334 | |
dcd61450 IS |
335 | return saved_in_initrd; |
336 | } | |
337 | ||
338 | void in_initrd_force(bool value) { | |
339 | saved_in_initrd = value; | |
9be346c9 | 340 | } |
069cfc85 | 341 | |
a9e12476 KS |
342 | /* hey glibc, APIs with callbacks without a user pointer are so useless */ |
343 | void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size, | |
1c574591 | 344 | int (*compar) (const void *, const void *, void *), void *arg) { |
a9e12476 KS |
345 | size_t l, u, idx; |
346 | const void *p; | |
347 | int comparison; | |
348 | ||
349 | l = 0; | |
350 | u = nmemb; | |
351 | while (l < u) { | |
352 | idx = (l + u) / 2; | |
0f2e01a5 | 353 | p = (const char *) base + idx * size; |
a9e12476 KS |
354 | comparison = compar(key, p, arg); |
355 | if (comparison < 0) | |
356 | u = idx; | |
357 | else if (comparison > 0) | |
358 | l = idx + 1; | |
359 | else | |
360 | return (void *)p; | |
361 | } | |
362 | return NULL; | |
363 | } | |
09017585 | 364 | |
240dbaa4 LP |
365 | int on_ac_power(void) { |
366 | bool found_offline = false, found_online = false; | |
367 | _cleanup_closedir_ DIR *d = NULL; | |
8fb3f009 | 368 | struct dirent *de; |
240dbaa4 LP |
369 | |
370 | d = opendir("/sys/class/power_supply"); | |
371 | if (!d) | |
6d890034 | 372 | return errno == ENOENT ? true : -errno; |
240dbaa4 | 373 | |
8fb3f009 | 374 | FOREACH_DIRENT(de, d, return -errno) { |
240dbaa4 LP |
375 | _cleanup_close_ int fd = -1, device = -1; |
376 | char contents[6]; | |
377 | ssize_t n; | |
240dbaa4 | 378 | |
240dbaa4 LP |
379 | device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY); |
380 | if (device < 0) { | |
381 | if (errno == ENOENT || errno == ENOTDIR) | |
382 | continue; | |
383 | ||
384 | return -errno; | |
385 | } | |
386 | ||
387 | fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY); | |
388 | if (fd < 0) { | |
389 | if (errno == ENOENT) | |
390 | continue; | |
391 | ||
392 | return -errno; | |
393 | } | |
394 | ||
395 | n = read(fd, contents, sizeof(contents)); | |
396 | if (n < 0) | |
397 | return -errno; | |
398 | ||
399 | if (n != 6 || memcmp(contents, "Mains\n", 6)) | |
400 | continue; | |
401 | ||
03e334a1 | 402 | safe_close(fd); |
240dbaa4 LP |
403 | fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY); |
404 | if (fd < 0) { | |
405 | if (errno == ENOENT) | |
406 | continue; | |
407 | ||
408 | return -errno; | |
409 | } | |
410 | ||
411 | n = read(fd, contents, sizeof(contents)); | |
412 | if (n < 0) | |
413 | return -errno; | |
414 | ||
415 | if (n != 2 || contents[1] != '\n') | |
416 | return -EIO; | |
417 | ||
418 | if (contents[0] == '1') { | |
419 | found_online = true; | |
420 | break; | |
421 | } else if (contents[0] == '0') | |
422 | found_offline = true; | |
423 | else | |
424 | return -EIO; | |
425 | } | |
426 | ||
427 | return found_online || !found_offline; | |
428 | } | |
fabe5c0e | 429 | |
bc9fd78c LP |
430 | int container_get_leader(const char *machine, pid_t *pid) { |
431 | _cleanup_free_ char *s = NULL, *class = NULL; | |
432 | const char *p; | |
433 | pid_t leader; | |
434 | int r; | |
435 | ||
436 | assert(machine); | |
437 | assert(pid); | |
438 | ||
b9a8d250 LP |
439 | if (!machine_name_is_valid(machine)) |
440 | return -EINVAL; | |
441 | ||
63c372cb | 442 | p = strjoina("/run/systemd/machines/", machine); |
bc9fd78c LP |
443 | r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL); |
444 | if (r == -ENOENT) | |
445 | return -EHOSTDOWN; | |
446 | if (r < 0) | |
447 | return r; | |
448 | if (!s) | |
449 | return -EIO; | |
450 | ||
451 | if (!streq_ptr(class, "container")) | |
452 | return -EIO; | |
453 | ||
454 | r = parse_pid(s, &leader); | |
455 | if (r < 0) | |
456 | return r; | |
457 | if (leader <= 1) | |
458 | return -EIO; | |
459 | ||
460 | *pid = leader; | |
461 | return 0; | |
462 | } | |
463 | ||
671c3419 RM |
464 | int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) { |
465 | _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1; | |
359a06aa | 466 | int rfd = -1; |
bc9fd78c LP |
467 | |
468 | assert(pid >= 0); | |
bc9fd78c | 469 | |
878cd7e9 LP |
470 | if (mntns_fd) { |
471 | const char *mntns; | |
a4475f57 | 472 | |
878cd7e9 LP |
473 | mntns = procfs_file_alloca(pid, "ns/mnt"); |
474 | mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
475 | if (mntnsfd < 0) | |
476 | return -errno; | |
477 | } | |
bc9fd78c | 478 | |
878cd7e9 LP |
479 | if (pidns_fd) { |
480 | const char *pidns; | |
481 | ||
482 | pidns = procfs_file_alloca(pid, "ns/pid"); | |
483 | pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
484 | if (pidnsfd < 0) | |
485 | return -errno; | |
486 | } | |
487 | ||
488 | if (netns_fd) { | |
489 | const char *netns; | |
490 | ||
491 | netns = procfs_file_alloca(pid, "ns/net"); | |
492 | netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
493 | if (netnsfd < 0) | |
494 | return -errno; | |
495 | } | |
496 | ||
671c3419 RM |
497 | if (userns_fd) { |
498 | const char *userns; | |
499 | ||
500 | userns = procfs_file_alloca(pid, "ns/user"); | |
501 | usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
502 | if (usernsfd < 0 && errno != ENOENT) | |
503 | return -errno; | |
504 | } | |
505 | ||
878cd7e9 LP |
506 | if (root_fd) { |
507 | const char *root; | |
508 | ||
509 | root = procfs_file_alloca(pid, "root"); | |
510 | rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); | |
511 | if (rfd < 0) | |
512 | return -errno; | |
513 | } | |
514 | ||
515 | if (pidns_fd) | |
516 | *pidns_fd = pidnsfd; | |
bc9fd78c | 517 | |
878cd7e9 LP |
518 | if (mntns_fd) |
519 | *mntns_fd = mntnsfd; | |
520 | ||
521 | if (netns_fd) | |
522 | *netns_fd = netnsfd; | |
523 | ||
671c3419 RM |
524 | if (userns_fd) |
525 | *userns_fd = usernsfd; | |
526 | ||
878cd7e9 LP |
527 | if (root_fd) |
528 | *root_fd = rfd; | |
529 | ||
671c3419 | 530 | pidnsfd = mntnsfd = netnsfd = usernsfd = -1; |
bc9fd78c LP |
531 | |
532 | return 0; | |
533 | } | |
534 | ||
671c3419 RM |
535 | int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) { |
536 | if (userns_fd >= 0) { | |
537 | /* Can't setns to your own userns, since then you could | |
538 | * escalate from non-root to root in your own namespace, so | |
539 | * check if namespaces equal before attempting to enter. */ | |
540 | _cleanup_free_ char *userns_fd_path = NULL; | |
541 | int r; | |
542 | if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0) | |
543 | return -ENOMEM; | |
544 | ||
e3f791a2 | 545 | r = files_same(userns_fd_path, "/proc/self/ns/user", 0); |
671c3419 RM |
546 | if (r < 0) |
547 | return r; | |
548 | if (r) | |
549 | userns_fd = -1; | |
550 | } | |
bc9fd78c | 551 | |
878cd7e9 LP |
552 | if (pidns_fd >= 0) |
553 | if (setns(pidns_fd, CLONE_NEWPID) < 0) | |
554 | return -errno; | |
a4475f57 | 555 | |
878cd7e9 LP |
556 | if (mntns_fd >= 0) |
557 | if (setns(mntns_fd, CLONE_NEWNS) < 0) | |
558 | return -errno; | |
bc9fd78c | 559 | |
878cd7e9 LP |
560 | if (netns_fd >= 0) |
561 | if (setns(netns_fd, CLONE_NEWNET) < 0) | |
562 | return -errno; | |
bc9fd78c | 563 | |
671c3419 RM |
564 | if (userns_fd >= 0) |
565 | if (setns(userns_fd, CLONE_NEWUSER) < 0) | |
566 | return -errno; | |
567 | ||
878cd7e9 LP |
568 | if (root_fd >= 0) { |
569 | if (fchdir(root_fd) < 0) | |
570 | return -errno; | |
571 | ||
572 | if (chroot(".") < 0) | |
573 | return -errno; | |
574 | } | |
bc9fd78c | 575 | |
b4da6d6b | 576 | return reset_uid_gid(); |
bc9fd78c | 577 | } |
bf108e55 | 578 | |
1c231f56 | 579 | uint64_t physical_memory(void) { |
d9ab2bcf LP |
580 | _cleanup_free_ char *root = NULL, *value = NULL; |
581 | uint64_t mem, lim; | |
582 | size_t ps; | |
583 | long sc; | |
1c231f56 | 584 | |
d9ab2bcf LP |
585 | /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of |
586 | * memory. | |
587 | * | |
588 | * In order to support containers nicely that have a configured memory limit we'll take the minimum of the | |
589 | * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */ | |
590 | ||
591 | sc = sysconf(_SC_PHYS_PAGES); | |
592 | assert(sc > 0); | |
593 | ||
594 | ps = page_size(); | |
595 | mem = (uint64_t) sc * (uint64_t) ps; | |
596 | ||
597 | if (cg_get_root_path(&root) < 0) | |
598 | return mem; | |
599 | ||
600 | if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value)) | |
601 | return mem; | |
602 | ||
603 | if (safe_atou64(value, &lim) < 0) | |
604 | return mem; | |
1c231f56 | 605 | |
d9ab2bcf LP |
606 | /* Make sure the limit is a multiple of our own page size */ |
607 | lim /= ps; | |
608 | lim *= ps; | |
1c231f56 | 609 | |
d9ab2bcf | 610 | return MIN(mem, lim); |
1c231f56 | 611 | } |
6db615c1 | 612 | |
d8cf2ac7 LP |
613 | uint64_t physical_memory_scale(uint64_t v, uint64_t max) { |
614 | uint64_t p, m, ps, r; | |
615 | ||
616 | assert(max > 0); | |
617 | ||
618 | /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success | |
619 | * the result is a multiple of the page size (rounds down). */ | |
620 | ||
621 | ps = page_size(); | |
622 | assert(ps > 0); | |
623 | ||
624 | p = physical_memory() / ps; | |
625 | assert(p > 0); | |
626 | ||
627 | m = p * v; | |
628 | if (m / p != v) | |
629 | return UINT64_MAX; | |
630 | ||
631 | m /= max; | |
632 | ||
633 | r = m * ps; | |
634 | if (r / ps != m) | |
635 | return UINT64_MAX; | |
636 | ||
637 | return r; | |
638 | } | |
639 | ||
83f8e808 LP |
640 | uint64_t system_tasks_max(void) { |
641 | ||
642 | #if SIZEOF_PID_T == 4 | |
643 | #define TASKS_MAX ((uint64_t) (INT32_MAX-1)) | |
644 | #elif SIZEOF_PID_T == 2 | |
645 | #define TASKS_MAX ((uint64_t) (INT16_MAX-1)) | |
646 | #else | |
647 | #error "Unknown pid_t size" | |
648 | #endif | |
649 | ||
650 | _cleanup_free_ char *value = NULL, *root = NULL; | |
651 | uint64_t a = TASKS_MAX, b = TASKS_MAX; | |
652 | ||
653 | /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this | |
654 | * limit: | |
655 | * | |
656 | * a) the maximum value for the pid_t type | |
657 | * b) the cgroups pids_max attribute for the system | |
658 | * c) the kernel's configure maximum PID value | |
659 | * | |
660 | * And then pick the smallest of the three */ | |
661 | ||
662 | if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0) | |
663 | (void) safe_atou64(value, &a); | |
664 | ||
665 | if (cg_get_root_path(&root) >= 0) { | |
666 | value = mfree(value); | |
667 | ||
668 | if (cg_get_attribute("pids", root, "pids.max", &value) >= 0) | |
669 | (void) safe_atou64(value, &b); | |
670 | } | |
671 | ||
672 | return MIN3(TASKS_MAX, | |
673 | a <= 0 ? TASKS_MAX : a, | |
674 | b <= 0 ? TASKS_MAX : b); | |
675 | } | |
676 | ||
677 | uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) { | |
678 | uint64_t t, m; | |
679 | ||
680 | assert(max > 0); | |
681 | ||
682 | /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages | |
683 | * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */ | |
684 | ||
685 | t = system_tasks_max(); | |
686 | assert(t > 0); | |
687 | ||
688 | m = t * v; | |
689 | if (m / t != v) /* overflow? */ | |
690 | return UINT64_MAX; | |
691 | ||
692 | return m / max; | |
693 | } | |
694 | ||
27c06cb5 LP |
695 | int update_reboot_parameter_and_warn(const char *param) { |
696 | int r; | |
c5220a94 | 697 | |
27c06cb5 LP |
698 | if (isempty(param)) { |
699 | if (unlink("/run/systemd/reboot-param") < 0) { | |
700 | if (errno == ENOENT) | |
701 | return 0; | |
702 | ||
703 | return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m"); | |
704 | } | |
705 | ||
706 | return 0; | |
707 | } | |
708 | ||
78e334b5 | 709 | RUN_WITH_UMASK(0022) { |
27c06cb5 | 710 | r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE); |
78e334b5 ZJS |
711 | if (r < 0) |
712 | return log_warning_errno(r, "Failed to write reboot parameter file: %m"); | |
713 | } | |
c5220a94 | 714 | |
e53fc357 | 715 | return 0; |
c5220a94 | 716 | } |
6d313367 | 717 | |
3f6fd1ba LP |
718 | int version(void) { |
719 | puts(PACKAGE_STRING "\n" | |
720 | SYSTEMD_FEATURES); | |
721 | return 0; | |
722 | } | |
c43b2b9c FB |
723 | |
724 | int get_block_device(const char *path, dev_t *dev) { | |
725 | struct stat st; | |
726 | struct statfs sfs; | |
727 | ||
728 | assert(path); | |
729 | assert(dev); | |
730 | ||
731 | /* Get's the block device directly backing a file system. If | |
732 | * the block device is encrypted, returns the device mapper | |
733 | * block device. */ | |
734 | ||
735 | if (lstat(path, &st)) | |
736 | return -errno; | |
737 | ||
738 | if (major(st.st_dev) != 0) { | |
739 | *dev = st.st_dev; | |
740 | return 1; | |
741 | } | |
742 | ||
743 | if (statfs(path, &sfs) < 0) | |
744 | return -errno; | |
745 | ||
746 | if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) | |
747 | return btrfs_get_block_device(path, dev); | |
748 | ||
749 | return 0; | |
750 | } | |
751 | ||
752 | int get_block_device_harder(const char *path, dev_t *dev) { | |
753 | _cleanup_closedir_ DIR *d = NULL; | |
754 | _cleanup_free_ char *p = NULL, *t = NULL; | |
755 | struct dirent *de, *found = NULL; | |
756 | const char *q; | |
757 | unsigned maj, min; | |
758 | dev_t dt; | |
759 | int r; | |
760 | ||
761 | assert(path); | |
762 | assert(dev); | |
763 | ||
764 | /* Gets the backing block device for a file system, and | |
765 | * handles LUKS encrypted file systems, looking for its | |
766 | * immediate parent, if there is one. */ | |
767 | ||
768 | r = get_block_device(path, &dt); | |
769 | if (r <= 0) | |
770 | return r; | |
771 | ||
772 | if (asprintf(&p, "/sys/dev/block/%u:%u/slaves", major(dt), minor(dt)) < 0) | |
773 | return -ENOMEM; | |
774 | ||
775 | d = opendir(p); | |
776 | if (!d) { | |
777 | if (errno == ENOENT) | |
778 | goto fallback; | |
779 | ||
780 | return -errno; | |
781 | } | |
782 | ||
783 | FOREACH_DIRENT_ALL(de, d, return -errno) { | |
784 | ||
785 | if (dot_or_dot_dot(de->d_name)) | |
786 | continue; | |
787 | ||
788 | if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN)) | |
789 | continue; | |
790 | ||
791 | if (found) { | |
792 | _cleanup_free_ char *u = NULL, *v = NULL, *a = NULL, *b = NULL; | |
793 | ||
794 | /* We found a device backed by multiple other devices. We don't really support automatic | |
795 | * discovery on such setups, with the exception of dm-verity partitions. In this case there are | |
796 | * two backing devices: the data partition and the hash partition. We are fine with such | |
797 | * setups, however, only if both partitions are on the same physical device. Hence, let's | |
798 | * verify this. */ | |
799 | ||
800 | u = strjoin(p, "/", de->d_name, "/../dev"); | |
801 | if (!u) | |
802 | return -ENOMEM; | |
803 | ||
804 | v = strjoin(p, "/", found->d_name, "/../dev"); | |
805 | if (!v) | |
806 | return -ENOMEM; | |
807 | ||
808 | r = read_one_line_file(u, &a); | |
809 | if (r < 0) { | |
810 | log_debug_errno(r, "Failed to read %s: %m", u); | |
811 | goto fallback; | |
812 | } | |
813 | ||
814 | r = read_one_line_file(v, &b); | |
815 | if (r < 0) { | |
816 | log_debug_errno(r, "Failed to read %s: %m", v); | |
817 | goto fallback; | |
818 | } | |
819 | ||
820 | /* Check if the parent device is the same. If not, then the two backing devices are on | |
821 | * different physical devices, and we don't support that. */ | |
822 | if (!streq(a, b)) | |
823 | goto fallback; | |
824 | } | |
825 | ||
826 | found = de; | |
827 | } | |
828 | ||
829 | if (!found) | |
830 | goto fallback; | |
831 | ||
832 | q = strjoina(p, "/", found->d_name, "/dev"); | |
833 | ||
834 | r = read_one_line_file(q, &t); | |
835 | if (r == -ENOENT) | |
836 | goto fallback; | |
837 | if (r < 0) | |
838 | return r; | |
839 | ||
840 | if (sscanf(t, "%u:%u", &maj, &min) != 2) | |
841 | return -EINVAL; | |
842 | ||
843 | if (maj == 0) | |
844 | goto fallback; | |
845 | ||
846 | *dev = makedev(maj, min); | |
847 | return 1; | |
848 | ||
849 | fallback: | |
850 | *dev = dt; | |
851 | return 1; | |
852 | } |