2 This file is part of systemd.
4 Copyright 2010 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
30 #include <sys/prctl.h>
31 #include <sys/statfs.h>
32 #include <sys/sysmacros.h>
33 #include <sys/types.h>
36 #include "alloc-util.h"
38 #include "cgroup-util.h"
40 #include "dirent-util.h"
43 #include "format-util.h"
45 #include "hostname-util.h"
49 #include "parse-util.h"
50 #include "path-util.h"
51 #include "process-util.h"
53 #include "signal-util.h"
54 #include "stat-util.h"
55 #include "string-util.h"
57 #include "time-util.h"
58 #include "umask-util.h"
59 #include "user-util.h"
62 /* Put this test here for a lack of better place */
63 assert_cc(EAGAIN
== EWOULDBLOCK
);
66 char **saved_argv
= NULL
;
67 static int saved_in_initrd
= -1;
69 size_t page_size(void) {
70 static thread_local
size_t pgsz
= 0;
73 if (_likely_(pgsz
> 0))
76 r
= sysconf(_SC_PAGESIZE
);
83 static int do_execute(char **directories
, usec_t timeout
, char *argv
[]) {
84 _cleanup_hashmap_free_free_ Hashmap
*pids
= NULL
;
85 _cleanup_set_free_free_ Set
*seen
= NULL
;
88 /* We fork this all off from a child process so that we can
89 * somewhat cleanly make use of SIGALRM to set a time limit */
91 (void) reset_all_signal_handlers();
92 (void) reset_signal_mask();
94 assert_se(prctl(PR_SET_PDEATHSIG
, SIGTERM
) == 0);
96 pids
= hashmap_new(NULL
);
100 seen
= set_new(&string_hash_ops
);
104 STRV_FOREACH(directory
, directories
) {
105 _cleanup_closedir_
DIR *d
;
108 d
= opendir(*directory
);
113 return log_error_errno(errno
, "Failed to open directory %s: %m", *directory
);
116 FOREACH_DIRENT(de
, d
, break) {
117 _cleanup_free_
char *path
= NULL
;
121 if (!dirent_is_file(de
))
124 if (set_contains(seen
, de
->d_name
)) {
125 log_debug("%1$s/%2$s skipped (%2$s was already seen).", *directory
, de
->d_name
);
129 r
= set_put_strdup(seen
, de
->d_name
);
133 path
= strjoin(*directory
, "/", de
->d_name
);
137 if (null_or_empty_path(path
)) {
138 log_debug("%s is empty (a mask).", path
);
144 log_error_errno(errno
, "Failed to fork: %m");
146 } else if (pid
== 0) {
149 assert_se(prctl(PR_SET_PDEATHSIG
, SIGTERM
) == 0);
159 return log_error_errno(errno
, "Failed to execute %s: %m", path
);
162 log_debug("Spawned %s as " PID_FMT
".", path
, pid
);
164 r
= hashmap_put(pids
, PID_TO_PTR(pid
), path
);
171 /* Abort execution of this process after the timout. We simply
172 * rely on SIGALRM as default action terminating the process,
173 * and turn on alarm(). */
175 if (timeout
!= USEC_INFINITY
)
176 alarm((timeout
+ USEC_PER_SEC
- 1) / USEC_PER_SEC
);
178 while (!hashmap_isempty(pids
)) {
179 _cleanup_free_
char *path
= NULL
;
182 pid
= PTR_TO_PID(hashmap_first_key(pids
));
185 path
= hashmap_remove(pids
, PID_TO_PTR(pid
));
188 wait_for_terminate_and_warn(path
, pid
, true);
194 void execute_directories(const char* const* directories
, usec_t timeout
, char *argv
[]) {
198 char **dirs
= (char**) directories
;
200 assert(!strv_isempty(dirs
));
202 name
= basename(dirs
[0]);
203 assert(!isempty(name
));
205 /* Executes all binaries in the directories in parallel and waits
206 * for them to finish. Optionally a timeout is applied. If a file
207 * with the same name exists in more than one directory, the
208 * earliest one wins. */
210 executor_pid
= fork();
211 if (executor_pid
< 0) {
212 log_error_errno(errno
, "Failed to fork: %m");
215 } else if (executor_pid
== 0) {
216 r
= do_execute(dirs
, timeout
, argv
);
217 _exit(r
< 0 ? EXIT_FAILURE
: EXIT_SUCCESS
);
220 wait_for_terminate_and_warn(name
, executor_pid
, true);
223 bool plymouth_running(void) {
224 return access("/run/plymouth/pid", F_OK
) >= 0;
227 bool display_is_local(const char *display
) {
236 int socket_from_display(const char *display
, char **path
) {
243 if (!display_is_local(display
))
246 k
= strspn(display
+1, "0123456789");
248 f
= new(char, strlen("/tmp/.X11-unix/X") + k
+ 1);
252 c
= stpcpy(f
, "/tmp/.X11-unix/X");
253 memcpy(c
, display
+1, k
);
261 int block_get_whole_disk(dev_t d
, dev_t
*ret
) {
268 /* If it has a queue this is good enough for us */
269 if (asprintf(&p
, "/sys/dev/block/%u:%u/queue", major(d
), minor(d
)) < 0)
280 /* If it is a partition find the originating device */
281 if (asprintf(&p
, "/sys/dev/block/%u:%u/partition", major(d
), minor(d
)) < 0)
290 /* Get parent dev_t */
291 if (asprintf(&p
, "/sys/dev/block/%u:%u/../dev", major(d
), minor(d
)) < 0)
294 r
= read_one_line_file(p
, &s
);
300 r
= sscanf(s
, "%u:%u", &m
, &n
);
306 /* Only return this if it is really good enough for us. */
307 if (asprintf(&p
, "/sys/dev/block/%u:%u/queue", m
, n
) < 0)
314 *ret
= makedev(m
, n
);
321 bool kexec_loaded(void) {
325 if (read_one_line_file("/sys/kernel/kexec_loaded", &s
) >= 0) {
333 int prot_from_flags(int flags
) {
335 switch (flags
& O_ACCMODE
) {
344 return PROT_READ
|PROT_WRITE
;
351 int fork_agent(pid_t
*pid
, const int except
[], unsigned n_except
, const char *path
, ...) {
352 bool stdout_is_tty
, stderr_is_tty
;
353 pid_t parent_pid
, agent_pid
;
354 sigset_t ss
, saved_ss
;
362 /* Spawns a temporary TTY agent, making sure it goes away when
365 parent_pid
= getpid();
367 /* First we temporarily block all signals, so that the new
368 * child has them blocked initially. This way, we can be sure
369 * that SIGTERMs are not lost we might send to the agent. */
370 assert_se(sigfillset(&ss
) >= 0);
371 assert_se(sigprocmask(SIG_SETMASK
, &ss
, &saved_ss
) >= 0);
375 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
379 if (agent_pid
!= 0) {
380 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
387 * Make sure the agent goes away when the parent dies */
388 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
391 /* Make sure we actually can kill the agent, if we need to, in
392 * case somebody invoked us from a shell script that trapped
393 * SIGTERM or so... */
394 (void) reset_all_signal_handlers();
395 (void) reset_signal_mask();
397 /* Check whether our parent died before we were able
398 * to set the death signal and unblock the signals */
399 if (getppid() != parent_pid
)
402 /* Don't leak fds to the agent */
403 close_all_fds(except
, n_except
);
405 stdout_is_tty
= isatty(STDOUT_FILENO
);
406 stderr_is_tty
= isatty(STDERR_FILENO
);
408 if (!stdout_is_tty
|| !stderr_is_tty
) {
411 /* Detach from stdout/stderr. and reopen
412 * /dev/tty for them. This is important to
413 * ensure that when systemctl is started via
414 * popen() or a similar call that expects to
415 * read EOF we actually do generate EOF and
416 * not delay this indefinitely by because we
417 * keep an unused copy of stdin around. */
418 fd
= open("/dev/tty", O_WRONLY
);
420 log_error_errno(errno
, "Failed to open /dev/tty: %m");
424 if (!stdout_is_tty
&& dup2(fd
, STDOUT_FILENO
) < 0) {
425 log_error_errno(errno
, "Failed to dup2 /dev/tty: %m");
429 if (!stderr_is_tty
&& dup2(fd
, STDERR_FILENO
) < 0) {
430 log_error_errno(errno
, "Failed to dup2 /dev/tty: %m");
434 if (fd
> STDERR_FILENO
)
438 /* Count arguments */
440 for (n
= 0; va_arg(ap
, char*); n
++)
445 l
= alloca(sizeof(char *) * (n
+ 1));
447 /* Fill in arguments */
449 for (i
= 0; i
<= n
; i
++)
450 l
[i
] = va_arg(ap
, char*);
457 bool in_initrd(void) {
460 if (saved_in_initrd
>= 0)
461 return saved_in_initrd
;
463 /* We make two checks here:
465 * 1. the flag file /etc/initrd-release must exist
466 * 2. the root file system must be a memory file system
468 * The second check is extra paranoia, since misdetecting an
469 * initrd can have bad consequences due the initrd
470 * emptying when transititioning to the main systemd.
473 saved_in_initrd
= access("/etc/initrd-release", F_OK
) >= 0 &&
474 statfs("/", &s
) >= 0 &&
477 return saved_in_initrd
;
480 void in_initrd_force(bool value
) {
481 saved_in_initrd
= value
;
484 /* hey glibc, APIs with callbacks without a user pointer are so useless */
485 void *xbsearch_r(const void *key
, const void *base
, size_t nmemb
, size_t size
,
486 int (*compar
) (const void *, const void *, void *), void *arg
) {
495 p
= (void *)(((const char *) base
) + (idx
* size
));
496 comparison
= compar(key
, p
, arg
);
499 else if (comparison
> 0)
507 int on_ac_power(void) {
508 bool found_offline
= false, found_online
= false;
509 _cleanup_closedir_
DIR *d
= NULL
;
512 d
= opendir("/sys/class/power_supply");
514 return errno
== ENOENT
? true : -errno
;
516 FOREACH_DIRENT(de
, d
, return -errno
) {
517 _cleanup_close_
int fd
= -1, device
= -1;
521 device
= openat(dirfd(d
), de
->d_name
, O_DIRECTORY
|O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
523 if (errno
== ENOENT
|| errno
== ENOTDIR
)
529 fd
= openat(device
, "type", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
537 n
= read(fd
, contents
, sizeof(contents
));
541 if (n
!= 6 || memcmp(contents
, "Mains\n", 6))
545 fd
= openat(device
, "online", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
553 n
= read(fd
, contents
, sizeof(contents
));
557 if (n
!= 2 || contents
[1] != '\n')
560 if (contents
[0] == '1') {
563 } else if (contents
[0] == '0')
564 found_offline
= true;
569 return found_online
|| !found_offline
;
572 int container_get_leader(const char *machine
, pid_t
*pid
) {
573 _cleanup_free_
char *s
= NULL
, *class = NULL
;
581 if (!machine_name_is_valid(machine
))
584 p
= strjoina("/run/systemd/machines/", machine
);
585 r
= parse_env_file(p
, NEWLINE
, "LEADER", &s
, "CLASS", &class, NULL
);
593 if (!streq_ptr(class, "container"))
596 r
= parse_pid(s
, &leader
);
606 int namespace_open(pid_t pid
, int *pidns_fd
, int *mntns_fd
, int *netns_fd
, int *userns_fd
, int *root_fd
) {
607 _cleanup_close_
int pidnsfd
= -1, mntnsfd
= -1, netnsfd
= -1, usernsfd
= -1;
615 mntns
= procfs_file_alloca(pid
, "ns/mnt");
616 mntnsfd
= open(mntns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
624 pidns
= procfs_file_alloca(pid
, "ns/pid");
625 pidnsfd
= open(pidns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
633 netns
= procfs_file_alloca(pid
, "ns/net");
634 netnsfd
= open(netns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
642 userns
= procfs_file_alloca(pid
, "ns/user");
643 usernsfd
= open(userns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
644 if (usernsfd
< 0 && errno
!= ENOENT
)
651 root
= procfs_file_alloca(pid
, "root");
652 rfd
= open(root
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
667 *userns_fd
= usernsfd
;
672 pidnsfd
= mntnsfd
= netnsfd
= usernsfd
= -1;
677 int namespace_enter(int pidns_fd
, int mntns_fd
, int netns_fd
, int userns_fd
, int root_fd
) {
678 if (userns_fd
>= 0) {
679 /* Can't setns to your own userns, since then you could
680 * escalate from non-root to root in your own namespace, so
681 * check if namespaces equal before attempting to enter. */
682 _cleanup_free_
char *userns_fd_path
= NULL
;
684 if (asprintf(&userns_fd_path
, "/proc/self/fd/%d", userns_fd
) < 0)
687 r
= files_same(userns_fd_path
, "/proc/self/ns/user");
695 if (setns(pidns_fd
, CLONE_NEWPID
) < 0)
699 if (setns(mntns_fd
, CLONE_NEWNS
) < 0)
703 if (setns(netns_fd
, CLONE_NEWNET
) < 0)
707 if (setns(userns_fd
, CLONE_NEWUSER
) < 0)
711 if (fchdir(root_fd
) < 0)
718 return reset_uid_gid();
721 uint64_t physical_memory(void) {
722 _cleanup_free_
char *root
= NULL
, *value
= NULL
;
727 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
730 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
731 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
733 sc
= sysconf(_SC_PHYS_PAGES
);
737 mem
= (uint64_t) sc
* (uint64_t) ps
;
739 if (cg_get_root_path(&root
) < 0)
742 if (cg_get_attribute("memory", root
, "memory.limit_in_bytes", &value
))
745 if (safe_atou64(value
, &lim
) < 0)
748 /* Make sure the limit is a multiple of our own page size */
752 return MIN(mem
, lim
);
755 uint64_t physical_memory_scale(uint64_t v
, uint64_t max
) {
756 uint64_t p
, m
, ps
, r
;
760 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
761 * the result is a multiple of the page size (rounds down). */
766 p
= physical_memory() / ps
;
782 uint64_t system_tasks_max(void) {
784 #if SIZEOF_PID_T == 4
785 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
786 #elif SIZEOF_PID_T == 2
787 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
789 #error "Unknown pid_t size"
792 _cleanup_free_
char *value
= NULL
, *root
= NULL
;
793 uint64_t a
= TASKS_MAX
, b
= TASKS_MAX
;
795 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
798 * a) the maximum value for the pid_t type
799 * b) the cgroups pids_max attribute for the system
800 * c) the kernel's configure maximum PID value
802 * And then pick the smallest of the three */
804 if (read_one_line_file("/proc/sys/kernel/pid_max", &value
) >= 0)
805 (void) safe_atou64(value
, &a
);
807 if (cg_get_root_path(&root
) >= 0) {
808 value
= mfree(value
);
810 if (cg_get_attribute("pids", root
, "pids.max", &value
) >= 0)
811 (void) safe_atou64(value
, &b
);
814 return MIN3(TASKS_MAX
,
815 a
<= 0 ? TASKS_MAX
: a
,
816 b
<= 0 ? TASKS_MAX
: b
);
819 uint64_t system_tasks_max_scale(uint64_t v
, uint64_t max
) {
824 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
825 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
827 t
= system_tasks_max();
831 if (m
/ t
!= v
) /* overflow? */
837 int update_reboot_parameter_and_warn(const char *param
) {
840 if (isempty(param
)) {
841 if (unlink("/run/systemd/reboot-param") < 0) {
845 return log_warning_errno(errno
, "Failed to unlink reboot parameter file: %m");
851 RUN_WITH_UMASK(0022) {
852 r
= write_string_file("/run/systemd/reboot-param", param
, WRITE_STRING_FILE_CREATE
);
854 return log_warning_errno(r
, "Failed to write reboot parameter file: %m");
861 puts(PACKAGE_STRING
"\n"