1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
31 #include <linux/magic.h>
32 #include <linux/oom.h>
33 #include <linux/sched.h>
44 #include <sys/ioctl.h>
46 #include <sys/mount.h>
47 #include <sys/personality.h>
48 #include <sys/prctl.h>
50 #include <sys/statvfs.h>
52 #include <sys/types.h>
53 #include <sys/utsname.h>
59 /* When we include libgen.h because we need dirname() we immediately
60 * undefine basename() since libgen.h defines it as a macro to the
61 * POSIX version which is really broken. We prefer GNU basename(). */
65 #ifdef HAVE_SYS_AUXV_H
69 /* We include linux/fs.h as last of the system headers, as it
70 * otherwise conflicts with sys/mount.h. Yay, Linux is great! */
75 #include "device-nodes.h"
78 #include "exit-status.h"
81 #include "formats-util.h"
84 #include "hostname-util.h"
90 #include "hexdecoct.h"
91 #include "parse-util.h"
92 #include "path-util.h"
93 #include "process-util.h"
94 #include "random-util.h"
95 #include "signal-util.h"
96 #include "sparse-endian.h"
97 #include "string-table.h"
98 #include "string-util.h"
100 #include "terminal-util.h"
101 #include "user-util.h"
105 #include "dirent-util.h"
106 #include "stat-util.h"
108 /* Put this test here for a lack of better place */
109 assert_cc(EAGAIN
== EWOULDBLOCK
);
112 char **saved_argv
= NULL
;
114 size_t page_size(void) {
115 static thread_local
size_t pgsz
= 0;
118 if (_likely_(pgsz
> 0))
121 r
= sysconf(_SC_PAGESIZE
);
128 bool fstype_is_network(const char *fstype
) {
129 static const char table
[] =
144 x
= startswith(fstype
, "fuse.");
148 return nulstr_contains(table
, fstype
);
151 void rename_process(const char name
[8]) {
154 /* This is a like a poor man's setproctitle(). It changes the
155 * comm field, argv[0], and also the glibc's internally used
156 * name of the process. For the first one a limit of 16 chars
157 * applies, to the second one usually one of 10 (i.e. length
158 * of "/sbin/init"), to the third one one of 7 (i.e. length of
159 * "systemd"). If you pass a longer string it will be
162 prctl(PR_SET_NAME
, name
);
164 if (program_invocation_name
)
165 strncpy(program_invocation_name
, name
, strlen(program_invocation_name
));
167 if (saved_argc
> 0) {
171 strncpy(saved_argv
[0], name
, strlen(saved_argv
[0]));
173 for (i
= 1; i
< saved_argc
; i
++) {
177 memzero(saved_argv
[i
], strlen(saved_argv
[i
]));
182 noreturn
void freeze(void) {
184 /* Make sure nobody waits for us on a socket anymore */
185 close_all_fds(NULL
, 0);
193 static int do_execute(char **directories
, usec_t timeout
, char *argv
[]) {
194 _cleanup_hashmap_free_free_ Hashmap
*pids
= NULL
;
195 _cleanup_set_free_free_ Set
*seen
= NULL
;
198 /* We fork this all off from a child process so that we can
199 * somewhat cleanly make use of SIGALRM to set a time limit */
201 (void) reset_all_signal_handlers();
202 (void) reset_signal_mask();
204 assert_se(prctl(PR_SET_PDEATHSIG
, SIGTERM
) == 0);
206 pids
= hashmap_new(NULL
);
210 seen
= set_new(&string_hash_ops
);
214 STRV_FOREACH(directory
, directories
) {
215 _cleanup_closedir_
DIR *d
;
218 d
= opendir(*directory
);
223 return log_error_errno(errno
, "Failed to open directory %s: %m", *directory
);
226 FOREACH_DIRENT(de
, d
, break) {
227 _cleanup_free_
char *path
= NULL
;
231 if (!dirent_is_file(de
))
234 if (set_contains(seen
, de
->d_name
)) {
235 log_debug("%1$s/%2$s skipped (%2$s was already seen).", *directory
, de
->d_name
);
239 r
= set_put_strdup(seen
, de
->d_name
);
243 path
= strjoin(*directory
, "/", de
->d_name
, NULL
);
247 if (null_or_empty_path(path
)) {
248 log_debug("%s is empty (a mask).", path
);
254 log_error_errno(errno
, "Failed to fork: %m");
256 } else if (pid
== 0) {
259 assert_se(prctl(PR_SET_PDEATHSIG
, SIGTERM
) == 0);
269 return log_error_errno(errno
, "Failed to execute %s: %m", path
);
272 log_debug("Spawned %s as " PID_FMT
".", path
, pid
);
274 r
= hashmap_put(pids
, UINT_TO_PTR(pid
), path
);
281 /* Abort execution of this process after the timout. We simply
282 * rely on SIGALRM as default action terminating the process,
283 * and turn on alarm(). */
285 if (timeout
!= USEC_INFINITY
)
286 alarm((timeout
+ USEC_PER_SEC
- 1) / USEC_PER_SEC
);
288 while (!hashmap_isempty(pids
)) {
289 _cleanup_free_
char *path
= NULL
;
292 pid
= PTR_TO_UINT(hashmap_first_key(pids
));
295 path
= hashmap_remove(pids
, UINT_TO_PTR(pid
));
298 wait_for_terminate_and_warn(path
, pid
, true);
304 void execute_directories(const char* const* directories
, usec_t timeout
, char *argv
[]) {
308 char **dirs
= (char**) directories
;
310 assert(!strv_isempty(dirs
));
312 name
= basename(dirs
[0]);
313 assert(!isempty(name
));
315 /* Executes all binaries in the directories in parallel and waits
316 * for them to finish. Optionally a timeout is applied. If a file
317 * with the same name exists in more than one directory, the
318 * earliest one wins. */
320 executor_pid
= fork();
321 if (executor_pid
< 0) {
322 log_error_errno(errno
, "Failed to fork: %m");
325 } else if (executor_pid
== 0) {
326 r
= do_execute(dirs
, timeout
, argv
);
327 _exit(r
< 0 ? EXIT_FAILURE
: EXIT_SUCCESS
);
330 wait_for_terminate_and_warn(name
, executor_pid
, true);
333 bool plymouth_running(void) {
334 return access("/run/plymouth/pid", F_OK
) >= 0;
337 bool display_is_local(const char *display
) {
346 int socket_from_display(const char *display
, char **path
) {
353 if (!display_is_local(display
))
356 k
= strspn(display
+1, "0123456789");
358 f
= new(char, strlen("/tmp/.X11-unix/X") + k
+ 1);
362 c
= stpcpy(f
, "/tmp/.X11-unix/X");
363 memcpy(c
, display
+1, k
);
371 int glob_exists(const char *path
) {
372 _cleanup_globfree_ glob_t g
= {};
378 k
= glob(path
, GLOB_NOSORT
|GLOB_BRACE
, NULL
, &g
);
380 if (k
== GLOB_NOMATCH
)
382 else if (k
== GLOB_NOSPACE
)
385 return !strv_isempty(g
.gl_pathv
);
387 return errno
? -errno
: -EIO
;
390 int glob_extend(char ***strv
, const char *path
) {
391 _cleanup_globfree_ glob_t g
= {};
396 k
= glob(path
, GLOB_NOSORT
|GLOB_BRACE
, NULL
, &g
);
398 if (k
== GLOB_NOMATCH
)
400 else if (k
== GLOB_NOSPACE
)
402 else if (k
!= 0 || strv_isempty(g
.gl_pathv
))
403 return errno
? -errno
: -EIO
;
405 STRV_FOREACH(p
, g
.gl_pathv
) {
406 k
= strv_extend(strv
, *p
);
414 int block_get_whole_disk(dev_t d
, dev_t
*ret
) {
421 /* If it has a queue this is good enough for us */
422 if (asprintf(&p
, "/sys/dev/block/%u:%u/queue", major(d
), minor(d
)) < 0)
433 /* If it is a partition find the originating device */
434 if (asprintf(&p
, "/sys/dev/block/%u:%u/partition", major(d
), minor(d
)) < 0)
443 /* Get parent dev_t */
444 if (asprintf(&p
, "/sys/dev/block/%u:%u/../dev", major(d
), minor(d
)) < 0)
447 r
= read_one_line_file(p
, &s
);
453 r
= sscanf(s
, "%u:%u", &m
, &n
);
459 /* Only return this if it is really good enough for us. */
460 if (asprintf(&p
, "/sys/dev/block/%u:%u/queue", m
, n
) < 0)
467 *ret
= makedev(m
, n
);
474 static const char *const ioprio_class_table
[] = {
475 [IOPRIO_CLASS_NONE
] = "none",
476 [IOPRIO_CLASS_RT
] = "realtime",
477 [IOPRIO_CLASS_BE
] = "best-effort",
478 [IOPRIO_CLASS_IDLE
] = "idle"
481 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class
, int, INT_MAX
);
483 static const char *const sigchld_code_table
[] = {
484 [CLD_EXITED
] = "exited",
485 [CLD_KILLED
] = "killed",
486 [CLD_DUMPED
] = "dumped",
487 [CLD_TRAPPED
] = "trapped",
488 [CLD_STOPPED
] = "stopped",
489 [CLD_CONTINUED
] = "continued",
492 DEFINE_STRING_TABLE_LOOKUP(sigchld_code
, int);
494 static const char* const sched_policy_table
[] = {
495 [SCHED_OTHER
] = "other",
496 [SCHED_BATCH
] = "batch",
497 [SCHED_IDLE
] = "idle",
498 [SCHED_FIFO
] = "fifo",
502 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy
, int, INT_MAX
);
504 bool kexec_loaded(void) {
508 if (read_one_line_file("/sys/kernel/kexec_loaded", &s
) >= 0) {
516 int prot_from_flags(int flags
) {
518 switch (flags
& O_ACCMODE
) {
527 return PROT_READ
|PROT_WRITE
;
534 void* memdup(const void *p
, size_t l
) {
547 int fork_agent(pid_t
*pid
, const int except
[], unsigned n_except
, const char *path
, ...) {
548 bool stdout_is_tty
, stderr_is_tty
;
549 pid_t parent_pid
, agent_pid
;
550 sigset_t ss
, saved_ss
;
558 /* Spawns a temporary TTY agent, making sure it goes away when
561 parent_pid
= getpid();
563 /* First we temporarily block all signals, so that the new
564 * child has them blocked initially. This way, we can be sure
565 * that SIGTERMs are not lost we might send to the agent. */
566 assert_se(sigfillset(&ss
) >= 0);
567 assert_se(sigprocmask(SIG_SETMASK
, &ss
, &saved_ss
) >= 0);
571 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
575 if (agent_pid
!= 0) {
576 assert_se(sigprocmask(SIG_SETMASK
, &saved_ss
, NULL
) >= 0);
583 * Make sure the agent goes away when the parent dies */
584 if (prctl(PR_SET_PDEATHSIG
, SIGTERM
) < 0)
587 /* Make sure we actually can kill the agent, if we need to, in
588 * case somebody invoked us from a shell script that trapped
589 * SIGTERM or so... */
590 (void) reset_all_signal_handlers();
591 (void) reset_signal_mask();
593 /* Check whether our parent died before we were able
594 * to set the death signal and unblock the signals */
595 if (getppid() != parent_pid
)
598 /* Don't leak fds to the agent */
599 close_all_fds(except
, n_except
);
601 stdout_is_tty
= isatty(STDOUT_FILENO
);
602 stderr_is_tty
= isatty(STDERR_FILENO
);
604 if (!stdout_is_tty
|| !stderr_is_tty
) {
607 /* Detach from stdout/stderr. and reopen
608 * /dev/tty for them. This is important to
609 * ensure that when systemctl is started via
610 * popen() or a similar call that expects to
611 * read EOF we actually do generate EOF and
612 * not delay this indefinitely by because we
613 * keep an unused copy of stdin around. */
614 fd
= open("/dev/tty", O_WRONLY
);
616 log_error_errno(errno
, "Failed to open /dev/tty: %m");
621 dup2(fd
, STDOUT_FILENO
);
624 dup2(fd
, STDERR_FILENO
);
630 /* Count arguments */
632 for (n
= 0; va_arg(ap
, char*); n
++)
637 l
= alloca(sizeof(char *) * (n
+ 1));
639 /* Fill in arguments */
641 for (i
= 0; i
<= n
; i
++)
642 l
[i
] = va_arg(ap
, char*);
649 bool http_etag_is_valid(const char *etag
) {
653 if (!endswith(etag
, "\""))
656 if (!startswith(etag
, "\"") && !startswith(etag
, "W/\""))
662 bool http_url_is_valid(const char *url
) {
668 p
= startswith(url
, "http://");
670 p
= startswith(url
, "https://");
677 return ascii_is_valid(p
);
680 bool documentation_url_is_valid(const char *url
) {
686 if (http_url_is_valid(url
))
689 p
= startswith(url
, "file:/");
691 p
= startswith(url
, "info:");
693 p
= startswith(url
, "man:");
698 return ascii_is_valid(p
);
701 bool in_initrd(void) {
702 static int saved
= -1;
708 /* We make two checks here:
710 * 1. the flag file /etc/initrd-release must exist
711 * 2. the root file system must be a memory file system
713 * The second check is extra paranoia, since misdetecting an
714 * initrd can have bad bad consequences due the initrd
715 * emptying when transititioning to the main systemd.
718 saved
= access("/etc/initrd-release", F_OK
) >= 0 &&
719 statfs("/", &s
) >= 0 &&
725 /* hey glibc, APIs with callbacks without a user pointer are so useless */
726 void *xbsearch_r(const void *key
, const void *base
, size_t nmemb
, size_t size
,
727 int (*compar
) (const void *, const void *, void *), void *arg
) {
736 p
= (void *)(((const char *) base
) + (idx
* size
));
737 comparison
= compar(key
, p
, arg
);
740 else if (comparison
> 0)
748 int on_ac_power(void) {
749 bool found_offline
= false, found_online
= false;
750 _cleanup_closedir_
DIR *d
= NULL
;
752 d
= opendir("/sys/class/power_supply");
754 return errno
== ENOENT
? true : -errno
;
758 _cleanup_close_
int fd
= -1, device
= -1;
764 if (!de
&& errno
!= 0)
770 if (hidden_file(de
->d_name
))
773 device
= openat(dirfd(d
), de
->d_name
, O_DIRECTORY
|O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
775 if (errno
== ENOENT
|| errno
== ENOTDIR
)
781 fd
= openat(device
, "type", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
789 n
= read(fd
, contents
, sizeof(contents
));
793 if (n
!= 6 || memcmp(contents
, "Mains\n", 6))
797 fd
= openat(device
, "online", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
805 n
= read(fd
, contents
, sizeof(contents
));
809 if (n
!= 2 || contents
[1] != '\n')
812 if (contents
[0] == '1') {
815 } else if (contents
[0] == '0')
816 found_offline
= true;
821 return found_online
|| !found_offline
;
824 void* greedy_realloc(void **p
, size_t *allocated
, size_t need
, size_t size
) {
831 if (*allocated
>= need
)
834 newalloc
= MAX(need
* 2, 64u / size
);
837 /* check for overflows */
846 *allocated
= newalloc
;
850 void* greedy_realloc0(void **p
, size_t *allocated
, size_t need
, size_t size
) {
859 q
= greedy_realloc(p
, allocated
, need
, size
);
863 if (*allocated
> prev
)
864 memzero(q
+ prev
* size
, (*allocated
- prev
) * size
);
869 bool id128_is_valid(const char *s
) {
875 /* Simple formatted 128bit hex string */
877 for (i
= 0; i
< l
; i
++) {
880 if (!(c
>= '0' && c
<= '9') &&
881 !(c
>= 'a' && c
<= 'z') &&
882 !(c
>= 'A' && c
<= 'Z'))
886 } else if (l
== 36) {
890 for (i
= 0; i
< l
; i
++) {
893 if ((i
== 8 || i
== 13 || i
== 18 || i
== 23)) {
897 if (!(c
>= '0' && c
<= '9') &&
898 !(c
>= 'a' && c
<= 'z') &&
899 !(c
>= 'A' && c
<= 'Z'))
910 int container_get_leader(const char *machine
, pid_t
*pid
) {
911 _cleanup_free_
char *s
= NULL
, *class = NULL
;
919 if (!machine_name_is_valid(machine
))
922 p
= strjoina("/run/systemd/machines/", machine
);
923 r
= parse_env_file(p
, NEWLINE
, "LEADER", &s
, "CLASS", &class, NULL
);
931 if (!streq_ptr(class, "container"))
934 r
= parse_pid(s
, &leader
);
944 int namespace_open(pid_t pid
, int *pidns_fd
, int *mntns_fd
, int *netns_fd
, int *userns_fd
, int *root_fd
) {
945 _cleanup_close_
int pidnsfd
= -1, mntnsfd
= -1, netnsfd
= -1, usernsfd
= -1;
953 mntns
= procfs_file_alloca(pid
, "ns/mnt");
954 mntnsfd
= open(mntns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
962 pidns
= procfs_file_alloca(pid
, "ns/pid");
963 pidnsfd
= open(pidns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
971 netns
= procfs_file_alloca(pid
, "ns/net");
972 netnsfd
= open(netns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
980 userns
= procfs_file_alloca(pid
, "ns/user");
981 usernsfd
= open(userns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
982 if (usernsfd
< 0 && errno
!= ENOENT
)
989 root
= procfs_file_alloca(pid
, "root");
990 rfd
= open(root
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
1002 *netns_fd
= netnsfd
;
1005 *userns_fd
= usernsfd
;
1010 pidnsfd
= mntnsfd
= netnsfd
= usernsfd
= -1;
1015 int namespace_enter(int pidns_fd
, int mntns_fd
, int netns_fd
, int userns_fd
, int root_fd
) {
1016 if (userns_fd
>= 0) {
1017 /* Can't setns to your own userns, since then you could
1018 * escalate from non-root to root in your own namespace, so
1019 * check if namespaces equal before attempting to enter. */
1020 _cleanup_free_
char *userns_fd_path
= NULL
;
1022 if (asprintf(&userns_fd_path
, "/proc/self/fd/%d", userns_fd
) < 0)
1025 r
= files_same(userns_fd_path
, "/proc/self/ns/user");
1033 if (setns(pidns_fd
, CLONE_NEWPID
) < 0)
1037 if (setns(mntns_fd
, CLONE_NEWNS
) < 0)
1041 if (setns(netns_fd
, CLONE_NEWNET
) < 0)
1045 if (setns(userns_fd
, CLONE_NEWUSER
) < 0)
1049 if (fchdir(root_fd
) < 0)
1052 if (chroot(".") < 0)
1056 return reset_uid_gid();
1059 unsigned long personality_from_string(const char *p
) {
1061 /* Parse a personality specifier. We introduce our own
1062 * identifiers that indicate specific ABIs, rather than just
1063 * hints regarding the register size, since we want to keep
1064 * things open for multiple locally supported ABIs for the
1065 * same register size. We try to reuse the ABI identifiers
1066 * used by libseccomp. */
1068 #if defined(__x86_64__)
1070 if (streq(p
, "x86"))
1073 if (streq(p
, "x86-64"))
1076 #elif defined(__i386__)
1078 if (streq(p
, "x86"))
1081 #elif defined(__s390x__)
1083 if (streq(p
, "s390"))
1086 if (streq(p
, "s390x"))
1089 #elif defined(__s390__)
1091 if (streq(p
, "s390"))
1095 return PERSONALITY_INVALID
;
1098 const char* personality_to_string(unsigned long p
) {
1100 #if defined(__x86_64__)
1102 if (p
== PER_LINUX32
)
1108 #elif defined(__i386__)
1113 #elif defined(__s390x__)
1118 if (p
== PER_LINUX32
)
1121 #elif defined(__s390__)
1131 uint64_t physical_memory(void) {
1134 /* We return this as uint64_t in case we are running as 32bit
1135 * process on a 64bit kernel with huge amounts of memory */
1137 mem
= sysconf(_SC_PHYS_PAGES
);
1140 return (uint64_t) mem
* (uint64_t) page_size();
1143 int update_reboot_param_file(const char *param
) {
1147 r
= write_string_file(REBOOT_PARAM_FILE
, param
, WRITE_STRING_FILE_CREATE
);
1149 return log_error_errno(r
, "Failed to write reboot param to "REBOOT_PARAM_FILE
": %m");
1151 (void) unlink(REBOOT_PARAM_FILE
);
1157 puts(PACKAGE_STRING
"\n"
1162 bool fdname_is_valid(const char *s
) {
1165 /* Validates a name for $LISTEN_FDNAMES. We basically allow
1166 * everything ASCII that's not a control character. Also, as
1167 * special exception the ":" character is not allowed, as we
1168 * use that as field separator in $LISTEN_FDNAMES.
1170 * Note that the empty string is explicitly allowed
1171 * here. However, we limit the length of the names to 255
1177 for (p
= s
; *p
; p
++) {
1189 bool oom_score_adjust_is_valid(int oa
) {
1190 return oa
>= OOM_SCORE_ADJ_MIN
&& oa
<= OOM_SCORE_ADJ_MAX
;