1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
4 #include <netinet/in.h>
11 #include "sd-daemon.h"
13 #include "alloc-util.h"
14 #include "errno-util.h"
15 #include "extract-word.h"
19 #include "iovec-util.h"
21 #include "missing_magic.h"
22 #include "parse-util.h"
23 #include "path-util.h"
24 #include "pidfd-util.h"
25 #include "process-util.h"
26 #include "socket-util.h"
27 #include "stat-util.h"
29 #include "time-util.h"
31 #define SNDBUF_SIZE (8*1024*1024)
33 static void unsetenv_listen(bool unset_environment
) {
34 if (!unset_environment
)
37 assert_se(unsetenv("LISTEN_PID") == 0);
38 assert_se(unsetenv("LISTEN_FDS") == 0);
39 assert_se(unsetenv("LISTEN_FDNAMES") == 0);
42 _public_
int sd_listen_fds(int unset_environment
) {
47 e
= getenv("LISTEN_PID");
53 r
= parse_pid(e
, &pid
);
58 if (getpid_cached() != pid
) {
63 e
= getenv("LISTEN_FDS");
73 assert_cc(SD_LISTEN_FDS_START
< INT_MAX
);
74 if (n
<= 0 || n
> INT_MAX
- SD_LISTEN_FDS_START
) {
79 for (int fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
80 r
= fd_cloexec(fd
, true);
88 unsetenv_listen(unset_environment
);
92 _public_
int sd_listen_fds_with_names(int unset_environment
, char ***ret_names
) {
93 _cleanup_strv_free_
char **l
= NULL
;
95 int n_names
= 0, n_fds
;
100 return sd_listen_fds(unset_environment
);
102 e
= getenv("LISTEN_FDNAMES");
104 n_names
= strv_split_full(&l
, e
, ":", EXTRACT_DONT_COALESCE_SEPARATORS
);
106 unsetenv_listen(unset_environment
);
114 n_fds
= sd_listen_fds(unset_environment
);
119 if (n_names
!= n_fds
)
122 r
= strv_extend_n(&l
, "unknown", n_fds
);
127 *ret_names
= TAKE_PTR(l
);
131 _public_
int sd_is_fifo(int fd
, const char *path
) {
134 assert_return(fd
>= 0, -EBADF
);
136 if (fstat(fd
, &st_fd
) < 0)
139 if (!S_ISFIFO(st_fd
.st_mode
))
145 if (stat(path
, &st_path
) < 0) {
147 if (IN_SET(errno
, ENOENT
, ENOTDIR
))
153 return stat_inode_same(&st_path
, &st_fd
);
159 _public_
int sd_is_special(int fd
, const char *path
) {
162 assert_return(fd
>= 0, -EBADF
);
164 if (fstat(fd
, &st_fd
) < 0)
167 if (!S_ISREG(st_fd
.st_mode
) && !S_ISCHR(st_fd
.st_mode
))
173 if (stat(path
, &st_path
) < 0) {
175 if (IN_SET(errno
, ENOENT
, ENOTDIR
))
181 if (S_ISREG(st_fd
.st_mode
) && S_ISREG(st_path
.st_mode
))
182 return stat_inode_same(&st_path
, &st_fd
);
183 if (S_ISCHR(st_fd
.st_mode
) && S_ISCHR(st_path
.st_mode
))
184 return st_path
.st_rdev
== st_fd
.st_rdev
;
192 static int is_socket_internal(int fd
, int type
, int listening
) {
195 assert_return(fd
>= 0, -EBADF
);
196 assert_return(type
>= 0, -EINVAL
);
198 if (fstat(fd
, &st_fd
) < 0)
201 if (!S_ISSOCK(st_fd
.st_mode
))
206 socklen_t l
= sizeof(other_type
);
208 if (getsockopt(fd
, SOL_SOCKET
, SO_TYPE
, &other_type
, &l
) < 0)
211 if (l
!= sizeof(other_type
))
214 if (other_type
!= type
)
218 if (listening
>= 0) {
220 socklen_t l
= sizeof(accepting
);
222 if (getsockopt(fd
, SOL_SOCKET
, SO_ACCEPTCONN
, &accepting
, &l
) < 0)
225 if (l
!= sizeof(accepting
))
228 if (!accepting
!= !listening
)
235 _public_
int sd_is_socket(int fd
, int family
, int type
, int listening
) {
238 assert_return(fd
>= 0, -EBADF
);
239 assert_return(family
>= 0, -EINVAL
);
241 r
= is_socket_internal(fd
, type
, listening
);
246 union sockaddr_union sockaddr
= {};
247 socklen_t l
= sizeof(sockaddr
);
249 if (getsockname(fd
, &sockaddr
.sa
, &l
) < 0)
252 if (l
< sizeof(sa_family_t
))
255 return sockaddr
.sa
.sa_family
== family
;
261 _public_
int sd_is_socket_inet(int fd
, int family
, int type
, int listening
, uint16_t port
) {
262 union sockaddr_union sockaddr
= {};
263 socklen_t l
= sizeof(sockaddr
);
266 assert_return(fd
>= 0, -EBADF
);
267 assert_return(IN_SET(family
, 0, AF_INET
, AF_INET6
), -EINVAL
);
269 r
= is_socket_internal(fd
, type
, listening
);
273 if (getsockname(fd
, &sockaddr
.sa
, &l
) < 0)
276 if (l
< sizeof(sa_family_t
))
279 if (!IN_SET(sockaddr
.sa
.sa_family
, AF_INET
, AF_INET6
))
283 if (sockaddr
.sa
.sa_family
!= family
)
289 r
= sockaddr_port(&sockaddr
.sa
, &sa_port
);
293 return port
== sa_port
;
299 _public_
int sd_is_socket_sockaddr(int fd
, int type
, const struct sockaddr
* addr
, unsigned addr_len
, int listening
) {
300 union sockaddr_union sockaddr
= {};
301 socklen_t l
= sizeof(sockaddr
);
304 assert_return(fd
>= 0, -EBADF
);
305 assert_return(addr
, -EINVAL
);
306 assert_return(addr_len
>= sizeof(sa_family_t
), -ENOBUFS
);
307 assert_return(IN_SET(addr
->sa_family
, AF_INET
, AF_INET6
), -EPFNOSUPPORT
);
309 r
= is_socket_internal(fd
, type
, listening
);
313 if (getsockname(fd
, &sockaddr
.sa
, &l
) < 0)
316 if (l
< sizeof(sa_family_t
))
319 if (sockaddr
.sa
.sa_family
!= addr
->sa_family
)
322 if (sockaddr
.sa
.sa_family
== AF_INET
) {
323 const struct sockaddr_in
*in
= (const struct sockaddr_in
*) addr
;
325 if (l
< sizeof(struct sockaddr_in
) || addr_len
< sizeof(struct sockaddr_in
))
328 if (in
->sin_port
!= 0 &&
329 sockaddr
.in
.sin_port
!= in
->sin_port
)
332 return sockaddr
.in
.sin_addr
.s_addr
== in
->sin_addr
.s_addr
;
335 const struct sockaddr_in6
*in
= (const struct sockaddr_in6
*) addr
;
337 if (l
< sizeof(struct sockaddr_in6
) || addr_len
< sizeof(struct sockaddr_in6
))
340 if (in
->sin6_port
!= 0 &&
341 sockaddr
.in6
.sin6_port
!= in
->sin6_port
)
344 if (in
->sin6_flowinfo
!= 0 &&
345 sockaddr
.in6
.sin6_flowinfo
!= in
->sin6_flowinfo
)
348 if (in
->sin6_scope_id
!= 0 &&
349 sockaddr
.in6
.sin6_scope_id
!= in
->sin6_scope_id
)
352 return memcmp(sockaddr
.in6
.sin6_addr
.s6_addr
, in
->sin6_addr
.s6_addr
,
353 sizeof(in
->sin6_addr
.s6_addr
)) == 0;
357 _public_
int sd_is_socket_unix(int fd
, int type
, int listening
, const char *path
, size_t length
) {
358 union sockaddr_union sockaddr
= {};
359 socklen_t l
= sizeof(sockaddr
);
362 assert_return(fd
>= 0, -EBADF
);
364 r
= is_socket_internal(fd
, type
, listening
);
368 if (getsockname(fd
, &sockaddr
.sa
, &l
) < 0)
371 if (l
< sizeof(sa_family_t
))
374 if (sockaddr
.sa
.sa_family
!= AF_UNIX
)
379 length
= strlen(path
);
383 return l
== offsetof(struct sockaddr_un
, sun_path
);
386 /* Normal path socket */
388 (l
>= offsetof(struct sockaddr_un
, sun_path
) + length
+ 1) &&
389 memcmp(path
, sockaddr
.un
.sun_path
, length
+1) == 0;
391 /* Abstract namespace socket */
393 (l
== offsetof(struct sockaddr_un
, sun_path
) + length
) &&
394 memcmp(path
, sockaddr
.un
.sun_path
, length
) == 0;
400 _public_
int sd_is_mq(int fd
, const char *path
) {
404 /* Check that the fd is valid */
409 if (mq_getattr(fd
, &attr
) < 0) {
411 /* A non-mq fd (or an invalid one, but we ruled that out above) */
417 _cleanup_free_
char *fpath
= NULL
;
420 assert_return(path_is_absolute(path
), -EINVAL
);
422 if (fstat(fd
, &a
) < 0)
425 fpath
= path_join("/dev/mqueue", path
);
429 if (stat(fpath
, &b
) < 0)
432 if (!stat_inode_same(&a
, &b
))
439 static int vsock_bind_privileged_port(int fd
) {
440 union sockaddr_union sa
= {
441 .vm
.svm_family
= AF_VSOCK
,
442 .vm
.svm_cid
= VMADDR_CID_ANY
,
450 r
= RET_NERRNO(bind(fd
, &sa
.sa
, sizeof(sa
.vm
)));
451 while (r
== -EADDRINUSE
&& --sa
.vm
.svm_port
> 0);
456 static int pid_notify_with_fds_internal(
462 SocketAddress address
;
464 struct msghdr msghdr
= {
467 .msg_name
= &address
.sockaddr
,
469 _cleanup_close_
int fd
= -EBADF
;
472 assert_return(state
, -EINVAL
);
473 assert_return(fds
|| n_fds
== 0, -EINVAL
);
475 /* Let's make sure the multiplications below don't overflow, and also return a recognizable error in
476 * case the caller tries to send more fds than the kernel limit. The kernel would return EINVAL which
477 * is not too useful I'd say. */
478 if (n_fds
> SCM_MAX_FD
)
481 const char *e
= getenv("NOTIFY_SOCKET");
485 /* Allow AF_UNIX and AF_VSOCK, reject the rest. */
486 r
= socket_address_parse_unix(&address
, e
);
488 r
= socket_address_parse_vsock(&address
, e
);
491 msghdr
.msg_namelen
= address
.size
;
493 /* If we didn't get an address (which is a normal pattern when specifying VSOCK tuples) error out,
494 * we always require a specific CID. */
495 if (address
.sockaddr
.vm
.svm_family
== AF_VSOCK
&& address
.sockaddr
.vm
.svm_cid
== VMADDR_CID_ANY
)
498 type
= address
.type
== 0 ? SOCK_DGRAM
: address
.type
;
500 /* At the time of writing QEMU does not yet support AF_VSOCK + SOCK_DGRAM and returns
501 * ENODEV. Fallback to SOCK_SEQPACKET in that case. */
502 fd
= socket(address
.sockaddr
.sa
.sa_family
, type
|SOCK_CLOEXEC
, 0);
504 if (!(ERRNO_IS_NOT_SUPPORTED(errno
) || errno
== ENODEV
) || address
.sockaddr
.sa
.sa_family
!= AF_VSOCK
|| address
.type
> 0)
505 return log_debug_errno(errno
, "Failed to open %s notify socket to '%s': %m", socket_address_type_to_string(type
), e
);
507 type
= SOCK_SEQPACKET
;
508 fd
= socket(address
.sockaddr
.sa
.sa_family
, type
|SOCK_CLOEXEC
, 0);
509 if (fd
< 0 && ERRNO_IS_NOT_SUPPORTED(errno
)) {
511 fd
= socket(address
.sockaddr
.sa
.sa_family
, type
|SOCK_CLOEXEC
, 0);
514 return log_debug_errno(errno
, "Failed to open %s socket to '%s': %m", socket_address_type_to_string(type
), e
);
517 if (address
.sockaddr
.sa
.sa_family
== AF_VSOCK
) {
518 r
= vsock_bind_privileged_port(fd
);
519 if (r
< 0 && !ERRNO_IS_PRIVILEGE(r
))
520 return log_debug_errno(r
, "Failed to bind socket to privileged port: %m");
523 if (IN_SET(type
, SOCK_STREAM
, SOCK_SEQPACKET
)) {
524 if (connect(fd
, &address
.sockaddr
.sa
, address
.size
) < 0)
525 return log_debug_errno(errno
, "Failed to connect socket to '%s': %m", e
);
527 msghdr
.msg_name
= NULL
;
528 msghdr
.msg_namelen
= 0;
531 (void) fd_inc_sndbuf(fd
, SNDBUF_SIZE
);
533 iovec
= IOVEC_MAKE_STRING(state
);
536 (pid
!= 0 && pid
!= getpid_cached()) ||
537 getuid() != geteuid() ||
538 getgid() != getegid();
540 if (n_fds
> 0 || send_ucred
) {
541 struct cmsghdr
*cmsg
;
543 /* CMSG_SPACE(0) may return value different than zero, which results in miscalculated controllen. */
544 msghdr
.msg_controllen
=
545 (n_fds
> 0 ? CMSG_SPACE(sizeof(int) * n_fds
) : 0) +
546 (send_ucred
? CMSG_SPACE(sizeof(struct ucred
)) : 0);
548 msghdr
.msg_control
= alloca0(msghdr
.msg_controllen
);
550 cmsg
= CMSG_FIRSTHDR(&msghdr
);
552 cmsg
->cmsg_level
= SOL_SOCKET
;
553 cmsg
->cmsg_type
= SCM_RIGHTS
;
554 cmsg
->cmsg_len
= CMSG_LEN(sizeof(int) * n_fds
);
556 memcpy(CMSG_DATA(cmsg
), fds
, sizeof(int) * n_fds
);
559 assert_se(cmsg
= CMSG_NXTHDR(&msghdr
, cmsg
));
565 cmsg
->cmsg_level
= SOL_SOCKET
;
566 cmsg
->cmsg_type
= SCM_CREDENTIALS
;
567 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct ucred
));
569 ucred
= CMSG_TYPED_DATA(cmsg
, struct ucred
);
570 ucred
->pid
= pid
!= 0 ? pid
: getpid_cached();
571 ucred
->uid
= getuid();
572 ucred
->gid
= getgid();
579 /* First try with fake ucred data, as requested */
580 n
= sendmsg(fd
, &msghdr
, MSG_NOSIGNAL
);
583 return log_debug_errno(errno
, "Failed to send notify message to '%s': %m", e
);
585 /* If that failed, try with our own ucred instead */
586 msghdr
.msg_controllen
-= CMSG_SPACE(sizeof(struct ucred
));
587 if (msghdr
.msg_controllen
== 0)
588 msghdr
.msg_control
= NULL
;
593 /* Unless we're using SOCK_STREAM, we expect to write all the contents immediately. */
594 if (type
!= SOCK_STREAM
&& (size_t) n
< iovec_total_size(msghdr
.msg_iov
, msghdr
.msg_iovlen
))
597 /* Make sure we only send fds and ucred once, even if we're using SOCK_STREAM. */
598 msghdr
.msg_control
= NULL
;
599 msghdr
.msg_controllen
= 0;
601 } while (!iovec_increment(msghdr
.msg_iov
, msghdr
.msg_iovlen
, n
));
603 if (address
.sockaddr
.sa
.sa_family
== AF_VSOCK
&& IN_SET(type
, SOCK_STREAM
, SOCK_SEQPACKET
)) {
604 /* For AF_VSOCK, we need to close the socket to signal the end of the message. */
605 if (shutdown(fd
, SHUT_WR
) < 0)
606 return log_debug_errno(errno
, "Failed to shutdown notify socket: %m");
609 n
= recv(fd
, &c
, sizeof(c
), MSG_NOSIGNAL
);
611 return log_debug_errno(errno
, "Failed to wait for EOF on notify socket: %m");
613 return log_debug_errno(SYNTHETIC_ERRNO(EPROTO
), "Unexpectedly received data on notify socket.");
619 static void unsetenv_notify(bool unset_environment
) {
620 if (!unset_environment
)
623 assert_se(unsetenv("NOTIFY_SOCKET") == 0);
626 _public_
int sd_pid_notify_with_fds(
628 int unset_environment
,
635 r
= pid_notify_with_fds_internal(pid
, state
, fds
, n_fds
);
636 unsetenv_notify(unset_environment
);
640 _public_
int sd_pid_notify_barrier(pid_t pid
, int unset_environment
, uint64_t timeout
) {
641 _cleanup_close_pair_
int pipe_fd
[2] = EBADF_PAIR
;
644 r
= RET_NERRNO(pipe2(pipe_fd
, O_CLOEXEC
));
648 r
= pid_notify_with_fds_internal(pid
, "BARRIER=1", &pipe_fd
[1], 1);
652 pipe_fd
[1] = safe_close(pipe_fd
[1]);
654 r
= fd_wait_for_event(pipe_fd
[0], 0 /* POLLHUP is implicit */, timeout
);
664 unsetenv_notify(unset_environment
);
668 _public_
int sd_notify_barrier(int unset_environment
, uint64_t timeout
) {
669 return sd_pid_notify_barrier(0, unset_environment
, timeout
);
672 _public_
int sd_pid_notify(pid_t pid
, int unset_environment
, const char *state
) {
673 return sd_pid_notify_with_fds(pid
, unset_environment
, state
, NULL
, 0);
676 _public_
int sd_notify(int unset_environment
, const char *state
) {
677 return sd_pid_notify_with_fds(0, unset_environment
, state
, NULL
, 0);
680 _public_
int sd_pid_notifyf(pid_t pid
, int unset_environment
, const char *format
, ...) {
681 _cleanup_free_
char *p
= NULL
;
687 va_start(ap
, format
);
688 r
= vasprintf(&p
, format
, ap
);
697 r
= pid_notify_with_fds_internal(pid
, p
, /* fds= */ NULL
, /* n_fds= */ 0);
700 unsetenv_notify(unset_environment
);
704 _public_
int sd_notifyf(int unset_environment
, const char *format
, ...) {
705 _cleanup_free_
char *p
= NULL
;
711 va_start(ap
, format
);
712 r
= vasprintf(&p
, format
, ap
);
721 r
= pid_notify_with_fds_internal(/* pid= */ 0, p
, /* fds= */ NULL
, /* n_fds= */ 0);
724 unsetenv_notify(unset_environment
);
728 _public_
int sd_pid_notifyf_with_fds(
730 int unset_environment
,
731 const int *fds
, size_t n_fds
,
732 const char *format
, ...) {
734 _cleanup_free_
char *p
= NULL
;
740 va_start(ap
, format
);
741 r
= vasprintf(&p
, format
, ap
);
750 r
= pid_notify_with_fds_internal(pid
, p
, fds
, n_fds
);
753 unsetenv_notify(unset_environment
);
757 _public_
int sd_pidfd_get_inode_id(int pidfd
, uint64_t *ret
) {
760 assert_return(pidfd
>= 0, -EBADF
);
762 /* Are pidfds backed by pidfs where the unique inode id is relevant? Note that the pidfd
763 * passed to us is extrinsic and hence cannot be trusted to initialize our "have_pidfs" cache,
764 * instead pidfd_check_pidfs() will allocate one internally. */
765 r
= pidfd_check_pidfs(/* pid_fd = */ -EBADF
);
769 r
= fd_is_fs_type(pidfd
, PID_FS_MAGIC
);
773 return -EBADF
; /* pidfs is definitely around, so it's the fd that's of invalid type */
775 return pidfd_get_inode_id_impl(pidfd
, ret
);
778 _public_
int sd_booted(void) {
781 /* We test whether the runtime unit file directory has been created. This takes place in mount-setup.c,
782 * so is guaranteed to happen very early during boot. */
784 r
= access_nofollow("/run/systemd/system/", F_OK
);
793 static void unsetenv_watchdog(bool unset_environment
) {
794 if (!unset_environment
)
797 assert_se(unsetenv("WATCHDOG_USEC") == 0);
798 assert_se(unsetenv("WATCHDOG_PID") == 0);
801 _public_
int sd_watchdog_enabled(int unset_environment
, uint64_t *usec
) {
802 const char *s
, *p
= ""; /* p is set to dummy value to do unsetting */
806 s
= getenv("WATCHDOG_USEC");
812 r
= safe_atou64(s
, &u
);
815 if (!timestamp_is_set(u
)) {
820 p
= getenv("WATCHDOG_PID");
824 r
= parse_pid(p
, &pid
);
828 /* Is this for us? */
829 if (getpid_cached() != pid
) {
841 unsetenv_watchdog(unset_environment
);