1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2014 Lennart Poettering
8 #include <netinet/tcp.h>
10 #include "alloc-util.h"
14 #include "resolved-dns-stream.h"
16 #define DNS_STREAM_TIMEOUT_USEC (10 * USEC_PER_SEC)
17 #define DNS_STREAMS_MAX 128
19 #define WRITE_TLS_DATA 1
21 static void dns_stream_stop(DnsStream
*s
) {
24 s
->io_event_source
= sd_event_source_unref(s
->io_event_source
);
25 s
->timeout_event_source
= sd_event_source_unref(s
->timeout_event_source
);
26 s
->fd
= safe_close(s
->fd
);
29 static int dns_stream_update_io(DnsStream
*s
) {
34 if (s
->write_packet
&& s
->n_written
< sizeof(s
->write_size
) + s
->write_packet
->size
)
36 else if (!ordered_set_isempty(s
->write_queue
)) {
37 dns_packet_unref(s
->write_packet
);
38 s
->write_packet
= ordered_set_steal_first(s
->write_queue
);
39 s
->write_size
= htobe16(s
->write_packet
->size
);
43 if (!s
->read_packet
|| s
->n_read
< sizeof(s
->read_size
) + s
->read_packet
->size
)
46 return sd_event_source_set_io_events(s
->io_event_source
, f
);
49 static int dns_stream_complete(DnsStream
*s
, int error
) {
53 if (s
->tls_session
&& IN_SET(error
, ETIMEDOUT
, 0)) {
56 r
= gnutls_bye(s
->tls_session
, GNUTLS_SHUT_RDWR
);
57 if (r
== GNUTLS_E_AGAIN
&& !s
->tls_bye
) {
58 dns_stream_ref(s
); /* keep reference for closing TLS session */
67 s
->complete(s
, error
);
68 else /* the default action if no completion function is set is to close the stream */
74 static int dns_stream_identify(DnsStream
*s
) {
76 struct cmsghdr header
; /* For alignment */
77 uint8_t buffer
[CMSG_SPACE(MAXSIZE(struct in_pktinfo
, struct in6_pktinfo
))
78 + EXTRA_CMSG_SPACE
/* kernel appears to require extra space */];
80 struct msghdr mh
= {};
90 /* Query the local side */
91 s
->local_salen
= sizeof(s
->local
);
92 r
= getsockname(s
->fd
, &s
->local
.sa
, &s
->local_salen
);
95 if (s
->local
.sa
.sa_family
== AF_INET6
&& s
->ifindex
<= 0)
96 s
->ifindex
= s
->local
.in6
.sin6_scope_id
;
98 /* Query the remote side */
99 s
->peer_salen
= sizeof(s
->peer
);
100 r
= getpeername(s
->fd
, &s
->peer
.sa
, &s
->peer_salen
);
103 if (s
->peer
.sa
.sa_family
== AF_INET6
&& s
->ifindex
<= 0)
104 s
->ifindex
= s
->peer
.in6
.sin6_scope_id
;
106 /* Check consistency */
107 assert(s
->peer
.sa
.sa_family
== s
->local
.sa
.sa_family
);
108 assert(IN_SET(s
->peer
.sa
.sa_family
, AF_INET
, AF_INET6
));
110 /* Query connection meta information */
111 sl
= sizeof(control
);
112 if (s
->peer
.sa
.sa_family
== AF_INET
) {
113 r
= getsockopt(s
->fd
, IPPROTO_IP
, IP_PKTOPTIONS
, &control
, &sl
);
116 } else if (s
->peer
.sa
.sa_family
== AF_INET6
) {
118 r
= getsockopt(s
->fd
, IPPROTO_IPV6
, IPV6_2292PKTOPTIONS
, &control
, &sl
);
122 return -EAFNOSUPPORT
;
124 mh
.msg_control
= &control
;
125 mh
.msg_controllen
= sl
;
127 CMSG_FOREACH(cmsg
, &mh
) {
129 if (cmsg
->cmsg_level
== IPPROTO_IPV6
) {
130 assert(s
->peer
.sa
.sa_family
== AF_INET6
);
132 switch (cmsg
->cmsg_type
) {
135 struct in6_pktinfo
*i
= (struct in6_pktinfo
*) CMSG_DATA(cmsg
);
138 s
->ifindex
= i
->ipi6_ifindex
;
143 s
->ttl
= *(int *) CMSG_DATA(cmsg
);
147 } else if (cmsg
->cmsg_level
== IPPROTO_IP
) {
148 assert(s
->peer
.sa
.sa_family
== AF_INET
);
150 switch (cmsg
->cmsg_type
) {
153 struct in_pktinfo
*i
= (struct in_pktinfo
*) CMSG_DATA(cmsg
);
156 s
->ifindex
= i
->ipi_ifindex
;
161 s
->ttl
= *(int *) CMSG_DATA(cmsg
);
167 /* The Linux kernel sets the interface index to the loopback
168 * device if the connection came from the local host since it
169 * avoids the routing table in such a case. Let's unset the
170 * interface index in such a case. */
171 if (s
->ifindex
== LOOPBACK_IFINDEX
)
174 /* If we don't know the interface index still, we look for the
175 * first local interface with a matching address. Yuck! */
177 s
->ifindex
= manager_find_ifindex(s
->manager
, s
->local
.sa
.sa_family
, s
->local
.sa
.sa_family
== AF_INET
? (union in_addr_union
*) &s
->local
.in
.sin_addr
: (union in_addr_union
*) &s
->local
.in6
.sin6_addr
);
179 if (s
->protocol
== DNS_PROTOCOL_LLMNR
&& s
->ifindex
> 0) {
180 uint32_t ifindex
= htobe32(s
->ifindex
);
182 /* Make sure all packets for this connection are sent on the same interface */
183 if (s
->local
.sa
.sa_family
== AF_INET
) {
184 r
= setsockopt(s
->fd
, IPPROTO_IP
, IP_UNICAST_IF
, &ifindex
, sizeof(ifindex
));
186 log_debug_errno(errno
, "Failed to invoke IP_UNICAST_IF: %m");
187 } else if (s
->local
.sa
.sa_family
== AF_INET6
) {
188 r
= setsockopt(s
->fd
, IPPROTO_IPV6
, IPV6_UNICAST_IF
, &ifindex
, sizeof(ifindex
));
190 log_debug_errno(errno
, "Failed to invoke IPV6_UNICAST_IF: %m");
194 s
->identified
= true;
199 static ssize_t
dns_stream_writev(DnsStream
*s
, const struct iovec
*iov
, size_t iovcnt
, int flags
) {
206 if (s
->tls_session
&& !(flags
& WRITE_TLS_DATA
)) {
211 for (i
= 0; i
< iovcnt
; i
++) {
212 ss
= gnutls_record_send(s
->tls_session
, iov
[i
].iov_base
, iov
[i
].iov_len
);
216 case GNUTLS_E_INTERRUPTED
:
221 log_debug("Failed to invoke gnutls_record_send: %s", gnutls_strerror(ss
));
227 if (ss
!= (ssize_t
) iov
[i
].iov_len
)
232 if (s
->tfo_salen
> 0) {
233 struct msghdr hdr
= {
234 .msg_iov
= (struct iovec
*) iov
,
235 .msg_iovlen
= iovcnt
,
236 .msg_name
= &s
->tfo_address
.sa
,
237 .msg_namelen
= s
->tfo_salen
240 r
= sendmsg(s
->fd
, &hdr
, MSG_FASTOPEN
);
242 if (errno
== EOPNOTSUPP
) {
244 r
= connect(s
->fd
, &s
->tfo_address
.sa
, s
->tfo_salen
);
249 } else if (errno
== EINPROGRESS
)
252 s
->tfo_salen
= 0; /* connection is made */
254 r
= writev(s
->fd
, iov
, iovcnt
);
259 static ssize_t
dns_stream_read(DnsStream
*s
, void *buf
, size_t count
) {
263 if (s
->tls_session
) {
264 ss
= gnutls_record_recv(s
->tls_session
, buf
, count
);
268 case GNUTLS_E_INTERRUPTED
:
273 log_debug("Failed to invoke gnutls_record_send: %s", gnutls_strerror(ss
));
276 } else if (s
->on_connection
) {
279 r
= s
->on_connection(s
);
280 s
->on_connection
= NULL
; /* only call once */
286 ss
= read(s
->fd
, buf
, count
);
292 static ssize_t
dns_stream_tls_writev(gnutls_transport_ptr_t p
, const giovec_t
* iov
, int iovcnt
) {
297 r
= dns_stream_writev((DnsStream
*) p
, (struct iovec
*) iov
, iovcnt
, WRITE_TLS_DATA
);
307 static int on_stream_timeout(sd_event_source
*es
, usec_t usec
, void *userdata
) {
308 DnsStream
*s
= userdata
;
312 return dns_stream_complete(s
, ETIMEDOUT
);
315 static int on_stream_io(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
316 DnsStream
*s
= userdata
;
323 assert(s
->tls_session
);
325 r
= gnutls_bye(s
->tls_session
, GNUTLS_SHUT_RDWR
);
326 if (r
!= GNUTLS_E_AGAIN
) {
334 if (s
->tls_handshake
< 0) {
335 assert(s
->tls_session
);
337 s
->tls_handshake
= gnutls_handshake(s
->tls_session
);
338 if (s
->tls_handshake
>= 0) {
339 if (s
->on_connection
&& !(gnutls_session_get_flags(s
->tls_session
) & GNUTLS_SFLAGS_FALSE_START
)) {
340 r
= s
->on_connection(s
);
341 s
->on_connection
= NULL
; /* only call once */
346 if (gnutls_error_is_fatal(s
->tls_handshake
))
347 return dns_stream_complete(s
, ECONNREFUSED
);
355 /* only identify after connecting */
356 if (s
->tfo_salen
== 0) {
357 r
= dns_stream_identify(s
);
359 return dns_stream_complete(s
, -r
);
362 if ((revents
& EPOLLOUT
) &&
364 s
->n_written
< sizeof(s
->write_size
) + s
->write_packet
->size
) {
369 iov
[0].iov_base
= &s
->write_size
;
370 iov
[0].iov_len
= sizeof(s
->write_size
);
371 iov
[1].iov_base
= DNS_PACKET_DATA(s
->write_packet
);
372 iov
[1].iov_len
= s
->write_packet
->size
;
374 IOVEC_INCREMENT(iov
, 2, s
->n_written
);
376 ss
= dns_stream_writev(s
, iov
, 2, 0);
378 if (!IN_SET(errno
, EINTR
, EAGAIN
))
379 return dns_stream_complete(s
, errno
);
383 /* Are we done? If so, disable the event source for EPOLLOUT */
384 if (s
->n_written
>= sizeof(s
->write_size
) + s
->write_packet
->size
) {
385 r
= dns_stream_update_io(s
);
387 return dns_stream_complete(s
, -r
);
391 if ((revents
& (EPOLLIN
|EPOLLHUP
|EPOLLRDHUP
)) &&
393 s
->n_read
< sizeof(s
->read_size
) + s
->read_packet
->size
)) {
395 if (s
->n_read
< sizeof(s
->read_size
)) {
398 ss
= dns_stream_read(s
, (uint8_t*) &s
->read_size
+ s
->n_read
, sizeof(s
->read_size
) - s
->n_read
);
400 if (!IN_SET(errno
, EINTR
, EAGAIN
))
401 return dns_stream_complete(s
, errno
);
403 return dns_stream_complete(s
, ECONNRESET
);
408 if (s
->n_read
>= sizeof(s
->read_size
)) {
410 if (be16toh(s
->read_size
) < DNS_PACKET_HEADER_SIZE
)
411 return dns_stream_complete(s
, EBADMSG
);
413 if (s
->n_read
< sizeof(s
->read_size
) + be16toh(s
->read_size
)) {
416 if (!s
->read_packet
) {
417 r
= dns_packet_new(&s
->read_packet
, s
->protocol
, be16toh(s
->read_size
), DNS_PACKET_SIZE_MAX
);
419 return dns_stream_complete(s
, -r
);
421 s
->read_packet
->size
= be16toh(s
->read_size
);
422 s
->read_packet
->ipproto
= IPPROTO_TCP
;
423 s
->read_packet
->family
= s
->peer
.sa
.sa_family
;
424 s
->read_packet
->ttl
= s
->ttl
;
425 s
->read_packet
->ifindex
= s
->ifindex
;
427 if (s
->read_packet
->family
== AF_INET
) {
428 s
->read_packet
->sender
.in
= s
->peer
.in
.sin_addr
;
429 s
->read_packet
->sender_port
= be16toh(s
->peer
.in
.sin_port
);
430 s
->read_packet
->destination
.in
= s
->local
.in
.sin_addr
;
431 s
->read_packet
->destination_port
= be16toh(s
->local
.in
.sin_port
);
433 assert(s
->read_packet
->family
== AF_INET6
);
434 s
->read_packet
->sender
.in6
= s
->peer
.in6
.sin6_addr
;
435 s
->read_packet
->sender_port
= be16toh(s
->peer
.in6
.sin6_port
);
436 s
->read_packet
->destination
.in6
= s
->local
.in6
.sin6_addr
;
437 s
->read_packet
->destination_port
= be16toh(s
->local
.in6
.sin6_port
);
439 if (s
->read_packet
->ifindex
== 0)
440 s
->read_packet
->ifindex
= s
->peer
.in6
.sin6_scope_id
;
441 if (s
->read_packet
->ifindex
== 0)
442 s
->read_packet
->ifindex
= s
->local
.in6
.sin6_scope_id
;
446 ss
= dns_stream_read(s
,
447 (uint8_t*) DNS_PACKET_DATA(s
->read_packet
) + s
->n_read
- sizeof(s
->read_size
),
448 sizeof(s
->read_size
) + be16toh(s
->read_size
) - s
->n_read
);
450 if (!IN_SET(errno
, EINTR
, EAGAIN
))
451 return dns_stream_complete(s
, errno
);
453 return dns_stream_complete(s
, ECONNRESET
);
458 /* Are we done? If so, disable the event source for EPOLLIN */
459 if (s
->n_read
>= sizeof(s
->read_size
) + be16toh(s
->read_size
)) {
460 /* If there's a packet handler
461 * installed, call that. Note that
462 * this is optional... */
469 r
= dns_stream_update_io(s
);
471 return dns_stream_complete(s
, -r
);
476 if ((s
->write_packet
&& s
->n_written
>= sizeof(s
->write_size
) + s
->write_packet
->size
) &&
477 (s
->read_packet
&& s
->n_read
>= sizeof(s
->read_size
) + s
->read_packet
->size
))
478 return dns_stream_complete(s
, 0);
483 DnsStream
*dns_stream_unref(DnsStream
*s
) {
490 assert(s
->n_ref
> 0);
498 if (s
->server
&& s
->server
->stream
== s
)
499 s
->server
->stream
= NULL
;
502 LIST_REMOVE(streams
, s
->manager
->dns_streams
, s
);
503 s
->manager
->n_dns_streams
--;
508 gnutls_deinit(s
->tls_session
);
511 ORDERED_SET_FOREACH(p
, s
->write_queue
, i
)
512 dns_packet_unref(ordered_set_remove(s
->write_queue
, p
));
514 dns_packet_unref(s
->write_packet
);
515 dns_packet_unref(s
->read_packet
);
516 dns_server_unref(s
->server
);
518 ordered_set_free(s
->write_queue
);
523 DnsStream
*dns_stream_ref(DnsStream
*s
) {
527 assert(s
->n_ref
> 0);
533 int dns_stream_new(Manager
*m
, DnsStream
**ret
, DnsProtocol protocol
, int fd
, const union sockaddr_union
*tfo_address
) {
534 _cleanup_(dns_stream_unrefp
) DnsStream
*s
= NULL
;
540 if (m
->n_dns_streams
> DNS_STREAMS_MAX
)
543 s
= new0(DnsStream
, 1);
547 r
= ordered_set_ensure_allocated(&s
->write_queue
, &dns_packet_hash_ops
);
553 s
->protocol
= protocol
;
555 r
= sd_event_add_io(m
->event
, &s
->io_event_source
, fd
, EPOLLIN
, on_stream_io
, s
);
559 (void) sd_event_source_set_description(s
->io_event_source
, "dns-stream-io");
561 r
= sd_event_add_time(
563 &s
->timeout_event_source
,
564 clock_boottime_or_monotonic(),
565 now(clock_boottime_or_monotonic()) + DNS_STREAM_TIMEOUT_USEC
, 0,
566 on_stream_timeout
, s
);
570 (void) sd_event_source_set_description(s
->timeout_event_source
, "dns-stream-timeout");
572 LIST_PREPEND(streams
, m
->dns_streams
, s
);
576 s
->tfo_address
= *tfo_address
;
577 s
->tfo_salen
= tfo_address
->sa
.sa_family
== AF_INET6
? sizeof(tfo_address
->in6
) : sizeof(tfo_address
->in
);
588 int dns_stream_connect_tls(DnsStream
*s
, gnutls_session_t tls_session
) {
589 gnutls_transport_set_ptr2(tls_session
, (gnutls_transport_ptr_t
) (long) s
->fd
, s
);
590 gnutls_transport_set_vec_push_function(tls_session
, &dns_stream_tls_writev
);
593 s
->tls_session
= tls_session
;
594 s
->tls_handshake
= gnutls_handshake(tls_session
);
595 if (s
->tls_handshake
< 0 && gnutls_error_is_fatal(s
->tls_handshake
))
596 return -ECONNREFUSED
;
602 int dns_stream_write_packet(DnsStream
*s
, DnsPacket
*p
) {
607 r
= ordered_set_put(s
->write_queue
, p
);
613 return dns_stream_update_io(s
);