1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 #include <netinet/tcp.h>
5 #include "alloc-util.h"
9 #include "resolved-dns-stream.h"
11 #define DNS_STREAM_TIMEOUT_USEC (10 * USEC_PER_SEC)
12 #define DNS_STREAMS_MAX 128
14 static void dns_stream_stop(DnsStream
*s
) {
17 s
->io_event_source
= sd_event_source_unref(s
->io_event_source
);
18 s
->timeout_event_source
= sd_event_source_unref(s
->timeout_event_source
);
19 s
->fd
= safe_close(s
->fd
);
21 /* Disconnect us from the server object if we are now not usable anymore */
25 static int dns_stream_update_io(DnsStream
*s
) {
30 if (s
->write_packet
&& s
->n_written
< sizeof(s
->write_size
) + s
->write_packet
->size
)
32 else if (!ordered_set_isempty(s
->write_queue
)) {
33 dns_packet_unref(s
->write_packet
);
34 s
->write_packet
= ordered_set_steal_first(s
->write_queue
);
35 s
->write_size
= htobe16(s
->write_packet
->size
);
39 if (!s
->read_packet
|| s
->n_read
< sizeof(s
->read_size
) + s
->read_packet
->size
)
42 #if ENABLE_DNS_OVER_TLS
43 /* For handshake and clean closing purposes, TLS can override requested events */
44 if (s
->dnstls_events
!= 0)
48 return sd_event_source_set_io_events(s
->io_event_source
, f
);
51 static int dns_stream_complete(DnsStream
*s
, int error
) {
52 _cleanup_(dns_stream_unrefp
) _unused_ DnsStream
*ref
= dns_stream_ref(s
); /* Protect stream while we process it */
57 /* Error is > 0 when the connection failed for some reason in the network stack. It's == 0 if we sent
58 * and receieved exactly one packet each (in the LLMNR client case). */
60 #if ENABLE_DNS_OVER_TLS
64 r
= dnstls_stream_shutdown(s
, error
);
74 s
->complete(s
, error
);
75 else /* the default action if no completion function is set is to close the stream */
81 static int dns_stream_identify(DnsStream
*s
) {
83 struct cmsghdr header
; /* For alignment */
84 uint8_t buffer
[CMSG_SPACE(MAXSIZE(struct in_pktinfo
, struct in6_pktinfo
))
85 + EXTRA_CMSG_SPACE
/* kernel appears to require extra space */];
87 struct msghdr mh
= {};
97 /* Query the local side */
98 s
->local_salen
= sizeof(s
->local
);
99 r
= getsockname(s
->fd
, &s
->local
.sa
, &s
->local_salen
);
102 if (s
->local
.sa
.sa_family
== AF_INET6
&& s
->ifindex
<= 0)
103 s
->ifindex
= s
->local
.in6
.sin6_scope_id
;
105 /* Query the remote side */
106 s
->peer_salen
= sizeof(s
->peer
);
107 r
= getpeername(s
->fd
, &s
->peer
.sa
, &s
->peer_salen
);
110 if (s
->peer
.sa
.sa_family
== AF_INET6
&& s
->ifindex
<= 0)
111 s
->ifindex
= s
->peer
.in6
.sin6_scope_id
;
113 /* Check consistency */
114 assert(s
->peer
.sa
.sa_family
== s
->local
.sa
.sa_family
);
115 assert(IN_SET(s
->peer
.sa
.sa_family
, AF_INET
, AF_INET6
));
117 /* Query connection meta information */
118 sl
= sizeof(control
);
119 if (s
->peer
.sa
.sa_family
== AF_INET
) {
120 r
= getsockopt(s
->fd
, IPPROTO_IP
, IP_PKTOPTIONS
, &control
, &sl
);
123 } else if (s
->peer
.sa
.sa_family
== AF_INET6
) {
125 r
= getsockopt(s
->fd
, IPPROTO_IPV6
, IPV6_2292PKTOPTIONS
, &control
, &sl
);
129 return -EAFNOSUPPORT
;
131 mh
.msg_control
= &control
;
132 mh
.msg_controllen
= sl
;
134 CMSG_FOREACH(cmsg
, &mh
) {
136 if (cmsg
->cmsg_level
== IPPROTO_IPV6
) {
137 assert(s
->peer
.sa
.sa_family
== AF_INET6
);
139 switch (cmsg
->cmsg_type
) {
142 struct in6_pktinfo
*i
= (struct in6_pktinfo
*) CMSG_DATA(cmsg
);
145 s
->ifindex
= i
->ipi6_ifindex
;
150 s
->ttl
= *(int *) CMSG_DATA(cmsg
);
154 } else if (cmsg
->cmsg_level
== IPPROTO_IP
) {
155 assert(s
->peer
.sa
.sa_family
== AF_INET
);
157 switch (cmsg
->cmsg_type
) {
160 struct in_pktinfo
*i
= (struct in_pktinfo
*) CMSG_DATA(cmsg
);
163 s
->ifindex
= i
->ipi_ifindex
;
168 s
->ttl
= *(int *) CMSG_DATA(cmsg
);
174 /* The Linux kernel sets the interface index to the loopback
175 * device if the connection came from the local host since it
176 * avoids the routing table in such a case. Let's unset the
177 * interface index in such a case. */
178 if (s
->ifindex
== LOOPBACK_IFINDEX
)
181 /* If we don't know the interface index still, we look for the
182 * first local interface with a matching address. Yuck! */
184 s
->ifindex
= manager_find_ifindex(s
->manager
, s
->local
.sa
.sa_family
, s
->local
.sa
.sa_family
== AF_INET
? (union in_addr_union
*) &s
->local
.in
.sin_addr
: (union in_addr_union
*) &s
->local
.in6
.sin6_addr
);
186 if (s
->protocol
== DNS_PROTOCOL_LLMNR
&& s
->ifindex
> 0) {
187 uint32_t ifindex
= htobe32(s
->ifindex
);
189 /* Make sure all packets for this connection are sent on the same interface */
190 if (s
->local
.sa
.sa_family
== AF_INET
) {
191 r
= setsockopt(s
->fd
, IPPROTO_IP
, IP_UNICAST_IF
, &ifindex
, sizeof(ifindex
));
193 log_debug_errno(errno
, "Failed to invoke IP_UNICAST_IF: %m");
194 } else if (s
->local
.sa
.sa_family
== AF_INET6
) {
195 r
= setsockopt(s
->fd
, IPPROTO_IPV6
, IPV6_UNICAST_IF
, &ifindex
, sizeof(ifindex
));
197 log_debug_errno(errno
, "Failed to invoke IPV6_UNICAST_IF: %m");
201 s
->identified
= true;
206 ssize_t
dns_stream_writev(DnsStream
*s
, const struct iovec
*iov
, size_t iovcnt
, int flags
) {
212 #if ENABLE_DNS_OVER_TLS
213 if (s
->encrypted
&& !(flags
& DNS_STREAM_WRITE_TLS_DATA
)) {
218 for (i
= 0; i
< iovcnt
; i
++) {
219 ss
= dnstls_stream_write(s
, iov
[i
].iov_base
, iov
[i
].iov_len
);
224 if (ss
!= (ssize_t
) iov
[i
].iov_len
)
229 if (s
->tfo_salen
> 0) {
230 struct msghdr hdr
= {
231 .msg_iov
= (struct iovec
*) iov
,
232 .msg_iovlen
= iovcnt
,
233 .msg_name
= &s
->tfo_address
.sa
,
234 .msg_namelen
= s
->tfo_salen
237 m
= sendmsg(s
->fd
, &hdr
, MSG_FASTOPEN
);
239 if (errno
== EOPNOTSUPP
) {
241 if (connect(s
->fd
, &s
->tfo_address
.sa
, s
->tfo_salen
) < 0)
246 if (errno
== EINPROGRESS
)
251 s
->tfo_salen
= 0; /* connection is made */
253 m
= writev(s
->fd
, iov
, iovcnt
);
261 static ssize_t
dns_stream_read(DnsStream
*s
, void *buf
, size_t count
) {
264 #if ENABLE_DNS_OVER_TLS
266 ss
= dnstls_stream_read(s
, buf
, count
);
270 ss
= read(s
->fd
, buf
, count
);
278 static int on_stream_timeout(sd_event_source
*es
, usec_t usec
, void *userdata
) {
279 DnsStream
*s
= userdata
;
283 return dns_stream_complete(s
, ETIMEDOUT
);
286 static int on_stream_io(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
287 _cleanup_(dns_stream_unrefp
) DnsStream
*s
= dns_stream_ref(userdata
); /* Protect stream while we process it */
288 bool progressed
= false;
293 #if ENABLE_DNS_OVER_TLS
295 r
= dnstls_stream_on_io(s
, revents
);
296 if (r
== DNSTLS_STREAM_CLOSED
)
299 return dns_stream_update_io(s
);
301 return dns_stream_complete(s
, -r
);
303 r
= dns_stream_update_io(s
);
309 /* only identify after connecting */
310 if (s
->tfo_salen
== 0) {
311 r
= dns_stream_identify(s
);
313 return dns_stream_complete(s
, -r
);
316 if ((revents
& EPOLLOUT
) &&
318 s
->n_written
< sizeof(s
->write_size
) + s
->write_packet
->size
) {
323 iov
[0] = IOVEC_MAKE(&s
->write_size
, sizeof(s
->write_size
));
324 iov
[1] = IOVEC_MAKE(DNS_PACKET_DATA(s
->write_packet
), s
->write_packet
->size
);
326 IOVEC_INCREMENT(iov
, 2, s
->n_written
);
328 ss
= dns_stream_writev(s
, iov
, 2, 0);
330 if (!IN_SET(-ss
, EINTR
, EAGAIN
))
331 return dns_stream_complete(s
, -ss
);
337 /* Are we done? If so, disable the event source for EPOLLOUT */
338 if (s
->n_written
>= sizeof(s
->write_size
) + s
->write_packet
->size
) {
339 r
= dns_stream_update_io(s
);
341 return dns_stream_complete(s
, -r
);
345 if ((revents
& (EPOLLIN
|EPOLLHUP
|EPOLLRDHUP
)) &&
347 s
->n_read
< sizeof(s
->read_size
) + s
->read_packet
->size
)) {
349 if (s
->n_read
< sizeof(s
->read_size
)) {
352 ss
= dns_stream_read(s
, (uint8_t*) &s
->read_size
+ s
->n_read
, sizeof(s
->read_size
) - s
->n_read
);
354 if (!IN_SET(-ss
, EINTR
, EAGAIN
))
355 return dns_stream_complete(s
, -ss
);
357 return dns_stream_complete(s
, ECONNRESET
);
364 if (s
->n_read
>= sizeof(s
->read_size
)) {
366 if (be16toh(s
->read_size
) < DNS_PACKET_HEADER_SIZE
)
367 return dns_stream_complete(s
, EBADMSG
);
369 if (s
->n_read
< sizeof(s
->read_size
) + be16toh(s
->read_size
)) {
372 if (!s
->read_packet
) {
373 r
= dns_packet_new(&s
->read_packet
, s
->protocol
, be16toh(s
->read_size
), DNS_PACKET_SIZE_MAX
);
375 return dns_stream_complete(s
, -r
);
377 s
->read_packet
->size
= be16toh(s
->read_size
);
378 s
->read_packet
->ipproto
= IPPROTO_TCP
;
379 s
->read_packet
->family
= s
->peer
.sa
.sa_family
;
380 s
->read_packet
->ttl
= s
->ttl
;
381 s
->read_packet
->ifindex
= s
->ifindex
;
383 if (s
->read_packet
->family
== AF_INET
) {
384 s
->read_packet
->sender
.in
= s
->peer
.in
.sin_addr
;
385 s
->read_packet
->sender_port
= be16toh(s
->peer
.in
.sin_port
);
386 s
->read_packet
->destination
.in
= s
->local
.in
.sin_addr
;
387 s
->read_packet
->destination_port
= be16toh(s
->local
.in
.sin_port
);
389 assert(s
->read_packet
->family
== AF_INET6
);
390 s
->read_packet
->sender
.in6
= s
->peer
.in6
.sin6_addr
;
391 s
->read_packet
->sender_port
= be16toh(s
->peer
.in6
.sin6_port
);
392 s
->read_packet
->destination
.in6
= s
->local
.in6
.sin6_addr
;
393 s
->read_packet
->destination_port
= be16toh(s
->local
.in6
.sin6_port
);
395 if (s
->read_packet
->ifindex
== 0)
396 s
->read_packet
->ifindex
= s
->peer
.in6
.sin6_scope_id
;
397 if (s
->read_packet
->ifindex
== 0)
398 s
->read_packet
->ifindex
= s
->local
.in6
.sin6_scope_id
;
402 ss
= dns_stream_read(s
,
403 (uint8_t*) DNS_PACKET_DATA(s
->read_packet
) + s
->n_read
- sizeof(s
->read_size
),
404 sizeof(s
->read_size
) + be16toh(s
->read_size
) - s
->n_read
);
406 if (!IN_SET(-ss
, EINTR
, EAGAIN
))
407 return dns_stream_complete(s
, -ss
);
409 return dns_stream_complete(s
, ECONNRESET
);
414 /* Are we done? If so, disable the event source for EPOLLIN */
415 if (s
->n_read
>= sizeof(s
->read_size
) + be16toh(s
->read_size
)) {
416 /* If there's a packet handler
417 * installed, call that. Note that
418 * this is optional... */
425 r
= dns_stream_update_io(s
);
427 return dns_stream_complete(s
, -r
);
432 /* Call "complete" callback if finished reading and writing one packet, and there's nothing else left
434 if (s
->type
== DNS_STREAM_LLMNR_SEND
&&
435 (s
->write_packet
&& s
->n_written
>= sizeof(s
->write_size
) + s
->write_packet
->size
) &&
436 ordered_set_isempty(s
->write_queue
) &&
437 (s
->read_packet
&& s
->n_read
>= sizeof(s
->read_size
) + s
->read_packet
->size
))
438 return dns_stream_complete(s
, 0);
440 /* If we did something, let's restart the timeout event source */
441 if (progressed
&& s
->timeout_event_source
) {
442 r
= sd_event_source_set_time(s
->timeout_event_source
, now(clock_boottime_or_monotonic()) + DNS_STREAM_TIMEOUT_USEC
);
444 log_warning_errno(errno
, "Couldn't restart TCP connection timeout, ignoring: %m");
450 static DnsStream
*dns_stream_free(DnsStream
*s
) {
459 LIST_REMOVE(streams
, s
->manager
->dns_streams
, s
);
460 s
->manager
->n_dns_streams
[s
->type
]--;
463 #if ENABLE_DNS_OVER_TLS
465 dnstls_stream_free(s
);
468 ORDERED_SET_FOREACH(p
, s
->write_queue
, i
)
469 dns_packet_unref(ordered_set_remove(s
->write_queue
, p
));
471 dns_packet_unref(s
->write_packet
);
472 dns_packet_unref(s
->read_packet
);
473 dns_server_unref(s
->server
);
475 ordered_set_free(s
->write_queue
);
480 DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsStream
, dns_stream
, dns_stream_free
);
486 DnsProtocol protocol
,
488 const union sockaddr_union
*tfo_address
) {
490 _cleanup_(dns_stream_unrefp
) DnsStream
*s
= NULL
;
496 assert(type
< _DNS_STREAM_TYPE_MAX
);
497 assert(protocol
>= 0);
498 assert(protocol
< _DNS_PROTOCOL_MAX
);
501 if (m
->n_dns_streams
[type
] > DNS_STREAMS_MAX
)
504 s
= new(DnsStream
, 1);
511 .protocol
= protocol
,
514 r
= ordered_set_ensure_allocated(&s
->write_queue
, &dns_packet_hash_ops
);
518 r
= sd_event_add_io(m
->event
, &s
->io_event_source
, fd
, EPOLLIN
, on_stream_io
, s
);
522 (void) sd_event_source_set_description(s
->io_event_source
, "dns-stream-io");
524 r
= sd_event_add_time(
526 &s
->timeout_event_source
,
527 clock_boottime_or_monotonic(),
528 now(clock_boottime_or_monotonic()) + DNS_STREAM_TIMEOUT_USEC
, 0,
529 on_stream_timeout
, s
);
533 (void) sd_event_source_set_description(s
->timeout_event_source
, "dns-stream-timeout");
535 LIST_PREPEND(streams
, m
->dns_streams
, s
);
536 m
->n_dns_streams
[type
]++;
542 s
->tfo_address
= *tfo_address
;
543 s
->tfo_salen
= tfo_address
->sa
.sa_family
== AF_INET6
? sizeof(tfo_address
->in6
) : sizeof(tfo_address
->in
);
551 int dns_stream_write_packet(DnsStream
*s
, DnsPacket
*p
) {
557 r
= ordered_set_put(s
->write_queue
, p
);
563 return dns_stream_update_io(s
);
566 DnsPacket
*dns_stream_take_read_packet(DnsStream
*s
) {
572 if (s
->n_read
< sizeof(s
->read_size
))
575 if (s
->n_read
< sizeof(s
->read_size
) + be16toh(s
->read_size
))
579 return TAKE_PTR(s
->read_packet
);
582 void dns_stream_detach(DnsStream
*s
) {
588 if (s
->server
->stream
!= s
)
591 dns_server_unref_stream(s
->server
);