1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
7 #include "alloc-util.h"
8 #include "errno-util.h"
10 #include "iovec-util.h"
12 #include "missing_network.h"
13 #include "ordered-set.h"
14 #include "resolved-dns-packet.h"
15 #include "resolved-dns-server.h"
16 #include "resolved-dns-stream.h"
17 #include "resolved-manager.h"
19 #include "time-util.h"
21 #define DNS_STREAMS_MAX 128
23 #define DNS_QUERIES_PER_STREAM 32
25 static void dns_stream_stop(DnsStream
*s
) {
28 s
->io_event_source
= sd_event_source_disable_unref(s
->io_event_source
);
29 s
->timeout_event_source
= sd_event_source_disable_unref(s
->timeout_event_source
);
30 s
->fd
= safe_close(s
->fd
);
32 /* Disconnect us from the server object if we are now not usable anymore */
36 static int dns_stream_update_io(DnsStream
*s
) {
41 if (s
->write_packet
&& s
->n_written
< sizeof(s
->write_size
) + s
->write_packet
->size
)
43 else if (!ordered_set_isempty(s
->write_queue
)) {
44 dns_packet_unref(s
->write_packet
);
45 s
->write_packet
= ordered_set_steal_first(s
->write_queue
);
46 s
->write_size
= htobe16(s
->write_packet
->size
);
51 /* Let's read a packet if we haven't queued any yet. Except if we already hit a limit of parallel
52 * queries for this connection. */
53 if ((!s
->read_packet
|| s
->n_read
< sizeof(s
->read_size
) + s
->read_packet
->size
) &&
54 set_size(s
->queries
) < DNS_QUERIES_PER_STREAM
)
57 s
->requested_events
= f
;
59 #if ENABLE_DNS_OVER_TLS
60 /* For handshake and clean closing purposes, TLS can override requested events */
61 if (s
->dnstls_events
!= 0)
65 return sd_event_source_set_io_events(s
->io_event_source
, f
);
68 static int dns_stream_complete(DnsStream
*s
, int error
) {
69 _cleanup_(dns_stream_unrefp
) _unused_ DnsStream
*ref
= dns_stream_ref(s
); /* Protect stream while we process it */
74 /* Error is > 0 when the connection failed for some reason in the network stack. It's == 0 if we sent
75 * and received exactly one packet each (in the LLMNR client case). */
77 #if ENABLE_DNS_OVER_TLS
81 r
= dnstls_stream_shutdown(s
, error
);
91 s
->complete(s
, error
);
92 else /* the default action if no completion function is set is to close the stream */
98 static int dns_stream_identify(DnsStream
*s
) {
99 CMSG_BUFFER_TYPE(CMSG_SPACE(MAXSIZE(struct in_pktinfo
, struct in6_pktinfo
))
100 + CMSG_SPACE(int) + /* for the TTL */
101 + EXTRA_CMSG_SPACE
/* kernel appears to require extra space */) control
;
102 struct msghdr mh
= {};
103 struct cmsghdr
*cmsg
;
112 /* Query the local side */
113 s
->local_salen
= sizeof(s
->local
);
114 r
= getsockname(s
->fd
, &s
->local
.sa
, &s
->local_salen
);
117 if (s
->local
.sa
.sa_family
== AF_INET6
&& s
->ifindex
<= 0)
118 s
->ifindex
= s
->local
.in6
.sin6_scope_id
;
120 /* Query the remote side */
121 s
->peer_salen
= sizeof(s
->peer
);
122 r
= getpeername(s
->fd
, &s
->peer
.sa
, &s
->peer_salen
);
125 if (s
->peer
.sa
.sa_family
== AF_INET6
&& s
->ifindex
<= 0)
126 s
->ifindex
= s
->peer
.in6
.sin6_scope_id
;
128 /* Check consistency */
129 assert(s
->peer
.sa
.sa_family
== s
->local
.sa
.sa_family
);
130 assert(IN_SET(s
->peer
.sa
.sa_family
, AF_INET
, AF_INET6
));
132 /* Query connection meta information */
133 sl
= sizeof(control
);
134 if (s
->peer
.sa
.sa_family
== AF_INET
) {
135 r
= getsockopt(s
->fd
, IPPROTO_IP
, IP_PKTOPTIONS
, &control
, &sl
);
138 } else if (s
->peer
.sa
.sa_family
== AF_INET6
) {
140 r
= getsockopt(s
->fd
, IPPROTO_IPV6
, IPV6_2292PKTOPTIONS
, &control
, &sl
);
144 return -EAFNOSUPPORT
;
146 mh
.msg_control
= &control
;
147 mh
.msg_controllen
= sl
;
149 CMSG_FOREACH(cmsg
, &mh
) {
151 if (cmsg
->cmsg_level
== IPPROTO_IPV6
) {
152 assert(s
->peer
.sa
.sa_family
== AF_INET6
);
154 switch (cmsg
->cmsg_type
) {
157 struct in6_pktinfo
*i
= CMSG_TYPED_DATA(cmsg
, struct in6_pktinfo
);
160 s
->ifindex
= i
->ipi6_ifindex
;
165 s
->ttl
= *CMSG_TYPED_DATA(cmsg
, int);
169 } else if (cmsg
->cmsg_level
== IPPROTO_IP
) {
170 assert(s
->peer
.sa
.sa_family
== AF_INET
);
172 switch (cmsg
->cmsg_type
) {
175 struct in_pktinfo
*i
= CMSG_TYPED_DATA(cmsg
, struct in_pktinfo
);
178 s
->ifindex
= i
->ipi_ifindex
;
183 s
->ttl
= *CMSG_TYPED_DATA(cmsg
, int);
189 /* The Linux kernel sets the interface index to the loopback
190 * device if the connection came from the local host since it
191 * avoids the routing table in such a case. Let's unset the
192 * interface index in such a case. */
193 if (s
->ifindex
== LOOPBACK_IFINDEX
)
196 /* If we don't know the interface index still, we look for the
197 * first local interface with a matching address. Yuck! */
199 s
->ifindex
= manager_find_ifindex(s
->manager
, s
->local
.sa
.sa_family
, sockaddr_in_addr(&s
->local
.sa
));
201 if (s
->protocol
== DNS_PROTOCOL_LLMNR
&& s
->ifindex
> 0) {
202 /* Make sure all packets for this connection are sent on the same interface */
203 r
= socket_set_unicast_if(s
->fd
, s
->local
.sa
.sa_family
, s
->ifindex
);
205 log_debug_errno(r
, "Failed to invoke IP_UNICAST_IF/IPV6_UNICAST_IF: %m");
208 s
->identified
= true;
213 ssize_t
dns_stream_writev(DnsStream
*s
, const struct iovec
*iov
, size_t iovcnt
, int flags
) {
220 #if ENABLE_DNS_OVER_TLS
221 if (s
->encrypted
&& !(flags
& DNS_STREAM_WRITE_TLS_DATA
))
222 return dnstls_stream_writev(s
, iov
, iovcnt
);
225 if (s
->tfo_salen
> 0) {
226 struct msghdr hdr
= {
227 .msg_iov
= (struct iovec
*) iov
,
228 .msg_iovlen
= iovcnt
,
229 .msg_name
= &s
->tfo_address
.sa
,
230 .msg_namelen
= s
->tfo_salen
233 m
= sendmsg(s
->fd
, &hdr
, MSG_FASTOPEN
);
235 if (ERRNO_IS_NOT_SUPPORTED(errno
)) {
236 /* MSG_FASTOPEN not supported? Then try to connect() traditionally */
237 r
= RET_NERRNO(connect(s
->fd
, &s
->tfo_address
.sa
, s
->tfo_salen
));
238 s
->tfo_salen
= 0; /* connection is made */
239 if (r
< 0 && r
!= -EINPROGRESS
)
242 return -EAGAIN
; /* In case of EINPROGRESS, EAGAIN or success: return EAGAIN, so that caller calls us again */
244 if (errno
== EINPROGRESS
)
249 s
->tfo_salen
= 0; /* connection is made */
251 m
= writev(s
->fd
, iov
, iovcnt
);
259 static ssize_t
dns_stream_read(DnsStream
*s
, void *buf
, size_t count
) {
262 #if ENABLE_DNS_OVER_TLS
264 ss
= dnstls_stream_read(s
, buf
, count
);
268 ss
= read(s
->fd
, buf
, count
);
276 static int on_stream_timeout(sd_event_source
*es
, usec_t usec
, void *userdata
) {
277 DnsStream
*s
= ASSERT_PTR(userdata
);
279 return dns_stream_complete(s
, ETIMEDOUT
);
282 static DnsPacket
*dns_stream_take_read_packet(DnsStream
*s
) {
285 /* Note, dns_stream_update() should be called after this is called. When this is called, the
286 * stream may be already full and the EPOLLIN flag is dropped from the stream IO event source.
287 * Even this makes a room to read in the stream, this does not call dns_stream_update(), hence
288 * EPOLLIN flag is not set automatically. So, to read further packets from the stream,
289 * dns_stream_update() must be called explicitly. Currently, this is only called from
290 * on_stream_io(), and there dns_stream_update() is called. */
295 if (s
->n_read
< sizeof(s
->read_size
))
298 if (s
->n_read
< sizeof(s
->read_size
) + be16toh(s
->read_size
))
302 return TAKE_PTR(s
->read_packet
);
305 static int on_stream_io(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
306 _cleanup_(dns_stream_unrefp
) DnsStream
*s
= dns_stream_ref(userdata
); /* Protect stream while we process it */
307 bool progressed
= false;
312 #if ENABLE_DNS_OVER_TLS
314 r
= dnstls_stream_on_io(s
, revents
);
315 if (r
== DNSTLS_STREAM_CLOSED
)
318 return dns_stream_update_io(s
);
320 return dns_stream_complete(s
, -r
);
322 r
= dns_stream_update_io(s
);
328 /* only identify after connecting */
329 if (s
->tfo_salen
== 0) {
330 r
= dns_stream_identify(s
);
332 return dns_stream_complete(s
, -r
);
335 if (revents
& EPOLLERR
) {
336 socklen_t errlen
= sizeof(r
);
337 if (getsockopt(s
->fd
, SOL_SOCKET
, SO_ERROR
, &r
, &errlen
) == 0)
338 return dns_stream_complete(s
, r
);
341 if ((revents
& EPOLLOUT
) &&
343 s
->n_written
< sizeof(s
->write_size
) + s
->write_packet
->size
) {
345 struct iovec iov
[] = {
346 IOVEC_MAKE(&s
->write_size
, sizeof(s
->write_size
)),
347 IOVEC_MAKE(DNS_PACKET_DATA(s
->write_packet
), s
->write_packet
->size
),
350 iovec_increment(iov
, ELEMENTSOF(iov
), s
->n_written
);
352 ssize_t ss
= dns_stream_writev(s
, iov
, ELEMENTSOF(iov
), 0);
354 if (!ERRNO_IS_TRANSIENT(ss
))
355 return dns_stream_complete(s
, -ss
);
361 /* Are we done? If so, disable the event source for EPOLLOUT */
362 if (s
->n_written
>= sizeof(s
->write_size
) + s
->write_packet
->size
) {
363 r
= dns_stream_update_io(s
);
365 return dns_stream_complete(s
, -r
);
369 while (s
->identified
&& /* Only read data once we identified the peer, because we cannot fill in the DNS packet meta info otherwise */
370 (revents
& (EPOLLIN
|EPOLLHUP
|EPOLLRDHUP
)) &&
372 s
->n_read
< sizeof(s
->read_size
) + s
->read_packet
->size
)) {
374 if (s
->n_read
< sizeof(s
->read_size
)) {
377 ss
= dns_stream_read(s
, (uint8_t*) &s
->read_size
+ s
->n_read
, sizeof(s
->read_size
) - s
->n_read
);
379 if (!ERRNO_IS_TRANSIENT(ss
))
380 return dns_stream_complete(s
, -ss
);
383 return dns_stream_complete(s
, ECONNRESET
);
390 if (s
->n_read
>= sizeof(s
->read_size
)) {
392 if (be16toh(s
->read_size
) < DNS_PACKET_HEADER_SIZE
)
393 return dns_stream_complete(s
, EBADMSG
);
395 if (s
->n_read
< sizeof(s
->read_size
) + be16toh(s
->read_size
)) {
398 if (!s
->read_packet
) {
399 r
= dns_packet_new(&s
->read_packet
, s
->protocol
, be16toh(s
->read_size
), DNS_PACKET_SIZE_MAX
);
401 return dns_stream_complete(s
, -r
);
403 s
->read_packet
->size
= be16toh(s
->read_size
);
404 s
->read_packet
->ipproto
= IPPROTO_TCP
;
405 s
->read_packet
->family
= s
->peer
.sa
.sa_family
;
406 s
->read_packet
->ttl
= s
->ttl
;
407 s
->read_packet
->ifindex
= s
->ifindex
;
408 s
->read_packet
->timestamp
= now(CLOCK_BOOTTIME
);
410 if (s
->read_packet
->family
== AF_INET
) {
411 s
->read_packet
->sender
.in
= s
->peer
.in
.sin_addr
;
412 s
->read_packet
->sender_port
= be16toh(s
->peer
.in
.sin_port
);
413 s
->read_packet
->destination
.in
= s
->local
.in
.sin_addr
;
414 s
->read_packet
->destination_port
= be16toh(s
->local
.in
.sin_port
);
416 assert(s
->read_packet
->family
== AF_INET6
);
417 s
->read_packet
->sender
.in6
= s
->peer
.in6
.sin6_addr
;
418 s
->read_packet
->sender_port
= be16toh(s
->peer
.in6
.sin6_port
);
419 s
->read_packet
->destination
.in6
= s
->local
.in6
.sin6_addr
;
420 s
->read_packet
->destination_port
= be16toh(s
->local
.in6
.sin6_port
);
422 if (s
->read_packet
->ifindex
== 0)
423 s
->read_packet
->ifindex
= s
->peer
.in6
.sin6_scope_id
;
424 if (s
->read_packet
->ifindex
== 0)
425 s
->read_packet
->ifindex
= s
->local
.in6
.sin6_scope_id
;
429 ss
= dns_stream_read(s
,
430 (uint8_t*) DNS_PACKET_DATA(s
->read_packet
) + s
->n_read
- sizeof(s
->read_size
),
431 sizeof(s
->read_size
) + be16toh(s
->read_size
) - s
->n_read
);
433 if (!ERRNO_IS_TRANSIENT(ss
))
434 return dns_stream_complete(s
, -ss
);
437 return dns_stream_complete(s
, ECONNRESET
);
442 /* Are we done? If so, call the packet handler and re-enable EPOLLIN for the
443 * event source if necessary. */
444 _cleanup_(dns_packet_unrefp
) DnsPacket
*p
= dns_stream_take_read_packet(s
);
446 assert(s
->on_packet
);
447 r
= s
->on_packet(s
, p
);
451 r
= dns_stream_update_io(s
);
453 return dns_stream_complete(s
, -r
);
455 s
->packet_received
= true;
457 /* If we just disabled the read event, stop reading */
458 if (!FLAGS_SET(s
->requested_events
, EPOLLIN
))
464 /* Complete the stream if finished reading and writing one packet, and there's nothing
465 * else left to write. */
466 if (s
->type
== DNS_STREAM_LLMNR_SEND
&& s
->packet_received
&&
467 !FLAGS_SET(s
->requested_events
, EPOLLOUT
))
468 return dns_stream_complete(s
, 0);
470 /* If we did something, let's restart the timeout event source */
471 if (progressed
&& s
->timeout_event_source
) {
472 r
= sd_event_source_set_time_relative(s
->timeout_event_source
, DNS_STREAM_ESTABLISHED_TIMEOUT_USEC
);
474 log_warning_errno(r
, "Couldn't restart TCP connection timeout, ignoring: %m");
480 static DnsStream
*dns_stream_free(DnsStream
*s
) {
488 LIST_REMOVE(streams
, s
->manager
->dns_streams
, s
);
489 s
->manager
->n_dns_streams
[s
->type
]--;
492 #if ENABLE_DNS_OVER_TLS
494 dnstls_stream_free(s
);
497 ORDERED_SET_FOREACH(p
, s
->write_queue
)
498 dns_packet_unref(ordered_set_remove(s
->write_queue
, p
));
500 dns_packet_unref(s
->write_packet
);
501 dns_packet_unref(s
->read_packet
);
502 dns_server_unref(s
->server
);
504 ordered_set_free(s
->write_queue
);
509 DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsStream
, dns_stream
, dns_stream_free
);
515 DnsProtocol protocol
,
517 const union sockaddr_union
*tfo_address
,
518 int (on_packet
)(DnsStream
*, DnsPacket
*),
519 int (complete
)(DnsStream
*, int), /* optional */
520 usec_t connect_timeout_usec
) {
522 _cleanup_(dns_stream_unrefp
) DnsStream
*s
= NULL
;
528 assert(type
< _DNS_STREAM_TYPE_MAX
);
529 assert(protocol
>= 0);
530 assert(protocol
< _DNS_PROTOCOL_MAX
);
534 if (m
->n_dns_streams
[type
] > DNS_STREAMS_MAX
)
537 s
= new(DnsStream
, 1);
544 .protocol
= protocol
,
548 r
= ordered_set_ensure_allocated(&s
->write_queue
, &dns_packet_hash_ops
);
552 r
= sd_event_add_io(m
->event
, &s
->io_event_source
, fd
, EPOLLIN
, on_stream_io
, s
);
556 (void) sd_event_source_set_description(s
->io_event_source
, "dns-stream-io");
558 r
= sd_event_add_time_relative(
560 &s
->timeout_event_source
,
562 connect_timeout_usec
, 0,
563 on_stream_timeout
, s
);
567 (void) sd_event_source_set_description(s
->timeout_event_source
, "dns-stream-timeout");
569 LIST_PREPEND(streams
, m
->dns_streams
, s
);
570 m
->n_dns_streams
[type
]++;
574 s
->on_packet
= on_packet
;
575 s
->complete
= complete
;
578 s
->tfo_address
= *tfo_address
;
579 s
->tfo_salen
= sockaddr_len(tfo_address
);
587 int dns_stream_write_packet(DnsStream
*s
, DnsPacket
*p
) {
593 r
= ordered_set_put(s
->write_queue
, p
);
599 return dns_stream_update_io(s
);
602 void dns_stream_detach(DnsStream
*s
) {
608 if (s
->server
->stream
!= s
)
611 dns_server_unref_stream(s
->server
);
614 DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(
618 trivial_compare_func
,
621 int dns_stream_disconnect_all(Manager
*m
) {
622 _cleanup_set_free_ Set
*closed
= NULL
;
627 /* Terminates all TCP connections (called after system suspend for example, to speed up recovery) */
629 log_info("Closing all remaining TCP connections.");
635 LIST_FOREACH(streams
, s
, m
->dns_streams
) {
636 r
= set_ensure_put(&closed
, &dns_stream_hash_ops
, s
);
640 /* Haven't seen this one before. Close it. */
642 (void) dns_stream_complete(s
, ECONNRESET
);
644 /* This might have a ripple effect, let's hence no look at the list further,
645 * but scan from the beginning again */