]> git.ipfire.org Git - thirdparty/systemd.git/blame_incremental - src/resolve/resolved-dns-stream.c
ci: enable arm64 runner for build/unit jobs
[thirdparty/systemd.git] / src / resolve / resolved-dns-stream.c
... / ...
CommitLineData
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3#include <unistd.h>
4
5#include "sd-event.h"
6
7#include "alloc-util.h"
8#include "errno-util.h"
9#include "fd-util.h"
10#include "iovec-util.h"
11#include "log.h"
12#include "missing_network.h"
13#include "ordered-set.h"
14#include "resolved-dns-packet.h"
15#include "resolved-dns-server.h"
16#include "resolved-dns-stream.h"
17#include "resolved-manager.h"
18#include "set.h"
19#include "time-util.h"
20
21#define DNS_STREAMS_MAX 128
22
23#define DNS_QUERIES_PER_STREAM 32
24
25static void dns_stream_stop(DnsStream *s) {
26 assert(s);
27
28 s->io_event_source = sd_event_source_disable_unref(s->io_event_source);
29 s->timeout_event_source = sd_event_source_disable_unref(s->timeout_event_source);
30 s->fd = safe_close(s->fd);
31
32 /* Disconnect us from the server object if we are now not usable anymore */
33 dns_stream_detach(s);
34}
35
36static int dns_stream_update_io(DnsStream *s) {
37 uint32_t f = 0;
38
39 assert(s);
40
41 if (s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size)
42 f |= EPOLLOUT;
43 else if (!ordered_set_isempty(s->write_queue)) {
44 dns_packet_unref(s->write_packet);
45 s->write_packet = ordered_set_steal_first(s->write_queue);
46 s->write_size = htobe16(s->write_packet->size);
47 s->n_written = 0;
48 f |= EPOLLOUT;
49 }
50
51 /* Let's read a packet if we haven't queued any yet. Except if we already hit a limit of parallel
52 * queries for this connection. */
53 if ((!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size) &&
54 set_size(s->queries) < DNS_QUERIES_PER_STREAM)
55 f |= EPOLLIN;
56
57 s->requested_events = f;
58
59#if ENABLE_DNS_OVER_TLS
60 /* For handshake and clean closing purposes, TLS can override requested events */
61 if (s->dnstls_events != 0)
62 f = s->dnstls_events;
63#endif
64
65 return sd_event_source_set_io_events(s->io_event_source, f);
66}
67
68static int dns_stream_complete(DnsStream *s, int error) {
69 _cleanup_(dns_stream_unrefp) _unused_ DnsStream *ref = dns_stream_ref(s); /* Protect stream while we process it */
70
71 assert(s);
72 assert(error >= 0);
73
74 /* Error is > 0 when the connection failed for some reason in the network stack. It's == 0 if we sent
75 * and received exactly one packet each (in the LLMNR client case). */
76
77#if ENABLE_DNS_OVER_TLS
78 if (s->encrypted) {
79 int r;
80
81 r = dnstls_stream_shutdown(s, error);
82 if (r != -EAGAIN)
83 dns_stream_stop(s);
84 } else
85#endif
86 dns_stream_stop(s);
87
88 dns_stream_detach(s);
89
90 if (s->complete)
91 s->complete(s, error);
92 else /* the default action if no completion function is set is to close the stream */
93 dns_stream_unref(s);
94
95 return 0;
96}
97
98static int dns_stream_identify(DnsStream *s) {
99 CMSG_BUFFER_TYPE(CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo))
100 + CMSG_SPACE(int) + /* for the TTL */
101 + EXTRA_CMSG_SPACE /* kernel appears to require extra space */) control;
102 struct msghdr mh = {};
103 struct cmsghdr *cmsg;
104 socklen_t sl;
105 int r;
106
107 assert(s);
108
109 if (s->identified)
110 return 0;
111
112 /* Query the local side */
113 s->local_salen = sizeof(s->local);
114 r = getsockname(s->fd, &s->local.sa, &s->local_salen);
115 if (r < 0)
116 return -errno;
117 if (s->local.sa.sa_family == AF_INET6 && s->ifindex <= 0)
118 s->ifindex = s->local.in6.sin6_scope_id;
119
120 /* Query the remote side */
121 s->peer_salen = sizeof(s->peer);
122 r = getpeername(s->fd, &s->peer.sa, &s->peer_salen);
123 if (r < 0)
124 return -errno;
125 if (s->peer.sa.sa_family == AF_INET6 && s->ifindex <= 0)
126 s->ifindex = s->peer.in6.sin6_scope_id;
127
128 /* Check consistency */
129 assert(s->peer.sa.sa_family == s->local.sa.sa_family);
130 assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
131
132 /* Query connection meta information */
133 sl = sizeof(control);
134 if (s->peer.sa.sa_family == AF_INET) {
135 r = getsockopt(s->fd, IPPROTO_IP, IP_PKTOPTIONS, &control, &sl);
136 if (r < 0)
137 return -errno;
138 } else if (s->peer.sa.sa_family == AF_INET6) {
139
140 r = getsockopt(s->fd, IPPROTO_IPV6, IPV6_2292PKTOPTIONS, &control, &sl);
141 if (r < 0)
142 return -errno;
143 } else
144 return -EAFNOSUPPORT;
145
146 mh.msg_control = &control;
147 mh.msg_controllen = sl;
148
149 CMSG_FOREACH(cmsg, &mh) {
150
151 if (cmsg->cmsg_level == IPPROTO_IPV6) {
152 assert(s->peer.sa.sa_family == AF_INET6);
153
154 switch (cmsg->cmsg_type) {
155
156 case IPV6_PKTINFO: {
157 struct in6_pktinfo *i = CMSG_TYPED_DATA(cmsg, struct in6_pktinfo);
158
159 if (s->ifindex <= 0)
160 s->ifindex = i->ipi6_ifindex;
161 break;
162 }
163
164 case IPV6_HOPLIMIT:
165 s->ttl = *CMSG_TYPED_DATA(cmsg, int);
166 break;
167 }
168
169 } else if (cmsg->cmsg_level == IPPROTO_IP) {
170 assert(s->peer.sa.sa_family == AF_INET);
171
172 switch (cmsg->cmsg_type) {
173
174 case IP_PKTINFO: {
175 struct in_pktinfo *i = CMSG_TYPED_DATA(cmsg, struct in_pktinfo);
176
177 if (s->ifindex <= 0)
178 s->ifindex = i->ipi_ifindex;
179 break;
180 }
181
182 case IP_TTL:
183 s->ttl = *CMSG_TYPED_DATA(cmsg, int);
184 break;
185 }
186 }
187 }
188
189 /* The Linux kernel sets the interface index to the loopback
190 * device if the connection came from the local host since it
191 * avoids the routing table in such a case. Let's unset the
192 * interface index in such a case. */
193 if (s->ifindex == LOOPBACK_IFINDEX)
194 s->ifindex = 0;
195
196 /* If we don't know the interface index still, we look for the
197 * first local interface with a matching address. Yuck! */
198 if (s->ifindex <= 0)
199 s->ifindex = manager_find_ifindex(s->manager, s->local.sa.sa_family, sockaddr_in_addr(&s->local.sa));
200
201 if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) {
202 /* Make sure all packets for this connection are sent on the same interface */
203 r = socket_set_unicast_if(s->fd, s->local.sa.sa_family, s->ifindex);
204 if (r < 0)
205 log_debug_errno(r, "Failed to invoke IP_UNICAST_IF/IPV6_UNICAST_IF: %m");
206 }
207
208 s->identified = true;
209
210 return 0;
211}
212
213ssize_t dns_stream_writev(DnsStream *s, const struct iovec *iov, size_t iovcnt, int flags) {
214 ssize_t m;
215 int r;
216
217 assert(s);
218 assert(iov);
219
220#if ENABLE_DNS_OVER_TLS
221 if (s->encrypted && !(flags & DNS_STREAM_WRITE_TLS_DATA))
222 return dnstls_stream_writev(s, iov, iovcnt);
223#endif
224
225 if (s->tfo_salen > 0) {
226 struct msghdr hdr = {
227 .msg_iov = (struct iovec*) iov,
228 .msg_iovlen = iovcnt,
229 .msg_name = &s->tfo_address.sa,
230 .msg_namelen = s->tfo_salen
231 };
232
233 m = sendmsg(s->fd, &hdr, MSG_FASTOPEN);
234 if (m < 0) {
235 if (ERRNO_IS_NOT_SUPPORTED(errno)) {
236 /* MSG_FASTOPEN not supported? Then try to connect() traditionally */
237 r = RET_NERRNO(connect(s->fd, &s->tfo_address.sa, s->tfo_salen));
238 s->tfo_salen = 0; /* connection is made */
239 if (r < 0 && r != -EINPROGRESS)
240 return r;
241
242 return -EAGAIN; /* In case of EINPROGRESS, EAGAIN or success: return EAGAIN, so that caller calls us again */
243 }
244 if (errno == EINPROGRESS)
245 return -EAGAIN;
246
247 return -errno;
248 } else
249 s->tfo_salen = 0; /* connection is made */
250 } else {
251 m = writev(s->fd, iov, iovcnt);
252 if (m < 0)
253 return -errno;
254 }
255
256 return m;
257}
258
259static ssize_t dns_stream_read(DnsStream *s, void *buf, size_t count) {
260 ssize_t ss;
261
262#if ENABLE_DNS_OVER_TLS
263 if (s->encrypted)
264 ss = dnstls_stream_read(s, buf, count);
265 else
266#endif
267 {
268 ss = read(s->fd, buf, count);
269 if (ss < 0)
270 return -errno;
271 }
272
273 return ss;
274}
275
276static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) {
277 DnsStream *s = ASSERT_PTR(userdata);
278
279 return dns_stream_complete(s, ETIMEDOUT);
280}
281
282static DnsPacket *dns_stream_take_read_packet(DnsStream *s) {
283 assert(s);
284
285 /* Note, dns_stream_update() should be called after this is called. When this is called, the
286 * stream may be already full and the EPOLLIN flag is dropped from the stream IO event source.
287 * Even this makes a room to read in the stream, this does not call dns_stream_update(), hence
288 * EPOLLIN flag is not set automatically. So, to read further packets from the stream,
289 * dns_stream_update() must be called explicitly. Currently, this is only called from
290 * on_stream_io(), and there dns_stream_update() is called. */
291
292 if (!s->read_packet)
293 return NULL;
294
295 if (s->n_read < sizeof(s->read_size))
296 return NULL;
297
298 if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size))
299 return NULL;
300
301 s->n_read = 0;
302 return TAKE_PTR(s->read_packet);
303}
304
305static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
306 _cleanup_(dns_stream_unrefp) DnsStream *s = dns_stream_ref(userdata); /* Protect stream while we process it */
307 bool progressed = false;
308 int r;
309
310 assert(s);
311
312#if ENABLE_DNS_OVER_TLS
313 if (s->encrypted) {
314 r = dnstls_stream_on_io(s, revents);
315 if (r == DNSTLS_STREAM_CLOSED)
316 return 0;
317 if (r == -EAGAIN)
318 return dns_stream_update_io(s);
319 if (r < 0)
320 return dns_stream_complete(s, -r);
321
322 r = dns_stream_update_io(s);
323 if (r < 0)
324 return r;
325 }
326#endif
327
328 /* only identify after connecting */
329 if (s->tfo_salen == 0) {
330 r = dns_stream_identify(s);
331 if (r < 0)
332 return dns_stream_complete(s, -r);
333 }
334
335 if (revents & EPOLLERR) {
336 socklen_t errlen = sizeof(r);
337 if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &r, &errlen) == 0)
338 return dns_stream_complete(s, r);
339 }
340
341 if ((revents & EPOLLOUT) &&
342 s->write_packet &&
343 s->n_written < sizeof(s->write_size) + s->write_packet->size) {
344
345 struct iovec iov[] = {
346 IOVEC_MAKE(&s->write_size, sizeof(s->write_size)),
347 IOVEC_MAKE(DNS_PACKET_DATA(s->write_packet), s->write_packet->size),
348 };
349
350 iovec_increment(iov, ELEMENTSOF(iov), s->n_written);
351
352 ssize_t ss = dns_stream_writev(s, iov, ELEMENTSOF(iov), 0);
353 if (ss < 0) {
354 if (!ERRNO_IS_TRANSIENT(ss))
355 return dns_stream_complete(s, -ss);
356 } else {
357 progressed = true;
358 s->n_written += ss;
359 }
360
361 /* Are we done? If so, disable the event source for EPOLLOUT */
362 if (s->n_written >= sizeof(s->write_size) + s->write_packet->size) {
363 r = dns_stream_update_io(s);
364 if (r < 0)
365 return dns_stream_complete(s, -r);
366 }
367 }
368
369 while (s->identified && /* Only read data once we identified the peer, because we cannot fill in the DNS packet meta info otherwise */
370 (revents & (EPOLLIN|EPOLLHUP|EPOLLRDHUP)) &&
371 (!s->read_packet ||
372 s->n_read < sizeof(s->read_size) + s->read_packet->size)) {
373
374 if (s->n_read < sizeof(s->read_size)) {
375 ssize_t ss;
376
377 ss = dns_stream_read(s, (uint8_t*) &s->read_size + s->n_read, sizeof(s->read_size) - s->n_read);
378 if (ss < 0) {
379 if (!ERRNO_IS_TRANSIENT(ss))
380 return dns_stream_complete(s, -ss);
381 break;
382 } else if (ss == 0)
383 return dns_stream_complete(s, ECONNRESET);
384 else {
385 progressed = true;
386 s->n_read += ss;
387 }
388 }
389
390 if (s->n_read >= sizeof(s->read_size)) {
391
392 if (be16toh(s->read_size) < DNS_PACKET_HEADER_SIZE)
393 return dns_stream_complete(s, EBADMSG);
394
395 if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size)) {
396 ssize_t ss;
397
398 if (!s->read_packet) {
399 r = dns_packet_new(&s->read_packet, s->protocol, be16toh(s->read_size), DNS_PACKET_SIZE_MAX);
400 if (r < 0)
401 return dns_stream_complete(s, -r);
402
403 s->read_packet->size = be16toh(s->read_size);
404 s->read_packet->ipproto = IPPROTO_TCP;
405 s->read_packet->family = s->peer.sa.sa_family;
406 s->read_packet->ttl = s->ttl;
407 s->read_packet->ifindex = s->ifindex;
408 s->read_packet->timestamp = now(CLOCK_BOOTTIME);
409
410 if (s->read_packet->family == AF_INET) {
411 s->read_packet->sender.in = s->peer.in.sin_addr;
412 s->read_packet->sender_port = be16toh(s->peer.in.sin_port);
413 s->read_packet->destination.in = s->local.in.sin_addr;
414 s->read_packet->destination_port = be16toh(s->local.in.sin_port);
415 } else {
416 assert(s->read_packet->family == AF_INET6);
417 s->read_packet->sender.in6 = s->peer.in6.sin6_addr;
418 s->read_packet->sender_port = be16toh(s->peer.in6.sin6_port);
419 s->read_packet->destination.in6 = s->local.in6.sin6_addr;
420 s->read_packet->destination_port = be16toh(s->local.in6.sin6_port);
421
422 if (s->read_packet->ifindex == 0)
423 s->read_packet->ifindex = s->peer.in6.sin6_scope_id;
424 if (s->read_packet->ifindex == 0)
425 s->read_packet->ifindex = s->local.in6.sin6_scope_id;
426 }
427 }
428
429 ss = dns_stream_read(s,
430 (uint8_t*) DNS_PACKET_DATA(s->read_packet) + s->n_read - sizeof(s->read_size),
431 sizeof(s->read_size) + be16toh(s->read_size) - s->n_read);
432 if (ss < 0) {
433 if (!ERRNO_IS_TRANSIENT(ss))
434 return dns_stream_complete(s, -ss);
435 break;
436 } else if (ss == 0)
437 return dns_stream_complete(s, ECONNRESET);
438 else
439 s->n_read += ss;
440 }
441
442 /* Are we done? If so, call the packet handler and re-enable EPOLLIN for the
443 * event source if necessary. */
444 _cleanup_(dns_packet_unrefp) DnsPacket *p = dns_stream_take_read_packet(s);
445 if (p) {
446 assert(s->on_packet);
447 r = s->on_packet(s, p);
448 if (r < 0)
449 return r;
450
451 r = dns_stream_update_io(s);
452 if (r < 0)
453 return dns_stream_complete(s, -r);
454
455 s->packet_received = true;
456
457 /* If we just disabled the read event, stop reading */
458 if (!FLAGS_SET(s->requested_events, EPOLLIN))
459 break;
460 }
461 }
462 }
463
464 /* Complete the stream if finished reading and writing one packet, and there's nothing
465 * else left to write. */
466 if (s->type == DNS_STREAM_LLMNR_SEND && s->packet_received &&
467 !FLAGS_SET(s->requested_events, EPOLLOUT))
468 return dns_stream_complete(s, 0);
469
470 /* If we did something, let's restart the timeout event source */
471 if (progressed && s->timeout_event_source) {
472 r = sd_event_source_set_time_relative(s->timeout_event_source, DNS_STREAM_ESTABLISHED_TIMEOUT_USEC);
473 if (r < 0)
474 log_warning_errno(r, "Couldn't restart TCP connection timeout, ignoring: %m");
475 }
476
477 return 0;
478}
479
480static DnsStream *dns_stream_free(DnsStream *s) {
481 DnsPacket *p;
482
483 assert(s);
484
485 dns_stream_stop(s);
486
487 if (s->manager) {
488 LIST_REMOVE(streams, s->manager->dns_streams, s);
489 s->manager->n_dns_streams[s->type]--;
490 }
491
492#if ENABLE_DNS_OVER_TLS
493 if (s->encrypted)
494 dnstls_stream_free(s);
495#endif
496
497 ORDERED_SET_FOREACH(p, s->write_queue)
498 dns_packet_unref(ordered_set_remove(s->write_queue, p));
499
500 dns_packet_unref(s->write_packet);
501 dns_packet_unref(s->read_packet);
502 dns_server_unref(s->server);
503
504 ordered_set_free(s->write_queue);
505
506 return mfree(s);
507}
508
509DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsStream, dns_stream, dns_stream_free);
510
511int dns_stream_new(
512 Manager *m,
513 DnsStream **ret,
514 DnsStreamType type,
515 DnsProtocol protocol,
516 int fd,
517 const union sockaddr_union *tfo_address,
518 int (on_packet)(DnsStream*, DnsPacket*),
519 int (complete)(DnsStream*, int), /* optional */
520 usec_t connect_timeout_usec) {
521
522 _cleanup_(dns_stream_unrefp) DnsStream *s = NULL;
523 int r;
524
525 assert(m);
526 assert(ret);
527 assert(type >= 0);
528 assert(type < _DNS_STREAM_TYPE_MAX);
529 assert(protocol >= 0);
530 assert(protocol < _DNS_PROTOCOL_MAX);
531 assert(fd >= 0);
532 assert(on_packet);
533
534 if (m->n_dns_streams[type] > DNS_STREAMS_MAX)
535 return -EBUSY;
536
537 s = new(DnsStream, 1);
538 if (!s)
539 return -ENOMEM;
540
541 *s = (DnsStream) {
542 .n_ref = 1,
543 .fd = -EBADF,
544 .protocol = protocol,
545 .type = type,
546 };
547
548 r = ordered_set_ensure_allocated(&s->write_queue, &dns_packet_hash_ops);
549 if (r < 0)
550 return r;
551
552 r = sd_event_add_io(m->event, &s->io_event_source, fd, EPOLLIN, on_stream_io, s);
553 if (r < 0)
554 return r;
555
556 (void) sd_event_source_set_description(s->io_event_source, "dns-stream-io");
557
558 r = sd_event_add_time_relative(
559 m->event,
560 &s->timeout_event_source,
561 CLOCK_BOOTTIME,
562 connect_timeout_usec, 0,
563 on_stream_timeout, s);
564 if (r < 0)
565 return r;
566
567 (void) sd_event_source_set_description(s->timeout_event_source, "dns-stream-timeout");
568
569 LIST_PREPEND(streams, m->dns_streams, s);
570 m->n_dns_streams[type]++;
571 s->manager = m;
572
573 s->fd = fd;
574 s->on_packet = on_packet;
575 s->complete = complete;
576
577 if (tfo_address) {
578 s->tfo_address = *tfo_address;
579 s->tfo_salen = sockaddr_len(tfo_address);
580 }
581
582 *ret = TAKE_PTR(s);
583
584 return 0;
585}
586
587int dns_stream_write_packet(DnsStream *s, DnsPacket *p) {
588 int r;
589
590 assert(s);
591 assert(p);
592
593 r = ordered_set_put(s->write_queue, p);
594 if (r < 0)
595 return r;
596
597 dns_packet_ref(p);
598
599 return dns_stream_update_io(s);
600}
601
602void dns_stream_detach(DnsStream *s) {
603 assert(s);
604
605 if (!s->server)
606 return;
607
608 if (s->server->stream != s)
609 return;
610
611 dns_server_unref_stream(s->server);
612}
613
614DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(
615 dns_stream_hash_ops,
616 void,
617 trivial_hash_func,
618 trivial_compare_func,
619 dns_stream_unref);
620
621int dns_stream_disconnect_all(Manager *m) {
622 _cleanup_set_free_ Set *closed = NULL;
623 int r;
624
625 assert(m);
626
627 /* Terminates all TCP connections (called after system suspend for example, to speed up recovery) */
628
629 log_info("Closing all remaining TCP connections.");
630
631 bool restart;
632 do {
633 restart = false;
634
635 LIST_FOREACH(streams, s, m->dns_streams) {
636 r = set_ensure_put(&closed, &dns_stream_hash_ops, s);
637 if (r < 0)
638 return log_oom();
639 if (r > 0) {
640 /* Haven't seen this one before. Close it. */
641 dns_stream_ref(s);
642 (void) dns_stream_complete(s, ECONNRESET);
643
644 /* This might have a ripple effect, let's hence no look at the list further,
645 * but scan from the beginning again */
646 restart = true;
647 break;
648 }
649 }
650 } while (restart);
651
652 return 0;
653}