]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/resolve/resolved-dns-stream.c
license: LGPL-2.1+ -> LGPL-2.1-or-later
[thirdparty/systemd.git] / src / resolve / resolved-dns-stream.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <netinet/tcp.h>
4 #include <unistd.h>
5
6 #include "alloc-util.h"
7 #include "fd-util.h"
8 #include "io-util.h"
9 #include "missing_network.h"
10 #include "resolved-dns-stream.h"
11 #include "resolved-manager.h"
12
13 #define DNS_STREAM_TIMEOUT_USEC (10 * USEC_PER_SEC)
14 #define DNS_STREAMS_MAX 128
15
16 #define DNS_QUERIES_PER_STREAM 32
17
18 static void dns_stream_stop(DnsStream *s) {
19 assert(s);
20
21 s->io_event_source = sd_event_source_unref(s->io_event_source);
22 s->timeout_event_source = sd_event_source_unref(s->timeout_event_source);
23 s->fd = safe_close(s->fd);
24
25 /* Disconnect us from the server object if we are now not usable anymore */
26 dns_stream_detach(s);
27 }
28
29 static int dns_stream_update_io(DnsStream *s) {
30 int f = 0;
31
32 assert(s);
33
34 if (s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size)
35 f |= EPOLLOUT;
36 else if (!ordered_set_isempty(s->write_queue)) {
37 dns_packet_unref(s->write_packet);
38 s->write_packet = ordered_set_steal_first(s->write_queue);
39 s->write_size = htobe16(s->write_packet->size);
40 s->n_written = 0;
41 f |= EPOLLOUT;
42 }
43
44 /* Let's read a packet if we haven't queued any yet. Except if we already hit a limit of parallel
45 * queries for this connection. */
46 if ((!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size) &&
47 set_size(s->queries) < DNS_QUERIES_PER_STREAM)
48 f |= EPOLLIN;
49
50 #if ENABLE_DNS_OVER_TLS
51 /* For handshake and clean closing purposes, TLS can override requested events */
52 if (s->dnstls_events != 0)
53 f = s->dnstls_events;
54 #endif
55
56 return sd_event_source_set_io_events(s->io_event_source, f);
57 }
58
59 static int dns_stream_complete(DnsStream *s, int error) {
60 _cleanup_(dns_stream_unrefp) _unused_ DnsStream *ref = dns_stream_ref(s); /* Protect stream while we process it */
61
62 assert(s);
63 assert(error >= 0);
64
65 /* Error is > 0 when the connection failed for some reason in the network stack. It's == 0 if we sent
66 * and received exactly one packet each (in the LLMNR client case). */
67
68 #if ENABLE_DNS_OVER_TLS
69 if (s->encrypted) {
70 int r;
71
72 r = dnstls_stream_shutdown(s, error);
73 if (r != -EAGAIN)
74 dns_stream_stop(s);
75 } else
76 #endif
77 dns_stream_stop(s);
78
79 dns_stream_detach(s);
80
81 if (s->complete)
82 s->complete(s, error);
83 else /* the default action if no completion function is set is to close the stream */
84 dns_stream_unref(s);
85
86 return 0;
87 }
88
89 static int dns_stream_identify(DnsStream *s) {
90 CMSG_BUFFER_TYPE(CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo))
91 + CMSG_SPACE(int) + /* for the TTL */
92 + EXTRA_CMSG_SPACE /* kernel appears to require extra space */) control;
93 struct msghdr mh = {};
94 struct cmsghdr *cmsg;
95 socklen_t sl;
96 int r;
97
98 assert(s);
99
100 if (s->identified)
101 return 0;
102
103 /* Query the local side */
104 s->local_salen = sizeof(s->local);
105 r = getsockname(s->fd, &s->local.sa, &s->local_salen);
106 if (r < 0)
107 return -errno;
108 if (s->local.sa.sa_family == AF_INET6 && s->ifindex <= 0)
109 s->ifindex = s->local.in6.sin6_scope_id;
110
111 /* Query the remote side */
112 s->peer_salen = sizeof(s->peer);
113 r = getpeername(s->fd, &s->peer.sa, &s->peer_salen);
114 if (r < 0)
115 return -errno;
116 if (s->peer.sa.sa_family == AF_INET6 && s->ifindex <= 0)
117 s->ifindex = s->peer.in6.sin6_scope_id;
118
119 /* Check consistency */
120 assert(s->peer.sa.sa_family == s->local.sa.sa_family);
121 assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
122
123 /* Query connection meta information */
124 sl = sizeof(control);
125 if (s->peer.sa.sa_family == AF_INET) {
126 r = getsockopt(s->fd, IPPROTO_IP, IP_PKTOPTIONS, &control, &sl);
127 if (r < 0)
128 return -errno;
129 } else if (s->peer.sa.sa_family == AF_INET6) {
130
131 r = getsockopt(s->fd, IPPROTO_IPV6, IPV6_2292PKTOPTIONS, &control, &sl);
132 if (r < 0)
133 return -errno;
134 } else
135 return -EAFNOSUPPORT;
136
137 mh.msg_control = &control;
138 mh.msg_controllen = sl;
139
140 CMSG_FOREACH(cmsg, &mh) {
141
142 if (cmsg->cmsg_level == IPPROTO_IPV6) {
143 assert(s->peer.sa.sa_family == AF_INET6);
144
145 switch (cmsg->cmsg_type) {
146
147 case IPV6_PKTINFO: {
148 struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg);
149
150 if (s->ifindex <= 0)
151 s->ifindex = i->ipi6_ifindex;
152 break;
153 }
154
155 case IPV6_HOPLIMIT:
156 s->ttl = *(int *) CMSG_DATA(cmsg);
157 break;
158 }
159
160 } else if (cmsg->cmsg_level == IPPROTO_IP) {
161 assert(s->peer.sa.sa_family == AF_INET);
162
163 switch (cmsg->cmsg_type) {
164
165 case IP_PKTINFO: {
166 struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg);
167
168 if (s->ifindex <= 0)
169 s->ifindex = i->ipi_ifindex;
170 break;
171 }
172
173 case IP_TTL:
174 s->ttl = *(int *) CMSG_DATA(cmsg);
175 break;
176 }
177 }
178 }
179
180 /* The Linux kernel sets the interface index to the loopback
181 * device if the connection came from the local host since it
182 * avoids the routing table in such a case. Let's unset the
183 * interface index in such a case. */
184 if (s->ifindex == LOOPBACK_IFINDEX)
185 s->ifindex = 0;
186
187 /* If we don't know the interface index still, we look for the
188 * first local interface with a matching address. Yuck! */
189 if (s->ifindex <= 0)
190 s->ifindex = manager_find_ifindex(s->manager, s->local.sa.sa_family, s->local.sa.sa_family == AF_INET ? (union in_addr_union*) &s->local.in.sin_addr : (union in_addr_union*) &s->local.in6.sin6_addr);
191
192 if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) {
193 /* Make sure all packets for this connection are sent on the same interface */
194 r = socket_set_unicast_if(s->fd, s->local.sa.sa_family, s->ifindex);
195 if (r < 0)
196 log_debug_errno(errno, "Failed to invoke IP_UNICAST_IF/IPV6_UNICAST_IF: %m");
197 }
198
199 s->identified = true;
200
201 return 0;
202 }
203
204 ssize_t dns_stream_writev(DnsStream *s, const struct iovec *iov, size_t iovcnt, int flags) {
205 ssize_t m;
206
207 assert(s);
208 assert(iov);
209
210 #if ENABLE_DNS_OVER_TLS
211 if (s->encrypted && !(flags & DNS_STREAM_WRITE_TLS_DATA)) {
212 ssize_t ss;
213 size_t i;
214
215 m = 0;
216 for (i = 0; i < iovcnt; i++) {
217 ss = dnstls_stream_write(s, iov[i].iov_base, iov[i].iov_len);
218 if (ss < 0)
219 return ss;
220
221 m += ss;
222 if (ss != (ssize_t) iov[i].iov_len)
223 continue;
224 }
225 } else
226 #endif
227 if (s->tfo_salen > 0) {
228 struct msghdr hdr = {
229 .msg_iov = (struct iovec*) iov,
230 .msg_iovlen = iovcnt,
231 .msg_name = &s->tfo_address.sa,
232 .msg_namelen = s->tfo_salen
233 };
234
235 m = sendmsg(s->fd, &hdr, MSG_FASTOPEN);
236 if (m < 0) {
237 if (errno == EOPNOTSUPP) {
238 s->tfo_salen = 0;
239 if (connect(s->fd, &s->tfo_address.sa, s->tfo_salen) < 0)
240 return -errno;
241
242 return -EAGAIN;
243 }
244 if (errno == EINPROGRESS)
245 return -EAGAIN;
246
247 return -errno;
248 } else
249 s->tfo_salen = 0; /* connection is made */
250 } else {
251 m = writev(s->fd, iov, iovcnt);
252 if (m < 0)
253 return -errno;
254 }
255
256 return m;
257 }
258
259 static ssize_t dns_stream_read(DnsStream *s, void *buf, size_t count) {
260 ssize_t ss;
261
262 #if ENABLE_DNS_OVER_TLS
263 if (s->encrypted)
264 ss = dnstls_stream_read(s, buf, count);
265 else
266 #endif
267 {
268 ss = read(s->fd, buf, count);
269 if (ss < 0)
270 return -errno;
271 }
272
273 return ss;
274 }
275
276 static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) {
277 DnsStream *s = userdata;
278
279 assert(s);
280
281 return dns_stream_complete(s, ETIMEDOUT);
282 }
283
284 static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
285 _cleanup_(dns_stream_unrefp) DnsStream *s = dns_stream_ref(userdata); /* Protect stream while we process it */
286 bool progressed = false;
287 int r;
288
289 assert(s);
290
291 #if ENABLE_DNS_OVER_TLS
292 if (s->encrypted) {
293 r = dnstls_stream_on_io(s, revents);
294 if (r == DNSTLS_STREAM_CLOSED)
295 return 0;
296 if (r == -EAGAIN)
297 return dns_stream_update_io(s);
298 if (r < 0)
299 return dns_stream_complete(s, -r);
300
301 r = dns_stream_update_io(s);
302 if (r < 0)
303 return r;
304 }
305 #endif
306
307 /* only identify after connecting */
308 if (s->tfo_salen == 0) {
309 r = dns_stream_identify(s);
310 if (r < 0)
311 return dns_stream_complete(s, -r);
312 }
313
314 if ((revents & EPOLLOUT) &&
315 s->write_packet &&
316 s->n_written < sizeof(s->write_size) + s->write_packet->size) {
317
318 struct iovec iov[2];
319 ssize_t ss;
320
321 iov[0] = IOVEC_MAKE(&s->write_size, sizeof(s->write_size));
322 iov[1] = IOVEC_MAKE(DNS_PACKET_DATA(s->write_packet), s->write_packet->size);
323
324 IOVEC_INCREMENT(iov, 2, s->n_written);
325
326 ss = dns_stream_writev(s, iov, 2, 0);
327 if (ss < 0) {
328 if (!IN_SET(-ss, EINTR, EAGAIN))
329 return dns_stream_complete(s, -ss);
330 } else {
331 progressed = true;
332 s->n_written += ss;
333 }
334
335 /* Are we done? If so, disable the event source for EPOLLOUT */
336 if (s->n_written >= sizeof(s->write_size) + s->write_packet->size) {
337 r = dns_stream_update_io(s);
338 if (r < 0)
339 return dns_stream_complete(s, -r);
340 }
341 }
342
343 if ((revents & (EPOLLIN|EPOLLHUP|EPOLLRDHUP)) &&
344 (!s->read_packet ||
345 s->n_read < sizeof(s->read_size) + s->read_packet->size)) {
346
347 if (s->n_read < sizeof(s->read_size)) {
348 ssize_t ss;
349
350 ss = dns_stream_read(s, (uint8_t*) &s->read_size + s->n_read, sizeof(s->read_size) - s->n_read);
351 if (ss < 0) {
352 if (!IN_SET(-ss, EINTR, EAGAIN))
353 return dns_stream_complete(s, -ss);
354 } else if (ss == 0)
355 return dns_stream_complete(s, ECONNRESET);
356 else {
357 progressed = true;
358 s->n_read += ss;
359 }
360 }
361
362 if (s->n_read >= sizeof(s->read_size)) {
363
364 if (be16toh(s->read_size) < DNS_PACKET_HEADER_SIZE)
365 return dns_stream_complete(s, EBADMSG);
366
367 if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size)) {
368 ssize_t ss;
369
370 if (!s->read_packet) {
371 r = dns_packet_new(&s->read_packet, s->protocol, be16toh(s->read_size), DNS_PACKET_SIZE_MAX);
372 if (r < 0)
373 return dns_stream_complete(s, -r);
374
375 s->read_packet->size = be16toh(s->read_size);
376 s->read_packet->ipproto = IPPROTO_TCP;
377 s->read_packet->family = s->peer.sa.sa_family;
378 s->read_packet->ttl = s->ttl;
379 s->read_packet->ifindex = s->ifindex;
380
381 if (s->read_packet->family == AF_INET) {
382 s->read_packet->sender.in = s->peer.in.sin_addr;
383 s->read_packet->sender_port = be16toh(s->peer.in.sin_port);
384 s->read_packet->destination.in = s->local.in.sin_addr;
385 s->read_packet->destination_port = be16toh(s->local.in.sin_port);
386 } else {
387 assert(s->read_packet->family == AF_INET6);
388 s->read_packet->sender.in6 = s->peer.in6.sin6_addr;
389 s->read_packet->sender_port = be16toh(s->peer.in6.sin6_port);
390 s->read_packet->destination.in6 = s->local.in6.sin6_addr;
391 s->read_packet->destination_port = be16toh(s->local.in6.sin6_port);
392
393 if (s->read_packet->ifindex == 0)
394 s->read_packet->ifindex = s->peer.in6.sin6_scope_id;
395 if (s->read_packet->ifindex == 0)
396 s->read_packet->ifindex = s->local.in6.sin6_scope_id;
397 }
398 }
399
400 ss = dns_stream_read(s,
401 (uint8_t*) DNS_PACKET_DATA(s->read_packet) + s->n_read - sizeof(s->read_size),
402 sizeof(s->read_size) + be16toh(s->read_size) - s->n_read);
403 if (ss < 0) {
404 if (!IN_SET(-ss, EINTR, EAGAIN))
405 return dns_stream_complete(s, -ss);
406 } else if (ss == 0)
407 return dns_stream_complete(s, ECONNRESET);
408 else
409 s->n_read += ss;
410 }
411
412 /* Are we done? If so, disable the event source for EPOLLIN */
413 if (s->n_read >= sizeof(s->read_size) + be16toh(s->read_size)) {
414 /* If there's a packet handler
415 * installed, call that. Note that
416 * this is optional... */
417 if (s->on_packet) {
418 r = s->on_packet(s);
419 if (r < 0)
420 return r;
421 }
422
423 r = dns_stream_update_io(s);
424 if (r < 0)
425 return dns_stream_complete(s, -r);
426 }
427 }
428 }
429
430 /* Call "complete" callback if finished reading and writing one packet, and there's nothing else left
431 * to write. */
432 if (s->type == DNS_STREAM_LLMNR_SEND &&
433 (s->write_packet && s->n_written >= sizeof(s->write_size) + s->write_packet->size) &&
434 ordered_set_isempty(s->write_queue) &&
435 (s->read_packet && s->n_read >= sizeof(s->read_size) + s->read_packet->size))
436 return dns_stream_complete(s, 0);
437
438 /* If we did something, let's restart the timeout event source */
439 if (progressed && s->timeout_event_source) {
440 r = sd_event_source_set_time_relative(s->timeout_event_source, DNS_STREAM_TIMEOUT_USEC);
441 if (r < 0)
442 log_warning_errno(errno, "Couldn't restart TCP connection timeout, ignoring: %m");
443 }
444
445 return 0;
446 }
447
448 static DnsStream *dns_stream_free(DnsStream *s) {
449 DnsPacket *p;
450
451 assert(s);
452
453 dns_stream_stop(s);
454
455 if (s->manager) {
456 LIST_REMOVE(streams, s->manager->dns_streams, s);
457 s->manager->n_dns_streams[s->type]--;
458 }
459
460 #if ENABLE_DNS_OVER_TLS
461 if (s->encrypted)
462 dnstls_stream_free(s);
463 #endif
464
465 ORDERED_SET_FOREACH(p, s->write_queue)
466 dns_packet_unref(ordered_set_remove(s->write_queue, p));
467
468 dns_packet_unref(s->write_packet);
469 dns_packet_unref(s->read_packet);
470 dns_server_unref(s->server);
471
472 ordered_set_free(s->write_queue);
473
474 return mfree(s);
475 }
476
477 DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsStream, dns_stream, dns_stream_free);
478
479 int dns_stream_new(
480 Manager *m,
481 DnsStream **ret,
482 DnsStreamType type,
483 DnsProtocol protocol,
484 int fd,
485 const union sockaddr_union *tfo_address) {
486
487 _cleanup_(dns_stream_unrefp) DnsStream *s = NULL;
488 int r;
489
490 assert(m);
491 assert(ret);
492 assert(type >= 0);
493 assert(type < _DNS_STREAM_TYPE_MAX);
494 assert(protocol >= 0);
495 assert(protocol < _DNS_PROTOCOL_MAX);
496 assert(fd >= 0);
497
498 if (m->n_dns_streams[type] > DNS_STREAMS_MAX)
499 return -EBUSY;
500
501 s = new(DnsStream, 1);
502 if (!s)
503 return -ENOMEM;
504
505 *s = (DnsStream) {
506 .n_ref = 1,
507 .fd = -1,
508 .protocol = protocol,
509 .type = type,
510 };
511
512 r = ordered_set_ensure_allocated(&s->write_queue, &dns_packet_hash_ops);
513 if (r < 0)
514 return r;
515
516 r = sd_event_add_io(m->event, &s->io_event_source, fd, EPOLLIN, on_stream_io, s);
517 if (r < 0)
518 return r;
519
520 (void) sd_event_source_set_description(s->io_event_source, "dns-stream-io");
521
522 r = sd_event_add_time_relative(
523 m->event,
524 &s->timeout_event_source,
525 clock_boottime_or_monotonic(),
526 DNS_STREAM_TIMEOUT_USEC, 0,
527 on_stream_timeout, s);
528 if (r < 0)
529 return r;
530
531 (void) sd_event_source_set_description(s->timeout_event_source, "dns-stream-timeout");
532
533 LIST_PREPEND(streams, m->dns_streams, s);
534 m->n_dns_streams[type]++;
535 s->manager = m;
536
537 s->fd = fd;
538
539 if (tfo_address) {
540 s->tfo_address = *tfo_address;
541 s->tfo_salen = tfo_address->sa.sa_family == AF_INET6 ? sizeof(tfo_address->in6) : sizeof(tfo_address->in);
542 }
543
544 *ret = TAKE_PTR(s);
545
546 return 0;
547 }
548
549 int dns_stream_write_packet(DnsStream *s, DnsPacket *p) {
550 int r;
551
552 assert(s);
553 assert(p);
554
555 r = ordered_set_put(s->write_queue, p);
556 if (r < 0)
557 return r;
558
559 dns_packet_ref(p);
560
561 return dns_stream_update_io(s);
562 }
563
564 DnsPacket *dns_stream_take_read_packet(DnsStream *s) {
565 assert(s);
566
567 if (!s->read_packet)
568 return NULL;
569
570 if (s->n_read < sizeof(s->read_size))
571 return NULL;
572
573 if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size))
574 return NULL;
575
576 s->n_read = 0;
577 return TAKE_PTR(s->read_packet);
578 }
579
580 void dns_stream_detach(DnsStream *s) {
581 assert(s);
582
583 if (!s->server)
584 return;
585
586 if (s->server->stream != s)
587 return;
588
589 dns_server_unref_stream(s->server);
590 }