]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/resolve/resolved-dns-stub.c
resolved: split out helper that checks whether we shall reply with EDNS0 DO
[thirdparty/systemd.git] / src / resolve / resolved-dns-stub.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <net/if_arp.h>
4 #include <netinet/tcp.h>
5
6 #include "errno-util.h"
7 #include "fd-util.h"
8 #include "missing_network.h"
9 #include "missing_socket.h"
10 #include "resolved-dns-stub.h"
11 #include "socket-netlink.h"
12 #include "socket-util.h"
13 #include "stdio-util.h"
14 #include "string-table.h"
15
16 /* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet,
17 * IP and UDP header sizes */
18 #define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U)
19
20 /* On the extra stubs, use a more conservative choice */
21 #define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX
22
23 static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type);
24
25 static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra *a, struct siphash *state) {
26 assert(a);
27
28 siphash24_compress(&a->mode, sizeof(a->mode), state);
29 siphash24_compress(&a->family, sizeof(a->family), state);
30 siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state);
31 siphash24_compress(&a->port, sizeof(a->port), state);
32 }
33
34 static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra *a, const DnsStubListenerExtra *b) {
35 int r;
36
37 assert(a);
38 assert(b);
39
40 r = CMP(a->mode, b->mode);
41 if (r != 0)
42 return r;
43
44 r = CMP(a->family, b->family);
45 if (r != 0)
46 return r;
47
48 r = memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
49 if (r != 0)
50 return r;
51
52 return CMP(a->port, b->port);
53 }
54
55 DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
56 dns_stub_listener_extra_hash_ops,
57 DnsStubListenerExtra,
58 dns_stub_listener_extra_hash_func,
59 dns_stub_listener_extra_compare_func,
60 dns_stub_listener_extra_free);
61
62 int dns_stub_listener_extra_new(
63 Manager *m,
64 DnsStubListenerExtra **ret) {
65
66 DnsStubListenerExtra *l;
67
68 l = new(DnsStubListenerExtra, 1);
69 if (!l)
70 return -ENOMEM;
71
72 *l = (DnsStubListenerExtra) {
73 .manager = m,
74 };
75
76 *ret = TAKE_PTR(l);
77 return 0;
78 }
79
80 DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p) {
81 if (!p)
82 return NULL;
83
84 p->udp_event_source = sd_event_source_disable_unref(p->udp_event_source);
85 p->tcp_event_source = sd_event_source_disable_unref(p->tcp_event_source);
86
87 hashmap_free(p->queries_by_packet);
88
89 return mfree(p);
90 }
91
92 static void stub_packet_hash_func(const DnsPacket *p, struct siphash *state) {
93 assert(p);
94
95 siphash24_compress(&p->protocol, sizeof(p->protocol), state);
96 siphash24_compress(&p->family, sizeof(p->family), state);
97 siphash24_compress(&p->sender, sizeof(p->sender), state);
98 siphash24_compress(&p->ipproto, sizeof(p->ipproto), state);
99 siphash24_compress(&p->sender_port, sizeof(p->sender_port), state);
100 siphash24_compress(DNS_PACKET_HEADER(p), sizeof(DnsPacketHeader), state);
101
102 /* We don't bother hashing the full packet here, just the header */
103 }
104
105 static int stub_packet_compare_func(const DnsPacket *x, const DnsPacket *y) {
106 int r;
107
108 r = CMP(x->protocol, y->protocol);
109 if (r != 0)
110 return r;
111
112 r = CMP(x->family, y->family);
113 if (r != 0)
114 return r;
115
116 r = memcmp(&x->sender, &y->sender, sizeof(x->sender));
117 if (r != 0)
118 return r;
119
120 r = CMP(x->ipproto, y->ipproto);
121 if (r != 0)
122 return r;
123
124 r = CMP(x->sender_port, y->sender_port);
125 if (r != 0)
126 return r;
127
128 return memcmp(DNS_PACKET_HEADER(x), DNS_PACKET_HEADER(y), sizeof(DnsPacketHeader));
129 }
130
131 DEFINE_HASH_OPS(stub_packet_hash_ops, DnsPacket, stub_packet_hash_func, stub_packet_compare_func);
132
133 static int reply_add_with_rrsig(
134 DnsAnswer **reply,
135 DnsResourceRecord *rr,
136 int ifindex,
137 DnsAnswerFlags flags,
138 DnsResourceRecord *rrsig,
139 bool with_rrsig) {
140 int r;
141
142 assert(reply);
143 assert(rr);
144
145 r = dns_answer_add_extend(reply, rr, ifindex, flags, rrsig);
146 if (r < 0)
147 return r;
148
149 if (with_rrsig && rrsig) {
150 r = dns_answer_add_extend(reply, rrsig, ifindex, flags, NULL);
151 if (r < 0)
152 return r;
153 }
154
155 return 0;
156 }
157
158 static int dns_stub_collect_answer_by_question(
159 DnsAnswer **reply,
160 DnsAnswer *answer,
161 DnsQuestion *question,
162 bool with_rrsig) { /* Add RRSIG RR matching each RR */
163
164 _cleanup_(dns_resource_key_unrefp) DnsResourceKey *redirected_key = NULL;
165 unsigned n_cname_redirects = 0;
166 DnsAnswerItem *item;
167 int r;
168
169 assert(reply);
170
171 /* Copies all RRs from 'answer' into 'reply', if they match 'question'. There might be direct and
172 * indirect matches (i.e. via CNAME/DNAME). If they have an indirect one, remember where we need to
173 * go, and restart the loop */
174
175 for (;;) {
176 _cleanup_(dns_resource_key_unrefp) DnsResourceKey *next_redirected_key = NULL;
177
178 DNS_ANSWER_FOREACH_ITEM(item, answer) {
179 DnsResourceKey *k = NULL;
180
181 if (redirected_key) {
182 /* There was a redirect in this packet, let's collect all matching RRs for the redirect */
183 r = dns_resource_key_match_rr(redirected_key, item->rr, NULL);
184 if (r < 0)
185 return r;
186
187 k = redirected_key;
188 } else if (question) {
189 /* We have a question, let's see if this RR matches it */
190 r = dns_question_matches_rr(question, item->rr, NULL);
191 if (r < 0)
192 return r;
193
194 k = question->keys[0];
195 } else
196 r = 1; /* No question, everything matches */
197
198 if (r == 0) {
199 _cleanup_free_ char *target = NULL;
200
201 /* OK, so the RR doesn't directly match. Let's see if the RR is a matching
202 * CNAME or DNAME */
203
204 assert(k);
205
206 r = dns_resource_record_get_cname_target(k, item->rr, &target);
207 if (r == -EUNATCH)
208 continue; /* Not a CNAME/DNAME or doesn't match */
209 if (r < 0)
210 return r;
211
212 /* Oh, wow, this is a redirect. Let's remember where this points, and store
213 * it in 'next_redirected_key'. Once we finished iterating through the rest
214 * of the RR's we'll start again, with the redirected RR key. */
215
216 n_cname_redirects++;
217 if (n_cname_redirects > CNAME_REDIRECT_MAX) /* don't loop forever */
218 return -ELOOP;
219
220 dns_resource_key_unref(next_redirected_key);
221
222 /* There can only be one CNAME per name, hence no point in storing more than one here */
223 next_redirected_key = dns_resource_key_new(k->class, k->type, target);
224 if (!next_redirected_key)
225 return -ENOMEM;
226 }
227
228 /* Mask the section info, we want the primary answers to always go without section info, so
229 * that it is added to the answer section when we synthesize a reply. */
230
231 r = reply_add_with_rrsig(
232 reply,
233 item->rr,
234 item->ifindex,
235 item->flags & ~DNS_ANSWER_MASK_SECTIONS,
236 item->rrsig,
237 with_rrsig);
238 if (r < 0)
239 return r;
240 }
241
242 if (!next_redirected_key)
243 break;
244
245 dns_resource_key_unref(redirected_key);
246 redirected_key = TAKE_PTR(next_redirected_key);
247 }
248
249 return 0;
250 }
251
252 static int dns_stub_collect_answer_by_section(
253 DnsAnswer **reply,
254 DnsAnswer *answer,
255 DnsAnswerFlags section,
256 DnsAnswer *exclude1,
257 DnsAnswer *exclude2,
258 bool with_dnssec) { /* Include DNSSEC RRs. RRSIG, NSEC, … */
259
260 DnsAnswerItem *item;
261 int r;
262
263 assert(reply);
264
265 /* Copies all RRs from 'answer' into 'reply', if they originate from the specified section. Also,
266 * avoid any RRs listed in 'exclude'. */
267
268 DNS_ANSWER_FOREACH_ITEM(item, answer) {
269
270 if (dns_answer_contains(exclude1, item->rr) ||
271 dns_answer_contains(exclude2, item->rr))
272 continue;
273
274 if (!with_dnssec &&
275 dns_type_is_dnssec(item->rr->key->type))
276 continue;
277
278 if (((item->flags ^ section) & (DNS_ANSWER_SECTION_ANSWER|DNS_ANSWER_SECTION_AUTHORITY|DNS_ANSWER_SECTION_ADDITIONAL)) != 0)
279 continue;
280
281 r = reply_add_with_rrsig(
282 reply,
283 item->rr,
284 item->ifindex,
285 item->flags,
286 item->rrsig,
287 with_dnssec);
288 if (r < 0)
289 return r;
290 }
291
292 return 0;
293 }
294
295 static int dns_stub_assign_sections(
296 DnsQuery *q,
297 DnsQuestion *question,
298 bool edns0_do) {
299
300 int r;
301
302 assert(q);
303 assert(question);
304
305 /* Let's assign the 'answer' RRs we collected to their respective sections in the reply datagram. We
306 * try to reproduce a section assignment similar to what the upstream DNS server responded to us. We
307 * use the DNS_ANSWER_SECTION_xyz flags to match things up, which is where the original upstream's
308 * packet section assignment is stored in the DnsAnswer object. Not all RRs in the 'answer' objects
309 * come with section information though (for example, because they were synthesized locally, and not
310 * from a DNS packet). To deal with that we extend the assignment logic a bit: anything from the
311 * 'answer' object that directly matches the original question is always put in the ANSWER section,
312 * regardless if it carries section info, or what that section info says. Then, anything from the
313 * 'answer' objects that is from the ANSWER or AUTHORITY sections, and wasn't already added to the
314 * ANSWER section is placed in the AUTHORITY section. Everything else from either object is added to
315 * the ADDITIONAL section. */
316
317 /* Include all RRs that directly answer the question in the answer section */
318 r = dns_stub_collect_answer_by_question(
319 &q->reply_answer,
320 q->answer,
321 question,
322 edns0_do);
323 if (r < 0)
324 return r;
325
326 /* Include all RRs that originate from the answer or authority sections, and aren't listed in the
327 * answer section, in the authority section */
328 r = dns_stub_collect_answer_by_section(
329 &q->reply_authoritative,
330 q->answer,
331 DNS_ANSWER_SECTION_ANSWER,
332 q->reply_answer, NULL,
333 edns0_do);
334 if (r < 0)
335 return r;
336 r = dns_stub_collect_answer_by_section(
337 &q->reply_authoritative,
338 q->answer,
339 DNS_ANSWER_SECTION_AUTHORITY,
340 q->reply_answer, NULL,
341 edns0_do);
342 if (r < 0)
343 return r;
344
345 /* Include all RRs that originate from the additional sections in the additional section (except if
346 * already listed in the other two sections). Also add all RRs with no section marking. */
347 r = dns_stub_collect_answer_by_section(
348 &q->reply_additional,
349 q->answer,
350 DNS_ANSWER_SECTION_ADDITIONAL,
351 q->reply_answer, q->reply_authoritative,
352 edns0_do);
353 if (r < 0)
354 return r;
355 r = dns_stub_collect_answer_by_section(
356 &q->reply_additional,
357 q->answer,
358 0,
359 q->reply_answer, q->reply_authoritative,
360 edns0_do);
361 if (r < 0)
362 return r;
363
364 return 0;
365 }
366
367 static int dns_stub_make_reply_packet(
368 DnsPacket **ret,
369 size_t max_size,
370 DnsQuestion *q,
371 bool *ret_truncated) {
372
373 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
374 bool tc = false;
375 int r;
376
377 assert(ret);
378
379 r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, max_size);
380 if (r < 0)
381 return r;
382
383 r = dns_packet_append_question(p, q);
384 if (r == -EMSGSIZE)
385 tc = true;
386 else if (r < 0)
387 return r;
388
389 if (ret_truncated)
390 *ret_truncated = tc;
391 else if (tc)
392 return -EMSGSIZE;
393
394 DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q));
395
396 *ret = TAKE_PTR(p);
397 return 0;
398 }
399
400 static int dns_stub_add_reply_packet_body(
401 DnsPacket *p,
402 DnsAnswer *answer,
403 DnsAnswer *authoritative,
404 DnsAnswer *additional,
405 bool edns0_do, /* Client expects DNSSEC RRs? */
406 bool *truncated) {
407
408 unsigned n_answer = 0, n_authoritative = 0, n_additional = 0;
409 bool tc = false;
410 int r;
411
412 assert(p);
413
414 /* Add the three sections to the packet. If the answer section doesn't fit we'll signal that as
415 * truncation. If the authoritative section doesn't fit and we are in DNSSEC mode, also signal
416 * truncation. In all other cases where things don't fit don't signal truncation, as for those cases
417 * the dropped RRs should not be essential. */
418
419 r = dns_packet_append_answer(p, answer, &n_answer);
420 if (r == -EMSGSIZE)
421 tc = true;
422 else if (r < 0)
423 return r;
424 else {
425 r = dns_packet_append_answer(p, authoritative, &n_authoritative);
426 if (r == -EMSGSIZE) {
427 if (edns0_do)
428 tc = true;
429 } else if (r < 0)
430 return r;
431 else {
432 r = dns_packet_append_answer(p, additional, &n_additional);
433 if (r < 0 && r != -EMSGSIZE)
434 return r;
435 }
436 }
437
438 if (tc) {
439 if (!truncated)
440 return -EMSGSIZE;
441
442 *truncated = true;
443 }
444
445 DNS_PACKET_HEADER(p)->ancount = htobe16(n_answer);
446 DNS_PACKET_HEADER(p)->nscount = htobe16(n_authoritative);
447 DNS_PACKET_HEADER(p)->arcount = htobe16(n_additional);
448 return 0;
449 }
450
451 static const char *nsid_string(void) {
452 static char buffer[SD_ID128_STRING_MAX + STRLEN(".resolved.systemd.io")] = "";
453 sd_id128_t id;
454 int r;
455
456 /* Let's generate a string that we can use as RFC5001 NSID identifier. The string shall identify us
457 * as systemd-resolved, and return a different string for each resolved instance without leaking host
458 * identity. Hence let's use a fixed suffix that identifies resolved, and a prefix generated from the
459 * machine ID but from which the machine ID cannot be determined.
460 *
461 * Clients can use this to determine whether an answer is originating locally or is proxied from
462 * upstream. */
463
464 if (!isempty(buffer))
465 return buffer;
466
467 r = sd_id128_get_machine_app_specific(
468 SD_ID128_MAKE(ed,d3,12,5d,16,b9,41,f9,a1,49,5f,ab,15,62,ab,27),
469 &id);
470 if (r < 0) {
471 log_debug_errno(r, "Failed to determine machine ID, ignoring: %m");
472 return NULL;
473 }
474
475 xsprintf(buffer, SD_ID128_FORMAT_STR ".resolved.systemd.io", SD_ID128_FORMAT_VAL(id));
476 return buffer;
477 }
478
479 static int dns_stub_finish_reply_packet(
480 DnsPacket *p,
481 uint16_t id,
482 int rcode,
483 bool tc, /* set the Truncated bit? */
484 bool aa, /* set the Authoritative Answer bit? */
485 bool add_opt, /* add an OPT RR to this packet? */
486 bool edns0_do, /* set the EDNS0 DNSSEC OK bit? */
487 bool ad, /* set the DNSSEC authenticated data bit? */
488 bool cd, /* set the DNSSEC checking disabled bit? */
489 uint16_t max_udp_size, /* The maximum UDP datagram size to advertise to clients */
490 bool nsid) { /* whether to add NSID */
491
492 int r;
493
494 assert(p);
495
496 if (add_opt) {
497 r = dns_packet_append_opt(p, max_udp_size, edns0_do, /* include_rfc6975 = */ false, nsid ? nsid_string() : NULL, rcode, NULL);
498 if (r == -EMSGSIZE) /* Hit the size limit? then indicate truncation */
499 tc = true;
500 else if (r < 0)
501 return r;
502 } else {
503 /* If the client can't to EDNS0, don't do DO either */
504 edns0_do = false;
505
506 /* If we don't do EDNS, clamp the rcode to 4 bit */
507 if (rcode > 0xF)
508 rcode = DNS_RCODE_SERVFAIL;
509 }
510
511 /* Don't set the CD bit unless DO is on, too */
512 if (!edns0_do)
513 cd = false;
514
515 /* Note that we allow the AD bit to be set even if client didn't signal DO, as per RFC 6840, section
516 * 5.7 */
517
518 DNS_PACKET_HEADER(p)->id = id;
519
520 DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
521 1 /* qr */,
522 0 /* opcode */,
523 aa /* aa */,
524 tc /* tc */,
525 1 /* rd */,
526 1 /* ra */,
527 ad /* ad */,
528 cd /* cd */,
529 rcode));
530
531 return 0;
532 }
533
534 static int dns_stub_send(
535 Manager *m,
536 DnsStubListenerExtra *l,
537 DnsStream *s,
538 DnsPacket *p,
539 DnsPacket *reply) {
540
541 int r;
542
543 assert(m);
544 assert(p);
545 assert(reply);
546
547 if (s)
548 r = dns_stream_write_packet(s, reply);
549 else
550 /* Note that it is essential here that we explicitly choose the source IP address for this packet. This
551 * is because otherwise the kernel will choose it automatically based on the routing table and will
552 * thus pick 127.0.0.1 rather than 127.0.0.53. */
553 r = manager_send(m,
554 manager_dns_stub_fd_extra(m, l, SOCK_DGRAM),
555 l ? p->ifindex : LOOPBACK_IFINDEX, /* force loopback iface if this is the main listener stub */
556 p->family, &p->sender, p->sender_port, &p->destination,
557 reply);
558 if (r < 0)
559 return log_debug_errno(r, "Failed to send reply packet: %m");
560
561 return 0;
562 }
563
564 static int dns_stub_reply_with_edns0_do(DnsQuery *q) {
565 assert(q);
566
567 /* Reply with DNSSEC DO set? Only if client supports it; and we did any DNSSEC verification
568 * ourselves, or consider the data fully authenticated because we generated it locally, or the client
569 * set cd */
570
571 return DNS_PACKET_DO(q->request_packet) &&
572 (q->answer_dnssec_result >= 0 || /* we did proper DNSSEC validation … */
573 dns_query_fully_authenticated(q) || /* … or we considered it authentic otherwise … */
574 DNS_PACKET_CD(q->request_packet)); /* … or client set CD */
575 }
576
577 static int dns_stub_send_reply(
578 DnsQuery *q,
579 int rcode) {
580
581 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
582 bool truncated, edns0_do;
583 int r;
584
585 assert(q);
586
587 edns0_do = dns_stub_reply_with_edns0_do(q); /* let's check if we shall reply with EDNS0 DO? */
588
589 r = dns_stub_assign_sections(
590 q,
591 q->request_packet->question,
592 edns0_do);
593 if (r < 0)
594 return log_debug_errno(r, "Failed to assign sections: %m");
595
596 r = dns_stub_make_reply_packet(
597 &reply,
598 DNS_PACKET_PAYLOAD_SIZE_MAX(q->request_packet),
599 q->request_packet->question,
600 &truncated);
601 if (r < 0)
602 return log_debug_errno(r, "Failed to build reply packet: %m");
603
604 r = dns_stub_add_reply_packet_body(
605 reply,
606 q->reply_answer,
607 q->reply_authoritative,
608 q->reply_additional,
609 edns0_do,
610 &truncated);
611 if (r < 0)
612 return log_debug_errno(r, "Failed to append reply packet body: %m");
613
614 r = dns_stub_finish_reply_packet(
615 reply,
616 DNS_PACKET_ID(q->request_packet),
617 rcode,
618 truncated,
619 dns_query_fully_synthetic(q),
620 !!q->request_packet->opt,
621 edns0_do,
622 DNS_PACKET_AD(q->request_packet) && dns_query_fully_authenticated(q),
623 DNS_PACKET_CD(q->request_packet),
624 q->stub_listener_extra ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
625 dns_packet_has_nsid_request(q->request_packet) > 0 && !q->stub_listener_extra);
626 if (r < 0)
627 return log_debug_errno(r, "Failed to build failure packet: %m");
628
629 return dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
630 }
631
632 static int dns_stub_send_failure(
633 Manager *m,
634 DnsStubListenerExtra *l,
635 DnsStream *s,
636 DnsPacket *p,
637 int rcode,
638 bool authenticated) {
639
640 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
641 bool truncated;
642 int r;
643
644 assert(m);
645 assert(p);
646
647 r = dns_stub_make_reply_packet(
648 &reply,
649 DNS_PACKET_PAYLOAD_SIZE_MAX(p),
650 p->question,
651 &truncated);
652 if (r < 0)
653 return log_debug_errno(r, "Failed to make failure packet: %m");
654
655 r = dns_stub_finish_reply_packet(
656 reply,
657 DNS_PACKET_ID(p),
658 rcode,
659 truncated,
660 false,
661 !!p->opt,
662 DNS_PACKET_DO(p),
663 DNS_PACKET_AD(p) && authenticated,
664 DNS_PACKET_CD(p),
665 l ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
666 dns_packet_has_nsid_request(p) > 0 && !l);
667 if (r < 0)
668 return log_debug_errno(r, "Failed to build failure packet: %m");
669
670 return dns_stub_send(m, l, s, p, reply);
671 }
672
673 static int dns_stub_patch_bypass_reply_packet(
674 DnsPacket **ret, /* Where to place the patched packet */
675 DnsPacket *original, /* The packet to patch */
676 DnsPacket *request) { /* The packet the patched packet shall look like a reply to */
677 _cleanup_(dns_packet_unrefp) DnsPacket *c = NULL;
678 int r;
679
680 assert(ret);
681 assert(original);
682 assert(request);
683
684 r = dns_packet_dup(&c, original);
685 if (r < 0)
686 return r;
687
688 /* Extract the packet, so that we know where the OPT field is */
689 r = dns_packet_extract(c);
690 if (r < 0)
691 return r;
692
693 /* Copy over the original client request ID, so that we can make the upstream query look like our own reply. */
694 DNS_PACKET_HEADER(c)->id = DNS_PACKET_HEADER(request)->id;
695
696 /* Patch in our own maximum datagram size, if EDNS0 was on */
697 r = dns_packet_patch_max_udp_size(c, ADVERTISE_DATAGRAM_SIZE_MAX);
698 if (r < 0)
699 return r;
700
701 /* Lower all TTLs by the time passed since we received the datagram. */
702 if (timestamp_is_set(original->timestamp)) {
703 r = dns_packet_patch_ttls(c, original->timestamp);
704 if (r < 0)
705 return r;
706 }
707
708 /* Our upstream connection might have supported larger DNS requests than our downstream one, hence
709 * set the TC bit if our reply is larger than what the client supports, and truncate. */
710 if (c->size > DNS_PACKET_PAYLOAD_SIZE_MAX(request)) {
711 log_debug("Artificially truncating stub response, as advertised size of client is smaller than upstream one.");
712 dns_packet_truncate(c, DNS_PACKET_PAYLOAD_SIZE_MAX(request));
713 DNS_PACKET_HEADER(c)->flags = htobe16(be16toh(DNS_PACKET_HEADER(c)->flags) | DNS_PACKET_FLAG_TC);
714 }
715
716 *ret = TAKE_PTR(c);
717 return 0;
718 }
719
720 static void dns_stub_query_complete(DnsQuery *q) {
721 int r;
722
723 assert(q);
724 assert(q->request_packet);
725
726 if (q->question_bypass) {
727 /* This is a bypass reply. If so, let's propagate the upstream packet, if we have it and it
728 * is regular DNS. (We can't do this if the upstream packet is LLMNR or mDNS, since the
729 * packets are not 100% compatible.) */
730
731 if (q->answer_full_packet &&
732 q->answer_full_packet->protocol == DNS_PROTOCOL_DNS) {
733 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
734
735 r = dns_stub_patch_bypass_reply_packet(&reply, q->answer_full_packet, q->request_packet);
736 if (r < 0)
737 log_debug_errno(r, "Failed to patch bypass reply packet: %m");
738 else
739 (void) dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
740
741 dns_query_free(q);
742 return;
743 }
744 }
745
746 /* Note that we don't bother with following CNAMEs here. We propagate the authoritative/additional
747 * sections from the upstream answer however, hence if the upstream server collected that information
748 * already we don't have to collect it ourselves anymore. */
749
750 switch (q->state) {
751
752 case DNS_TRANSACTION_SUCCESS:
753 case DNS_TRANSACTION_RCODE_FAILURE:
754 (void) dns_stub_send_reply(q, q->answer_rcode);
755 break;
756
757 case DNS_TRANSACTION_NOT_FOUND:
758 (void) dns_stub_send_reply(q, DNS_RCODE_NXDOMAIN);
759 break;
760
761 case DNS_TRANSACTION_TIMEOUT:
762 case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
763 /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */
764 break;
765
766 case DNS_TRANSACTION_NO_SERVERS:
767 case DNS_TRANSACTION_INVALID_REPLY:
768 case DNS_TRANSACTION_ERRNO:
769 case DNS_TRANSACTION_ABORTED:
770 case DNS_TRANSACTION_DNSSEC_FAILED:
771 case DNS_TRANSACTION_NO_TRUST_ANCHOR:
772 case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
773 case DNS_TRANSACTION_NETWORK_DOWN:
774 case DNS_TRANSACTION_NO_SOURCE:
775 case DNS_TRANSACTION_STUB_LOOP:
776 (void) dns_stub_send_reply(q, DNS_RCODE_SERVFAIL);
777 break;
778
779 case DNS_TRANSACTION_NULL:
780 case DNS_TRANSACTION_PENDING:
781 case DNS_TRANSACTION_VALIDATING:
782 default:
783 assert_not_reached("Impossible state");
784 }
785
786 dns_query_free(q);
787 }
788
789 static int dns_stub_stream_complete(DnsStream *s, int error) {
790 assert(s);
791
792 log_debug_errno(error, "DNS TCP connection terminated, destroying queries: %m");
793
794 for (;;) {
795 DnsQuery *q;
796
797 q = set_first(s->queries);
798 if (!q)
799 break;
800
801 dns_query_free(q);
802 }
803
804 /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections
805 * should be kept as long as the client wants to. */
806 dns_stream_unref(s);
807 return 0;
808 }
809
810 static void dns_stub_process_query(Manager *m, DnsStubListenerExtra *l, DnsStream *s, DnsPacket *p) {
811 _cleanup_(dns_query_freep) DnsQuery *q = NULL;
812 Hashmap **queries_by_packet;
813 DnsQuery *existing;
814 int r;
815
816 assert(m);
817 assert(p);
818 assert(p->protocol == DNS_PROTOCOL_DNS);
819
820 if (!l && /* l == NULL if this is the main stub */
821 (in_addr_is_localhost(p->family, &p->sender) <= 0 ||
822 in_addr_is_localhost(p->family, &p->destination) <= 0)) {
823 log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring.");
824 return;
825 }
826
827 if (manager_packet_from_our_transaction(m, p)) {
828 log_debug("Got our own packet looped back, ignoring.");
829 return;
830 }
831
832 queries_by_packet = l ? &l->queries_by_packet : &m->stub_queries_by_packet;
833 existing = hashmap_get(*queries_by_packet, p);
834 if (existing && dns_packet_equal(existing->request_packet, p)) {
835 log_debug("Got repeat packet from client, ignoring.");
836 return;
837 }
838
839 r = dns_packet_extract(p);
840 if (r < 0) {
841 log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m");
842 dns_stub_send_failure(m, l, s, p, DNS_RCODE_FORMERR, false);
843 return;
844 }
845
846 if (!DNS_PACKET_VERSION_SUPPORTED(p)) {
847 log_debug("Got EDNS OPT field with unsupported version number.");
848 dns_stub_send_failure(m, l, s, p, DNS_RCODE_BADVERS, false);
849 return;
850 }
851
852 if (dns_type_is_obsolete(p->question->keys[0]->type)) {
853 log_debug("Got message with obsolete key type, refusing.");
854 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
855 return;
856 }
857
858 if (dns_type_is_zone_transer(p->question->keys[0]->type)) {
859 log_debug("Got request for zone transfer, refusing.");
860 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
861 return;
862 }
863
864 if (!DNS_PACKET_RD(p)) {
865 /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */
866 log_debug("Got request with recursion disabled, refusing.");
867 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
868 return;
869 }
870
871 r = hashmap_ensure_allocated(queries_by_packet, &stub_packet_hash_ops);
872 if (r < 0) {
873 log_oom();
874 return;
875 }
876
877 if (DNS_PACKET_DO(p) && DNS_PACKET_CD(p)) {
878 log_debug("Got request with DNSSEC checking disabled, enabling bypass logic.");
879
880 r = dns_query_new(m, &q, NULL, NULL, p, 0,
881 SD_RESOLVED_PROTOCOLS_ALL|
882 SD_RESOLVED_NO_CNAME|
883 SD_RESOLVED_NO_SEARCH|
884 SD_RESOLVED_NO_VALIDATE|
885 SD_RESOLVED_REQUIRE_PRIMARY|
886 SD_RESOLVED_CLAMP_TTL);
887 } else
888 r = dns_query_new(m, &q, p->question, p->question, NULL, 0,
889 SD_RESOLVED_PROTOCOLS_ALL|
890 SD_RESOLVED_NO_SEARCH|
891 SD_RESOLVED_NO_CNAME|
892 (DNS_PACKET_DO(p) ? SD_RESOLVED_REQUIRE_PRIMARY : 0)|
893 SD_RESOLVED_CLAMP_TTL);
894 if (r < 0) {
895 log_error_errno(r, "Failed to generate query object: %m");
896 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
897 return;
898 }
899
900 q->request_packet = dns_packet_ref(p);
901 q->request_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */
902 q->stub_listener_extra = l;
903 q->complete = dns_stub_query_complete;
904
905 if (s) {
906 /* Remember which queries belong to this stream, so that we can cancel them when the stream
907 * is disconnected early */
908
909 r = set_ensure_put(&s->queries, NULL, q);
910 if (r < 0) {
911 log_oom();
912 return;
913 }
914 assert(r > 0);
915 }
916
917 /* Add the query to the hash table we use to determine repeat packets now. We don't care about
918 * failures here, since in the worst case we'll not recognize duplicate incoming requests, which
919 * isn't particularly bad. */
920 (void) hashmap_put(*queries_by_packet, q->request_packet, q);
921
922 r = dns_query_go(q);
923 if (r < 0) {
924 log_error_errno(r, "Failed to start query: %m");
925 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
926 return;
927 }
928
929 log_debug("Processing query...");
930 TAKE_PTR(q);
931 }
932
933 static int on_dns_stub_packet_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
934 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
935 int r;
936
937 r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p);
938 if (r <= 0)
939 return r;
940
941 if (dns_packet_validate_query(p) > 0) {
942 log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p));
943
944 dns_stub_process_query(m, l, NULL, p);
945 } else
946 log_debug("Invalid DNS stub UDP packet, ignoring.");
947
948 return 0;
949 }
950
951 static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
952 return on_dns_stub_packet_internal(s, fd, revents, userdata, NULL);
953 }
954
955 static int on_dns_stub_packet_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
956 DnsStubListenerExtra *l = userdata;
957
958 assert(l);
959
960 return on_dns_stub_packet_internal(s, fd, revents, l->manager, l);
961 }
962
963 static int on_dns_stub_stream_packet(DnsStream *s) {
964 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
965
966 assert(s);
967
968 p = dns_stream_take_read_packet(s);
969 assert(p);
970
971 if (dns_packet_validate_query(p) > 0) {
972 log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p));
973
974 dns_stub_process_query(s->manager, s->stub_listener_extra, s, p);
975 } else
976 log_debug("Invalid DNS stub TCP packet, ignoring.");
977
978 return 0;
979 }
980
981 static int on_dns_stub_stream_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
982 DnsStream *stream;
983 int cfd, r;
984
985 cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
986 if (cfd < 0) {
987 if (ERRNO_IS_ACCEPT_AGAIN(errno))
988 return 0;
989
990 return -errno;
991 }
992
993 r = dns_stream_new(m, &stream, DNS_STREAM_STUB, DNS_PROTOCOL_DNS, cfd, NULL);
994 if (r < 0) {
995 safe_close(cfd);
996 return r;
997 }
998
999 stream->stub_listener_extra = l;
1000 stream->on_packet = on_dns_stub_stream_packet;
1001 stream->complete = dns_stub_stream_complete;
1002
1003 /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */
1004
1005 return 0;
1006 }
1007
1008 static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1009 return on_dns_stub_stream_internal(s, fd, revents, userdata, NULL);
1010 }
1011
1012 static int on_dns_stub_stream_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1013 DnsStubListenerExtra *l = userdata;
1014
1015 assert(l);
1016 return on_dns_stub_stream_internal(s, fd, revents, l->manager, l);
1017 }
1018
1019 static int set_dns_stub_common_socket_options(int fd, int family) {
1020 int r;
1021
1022 assert(fd >= 0);
1023 assert(IN_SET(family, AF_INET, AF_INET6));
1024
1025 r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
1026 if (r < 0)
1027 return r;
1028
1029 r = socket_set_recvpktinfo(fd, family, true);
1030 if (r < 0)
1031 return r;
1032
1033 r = socket_set_recvttl(fd, family, true);
1034 if (r < 0)
1035 return r;
1036
1037 return 0;
1038 }
1039
1040 static int set_dns_stub_common_tcp_socket_options(int fd) {
1041 int r;
1042
1043 assert(fd >= 0);
1044
1045 r = setsockopt_int(fd, IPPROTO_TCP, TCP_FASTOPEN, 5); /* Everybody appears to pick qlen=5, let's do the same here. */
1046 if (r < 0)
1047 log_debug_errno(r, "Failed to enable TCP_FASTOPEN on TCP listening socket, ignoring: %m");
1048
1049 r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true);
1050 if (r < 0)
1051 log_debug_errno(r, "Failed to enable TCP_NODELAY mode, ignoring: %m");
1052
1053 return 0;
1054 }
1055
1056 static int manager_dns_stub_fd(Manager *m, int type) {
1057 union sockaddr_union sa = {
1058 .in.sin_family = AF_INET,
1059 .in.sin_addr.s_addr = htobe32(INADDR_DNS_STUB),
1060 .in.sin_port = htobe16(53),
1061 };
1062 _cleanup_close_ int fd = -1;
1063 int r;
1064
1065 assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
1066
1067 sd_event_source **event_source = type == SOCK_DGRAM ? &m->dns_stub_udp_event_source : &m->dns_stub_tcp_event_source;
1068 if (*event_source)
1069 return sd_event_source_get_io_fd(*event_source);
1070
1071 fd = socket(AF_INET, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
1072 if (fd < 0)
1073 return -errno;
1074
1075 r = set_dns_stub_common_socket_options(fd, AF_INET);
1076 if (r < 0)
1077 return r;
1078
1079 if (type == SOCK_STREAM) {
1080 r = set_dns_stub_common_tcp_socket_options(fd);
1081 if (r < 0)
1082 return r;
1083 }
1084
1085 /* Make sure no traffic from outside the local host can leak to onto this socket */
1086 r = socket_bind_to_ifindex(fd, LOOPBACK_IFINDEX);
1087 if (r < 0)
1088 return r;
1089
1090 r = setsockopt_int(fd, IPPROTO_IP, IP_TTL, 1);
1091 if (r < 0)
1092 return r;
1093
1094 if (bind(fd, &sa.sa, sizeof(sa.in)) < 0)
1095 return -errno;
1096
1097 if (type == SOCK_STREAM &&
1098 listen(fd, SOMAXCONN) < 0)
1099 return -errno;
1100
1101 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1102 type == SOCK_DGRAM ? on_dns_stub_packet : on_dns_stub_stream,
1103 m);
1104 if (r < 0)
1105 return r;
1106
1107 r = sd_event_source_set_io_fd_own(*event_source, true);
1108 if (r < 0)
1109 return r;
1110
1111 (void) sd_event_source_set_description(*event_source,
1112 type == SOCK_DGRAM ? "dns-stub-udp" : "dns-stub-tcp");
1113
1114 return TAKE_FD(fd);
1115 }
1116
1117 static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type) {
1118 _cleanup_free_ char *pretty = NULL;
1119 _cleanup_close_ int fd = -1;
1120 union sockaddr_union sa;
1121 int r;
1122
1123 assert(m);
1124 assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
1125
1126 if (!l)
1127 return manager_dns_stub_fd(m, type);
1128
1129 sd_event_source **event_source = type == SOCK_DGRAM ? &l->udp_event_source : &l->tcp_event_source;
1130 if (*event_source)
1131 return sd_event_source_get_io_fd(*event_source);
1132
1133 if (l->family == AF_INET)
1134 sa = (union sockaddr_union) {
1135 .in.sin_family = l->family,
1136 .in.sin_port = htobe16(dns_stub_listener_extra_port(l)),
1137 .in.sin_addr = l->address.in,
1138 };
1139 else
1140 sa = (union sockaddr_union) {
1141 .in6.sin6_family = l->family,
1142 .in6.sin6_port = htobe16(dns_stub_listener_extra_port(l)),
1143 .in6.sin6_addr = l->address.in6,
1144 };
1145
1146 fd = socket(l->family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
1147 if (fd < 0) {
1148 r = -errno;
1149 goto fail;
1150 }
1151
1152 r = set_dns_stub_common_socket_options(fd, l->family);
1153 if (r < 0)
1154 goto fail;
1155
1156 if (type == SOCK_STREAM) {
1157 r = set_dns_stub_common_tcp_socket_options(fd);
1158 if (r < 0)
1159 goto fail;
1160 }
1161
1162 /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case
1163 * people may want ttl > 1. */
1164
1165 r = socket_set_freebind(fd, l->family, true);
1166 if (r < 0)
1167 goto fail;
1168
1169 if (type == SOCK_DGRAM) {
1170 r = socket_disable_pmtud(fd, l->family);
1171 if (r < 0)
1172 log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
1173
1174 r = socket_set_recvfragsize(fd, l->family, true);
1175 if (r < 0)
1176 log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
1177 }
1178
1179 if (bind(fd, &sa.sa, SOCKADDR_LEN(sa)) < 0) {
1180 r = -errno;
1181 goto fail;
1182 }
1183
1184 if (type == SOCK_STREAM &&
1185 listen(fd, SOMAXCONN) < 0) {
1186 r = -errno;
1187 goto fail;
1188 }
1189
1190 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1191 type == SOCK_DGRAM ? on_dns_stub_packet_extra : on_dns_stub_stream_extra,
1192 l);
1193 if (r < 0)
1194 goto fail;
1195
1196 r = sd_event_source_set_io_fd_own(*event_source, true);
1197 if (r < 0)
1198 goto fail;
1199
1200 (void) sd_event_source_set_description(*event_source,
1201 type == SOCK_DGRAM ? "dns-stub-udp-extra" : "dns-stub-tcp-extra");
1202
1203 if (DEBUG_LOGGING) {
1204 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
1205 log_debug("Listening on %s socket %s.",
1206 type == SOCK_DGRAM ? "UDP" : "TCP",
1207 strnull(pretty));
1208 }
1209
1210 return TAKE_FD(fd);
1211
1212 fail:
1213 assert(r < 0);
1214 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
1215 return log_warning_errno(r,
1216 r == -EADDRINUSE ? "Another process is already listening on %s socket %s: %m" :
1217 "Failed to listen on %s socket %s: %m",
1218 type == SOCK_DGRAM ? "UDP" : "TCP",
1219 strnull(pretty));
1220 }
1221
1222 int manager_dns_stub_start(Manager *m) {
1223 const char *t = "UDP";
1224 int r = 0;
1225
1226 assert(m);
1227
1228 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO)
1229 log_debug("Not creating stub listener.");
1230 else
1231 log_debug("Creating stub listener using %s.",
1232 m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" :
1233 m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" :
1234 "UDP/TCP");
1235
1236 if (FLAGS_SET(m->dns_stub_listener_mode, DNS_STUB_LISTENER_UDP))
1237 r = manager_dns_stub_fd(m, SOCK_DGRAM);
1238
1239 if (r >= 0 &&
1240 FLAGS_SET(m->dns_stub_listener_mode, DNS_STUB_LISTENER_TCP)) {
1241 t = "TCP";
1242 r = manager_dns_stub_fd(m, SOCK_STREAM);
1243 }
1244
1245 if (IN_SET(r, -EADDRINUSE, -EPERM)) {
1246 log_warning_errno(r,
1247 r == -EADDRINUSE ? "Another process is already listening on %s socket 127.0.0.53:53.\n"
1248 "Turning off local DNS stub support." :
1249 "Failed to listen on %s socket 127.0.0.53:53: %m.\n"
1250 "Turning off local DNS stub support.",
1251 t);
1252 manager_dns_stub_stop(m);
1253 } else if (r < 0)
1254 return log_error_errno(r, "Failed to listen on %s socket 127.0.0.53:53: %m", t);
1255
1256 if (!ordered_set_isempty(m->dns_extra_stub_listeners)) {
1257 DnsStubListenerExtra *l;
1258
1259 log_debug("Creating extra stub listeners.");
1260
1261 ORDERED_SET_FOREACH(l, m->dns_extra_stub_listeners) {
1262 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_UDP))
1263 (void) manager_dns_stub_fd_extra(m, l, SOCK_DGRAM);
1264 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_TCP))
1265 (void) manager_dns_stub_fd_extra(m, l, SOCK_STREAM);
1266 }
1267 }
1268
1269 return 0;
1270 }
1271
1272 void manager_dns_stub_stop(Manager *m) {
1273 assert(m);
1274
1275 m->dns_stub_udp_event_source = sd_event_source_disable_unref(m->dns_stub_udp_event_source);
1276 m->dns_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_stub_tcp_event_source);
1277 }
1278
1279 static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = {
1280 [DNS_STUB_LISTENER_NO] = "no",
1281 [DNS_STUB_LISTENER_UDP] = "udp",
1282 [DNS_STUB_LISTENER_TCP] = "tcp",
1283 [DNS_STUB_LISTENER_YES] = "yes",
1284 };
1285 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES);