]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/resolve/resolved-dns-stub.c
11710b8a482ef2e5a413e71dc5d9e8d3a2fe10c4
[thirdparty/systemd.git] / src / resolve / resolved-dns-stub.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <netinet/tcp.h>
4
5 #include "sd-event.h"
6 #include "sd-id128.h"
7
8 #include "alloc-util.h"
9 #include "capability-util.h"
10 #include "dns-type.h"
11 #include "errno-util.h"
12 #include "fd-util.h"
13 #include "log.h"
14 #include "missing_network.h"
15 #include "resolve-util.h"
16 #include "resolved-dns-answer.h"
17 #include "resolved-dns-packet.h"
18 #include "resolved-dns-query.h"
19 #include "resolved-dns-question.h"
20 #include "resolved-dns-rr.h"
21 #include "resolved-dns-stream.h"
22 #include "resolved-dns-stub.h"
23 #include "resolved-dns-transaction.h"
24 #include "resolved-manager.h"
25 #include "set.h"
26 #include "siphash24.h"
27 #include "socket-util.h"
28 #include "stdio-util.h"
29 #include "string-table.h"
30 #include "string-util.h"
31 #include "time-util.h"
32
33 /* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet,
34 * IP and UDP header sizes */
35 #define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U)
36
37 /* On the extra stubs, use a more conservative choice */
38 #define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX
39
40 static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type);
41 static int manager_dns_stub_fd(Manager *m, int family, const union in_addr_union *listen_address, int type);
42
43 static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra *a, struct siphash *state) {
44 assert(a);
45
46 siphash24_compress_typesafe(a->mode, state);
47 siphash24_compress_typesafe(a->family, state);
48 in_addr_hash_func(&a->address, a->family, state);
49 siphash24_compress_typesafe(a->port, state);
50 }
51
52 static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra *a, const DnsStubListenerExtra *b) {
53 int r;
54
55 assert(a);
56 assert(b);
57
58 r = CMP(a->mode, b->mode);
59 if (r != 0)
60 return r;
61
62 r = CMP(a->family, b->family);
63 if (r != 0)
64 return r;
65
66 r = memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
67 if (r != 0)
68 return r;
69
70 return CMP(a->port, b->port);
71 }
72
73 DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
74 dns_stub_listener_extra_hash_ops,
75 DnsStubListenerExtra,
76 dns_stub_listener_extra_hash_func,
77 dns_stub_listener_extra_compare_func,
78 dns_stub_listener_extra_free);
79
80 int dns_stub_listener_extra_new(
81 Manager *m,
82 DnsStubListenerExtra **ret) {
83
84 DnsStubListenerExtra *l;
85
86 l = new(DnsStubListenerExtra, 1);
87 if (!l)
88 return -ENOMEM;
89
90 *l = (DnsStubListenerExtra) {
91 .manager = m,
92 };
93
94 *ret = TAKE_PTR(l);
95 return 0;
96 }
97
98 DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p) {
99 if (!p)
100 return NULL;
101
102 p->udp_event_source = sd_event_source_disable_unref(p->udp_event_source);
103 p->tcp_event_source = sd_event_source_disable_unref(p->tcp_event_source);
104
105 hashmap_free(p->queries_by_packet);
106
107 return mfree(p);
108 }
109
110 static void stub_packet_hash_func(const DnsPacket *p, struct siphash *state) {
111 assert(p);
112
113 siphash24_compress_typesafe(p->protocol, state);
114 siphash24_compress_typesafe(p->family, state);
115 siphash24_compress_typesafe(p->sender, state);
116 siphash24_compress_typesafe(p->ipproto, state);
117 siphash24_compress_typesafe(p->sender_port, state);
118 siphash24_compress(DNS_PACKET_HEADER(p), sizeof(DnsPacketHeader), state);
119
120 /* We don't bother hashing the full packet here, just the header */
121 }
122
123 static int stub_packet_compare_func(const DnsPacket *x, const DnsPacket *y) {
124 int r;
125
126 r = CMP(x->protocol, y->protocol);
127 if (r != 0)
128 return r;
129
130 r = CMP(x->family, y->family);
131 if (r != 0)
132 return r;
133
134 r = memcmp(&x->sender, &y->sender, sizeof(x->sender));
135 if (r != 0)
136 return r;
137
138 r = CMP(x->ipproto, y->ipproto);
139 if (r != 0)
140 return r;
141
142 r = CMP(x->sender_port, y->sender_port);
143 if (r != 0)
144 return r;
145
146 return memcmp(DNS_PACKET_HEADER(x), DNS_PACKET_HEADER(y), sizeof(DnsPacketHeader));
147 }
148
149 DEFINE_HASH_OPS(stub_packet_hash_ops, DnsPacket, stub_packet_hash_func, stub_packet_compare_func);
150
151 static int reply_add_with_rrsig(
152 DnsAnswer **reply,
153 DnsResourceRecord *rr,
154 int ifindex,
155 DnsAnswerFlags flags,
156 DnsResourceRecord *rrsig,
157 bool with_rrsig) {
158 int r;
159
160 assert(reply);
161 assert(rr);
162
163 r = dns_answer_add_extend(reply, rr, ifindex, flags, rrsig);
164 if (r < 0)
165 return r;
166
167 if (with_rrsig && rrsig) {
168 r = dns_answer_add_extend(reply, rrsig, ifindex, flags, NULL);
169 if (r < 0)
170 return r;
171 }
172
173 return 0;
174 }
175
176 static int dns_stub_collect_answer_by_question(
177 DnsAnswer **reply,
178 DnsAnswer *answer,
179 DnsQuestion *question,
180 bool with_rrsig) { /* Add RRSIG RR matching each RR */
181
182 DnsAnswerItem *item;
183 int r;
184
185 assert(reply);
186
187 /* Copies all RRs from 'answer' into 'reply', if they match 'question'. */
188
189 DNS_ANSWER_FOREACH_ITEM(item, answer) {
190
191 /* We have a question, let's see if this RR matches it */
192 r = dns_question_matches_rr(question, item->rr, NULL);
193 if (r < 0)
194 return r;
195 if (!r) {
196 /* Maybe there's a CNAME/DNAME in here? If so, that's an answer too */
197 r = dns_question_matches_cname_or_dname(question, item->rr, NULL);
198 if (r < 0)
199 return r;
200 if (!r)
201 continue;
202 }
203
204 /* Mask the section info, we want the primary answers to always go without section
205 * info, so that it is added to the answer section when we synthesize a reply. */
206
207 r = reply_add_with_rrsig(
208 reply,
209 item->rr,
210 item->ifindex,
211 item->flags & ~DNS_ANSWER_MASK_SECTIONS,
212 item->rrsig,
213 with_rrsig);
214 if (r < 0)
215 return r;
216 }
217
218 return 0;
219 }
220
221 static int dns_stub_collect_answer_by_section(
222 DnsAnswer **reply,
223 DnsAnswer *answer,
224 DnsAnswerFlags section,
225 DnsAnswer *exclude1,
226 DnsAnswer *exclude2,
227 bool with_dnssec) { /* Include DNSSEC RRs. RRSIG, NSEC, … */
228
229 DnsAnswerItem *item;
230 int r;
231
232 assert(reply);
233
234 /* Copies all RRs from 'answer' into 'reply', if they originate from the specified section. Also,
235 * avoid any RRs listed in 'exclude'. */
236
237 DNS_ANSWER_FOREACH_ITEM(item, answer) {
238
239 if (dns_answer_contains(exclude1, item->rr) ||
240 dns_answer_contains(exclude2, item->rr))
241 continue;
242
243 if (!with_dnssec &&
244 dns_type_is_dnssec(item->rr->key->type))
245 continue;
246
247 if (((item->flags ^ section) & DNS_ANSWER_MASK_SECTIONS) != 0)
248 continue;
249
250 r = reply_add_with_rrsig(
251 reply,
252 item->rr,
253 item->ifindex,
254 item->flags,
255 item->rrsig,
256 with_dnssec);
257 if (r < 0)
258 return r;
259 }
260
261 return 0;
262 }
263
264 static int dns_stub_assign_sections(
265 DnsQuery *q,
266 DnsQuestion *question,
267 bool edns0_do) {
268
269 int r;
270
271 assert(q);
272 assert(question);
273
274 /* Let's assign the 'answer' RRs we collected to their respective sections in the reply datagram. We
275 * try to reproduce a section assignment similar to what the upstream DNS server responded to us. We
276 * use the DNS_ANSWER_SECTION_xyz flags to match things up, which is where the original upstream's
277 * packet section assignment is stored in the DnsAnswer object. Not all RRs in the 'answer' objects
278 * come with section information though (for example, because they were synthesized locally, and not
279 * from a DNS packet). To deal with that we extend the assignment logic a bit: anything from the
280 * 'answer' object that directly matches the original question is always put in the ANSWER section,
281 * regardless if it carries section info, or what that section info says. Then, anything from the
282 * 'answer' objects that is from the ANSWER or AUTHORITY sections, and wasn't already added to the
283 * ANSWER section is placed in the AUTHORITY section. Everything else from either object is added to
284 * the ADDITIONAL section. */
285
286 /* Include all RRs that directly answer the question in the answer section */
287 r = dns_stub_collect_answer_by_question(
288 &q->reply_answer,
289 q->answer,
290 question,
291 edns0_do);
292 if (r < 0)
293 return r;
294
295 /* Include all RRs that originate from the authority sections, and aren't already listed in the
296 * answer section, in the authority section */
297 r = dns_stub_collect_answer_by_section(
298 &q->reply_authoritative,
299 q->answer,
300 DNS_ANSWER_SECTION_AUTHORITY,
301 q->reply_answer, NULL,
302 edns0_do);
303 if (r < 0)
304 return r;
305
306 /* Include all RRs that originate from the answer or additional sections in the additional section
307 * (except if already listed in the other two sections). Also add all RRs with no section marking. */
308 r = dns_stub_collect_answer_by_section(
309 &q->reply_additional,
310 q->answer,
311 DNS_ANSWER_SECTION_ANSWER,
312 q->reply_answer, q->reply_authoritative,
313 edns0_do);
314 if (r < 0)
315 return r;
316 r = dns_stub_collect_answer_by_section(
317 &q->reply_additional,
318 q->answer,
319 DNS_ANSWER_SECTION_ADDITIONAL,
320 q->reply_answer, q->reply_authoritative,
321 edns0_do);
322 if (r < 0)
323 return r;
324 r = dns_stub_collect_answer_by_section(
325 &q->reply_additional,
326 q->answer,
327 0,
328 q->reply_answer, q->reply_authoritative,
329 edns0_do);
330 if (r < 0)
331 return r;
332
333 return 0;
334 }
335
336 static int dns_stub_make_reply_packet(
337 DnsPacket **ret,
338 size_t max_size,
339 DnsQuestion *q,
340 bool *ret_truncated) {
341
342 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
343 bool tc = false;
344 int r;
345
346 assert(ret);
347
348 r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, max_size);
349 if (r < 0)
350 return r;
351
352 r = dns_packet_append_question(p, q);
353 if (r == -EMSGSIZE)
354 tc = true;
355 else if (r < 0)
356 return r;
357
358 if (ret_truncated)
359 *ret_truncated = tc;
360 else if (tc)
361 return -EMSGSIZE;
362
363 DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q));
364
365 *ret = TAKE_PTR(p);
366 return 0;
367 }
368
369 static int dns_stub_add_reply_packet_body(
370 DnsPacket *p,
371 DnsAnswer *answer,
372 DnsAnswer *authoritative,
373 DnsAnswer *additional,
374 bool edns0_do, /* Client expects DNSSEC RRs? */
375 bool *truncated) {
376
377 unsigned n_answer = 0, n_authoritative = 0, n_additional = 0;
378 bool tc = false;
379 int r;
380
381 assert(p);
382
383 /* Add the three sections to the packet. If the answer section doesn't fit we'll signal that as
384 * truncation. If the authoritative section doesn't fit and we are in DNSSEC mode, also signal
385 * truncation. In all other cases where things don't fit don't signal truncation, as for those cases
386 * the dropped RRs should not be essential. */
387
388 r = dns_packet_append_answer(p, answer, &n_answer);
389 if (r == -EMSGSIZE)
390 tc = true;
391 else if (r < 0)
392 return r;
393 else {
394 r = dns_packet_append_answer(p, authoritative, &n_authoritative);
395 if (r == -EMSGSIZE) {
396 if (edns0_do)
397 tc = true;
398 } else if (r < 0)
399 return r;
400 else {
401 r = dns_packet_append_answer(p, additional, &n_additional);
402 if (r < 0 && r != -EMSGSIZE)
403 return r;
404 }
405 }
406
407 if (tc) {
408 if (!truncated)
409 return -EMSGSIZE;
410
411 *truncated = true;
412 }
413
414 DNS_PACKET_HEADER(p)->ancount = htobe16(n_answer);
415 DNS_PACKET_HEADER(p)->nscount = htobe16(n_authoritative);
416 DNS_PACKET_HEADER(p)->arcount = htobe16(n_additional);
417 return 0;
418 }
419
420 static const char *nsid_string(void) {
421 static char buffer[SD_ID128_STRING_MAX + STRLEN(".resolved.systemd.io")] = "";
422 sd_id128_t id;
423 int r;
424
425 /* Let's generate a string that we can use as RFC5001 NSID identifier. The string shall identify us
426 * as systemd-resolved, and return a different string for each resolved instance without leaking host
427 * identity. Hence let's use a fixed suffix that identifies resolved, and a prefix generated from the
428 * machine ID but from which the machine ID cannot be determined.
429 *
430 * Clients can use this to determine whether an answer is originating locally or is proxied from
431 * upstream. */
432
433 if (!isempty(buffer))
434 return buffer;
435
436 r = sd_id128_get_machine_app_specific(
437 SD_ID128_MAKE(ed,d3,12,5d,16,b9,41,f9,a1,49,5f,ab,15,62,ab,27),
438 &id);
439 if (r < 0) {
440 log_debug_errno(r, "Failed to determine machine ID, ignoring: %m");
441 return NULL;
442 }
443
444 xsprintf(buffer, SD_ID128_FORMAT_STR ".resolved.systemd.io", SD_ID128_FORMAT_VAL(id));
445 return buffer;
446 }
447
448 static int dns_stub_finish_reply_packet(
449 DnsPacket *p,
450 uint16_t id,
451 int rcode,
452 bool tc, /* set the Truncated bit? */
453 bool aa, /* set the Authoritative Answer bit? */
454 bool rd, /* set the Recursion Desired bit? */
455 bool add_opt, /* add an OPT RR to this packet? */
456 bool edns0_do, /* set the EDNS0 DNSSEC OK bit? */
457 bool ad, /* set the DNSSEC authenticated data bit? */
458 bool cd, /* set the DNSSEC checking disabled bit? */
459 uint16_t max_udp_size, /* The maximum UDP datagram size to advertise to clients */
460 bool nsid) { /* whether to add NSID */
461
462 int r;
463
464 assert(p);
465
466 if (add_opt) {
467 r = dns_packet_append_opt(p, max_udp_size, edns0_do, /* include_rfc6975 = */ false, nsid ? nsid_string() : NULL, rcode, NULL);
468 if (r == -EMSGSIZE) /* Hit the size limit? then indicate truncation */
469 tc = true;
470 else if (r < 0)
471 return r;
472 } else {
473 /* If the client can't to EDNS0, don't do DO either */
474 edns0_do = false;
475
476 /* If we don't do EDNS, clamp the rcode to 4 bit */
477 if (rcode > 0xF)
478 rcode = DNS_RCODE_SERVFAIL;
479 }
480
481 /* Note that we allow the AD bit to be set even if client didn't signal DO, as per RFC 6840, section
482 * 5.7 */
483
484 DNS_PACKET_HEADER(p)->id = id;
485
486 DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
487 1 /* qr */,
488 0 /* opcode */,
489 aa /* aa */,
490 tc /* tc */,
491 rd /* rd */,
492 1 /* ra */,
493 ad /* ad */,
494 cd /* cd */,
495 rcode));
496
497 return 0;
498 }
499
500 static bool address_is_proxy(int family, const union in_addr_union *a) {
501 assert(a);
502
503 /* Returns true if the specified address is the DNS "proxy" stub, i.e. where we unconditionally enable bypass mode */
504
505 if (family != AF_INET)
506 return false;
507
508 return be32toh(a->in.s_addr) == INADDR_DNS_PROXY_STUB;
509 }
510
511 static int find_socket_fd(
512 Manager *m,
513 DnsStubListenerExtra *l,
514 int family,
515 const union in_addr_union *listen_address,
516 int type) {
517
518 assert(m);
519
520 /* Finds the right socket to use for sending. If we know the extra listener, otherwise go via the
521 * address to send from */
522 if (l)
523 return manager_dns_stub_fd_extra(m, l, type);
524
525 return manager_dns_stub_fd(m, family, listen_address, type);
526 }
527
528 static int dns_stub_send(
529 Manager *m,
530 DnsStubListenerExtra *l,
531 DnsStream *s,
532 DnsPacket *p,
533 DnsPacket *reply) {
534
535 int r;
536
537 assert(m);
538 assert(p);
539 assert(reply);
540
541 if (s)
542 r = dns_stream_write_packet(s, reply);
543 else {
544 int fd, ifindex;
545
546 fd = find_socket_fd(m, l, p->family, &p->destination, SOCK_DGRAM);
547 if (fd < 0)
548 return fd;
549
550 if (address_is_proxy(p->family, &p->destination))
551 /* Force loopback iface if this is the loopback proxy stub
552 * and ifindex was normalized to 0 by manager_recv(). */
553 ifindex = p->ifindex ?: LOOPBACK_IFINDEX;
554 else
555 /* Force loopback iface if this is the main listener stub. */
556 ifindex = l ? p->ifindex : LOOPBACK_IFINDEX;
557
558 /* Note that it is essential here that we explicitly choose the source IP address for this
559 * packet. This is because otherwise the kernel will choose it automatically based on the
560 * routing table and will thus pick 127.0.0.1 rather than 127.0.0.53/54. */
561 r = manager_send(m,
562 fd,
563 ifindex,
564 p->family, &p->sender, p->sender_port, &p->destination,
565 reply);
566 }
567 if (r < 0)
568 return log_debug_errno(r, "Failed to send reply packet: %m");
569
570 return 0;
571 }
572
573 static int dns_stub_reply_with_edns0_do(DnsQuery *q) {
574 assert(q);
575
576 /* Reply with DNSSEC DO set? Only if client supports it; and we did any DNSSEC verification
577 * ourselves, or consider the data fully authenticated because we generated it locally, or the client
578 * set cd */
579
580 return dns_packet_do(q->request_packet) &&
581 (q->answer_dnssec_result >= 0 || /* we did proper DNSSEC validation … */
582 dns_query_fully_authenticated(q) || /* … or we considered it authentic otherwise … */
583 DNS_PACKET_CD(q->request_packet)); /* … or client set CD */
584 }
585
586 static void dns_stub_suppress_duplicate_section_rrs(DnsQuery *q) {
587 /* If we follow a CNAME/DNAME chain we might end up populating our sections with redundant RRs
588 * because we built up the sections from multiple reply packets (one from each CNAME/DNAME chain
589 * element). E.g. it could be that an RR that was included in the first reply's additional section
590 * ends up being relevant as main answer in a subsequent reply in the chain. Let's clean this up, and
591 * remove everything in the "higher priority" sections from the "lower priority" sections.
592 *
593 * Note that this removal matches by RR keys instead of the full RRs. This is because RRsets should
594 * always end up in one section fully or not at all, but never be split among sections.
595 *
596 * Specifically: we remove ANSWER section RRs from the AUTHORITATIVE and ADDITIONAL sections, as well
597 * as AUTHORITATIVE section RRs from the ADDITIONAL section. */
598
599 dns_answer_remove_by_answer_keys(&q->reply_authoritative, q->reply_answer);
600 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_answer);
601 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_authoritative);
602 }
603
604 static int dns_stub_send_reply(
605 DnsQuery *q,
606 int rcode) {
607
608 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
609 bool truncated, edns0_do;
610 int r;
611
612 assert(q);
613
614 edns0_do = dns_stub_reply_with_edns0_do(q); /* let's check if we shall reply with EDNS0 DO? */
615
616 r = dns_stub_make_reply_packet(
617 &reply,
618 dns_packet_payload_size_max(q->request_packet),
619 q->request_packet->question,
620 &truncated);
621 if (r < 0)
622 return log_debug_errno(r, "Failed to build reply packet: %m");
623
624 dns_stub_suppress_duplicate_section_rrs(q);
625
626 r = dns_stub_add_reply_packet_body(
627 reply,
628 q->reply_answer,
629 q->reply_authoritative,
630 q->reply_additional,
631 edns0_do,
632 &truncated);
633 if (r < 0)
634 return log_debug_errno(r, "Failed to append reply packet body: %m");
635
636 r = dns_stub_finish_reply_packet(
637 reply,
638 DNS_PACKET_ID(q->request_packet),
639 rcode,
640 truncated,
641 dns_query_fully_authoritative(q),
642 DNS_PACKET_RD(q->request_packet),
643 !!q->request_packet->opt,
644 edns0_do,
645 (DNS_PACKET_AD(q->request_packet) || dns_packet_do(q->request_packet)) && dns_query_fully_authenticated(q),
646 FLAGS_SET(q->flags, SD_RESOLVED_NO_VALIDATE),
647 q->stub_listener_extra ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
648 dns_packet_has_nsid_request(q->request_packet) > 0 && !q->stub_listener_extra);
649 if (r < 0)
650 return log_debug_errno(r, "Failed to build failure packet: %m");
651
652 return dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
653 }
654
655 static int dns_stub_send_failure(
656 Manager *m,
657 DnsStubListenerExtra *l,
658 DnsStream *s,
659 DnsPacket *p,
660 int rcode,
661 bool authenticated) {
662
663 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
664 bool truncated;
665 int r;
666
667 assert(m);
668 assert(p);
669
670 r = dns_stub_make_reply_packet(
671 &reply,
672 dns_packet_payload_size_max(p),
673 p->question,
674 &truncated);
675 if (r < 0)
676 return log_debug_errno(r, "Failed to make failure packet: %m");
677
678 r = dns_stub_finish_reply_packet(
679 reply,
680 DNS_PACKET_ID(p),
681 rcode,
682 truncated,
683 false,
684 DNS_PACKET_RD(p),
685 !!p->opt,
686 dns_packet_do(p),
687 (DNS_PACKET_AD(p) || dns_packet_do(p)) && authenticated,
688 DNS_PACKET_CD(p),
689 l ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
690 dns_packet_has_nsid_request(p) > 0 && !l);
691 if (r < 0)
692 return log_debug_errno(r, "Failed to build failure packet: %m");
693
694 return dns_stub_send(m, l, s, p, reply);
695 }
696
697 static int dns_stub_patch_bypass_reply_packet(
698 DnsPacket **ret, /* Where to place the patched packet */
699 DnsPacket *original, /* The packet to patch */
700 DnsPacket *request, /* The packet the patched packet shall look like a reply to */
701 bool validated,
702 bool authenticated) {
703 _cleanup_(dns_packet_unrefp) DnsPacket *c = NULL;
704 int r;
705
706 assert(ret);
707 assert(original);
708 assert(request);
709
710 r = dns_packet_dup(&c, original);
711 if (r < 0)
712 return r;
713
714 /* Extract the packet, so that we know where the OPT field is */
715 r = dns_packet_extract(c);
716 if (r < 0)
717 return r;
718
719 /* Copy over the original client request ID, so that we can make the upstream query look like our own reply. */
720 DNS_PACKET_HEADER(c)->id = DNS_PACKET_HEADER(request)->id;
721
722 /* Patch in our own maximum datagram size, if EDNS0 was on */
723 r = dns_packet_patch_max_udp_size(c, ADVERTISE_DATAGRAM_SIZE_MAX);
724 if (r < 0)
725 return r;
726
727 /* Lower all TTLs by the time passed since we received the datagram. */
728 if (timestamp_is_set(original->timestamp)) {
729 r = dns_packet_patch_ttls(c, original->timestamp);
730 if (r < 0)
731 return r;
732 }
733
734 /* Our upstream connection might have supported larger DNS requests than our downstream one, hence
735 * set the TC bit if our reply is larger than what the client supports, and truncate. */
736 if (c->size > dns_packet_payload_size_max(request)) {
737 log_debug("Artificially truncating stub response, as advertised size of client is smaller than upstream one.");
738 dns_packet_truncate(c, dns_packet_payload_size_max(request));
739 DNS_PACKET_HEADER(c)->flags = htobe16(be16toh(DNS_PACKET_HEADER(c)->flags) | DNS_PACKET_FLAG_TC);
740 }
741
742 /* Patch the cd bit to reflect the state of validation: set when both we and the upstream
743 * resolver have checking disabled. */
744 DNS_PACKET_HEADER(c)->flags = htobe16(UPDATE_FLAG(be16toh(DNS_PACKET_HEADER(c)->flags),
745 DNS_PACKET_FLAG_CD, DNS_PACKET_CD(original) && !validated));
746
747 /* Ensure we don't pass along an untrusted ad flag for bypass packets */
748 DNS_PACKET_HEADER(c)->flags = htobe16(UPDATE_FLAG(be16toh(DNS_PACKET_HEADER(c)->flags),
749 DNS_PACKET_FLAG_AD, authenticated));
750
751 *ret = TAKE_PTR(c);
752 return 0;
753 }
754
755 static void dns_stub_query_complete(DnsQuery *query) {
756 _cleanup_(dns_query_freep) DnsQuery *q = query;
757 int r;
758
759 assert(q);
760 assert(q->request_packet);
761
762 if (q->question_bypass) {
763 /* This is a bypass reply. If so, let's propagate the upstream packet, if we have it and it
764 * is regular DNS. (We can't do this if the upstream packet is LLMNR or mDNS, since the
765 * packets are not 100% compatible.) */
766
767 if (q->answer_full_packet &&
768 q->answer_full_packet->protocol == DNS_PROTOCOL_DNS) {
769 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
770
771 r = dns_stub_patch_bypass_reply_packet(&reply, q->answer_full_packet, q->request_packet,
772 /* validated = */ !FLAGS_SET(q->flags, SD_RESOLVED_NO_VALIDATE),
773 FLAGS_SET(q->answer_query_flags, SD_RESOLVED_AUTHENTICATED));
774 if (r < 0)
775 log_debug_errno(r, "Failed to patch bypass reply packet: %m");
776 else
777 (void) dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
778
779 return;
780 }
781 }
782
783 /* Take all data from the current reply, and merge it into the three reply sections we are building
784 * up. We do this before processing CNAME redirects, so that we gradually build up our sections, and
785 * and keep adding all RRs in the CNAME chain. */
786 r = dns_stub_assign_sections(
787 q,
788 dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
789 dns_stub_reply_with_edns0_do(q));
790 if (r < 0)
791 return (void) log_debug_errno(r, "Failed to assign sections: %m");
792
793 switch (q->state) {
794
795 case DNS_TRANSACTION_SUCCESS: {
796 bool first = true;
797
798 for (;;) {
799 int cname_result;
800
801 cname_result = dns_query_process_cname_one(q);
802 if (cname_result == -ELOOP) { /* CNAME loop, let's send what we already have */
803 log_debug("Detected CNAME loop, returning what we already have.");
804 (void) dns_stub_send_reply(q, q->answer_rcode);
805 break;
806 }
807 if (cname_result < 0) {
808 log_debug_errno(cname_result, "Failed to process CNAME: %m");
809 break;
810 }
811
812 if (cname_result == DNS_QUERY_NOMATCH) {
813 /* This answer doesn't contain any RR that would answer our question
814 * positively, i.e. neither directly nor via CNAME. */
815
816 if (first) /* We never followed a CNAME and the answer doesn't match our
817 * question at all? Then this is final, the empty answer is the
818 * answer. */
819 break;
820
821 /* Otherwise, we already followed a CNAME once within this packet, and the
822 * packet doesn't answer our question. In that case let's restart the query,
823 * now with the redirected question. We'll */
824 r = dns_query_go(q);
825 if (r < 0)
826 return (void) log_debug_errno(r, "Failed to restart query: %m");
827
828 TAKE_PTR(q);
829 return;
830 }
831
832 r = dns_stub_assign_sections(
833 q,
834 dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
835 dns_stub_reply_with_edns0_do(q));
836 if (r < 0)
837 return (void) log_debug_errno(r, "Failed to assign sections: %m");
838
839 if (cname_result == DNS_QUERY_MATCH) /* A match? Then we are done, let's return what we got */
840 break;
841
842 /* We followed a CNAME. and collected the RRs that answer the redirected question
843 * successfully. Let's not try to do this again. */
844 assert(cname_result == DNS_QUERY_CNAME);
845 first = false;
846 }
847
848 _fallthrough_;
849 }
850
851 case DNS_TRANSACTION_RCODE_FAILURE:
852 (void) dns_stub_send_reply(q, q->answer_rcode);
853 break;
854
855 case DNS_TRANSACTION_NOT_FOUND:
856 (void) dns_stub_send_reply(q, DNS_RCODE_NXDOMAIN);
857 break;
858
859 case DNS_TRANSACTION_TIMEOUT:
860 case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
861 /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */
862 break;
863
864 case DNS_TRANSACTION_NO_SERVERS:
865 /* We're not configured to give answers for this question. Refuse it. */
866 (void) dns_stub_send_reply(q, DNS_RCODE_REFUSED);
867 break;
868
869 case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
870 /* This RR Type is not implemented */
871 (void) dns_stub_send_reply(q, DNS_RCODE_NOTIMP);
872 break;
873
874 case DNS_TRANSACTION_INVALID_REPLY:
875 case DNS_TRANSACTION_ERRNO:
876 case DNS_TRANSACTION_ABORTED:
877 case DNS_TRANSACTION_DNSSEC_FAILED:
878 case DNS_TRANSACTION_NO_TRUST_ANCHOR:
879 case DNS_TRANSACTION_NETWORK_DOWN:
880 case DNS_TRANSACTION_NO_SOURCE:
881 case DNS_TRANSACTION_STUB_LOOP:
882 (void) dns_stub_send_reply(q, DNS_RCODE_SERVFAIL);
883 break;
884
885 case DNS_TRANSACTION_NULL:
886 case DNS_TRANSACTION_PENDING:
887 case DNS_TRANSACTION_VALIDATING:
888 default:
889 assert_not_reached();
890 }
891 }
892
893 static int dns_stub_stream_complete(DnsStream *s, int error) {
894 assert(s);
895
896 log_debug_errno(error, "DNS TCP connection terminated, destroying queries: %m");
897
898 for (;;) {
899 DnsQuery *q;
900
901 q = set_first(s->queries);
902 if (!q)
903 break;
904
905 dns_query_free(q);
906 }
907
908 /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections
909 * should be kept as long as the client wants to. */
910 dns_stream_unref(s);
911 return 0;
912 }
913
914 static void dns_stub_process_query(Manager *m, DnsStubListenerExtra *l, DnsStream *s, DnsPacket *p) {
915 uint64_t protocol_flags = SD_RESOLVED_PROTOCOLS_ALL;
916 _cleanup_(dns_query_freep) DnsQuery *q = NULL;
917 Hashmap **queries_by_packet;
918 DnsQuery *existing;
919 bool bypass = false;
920 int r;
921
922 assert(m);
923 assert(p);
924 assert(p->protocol == DNS_PROTOCOL_DNS);
925
926 if (!l && /* l == NULL if this is the main stub */
927 !address_is_proxy(p->family, &p->destination) && /* don't restrict needlessly for 127.0.0.54 */
928 (in_addr_is_localhost(p->family, &p->sender) <= 0 ||
929 in_addr_is_localhost(p->family, &p->destination) <= 0)) {
930 log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring.");
931 return;
932 }
933
934 if (manager_packet_from_our_transaction(m, p)) {
935 log_debug("Got our own packet looped back, ignoring.");
936 return;
937 }
938
939 queries_by_packet = l ? &l->queries_by_packet : &m->stub_queries_by_packet;
940 existing = hashmap_get(*queries_by_packet, p);
941 if (existing && dns_packet_equal(existing->request_packet, p)) {
942 log_debug("Got repeat packet from client, ignoring.");
943 return;
944 }
945
946 r = dns_packet_extract(p);
947 if (r < 0) {
948 log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m");
949 dns_stub_send_failure(m, l, s, p, DNS_RCODE_FORMERR, false);
950 return;
951 }
952
953 if (!dns_packet_version_supported(p)) {
954 log_debug("Got EDNS OPT field with unsupported version number.");
955 dns_stub_send_failure(m, l, s, p, DNS_RCODE_BADVERS, false);
956 return;
957 }
958
959 if (dns_type_is_obsolete(dns_question_first_key(p->question)->type)) {
960 log_debug("Got message with obsolete key type, refusing.");
961 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
962 return;
963 }
964
965 if (dns_type_is_zone_transfer(dns_question_first_key(p->question)->type)) {
966 log_debug("Got request for zone transfer, refusing.");
967 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
968 return;
969 }
970
971 if (!DNS_PACKET_RD(p)) {
972 /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */
973 log_debug("Got request with recursion disabled, refusing.");
974 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
975 return;
976 }
977
978 r = hashmap_ensure_allocated(queries_by_packet, &stub_packet_hash_ops);
979 if (r < 0) {
980 log_oom();
981 return;
982 }
983
984 if (address_is_proxy(p->family, &p->destination)) {
985 _cleanup_free_ char *dipa = NULL;
986
987 r = in_addr_to_string(p->family, &p->destination, &dipa);
988 if (r < 0)
989 return (void) log_error_errno(r, "Failed to format destination address: %m");
990
991 log_debug("Got request to DNS proxy address 127.0.0.54, enabling bypass logic.");
992 bypass = true;
993 protocol_flags = SD_RESOLVED_DNS|SD_RESOLVED_NO_ZONE; /* Turn off mDNS/LLMNR for proxy stub. */
994 } else if (dns_packet_do(p)) {
995 log_debug("Got request with DNSSEC enabled, enabling bypass logic.");
996 bypass = true;
997 }
998
999 if (bypass)
1000 r = dns_query_new(m, &q, NULL, NULL, p, 0,
1001 protocol_flags|
1002 SD_RESOLVED_NO_CNAME|
1003 SD_RESOLVED_NO_SEARCH|
1004 (DNS_PACKET_CD(p) ? SD_RESOLVED_NO_VALIDATE | SD_RESOLVED_NO_CACHE : 0)|
1005 SD_RESOLVED_REQUIRE_PRIMARY|
1006 SD_RESOLVED_CLAMP_TTL|
1007 SD_RESOLVED_RELAX_SINGLE_LABEL);
1008 else
1009 r = dns_query_new(m, &q, p->question, p->question, NULL, 0,
1010 protocol_flags|
1011 SD_RESOLVED_NO_SEARCH|
1012 (DNS_PACKET_CD(p) ? SD_RESOLVED_NO_VALIDATE | SD_RESOLVED_NO_CACHE : 0)|
1013 (dns_packet_do(p) ? SD_RESOLVED_REQUIRE_PRIMARY : 0)|
1014 SD_RESOLVED_CLAMP_TTL);
1015 if (r == -ENOANO) /* Refuse query if there is -ENOANO */
1016 return (void) dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
1017 if (r < 0) {
1018 log_error_errno(r, "Failed to generate query object: %m");
1019 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
1020 return;
1021 }
1022
1023 q->request_packet = dns_packet_ref(p);
1024 q->request_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */
1025 q->stub_listener_extra = l;
1026 q->complete = dns_stub_query_complete;
1027
1028 if (s) {
1029 /* Remember which queries belong to this stream, so that we can cancel them when the stream
1030 * is disconnected early */
1031
1032 r = set_ensure_put(&s->queries, NULL, q);
1033 if (r < 0) {
1034 log_oom();
1035 return;
1036 }
1037 assert(r > 0);
1038 }
1039
1040 /* Add the query to the hash table we use to determine repeat packets now. We don't care about
1041 * failures here, since in the worst case we'll not recognize duplicate incoming requests, which
1042 * isn't particularly bad. */
1043 (void) hashmap_put(*queries_by_packet, q->request_packet, q);
1044
1045 r = dns_query_go(q);
1046 if (r < 0) {
1047 log_error_errno(r, "Failed to start query: %m");
1048 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
1049 return;
1050 }
1051
1052 log_debug("Processing query...");
1053 TAKE_PTR(q);
1054 }
1055
1056 static int on_dns_stub_packet_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
1057 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
1058 int r;
1059
1060 r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p);
1061 if (r <= 0)
1062 return r;
1063
1064 if (dns_packet_validate_query(p) > 0) {
1065 log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p));
1066
1067 dns_stub_process_query(m, l, NULL, p);
1068 } else
1069 log_debug("Invalid DNS stub UDP packet, ignoring.");
1070
1071 return 0;
1072 }
1073
1074 static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1075 return on_dns_stub_packet_internal(s, fd, revents, userdata, NULL);
1076 }
1077
1078 static int on_dns_stub_packet_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1079 DnsStubListenerExtra *l = ASSERT_PTR(userdata);
1080
1081 return on_dns_stub_packet_internal(s, fd, revents, l->manager, l);
1082 }
1083
1084 static int on_dns_stub_stream_packet(DnsStream *s, DnsPacket *p) {
1085 assert(s);
1086 assert(s->manager);
1087 assert(p);
1088
1089 if (dns_packet_validate_query(p) > 0) {
1090 log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p));
1091
1092 dns_stub_process_query(s->manager, s->stub_listener_extra, s, p);
1093 } else
1094 log_debug("Invalid DNS stub TCP packet, ignoring.");
1095
1096 return 0;
1097 }
1098
1099 static int on_dns_stub_stream_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
1100 DnsStream *stream;
1101 int cfd, r;
1102
1103 cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1104 if (cfd < 0) {
1105 if (ERRNO_IS_ACCEPT_AGAIN(errno))
1106 return 0;
1107
1108 return -errno;
1109 }
1110
1111 r = dns_stream_new(m, &stream, DNS_STREAM_STUB, DNS_PROTOCOL_DNS, cfd, NULL,
1112 on_dns_stub_stream_packet, dns_stub_stream_complete, DNS_STREAM_STUB_TIMEOUT_USEC);
1113 if (r < 0) {
1114 safe_close(cfd);
1115 return r;
1116 }
1117
1118 stream->stub_listener_extra = l;
1119
1120 /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */
1121
1122 return 0;
1123 }
1124
1125 static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1126 return on_dns_stub_stream_internal(s, fd, revents, userdata, NULL);
1127 }
1128
1129 static int on_dns_stub_stream_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1130 DnsStubListenerExtra *l = ASSERT_PTR(userdata);
1131
1132 return on_dns_stub_stream_internal(s, fd, revents, l->manager, l);
1133 }
1134
1135 static int set_dns_stub_common_socket_options(int fd, int family) {
1136 int r;
1137
1138 assert(fd >= 0);
1139 assert(IN_SET(family, AF_INET, AF_INET6));
1140
1141 r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
1142 if (r < 0)
1143 return r;
1144
1145 r = socket_set_recvpktinfo(fd, family, true);
1146 if (r < 0)
1147 return r;
1148
1149 r = socket_set_recvttl(fd, family, true);
1150 if (r < 0)
1151 return r;
1152
1153 return 0;
1154 }
1155
1156 static int set_dns_stub_common_tcp_socket_options(int fd) {
1157 int r;
1158
1159 assert(fd >= 0);
1160
1161 r = setsockopt_int(fd, IPPROTO_TCP, TCP_FASTOPEN, 5); /* Everybody appears to pick qlen=5, let's do the same here. */
1162 if (r < 0)
1163 log_debug_errno(r, "Failed to enable TCP_FASTOPEN on TCP listening socket, ignoring: %m");
1164
1165 r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true);
1166 if (r < 0)
1167 log_debug_errno(r, "Failed to enable TCP_NODELAY mode, ignoring: %m");
1168
1169 return 0;
1170 }
1171
1172 static int manager_dns_stub_fd(
1173 Manager *m,
1174 int family,
1175 const union in_addr_union *listen_addr,
1176 int type) {
1177
1178 sd_event_source **event_source;
1179 _cleanup_close_ int fd = -EBADF;
1180 union sockaddr_union sa;
1181 int r;
1182
1183 assert(m);
1184 assert(listen_addr);
1185
1186 if (type == SOCK_DGRAM)
1187 event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_udp_event_source : &m->dns_stub_udp_event_source;
1188 else if (type == SOCK_STREAM)
1189 event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_tcp_event_source : &m->dns_stub_tcp_event_source;
1190 else
1191 return -EPROTONOSUPPORT;
1192
1193 if (*event_source)
1194 return sd_event_source_get_io_fd(*event_source);
1195
1196 fd = socket(family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
1197 if (fd < 0)
1198 return -errno;
1199
1200 r = set_dns_stub_common_socket_options(fd, family);
1201 if (r < 0)
1202 return r;
1203
1204 if (type == SOCK_STREAM) {
1205 r = set_dns_stub_common_tcp_socket_options(fd);
1206 if (r < 0)
1207 return r;
1208 }
1209
1210 /* Set slightly different socket options for the non-proxy and the proxy binding. The former we want
1211 * to be accessible only from the local host, for the latter it's OK if people use NAT redirects or
1212 * so to redirect external traffic to it. */
1213
1214 if (!address_is_proxy(family, listen_addr)) {
1215 /* Make sure no traffic from outside the local host can leak to onto this socket */
1216 r = socket_bind_to_ifindex(fd, LOOPBACK_IFINDEX);
1217 if (r < 0)
1218 return r;
1219
1220 r = socket_set_ttl(fd, family, 1);
1221 if (r < 0)
1222 return r;
1223 } else if (type == SOCK_DGRAM) {
1224 /* Turn off Path MTU Discovery for UDP, for security reasons. See socket_disable_pmtud() for
1225 * a longer discussion. (We only do this for sockets that are potentially externally
1226 * accessible, i.e. the proxy stub one. For the non-proxy one we instead set the TTL to 1,
1227 * see above, so that packets don't get routed at all.) */
1228 r = socket_disable_pmtud(fd, family);
1229 if (r < 0)
1230 log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
1231
1232 r = socket_set_recvfragsize(fd, family, true);
1233 if (r < 0)
1234 log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
1235 }
1236
1237 r = sockaddr_set_in_addr(&sa, family, listen_addr, 53);
1238 if (r < 0)
1239 return r;
1240
1241 if (bind(fd, &sa.sa, sizeof(sa.in)) < 0)
1242 return -errno;
1243
1244 if (type == SOCK_STREAM &&
1245 listen(fd, SOMAXCONN_DELUXE) < 0)
1246 return -errno;
1247
1248 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1249 type == SOCK_DGRAM ? on_dns_stub_packet : on_dns_stub_stream,
1250 m);
1251 if (r < 0)
1252 return r;
1253
1254 r = sd_event_source_set_io_fd_own(*event_source, true);
1255 if (r < 0)
1256 return r;
1257
1258 (void) sd_event_source_set_description(*event_source,
1259 type == SOCK_DGRAM ? "dns-stub-udp" : "dns-stub-tcp");
1260
1261 return TAKE_FD(fd);
1262 }
1263
1264 static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type) {
1265 _cleanup_free_ char *pretty = NULL;
1266 _cleanup_close_ int fd = -EBADF;
1267 union sockaddr_union sa;
1268 int r;
1269
1270 assert(m);
1271 assert(l);
1272 assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
1273
1274 sd_event_source **event_source = type == SOCK_DGRAM ? &l->udp_event_source : &l->tcp_event_source;
1275 if (*event_source)
1276 return sd_event_source_get_io_fd(*event_source);
1277
1278 if (!have_effective_cap(CAP_NET_BIND_SERVICE) && dns_stub_listener_extra_port(l) < 1024) {
1279 log_warning("Missing CAP_NET_BIND_SERVICE capability, not creating extra stub listener on port %hu.",
1280 dns_stub_listener_extra_port(l));
1281 return 0;
1282 }
1283
1284 if (l->family == AF_INET)
1285 sa = (union sockaddr_union) {
1286 .in.sin_family = l->family,
1287 .in.sin_port = htobe16(dns_stub_listener_extra_port(l)),
1288 .in.sin_addr = l->address.in,
1289 };
1290 else
1291 sa = (union sockaddr_union) {
1292 .in6.sin6_family = l->family,
1293 .in6.sin6_port = htobe16(dns_stub_listener_extra_port(l)),
1294 .in6.sin6_addr = l->address.in6,
1295 };
1296
1297 fd = socket(l->family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
1298 if (fd < 0) {
1299 r = -errno;
1300 goto fail;
1301 }
1302
1303 r = set_dns_stub_common_socket_options(fd, l->family);
1304 if (r < 0)
1305 goto fail;
1306
1307 if (type == SOCK_STREAM) {
1308 r = set_dns_stub_common_tcp_socket_options(fd);
1309 if (r < 0)
1310 goto fail;
1311 }
1312
1313 /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case
1314 * people may want ttl > 1. */
1315
1316 r = socket_set_freebind(fd, l->family, true);
1317 if (r < 0)
1318 goto fail;
1319
1320 if (type == SOCK_DGRAM) {
1321 r = socket_disable_pmtud(fd, l->family);
1322 if (r < 0)
1323 log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
1324
1325 r = socket_set_recvfragsize(fd, l->family, true);
1326 if (r < 0)
1327 log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
1328 }
1329
1330 r = RET_NERRNO(bind(fd, &sa.sa, sockaddr_len(&sa)));
1331 if (r < 0)
1332 goto fail;
1333
1334 if (type == SOCK_STREAM &&
1335 listen(fd, SOMAXCONN_DELUXE) < 0) {
1336 r = -errno;
1337 goto fail;
1338 }
1339
1340 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1341 type == SOCK_DGRAM ? on_dns_stub_packet_extra : on_dns_stub_stream_extra,
1342 l);
1343 if (r < 0)
1344 goto fail;
1345
1346 r = sd_event_source_set_io_fd_own(*event_source, true);
1347 if (r < 0)
1348 goto fail;
1349
1350 (void) sd_event_source_set_description(*event_source,
1351 type == SOCK_DGRAM ? "dns-stub-udp-extra" : "dns-stub-tcp-extra");
1352
1353 if (DEBUG_LOGGING) {
1354 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
1355 log_debug("Listening on %s socket %s.",
1356 type == SOCK_DGRAM ? "UDP" : "TCP",
1357 strnull(pretty));
1358 }
1359
1360 return TAKE_FD(fd);
1361
1362 fail:
1363 assert(r < 0);
1364 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
1365 return log_warning_errno(r,
1366 r == -EADDRINUSE ? "Another process is already listening on %s socket %s: %m" :
1367 "Failed to listen on %s socket %s: %m",
1368 type == SOCK_DGRAM ? "UDP" : "TCP",
1369 strnull(pretty));
1370 }
1371
1372 int manager_dns_stub_start(Manager *m) {
1373 int r;
1374
1375 assert(m);
1376
1377 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO)
1378 log_debug("Not creating stub listener.");
1379 else if (!have_effective_cap(CAP_NET_BIND_SERVICE))
1380 log_warning("Missing CAP_NET_BIND_SERVICE capability, not creating stub listener on port 53.");
1381 else {
1382 static const struct {
1383 uint32_t addr;
1384 int socket_type;
1385 } stub_sockets[] = {
1386 { INADDR_DNS_STUB, SOCK_DGRAM },
1387 { INADDR_DNS_STUB, SOCK_STREAM },
1388 { INADDR_DNS_PROXY_STUB, SOCK_DGRAM },
1389 { INADDR_DNS_PROXY_STUB, SOCK_STREAM },
1390 };
1391
1392 log_debug("Creating stub listener using %s.",
1393 m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" :
1394 m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" :
1395 "UDP/TCP");
1396
1397 FOREACH_ELEMENT(s, stub_sockets) {
1398 union in_addr_union a = {
1399 .in.s_addr = htobe32(s->addr),
1400 };
1401
1402 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP && s->socket_type == SOCK_STREAM)
1403 continue;
1404 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP && s->socket_type == SOCK_DGRAM)
1405 continue;
1406
1407 r = manager_dns_stub_fd(m, AF_INET, &a, s->socket_type);
1408 if (r < 0) {
1409 _cleanup_free_ char *busy_socket = NULL;
1410
1411 if (asprintf(&busy_socket,
1412 "%s socket " IPV4_ADDRESS_FMT_STR ":53",
1413 s->socket_type == SOCK_DGRAM ? "UDP" : "TCP",
1414 IPV4_ADDRESS_FMT_VAL(a.in)) < 0)
1415 return log_oom();
1416
1417 if (IN_SET(r, -EADDRINUSE, -EPERM)) {
1418 log_warning_errno(r,
1419 r == -EADDRINUSE ? "Another process is already listening on %s.\n"
1420 "Turning off local DNS stub support." :
1421 "Failed to listen on %s: %m.\n"
1422 "Turning off local DNS stub support.",
1423 busy_socket);
1424 manager_dns_stub_stop(m);
1425 break;
1426 }
1427
1428 return log_error_errno(r, "Failed to listen on %s: %m", busy_socket);
1429 }
1430 }
1431 }
1432
1433 if (!ordered_set_isempty(m->dns_extra_stub_listeners)) {
1434 DnsStubListenerExtra *l;
1435
1436 log_debug("Creating extra stub listeners.");
1437
1438 ORDERED_SET_FOREACH(l, m->dns_extra_stub_listeners) {
1439 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_UDP))
1440 (void) manager_dns_stub_fd_extra(m, l, SOCK_DGRAM);
1441 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_TCP))
1442 (void) manager_dns_stub_fd_extra(m, l, SOCK_STREAM);
1443 }
1444 }
1445
1446 return 0;
1447 }
1448
1449 void manager_dns_stub_stop(Manager *m) {
1450 assert(m);
1451
1452 m->dns_stub_udp_event_source = sd_event_source_disable_unref(m->dns_stub_udp_event_source);
1453 m->dns_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_stub_tcp_event_source);
1454 m->dns_proxy_stub_udp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_udp_event_source);
1455 m->dns_proxy_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_tcp_event_source);
1456 }
1457
1458 static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = {
1459 [DNS_STUB_LISTENER_NO] = "no",
1460 [DNS_STUB_LISTENER_UDP] = "udp",
1461 [DNS_STUB_LISTENER_TCP] = "tcp",
1462 [DNS_STUB_LISTENER_YES] = "yes",
1463 };
1464 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES);