]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/resolve/resolved-dns-stub.c
portabled: add --extension parameter for layered images support
[thirdparty/systemd.git] / src / resolve / resolved-dns-stub.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
b30bf55d 2
ca8b62b5 3#include <net/if_arp.h>
8624f128 4#include <netinet/tcp.h>
ca8b62b5 5
4ff9bc2e 6#include "errno-util.h"
b30bf55d 7#include "fd-util.h"
ef118d00 8#include "missing_network.h"
af8b1384 9#include "missing_socket.h"
b30bf55d 10#include "resolved-dns-stub.h"
1f05101f 11#include "socket-netlink.h"
b30bf55d 12#include "socket-util.h"
4a6eb824 13#include "stdio-util.h"
ae8f0ec3 14#include "string-table.h"
b30bf55d
LP
15
16/* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet,
17 * IP and UDP header sizes */
18#define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U)
19
b370adb5
LP
20/* On the extra stubs, use a more conservative choice */
21#define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX
22
b5febb3f 23static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type);
0354029b 24
ae8f0ec3
LP
25static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra *a, struct siphash *state) {
26 assert(a);
27
28 siphash24_compress(&a->mode, sizeof(a->mode), state);
29 siphash24_compress(&a->family, sizeof(a->family), state);
30 siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state);
31 siphash24_compress(&a->port, sizeof(a->port), state);
32}
33
34static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra *a, const DnsStubListenerExtra *b) {
35 int r;
36
37 assert(a);
38 assert(b);
39
40 r = CMP(a->mode, b->mode);
41 if (r != 0)
42 return r;
43
44 r = CMP(a->family, b->family);
45 if (r != 0)
46 return r;
47
48 r = memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
49 if (r != 0)
50 return r;
51
52 return CMP(a->port, b->port);
53}
54
55DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
56 dns_stub_listener_extra_hash_ops,
57 DnsStubListenerExtra,
58 dns_stub_listener_extra_hash_func,
59 dns_stub_listener_extra_compare_func,
60 dns_stub_listener_extra_free);
61
0354029b
LP
62int dns_stub_listener_extra_new(
63 Manager *m,
64 DnsStubListenerExtra **ret) {
ae8f0ec3 65
36aaabc3 66 DnsStubListenerExtra *l;
1f05101f 67
0354029b 68 l = new(DnsStubListenerExtra, 1);
1f05101f
SS
69 if (!l)
70 return -ENOMEM;
71
0354029b
LP
72 *l = (DnsStubListenerExtra) {
73 .manager = m,
74 };
1f05101f 75
0354029b 76 *ret = TAKE_PTR(l);
1f05101f
SS
77 return 0;
78}
79
36aaabc3 80DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p) {
bf22f231
YW
81 if (!p)
82 return NULL;
83
97935302
ZJS
84 p->udp_event_source = sd_event_source_disable_unref(p->udp_event_source);
85 p->tcp_event_source = sd_event_source_disable_unref(p->tcp_event_source);
bf22f231 86
bde69bbd
LP
87 hashmap_free(p->queries_by_packet);
88
bf22f231
YW
89 return mfree(p);
90}
91
bde69bbd
LP
92static void stub_packet_hash_func(const DnsPacket *p, struct siphash *state) {
93 assert(p);
94
95 siphash24_compress(&p->protocol, sizeof(p->protocol), state);
96 siphash24_compress(&p->family, sizeof(p->family), state);
97 siphash24_compress(&p->sender, sizeof(p->sender), state);
98 siphash24_compress(&p->ipproto, sizeof(p->ipproto), state);
99 siphash24_compress(&p->sender_port, sizeof(p->sender_port), state);
100 siphash24_compress(DNS_PACKET_HEADER(p), sizeof(DnsPacketHeader), state);
101
102 /* We don't bother hashing the full packet here, just the header */
103}
104
105static int stub_packet_compare_func(const DnsPacket *x, const DnsPacket *y) {
106 int r;
107
108 r = CMP(x->protocol, y->protocol);
109 if (r != 0)
110 return r;
111
112 r = CMP(x->family, y->family);
113 if (r != 0)
114 return r;
115
116 r = memcmp(&x->sender, &y->sender, sizeof(x->sender));
117 if (r != 0)
118 return r;
119
120 r = CMP(x->ipproto, y->ipproto);
121 if (r != 0)
122 return r;
123
124 r = CMP(x->sender_port, y->sender_port);
125 if (r != 0)
126 return r;
127
128 return memcmp(DNS_PACKET_HEADER(x), DNS_PACKET_HEADER(y), sizeof(DnsPacketHeader));
129}
130
131DEFINE_HASH_OPS(stub_packet_hash_ops, DnsPacket, stub_packet_hash_func, stub_packet_compare_func);
132
5bd7ebb3
LP
133static int reply_add_with_rrsig(
134 DnsAnswer **reply,
135 DnsResourceRecord *rr,
136 int ifindex,
137 DnsAnswerFlags flags,
138 DnsResourceRecord *rrsig,
139 bool with_rrsig) {
140 int r;
141
142 assert(reply);
143 assert(rr);
144
145 r = dns_answer_add_extend(reply, rr, ifindex, flags, rrsig);
146 if (r < 0)
147 return r;
148
149 if (with_rrsig && rrsig) {
150 r = dns_answer_add_extend(reply, rrsig, ifindex, flags, NULL);
151 if (r < 0)
152 return r;
153 }
154
155 return 0;
156}
157
775ae354
LP
158static int dns_stub_collect_answer_by_question(
159 DnsAnswer **reply,
51027656 160 DnsAnswer *answer,
775ae354
LP
161 DnsQuestion *question,
162 bool with_rrsig) { /* Add RRSIG RR matching each RR */
b30bf55d 163
775ae354 164 DnsAnswerItem *item;
b30bf55d
LP
165 int r;
166
775ae354 167 assert(reply);
e8d23f92 168
915ba31c 169 /* Copies all RRs from 'answer' into 'reply', if they match 'question'. */
4838dc4f 170
915ba31c 171 DNS_ANSWER_FOREACH_ITEM(item, answer) {
5bd7ebb3 172
915ba31c
LP
173 /* We have a question, let's see if this RR matches it */
174 r = dns_question_matches_rr(question, item->rr, NULL);
175 if (r < 0)
176 return r;
177 if (!r) {
178 /* Maybe there's a CNAME/DNAME in here? If so, that's an answer too */
179 r = dns_question_matches_cname_or_dname(question, item->rr, NULL);
4838dc4f
LP
180 if (r < 0)
181 return r;
915ba31c
LP
182 if (!r)
183 continue;
4838dc4f 184 }
5bd7ebb3 185
915ba31c
LP
186 /* Mask the section info, we want the primary answers to always go without section
187 * info, so that it is added to the answer section when we synthesize a reply. */
5bd7ebb3 188
915ba31c
LP
189 r = reply_add_with_rrsig(
190 reply,
191 item->rr,
192 item->ifindex,
193 item->flags & ~DNS_ANSWER_MASK_SECTIONS,
194 item->rrsig,
195 with_rrsig);
196 if (r < 0)
197 return r;
e8d23f92 198 }
b30bf55d 199
775ae354
LP
200 return 0;
201}
e8d23f92 202
775ae354
LP
203static int dns_stub_collect_answer_by_section(
204 DnsAnswer **reply,
205 DnsAnswer *answer,
206 DnsAnswerFlags section,
207 DnsAnswer *exclude1,
208 DnsAnswer *exclude2,
209 bool with_dnssec) { /* Include DNSSEC RRs. RRSIG, NSEC, … */
b30bf55d 210
775ae354 211 DnsAnswerItem *item;
775ae354 212 int r;
b30bf55d 213
775ae354
LP
214 assert(reply);
215
216 /* Copies all RRs from 'answer' into 'reply', if they originate from the specified section. Also,
217 * avoid any RRs listed in 'exclude'. */
218
219 DNS_ANSWER_FOREACH_ITEM(item, answer) {
220
221 if (dns_answer_contains(exclude1, item->rr) ||
222 dns_answer_contains(exclude2, item->rr))
223 continue;
224
225 if (!with_dnssec &&
226 dns_type_is_dnssec(item->rr->key->type))
227 continue;
228
c4d98c3a 229 if (((item->flags ^ section) & DNS_ANSWER_MASK_SECTIONS) != 0)
775ae354
LP
230 continue;
231
5bd7ebb3
LP
232 r = reply_add_with_rrsig(
233 reply,
234 item->rr,
235 item->ifindex,
236 item->flags,
237 item->rrsig,
238 with_dnssec);
b30bf55d
LP
239 if (r < 0)
240 return r;
b30bf55d 241 }
e8d23f92 242
5bd7ebb3 243 return 0;
775ae354
LP
244}
245
246static int dns_stub_assign_sections(
247 DnsQuery *q,
248 DnsQuestion *question,
249 bool edns0_do) {
250
251 int r;
252
253 assert(q);
254 assert(question);
255
c6ebf89b
LP
256 /* Let's assign the 'answer' RRs we collected to their respective sections in the reply datagram. We
257 * try to reproduce a section assignment similar to what the upstream DNS server responded to us. We
258 * use the DNS_ANSWER_SECTION_xyz flags to match things up, which is where the original upstream's
259 * packet section assignment is stored in the DnsAnswer object. Not all RRs in the 'answer' objects
260 * come with section information though (for example, because they were synthesized locally, and not
261 * from a DNS packet). To deal with that we extend the assignment logic a bit: anything from the
262 * 'answer' object that directly matches the original question is always put in the ANSWER section,
263 * regardless if it carries section info, or what that section info says. Then, anything from the
264 * 'answer' objects that is from the ANSWER or AUTHORITY sections, and wasn't already added to the
265 * ANSWER section is placed in the AUTHORITY section. Everything else from either object is added to
266 * the ADDITIONAL section. */
775ae354
LP
267
268 /* Include all RRs that directly answer the question in the answer section */
269 r = dns_stub_collect_answer_by_question(
270 &q->reply_answer,
271 q->answer,
272 question,
273 edns0_do);
274 if (r < 0)
275 return r;
276
d451f0e8 277 /* Include all RRs that originate from the authority sections, and aren't already listed in the
775ae354
LP
278 * answer section, in the authority section */
279 r = dns_stub_collect_answer_by_section(
280 &q->reply_authoritative,
281 q->answer,
d451f0e8 282 DNS_ANSWER_SECTION_AUTHORITY,
775ae354
LP
283 q->reply_answer, NULL,
284 edns0_do);
285 if (r < 0)
286 return r;
d451f0e8
LP
287
288 /* Include all RRs that originate from the answer or additional sections in the additional section
289 * (except if already listed in the other two sections). Also add all RRs with no section marking. */
775ae354 290 r = dns_stub_collect_answer_by_section(
d451f0e8 291 &q->reply_additional,
775ae354 292 q->answer,
d451f0e8
LP
293 DNS_ANSWER_SECTION_ANSWER,
294 q->reply_answer, q->reply_authoritative,
775ae354
LP
295 edns0_do);
296 if (r < 0)
297 return r;
775ae354
LP
298 r = dns_stub_collect_answer_by_section(
299 &q->reply_additional,
300 q->answer,
301 DNS_ANSWER_SECTION_ADDITIONAL,
302 q->reply_answer, q->reply_authoritative,
303 edns0_do);
304 if (r < 0)
305 return r;
306 r = dns_stub_collect_answer_by_section(
307 &q->reply_additional,
308 q->answer,
309 0,
310 q->reply_answer, q->reply_authoritative,
311 edns0_do);
312 if (r < 0)
313 return r;
314
315 return 0;
316}
317
318static int dns_stub_make_reply_packet(
319 DnsPacket **ret,
320 size_t max_size,
321 DnsQuestion *q,
322 bool *ret_truncated) {
323
324 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
325 bool tc = false;
326 int r;
327
328 assert(ret);
329
330 r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, max_size);
331 if (r < 0)
332 return r;
333
334 r = dns_packet_append_question(p, q);
335 if (r == -EMSGSIZE)
336 tc = true;
337 else if (r < 0)
338 return r;
339
51027656 340 if (ret_truncated)
775ae354
LP
341 *ret_truncated = tc;
342 else if (tc)
51027656
LP
343 return -EMSGSIZE;
344
775ae354 345 DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q));
e8d23f92 346
775ae354
LP
347 *ret = TAKE_PTR(p);
348 return 0;
349}
350
351static int dns_stub_add_reply_packet_body(
352 DnsPacket *p,
353 DnsAnswer *answer,
354 DnsAnswer *authoritative,
355 DnsAnswer *additional,
356 bool edns0_do, /* Client expects DNSSEC RRs? */
357 bool *truncated) {
358
359 unsigned n_answer = 0, n_authoritative = 0, n_additional = 0;
360 bool tc = false;
361 int r;
362
363 assert(p);
364
365 /* Add the three sections to the packet. If the answer section doesn't fit we'll signal that as
366 * truncation. If the authoritative section doesn't fit and we are in DNSSEC mode, also signal
367 * truncation. In all other cases where things don't fit don't signal truncation, as for those cases
368 * the dropped RRs should not be essential. */
369
370 r = dns_packet_append_answer(p, answer, &n_answer);
371 if (r == -EMSGSIZE)
372 tc = true;
373 else if (r < 0)
374 return r;
375 else {
376 r = dns_packet_append_answer(p, authoritative, &n_authoritative);
377 if (r == -EMSGSIZE) {
378 if (edns0_do)
379 tc = true;
380 } else if (r < 0)
381 return r;
382 else {
383 r = dns_packet_append_answer(p, additional, &n_additional);
384 if (r < 0 && r != -EMSGSIZE)
385 return r;
386 }
387 }
388
389 if (tc) {
390 if (!truncated)
391 return -EMSGSIZE;
392
393 *truncated = true;
394 }
395
396 DNS_PACKET_HEADER(p)->ancount = htobe16(n_answer);
397 DNS_PACKET_HEADER(p)->nscount = htobe16(n_authoritative);
398 DNS_PACKET_HEADER(p)->arcount = htobe16(n_additional);
e8d23f92
LP
399 return 0;
400}
401
4a6eb824
LP
402static const char *nsid_string(void) {
403 static char buffer[SD_ID128_STRING_MAX + STRLEN(".resolved.systemd.io")] = "";
404 sd_id128_t id;
405 int r;
406
407 /* Let's generate a string that we can use as RFC5001 NSID identifier. The string shall identify us
408 * as systemd-resolved, and return a different string for each resolved instance without leaking host
409 * identity. Hence let's use a fixed suffix that identifies resolved, and a prefix generated from the
410 * machine ID but from which the machine ID cannot be determined.
411 *
412 * Clients can use this to determine whether an answer is originating locally or is proxied from
413 * upstream. */
414
415 if (!isempty(buffer))
416 return buffer;
417
418 r = sd_id128_get_machine_app_specific(
419 SD_ID128_MAKE(ed,d3,12,5d,16,b9,41,f9,a1,49,5f,ab,15,62,ab,27),
420 &id);
421 if (r < 0) {
b480543c 422 log_debug_errno(r, "Failed to determine machine ID, ignoring: %m");
4a6eb824
LP
423 return NULL;
424 }
425
426 xsprintf(buffer, SD_ID128_FORMAT_STR ".resolved.systemd.io", SD_ID128_FORMAT_VAL(id));
427 return buffer;
428}
429
e8d23f92
LP
430static int dns_stub_finish_reply_packet(
431 DnsPacket *p,
432 uint16_t id,
433 int rcode,
51027656 434 bool tc, /* set the Truncated bit? */
4ad017cd 435 bool aa, /* set the Authoritative Answer bit? */
e8d23f92
LP
436 bool add_opt, /* add an OPT RR to this packet? */
437 bool edns0_do, /* set the EDNS0 DNSSEC OK bit? */
b370adb5 438 bool ad, /* set the DNSSEC authenticated data bit? */
775ae354 439 bool cd, /* set the DNSSEC checking disabled bit? */
4a6eb824
LP
440 uint16_t max_udp_size, /* The maximum UDP datagram size to advertise to clients */
441 bool nsid) { /* whether to add NSID */
e8d23f92
LP
442
443 int r;
444
445 assert(p);
446
ff4caaae 447 if (add_opt) {
4a6eb824 448 r = dns_packet_append_opt(p, max_udp_size, edns0_do, /* include_rfc6975 = */ false, nsid ? nsid_string() : NULL, rcode, NULL);
ff4caaae
LP
449 if (r == -EMSGSIZE) /* Hit the size limit? then indicate truncation */
450 tc = true;
451 else if (r < 0)
452 return r;
ff4caaae 453 } else {
941dd294
LP
454 /* If the client can't to EDNS0, don't do DO either */
455 edns0_do = false;
456
775ae354 457 /* If we don't do EDNS, clamp the rcode to 4 bit */
941dd294
LP
458 if (rcode > 0xF)
459 rcode = DNS_RCODE_SERVFAIL;
460 }
461
8c9c68b5
LP
462 /* Don't set the CD bit unless DO is on, too */
463 if (!edns0_do)
775ae354
LP
464 cd = false;
465
8c9c68b5
LP
466 /* Note that we allow the AD bit to be set even if client didn't signal DO, as per RFC 6840, section
467 * 5.7 */
e8d23f92
LP
468
469 DNS_PACKET_HEADER(p)->id = id;
470
471 DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
51027656
LP
472 1 /* qr */,
473 0 /* opcode */,
4ad017cd 474 aa /* aa */,
51027656
LP
475 tc /* tc */,
476 1 /* rd */,
477 1 /* ra */,
e8d23f92 478 ad /* ad */,
775ae354 479 cd /* cd */,
e8d23f92 480 rcode));
b30bf55d 481
b30bf55d
LP
482 return 0;
483}
484
0354029b
LP
485static int dns_stub_send(
486 Manager *m,
487 DnsStubListenerExtra *l,
488 DnsStream *s,
489 DnsPacket *p,
490 DnsPacket *reply) {
491
b30bf55d
LP
492 int r;
493
494 assert(m);
495 assert(p);
496 assert(reply);
497
498 if (s)
499 r = dns_stream_write_packet(s, reply);
0354029b 500 else
b30bf55d
LP
501 /* Note that it is essential here that we explicitly choose the source IP address for this packet. This
502 * is because otherwise the kernel will choose it automatically based on the routing table and will
503 * thus pick 127.0.0.1 rather than 127.0.0.53. */
0354029b 504 r = manager_send(m,
b5febb3f 505 manager_dns_stub_fd_extra(m, l, SOCK_DGRAM),
0354029b
LP
506 l ? p->ifindex : LOOPBACK_IFINDEX, /* force loopback iface if this is the main listener stub */
507 p->family, &p->sender, p->sender_port, &p->destination,
508 reply);
b30bf55d
LP
509 if (r < 0)
510 return log_debug_errno(r, "Failed to send reply packet: %m");
511
512 return 0;
513}
514
39005e18
LP
515static int dns_stub_reply_with_edns0_do(DnsQuery *q) {
516 assert(q);
517
518 /* Reply with DNSSEC DO set? Only if client supports it; and we did any DNSSEC verification
519 * ourselves, or consider the data fully authenticated because we generated it locally, or the client
520 * set cd */
521
522 return DNS_PACKET_DO(q->request_packet) &&
523 (q->answer_dnssec_result >= 0 || /* we did proper DNSSEC validation … */
524 dns_query_fully_authenticated(q) || /* … or we considered it authentic otherwise … */
525 DNS_PACKET_CD(q->request_packet)); /* … or client set CD */
526}
527
5d7da51e
LP
528static void dns_stub_suppress_duplicate_section_rrs(DnsQuery *q) {
529 /* If we follow a CNAME/DNAME chain we might end up populating our sections with redundant RRs
530 * because we built up the sections from multiple reply packets (one from each CNAME/DNAME chain
531 * element). E.g. it could be that an RR that was included in the first reply's additional section
532 * ends up being relevant as main answer in a subsequent reply in the chain. Let's clean this up, and
533 * remove everything in the "higher priority" sections from the "lower priority" sections.
534 *
535 * Note that this removal matches by RR keys instead of the full RRs. This is because RRsets should
536 * always end up in one section fully or not at all, but never be split among sections.
537 *
538 * Specifically: we remove ANSWER section RRs from the AUTHORITATIVE and ADDITIONAL sections, as well
539 * as AUTHORITATIVE section RRs from the ADDITIONAL section. */
540
541 dns_answer_remove_by_answer_keys(&q->reply_authoritative, q->reply_answer);
542 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_answer);
543 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_authoritative);
544}
545
775ae354
LP
546static int dns_stub_send_reply(
547 DnsQuery *q,
548 int rcode) {
549
550 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
551 bool truncated, edns0_do;
552 int r;
553
554 assert(q);
555
39005e18 556 edns0_do = dns_stub_reply_with_edns0_do(q); /* let's check if we shall reply with EDNS0 DO? */
775ae354 557
775ae354
LP
558 r = dns_stub_make_reply_packet(
559 &reply,
560 DNS_PACKET_PAYLOAD_SIZE_MAX(q->request_packet),
561 q->request_packet->question,
562 &truncated);
563 if (r < 0)
564 return log_debug_errno(r, "Failed to build reply packet: %m");
565
5d7da51e
LP
566 dns_stub_suppress_duplicate_section_rrs(q);
567
775ae354
LP
568 r = dns_stub_add_reply_packet_body(
569 reply,
570 q->reply_answer,
571 q->reply_authoritative,
572 q->reply_additional,
573 edns0_do,
574 &truncated);
575 if (r < 0)
576 return log_debug_errno(r, "Failed to append reply packet body: %m");
577
578 r = dns_stub_finish_reply_packet(
579 reply,
580 DNS_PACKET_ID(q->request_packet),
581 rcode,
582 truncated,
9ddf099f 583 dns_query_fully_authoritative(q),
775ae354
LP
584 !!q->request_packet->opt,
585 edns0_do,
8c9c68b5 586 DNS_PACKET_AD(q->request_packet) && dns_query_fully_authenticated(q),
775ae354 587 DNS_PACKET_CD(q->request_packet),
4a6eb824
LP
588 q->stub_listener_extra ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
589 dns_packet_has_nsid_request(q->request_packet) > 0 && !q->stub_listener_extra);
775ae354
LP
590 if (r < 0)
591 return log_debug_errno(r, "Failed to build failure packet: %m");
592
593 return dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
594}
595
0354029b
LP
596static int dns_stub_send_failure(
597 Manager *m,
598 DnsStubListenerExtra *l,
599 DnsStream *s,
600 DnsPacket *p,
601 int rcode,
602 bool authenticated) {
603
b30bf55d 604 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
775ae354 605 bool truncated;
b30bf55d
LP
606 int r;
607
608 assert(m);
609 assert(p);
610
775ae354
LP
611 r = dns_stub_make_reply_packet(
612 &reply,
613 DNS_PACKET_PAYLOAD_SIZE_MAX(p),
614 p->question,
615 &truncated);
e8d23f92
LP
616 if (r < 0)
617 return log_debug_errno(r, "Failed to make failure packet: %m");
618
b370adb5
LP
619 r = dns_stub_finish_reply_packet(
620 reply,
621 DNS_PACKET_ID(p),
622 rcode,
775ae354 623 truncated,
4ad017cd 624 false,
b370adb5
LP
625 !!p->opt,
626 DNS_PACKET_DO(p),
8c9c68b5 627 DNS_PACKET_AD(p) && authenticated,
775ae354 628 DNS_PACKET_CD(p),
4a6eb824
LP
629 l ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
630 dns_packet_has_nsid_request(p) > 0 && !l);
b30bf55d
LP
631 if (r < 0)
632 return log_debug_errno(r, "Failed to build failure packet: %m");
633
0354029b 634 return dns_stub_send(m, l, s, p, reply);
b30bf55d
LP
635}
636
775ae354
LP
637static int dns_stub_patch_bypass_reply_packet(
638 DnsPacket **ret, /* Where to place the patched packet */
639 DnsPacket *original, /* The packet to patch */
640 DnsPacket *request) { /* The packet the patched packet shall look like a reply to */
641 _cleanup_(dns_packet_unrefp) DnsPacket *c = NULL;
642 int r;
643
644 assert(ret);
645 assert(original);
646 assert(request);
647
648 r = dns_packet_dup(&c, original);
649 if (r < 0)
650 return r;
651
652 /* Extract the packet, so that we know where the OPT field is */
653 r = dns_packet_extract(c);
654 if (r < 0)
655 return r;
656
657 /* Copy over the original client request ID, so that we can make the upstream query look like our own reply. */
658 DNS_PACKET_HEADER(c)->id = DNS_PACKET_HEADER(request)->id;
659
660 /* Patch in our own maximum datagram size, if EDNS0 was on */
661 r = dns_packet_patch_max_udp_size(c, ADVERTISE_DATAGRAM_SIZE_MAX);
662 if (r < 0)
663 return r;
664
665 /* Lower all TTLs by the time passed since we received the datagram. */
666 if (timestamp_is_set(original->timestamp)) {
667 r = dns_packet_patch_ttls(c, original->timestamp);
668 if (r < 0)
669 return r;
670 }
671
672 /* Our upstream connection might have supported larger DNS requests than our downstream one, hence
673 * set the TC bit if our reply is larger than what the client supports, and truncate. */
674 if (c->size > DNS_PACKET_PAYLOAD_SIZE_MAX(request)) {
675 log_debug("Artificially truncating stub response, as advertised size of client is smaller than upstream one.");
676 dns_packet_truncate(c, DNS_PACKET_PAYLOAD_SIZE_MAX(request));
677 DNS_PACKET_HEADER(c)->flags = htobe16(be16toh(DNS_PACKET_HEADER(c)->flags) | DNS_PACKET_FLAG_TC);
678 }
679
680 *ret = TAKE_PTR(c);
681 return 0;
682}
683
b30bf55d
LP
684static void dns_stub_query_complete(DnsQuery *q) {
685 int r;
686
687 assert(q);
775ae354 688 assert(q->request_packet);
b30bf55d 689
775ae354
LP
690 if (q->question_bypass) {
691 /* This is a bypass reply. If so, let's propagate the upstream packet, if we have it and it
692 * is regular DNS. (We can't do this if the upstream packet is LLMNR or mDNS, since the
693 * packets are not 100% compatible.) */
b30bf55d 694
775ae354
LP
695 if (q->answer_full_packet &&
696 q->answer_full_packet->protocol == DNS_PROTOCOL_DNS) {
697 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
e8d23f92 698
775ae354
LP
699 r = dns_stub_patch_bypass_reply_packet(&reply, q->answer_full_packet, q->request_packet);
700 if (r < 0)
701 log_debug_errno(r, "Failed to patch bypass reply packet: %m");
702 else
703 (void) dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
704
705 dns_query_free(q);
706 return;
e8d23f92 707 }
775ae354 708 }
b30bf55d 709
b97fc571
LP
710 /* Take all data from the current reply, and merge it into the three reply sections we are building
711 * up. We do this before processing CNAME redirects, so that we gradually build up our sections, and
712 * and keep adding all RRs in the CNAME chain. */
713 r = dns_stub_assign_sections(
714 q,
a7c0291c 715 dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
b97fc571
LP
716 dns_stub_reply_with_edns0_do(q));
717 if (r < 0) {
718 log_debug_errno(r, "Failed to assign sections: %m");
719 dns_query_free(q);
720 return;
721 }
2f4d8e57 722
775ae354
LP
723 switch (q->state) {
724
915ba31c
LP
725 case DNS_TRANSACTION_SUCCESS: {
726 bool first = true;
727
728 for (;;) {
729 int cname_result;
730
731 cname_result = dns_query_process_cname_one(q);
732 if (cname_result == -ELOOP) { /* CNAME loop, let's send what we already have */
733 log_debug_errno(r, "Detected CNAME loop, returning what we already have.");
734 (void) dns_stub_send_reply(q, q->answer_rcode);
735 break;
736 }
737 if (cname_result < 0) {
738 log_debug_errno(cname_result, "Failed to process CNAME: %m");
739 break;
740 }
741
742 if (cname_result == DNS_QUERY_NOMATCH) {
743 /* This answer doesn't contain any RR that would answer our question
744 * positively, i.e. neither directly nor via CNAME. */
745
746 if (first) /* We never followed a CNAME and the answer doesn't match our
747 * question at all? Then this is final, the empty answer is the
748 * answer. */
749 break;
750
751 /* Otherwise, we already followed a CNAME once within this packet, and the
752 * packet doesn't answer our question. In that case let's restart the query,
753 * now with the redirected question. We'll */
754 r = dns_query_go(q);
755 if (r < 0)
756 log_debug_errno(r, "Failed to restart query: %m");
757
758 return;
759 }
760
761 r = dns_stub_assign_sections(
762 q,
763 dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
764 dns_stub_reply_with_edns0_do(q));
765 if (r < 0) {
766 log_debug_errno(r, "Failed to assign sections: %m");
767 dns_query_free(q);
768 return;
769 }
770
771 if (cname_result == DNS_QUERY_MATCH) /* A match? Then we are done, let's return what we got */
772 break;
773
774 /* We followed a CNAME. and collected the RRs that answer the redirected question
775 * successfully. Let's not try to do this again. */
776 assert(cname_result == DNS_QUERY_CNAME);
777 first = false;
b97fc571 778 }
b97fc571
LP
779
780 _fallthrough_;
915ba31c 781 }
b97fc571 782
b30bf55d 783 case DNS_TRANSACTION_RCODE_FAILURE:
775ae354 784 (void) dns_stub_send_reply(q, q->answer_rcode);
b30bf55d
LP
785 break;
786
787 case DNS_TRANSACTION_NOT_FOUND:
775ae354 788 (void) dns_stub_send_reply(q, DNS_RCODE_NXDOMAIN);
b30bf55d
LP
789 break;
790
791 case DNS_TRANSACTION_TIMEOUT:
792 case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
793 /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */
794 break;
795
796 case DNS_TRANSACTION_NO_SERVERS:
797 case DNS_TRANSACTION_INVALID_REPLY:
798 case DNS_TRANSACTION_ERRNO:
799 case DNS_TRANSACTION_ABORTED:
800 case DNS_TRANSACTION_DNSSEC_FAILED:
801 case DNS_TRANSACTION_NO_TRUST_ANCHOR:
802 case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
803 case DNS_TRANSACTION_NETWORK_DOWN:
775ae354 804 case DNS_TRANSACTION_NO_SOURCE:
49ef064c 805 case DNS_TRANSACTION_STUB_LOOP:
775ae354 806 (void) dns_stub_send_reply(q, DNS_RCODE_SERVFAIL);
b30bf55d
LP
807 break;
808
809 case DNS_TRANSACTION_NULL:
810 case DNS_TRANSACTION_PENDING:
811 case DNS_TRANSACTION_VALIDATING:
812 default:
813 assert_not_reached("Impossible state");
814 }
815
b30bf55d
LP
816 dns_query_free(q);
817}
818
819static int dns_stub_stream_complete(DnsStream *s, int error) {
820 assert(s);
821
b412af57
LP
822 log_debug_errno(error, "DNS TCP connection terminated, destroying queries: %m");
823
824 for (;;) {
825 DnsQuery *q;
826
827 q = set_first(s->queries);
828 if (!q)
829 break;
b30bf55d 830
b412af57
LP
831 dns_query_free(q);
832 }
b30bf55d 833
b412af57
LP
834 /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections
835 * should be kept as long as the client wants to. */
836 dns_stream_unref(s);
b30bf55d
LP
837 return 0;
838}
839
0354029b 840static void dns_stub_process_query(Manager *m, DnsStubListenerExtra *l, DnsStream *s, DnsPacket *p) {
ceb17827 841 _cleanup_(dns_query_freep) DnsQuery *q = NULL;
bde69bbd
LP
842 Hashmap **queries_by_packet;
843 DnsQuery *existing;
b30bf55d
LP
844 int r;
845
846 assert(m);
847 assert(p);
848 assert(p->protocol == DNS_PROTOCOL_DNS);
849
0354029b 850 if (!l && /* l == NULL if this is the main stub */
d1fb8cda
YW
851 (in_addr_is_localhost(p->family, &p->sender) <= 0 ||
852 in_addr_is_localhost(p->family, &p->destination) <= 0)) {
565147b7 853 log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring.");
ceb17827 854 return;
b30bf55d
LP
855 }
856
a9fd8837
LP
857 if (manager_packet_from_our_transaction(m, p)) {
858 log_debug("Got our own packet looped back, ignoring.");
859 return;
860 }
861
bde69bbd
LP
862 queries_by_packet = l ? &l->queries_by_packet : &m->stub_queries_by_packet;
863 existing = hashmap_get(*queries_by_packet, p);
864 if (existing && dns_packet_equal(existing->request_packet, p)) {
865 log_debug("Got repeat packet from client, ignoring.");
866 return;
867 }
868
b30bf55d
LP
869 r = dns_packet_extract(p);
870 if (r < 0) {
871 log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m");
0354029b 872 dns_stub_send_failure(m, l, s, p, DNS_RCODE_FORMERR, false);
ceb17827 873 return;
b30bf55d
LP
874 }
875
876 if (!DNS_PACKET_VERSION_SUPPORTED(p)) {
877 log_debug("Got EDNS OPT field with unsupported version number.");
0354029b 878 dns_stub_send_failure(m, l, s, p, DNS_RCODE_BADVERS, false);
ceb17827 879 return;
b30bf55d
LP
880 }
881
882 if (dns_type_is_obsolete(p->question->keys[0]->type)) {
883 log_debug("Got message with obsolete key type, refusing.");
30ee7071 884 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 885 return;
b30bf55d
LP
886 }
887
888 if (dns_type_is_zone_transer(p->question->keys[0]->type)) {
889 log_debug("Got request for zone transfer, refusing.");
30ee7071 890 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 891 return;
b30bf55d
LP
892 }
893
894 if (!DNS_PACKET_RD(p)) {
895 /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */
896 log_debug("Got request with recursion disabled, refusing.");
0354029b 897 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 898 return;
b30bf55d
LP
899 }
900
bde69bbd
LP
901 r = hashmap_ensure_allocated(queries_by_packet, &stub_packet_hash_ops);
902 if (r < 0) {
903 log_oom();
904 return;
905 }
906
b30bf55d 907 if (DNS_PACKET_DO(p) && DNS_PACKET_CD(p)) {
775ae354
LP
908 log_debug("Got request with DNSSEC checking disabled, enabling bypass logic.");
909
910 r = dns_query_new(m, &q, NULL, NULL, p, 0,
911 SD_RESOLVED_PROTOCOLS_ALL|
912 SD_RESOLVED_NO_CNAME|
913 SD_RESOLVED_NO_SEARCH|
914 SD_RESOLVED_NO_VALIDATE|
915 SD_RESOLVED_REQUIRE_PRIMARY|
916 SD_RESOLVED_CLAMP_TTL);
917 } else
918 r = dns_query_new(m, &q, p->question, p->question, NULL, 0,
919 SD_RESOLVED_PROTOCOLS_ALL|
920 SD_RESOLVED_NO_SEARCH|
2f4d8e57 921 (DNS_PACKET_DO(p) ? SD_RESOLVED_REQUIRE_PRIMARY : 0)|
775ae354 922 SD_RESOLVED_CLAMP_TTL);
b30bf55d
LP
923 if (r < 0) {
924 log_error_errno(r, "Failed to generate query object: %m");
0354029b 925 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
ceb17827 926 return;
b30bf55d
LP
927 }
928
775ae354
LP
929 q->request_packet = dns_packet_ref(p);
930 q->request_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */
0354029b 931 q->stub_listener_extra = l;
b30bf55d
LP
932 q->complete = dns_stub_query_complete;
933
934 if (s) {
b412af57
LP
935 /* Remember which queries belong to this stream, so that we can cancel them when the stream
936 * is disconnected early */
937
ceb17827 938 r = set_ensure_put(&s->queries, NULL, q);
b412af57
LP
939 if (r < 0) {
940 log_oom();
ceb17827 941 return;
b412af57 942 }
ceb17827 943 assert(r > 0);
b30bf55d
LP
944 }
945
bde69bbd
LP
946 /* Add the query to the hash table we use to determine repeat packets now. We don't care about
947 * failures here, since in the worst case we'll not recognize duplicate incoming requests, which
948 * isn't particularly bad. */
949 (void) hashmap_put(*queries_by_packet, q->request_packet, q);
950
b30bf55d
LP
951 r = dns_query_go(q);
952 if (r < 0) {
953 log_error_errno(r, "Failed to start query: %m");
0354029b 954 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
ceb17827 955 return;
b30bf55d
LP
956 }
957
52e63427 958 log_debug("Processing query...");
ceb17827 959 TAKE_PTR(q);
b30bf55d
LP
960}
961
0354029b 962static int on_dns_stub_packet_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
b30bf55d 963 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
b30bf55d
LP
964 int r;
965
966 r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p);
967 if (r <= 0)
968 return r;
969
970 if (dns_packet_validate_query(p) > 0) {
971 log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p));
972
0354029b 973 dns_stub_process_query(m, l, NULL, p);
b30bf55d
LP
974 } else
975 log_debug("Invalid DNS stub UDP packet, ignoring.");
976
977 return 0;
978}
979
d1fb8cda 980static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
0354029b 981 return on_dns_stub_packet_internal(s, fd, revents, userdata, NULL);
d1fb8cda
YW
982}
983
984static int on_dns_stub_packet_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
0354029b
LP
985 DnsStubListenerExtra *l = userdata;
986
987 assert(l);
988
989 return on_dns_stub_packet_internal(s, fd, revents, l->manager, l);
d1fb8cda
YW
990}
991
e4bed40f
ZJS
992static int on_dns_stub_stream_packet(DnsStream *s) {
993 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
994
995 assert(s);
996
997 p = dns_stream_take_read_packet(s);
998 assert(p);
999
1000 if (dns_packet_validate_query(p) > 0) {
1001 log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p));
1002
1003 dns_stub_process_query(s->manager, s->stub_listener_extra, s, p);
1004 } else
1005 log_debug("Invalid DNS stub TCP packet, ignoring.");
1006
1007 return 0;
1008}
1009
1010static int on_dns_stub_stream_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
1011 DnsStream *stream;
1012 int cfd, r;
1013
1014 cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1015 if (cfd < 0) {
1016 if (ERRNO_IS_ACCEPT_AGAIN(errno))
1017 return 0;
1018
1019 return -errno;
1020 }
1021
1022 r = dns_stream_new(m, &stream, DNS_STREAM_STUB, DNS_PROTOCOL_DNS, cfd, NULL);
1023 if (r < 0) {
1024 safe_close(cfd);
1025 return r;
1026 }
1027
1028 stream->stub_listener_extra = l;
1029 stream->on_packet = on_dns_stub_stream_packet;
1030 stream->complete = dns_stub_stream_complete;
1031
1032 /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */
1033
1034 return 0;
1035}
1036
1037static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1038 return on_dns_stub_stream_internal(s, fd, revents, userdata, NULL);
1039}
1040
1041static int on_dns_stub_stream_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1042 DnsStubListenerExtra *l = userdata;
1043
1044 assert(l);
1045 return on_dns_stub_stream_internal(s, fd, revents, l->manager, l);
1046}
1047
af8b1384 1048static int set_dns_stub_common_socket_options(int fd, int family) {
1f05101f
SS
1049 int r;
1050
1051 assert(fd >= 0);
af8b1384 1052 assert(IN_SET(family, AF_INET, AF_INET6));
1f05101f
SS
1053
1054 r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
1055 if (r < 0)
1056 return r;
1057
5d0fe423
LP
1058 r = socket_set_recvpktinfo(fd, family, true);
1059 if (r < 0)
1060 return r;
af8b1384 1061
5d0fe423
LP
1062 r = socket_set_recvttl(fd, family, true);
1063 if (r < 0)
1064 return r;
af8b1384
YW
1065
1066 return 0;
1f05101f
SS
1067}
1068
8624f128
LP
1069static int set_dns_stub_common_tcp_socket_options(int fd) {
1070 int r;
1071
1072 assert(fd >= 0);
1073
1074 r = setsockopt_int(fd, IPPROTO_TCP, TCP_FASTOPEN, 5); /* Everybody appears to pick qlen=5, let's do the same here. */
1075 if (r < 0)
1076 log_debug_errno(r, "Failed to enable TCP_FASTOPEN on TCP listening socket, ignoring: %m");
1077
1078 r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true);
1079 if (r < 0)
1080 log_debug_errno(r, "Failed to enable TCP_NODELAY mode, ignoring: %m");
1081
1082 return 0;
1083}
1084
d491917c 1085static int manager_dns_stub_fd(Manager *m, int type) {
b30bf55d
LP
1086 union sockaddr_union sa = {
1087 .in.sin_family = AF_INET,
b30bf55d 1088 .in.sin_addr.s_addr = htobe32(INADDR_DNS_STUB),
d491917c 1089 .in.sin_port = htobe16(53),
b30bf55d 1090 };
424e490b 1091 _cleanup_close_ int fd = -1;
b30bf55d
LP
1092 int r;
1093
d491917c
ZJS
1094 assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
1095
1096 sd_event_source **event_source = type == SOCK_DGRAM ? &m->dns_stub_udp_event_source : &m->dns_stub_tcp_event_source;
1097 if (*event_source)
1098 return sd_event_source_get_io_fd(*event_source);
b30bf55d 1099
d491917c 1100 fd = socket(AF_INET, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
424e490b 1101 if (fd < 0)
b30bf55d
LP
1102 return -errno;
1103
af8b1384 1104 r = set_dns_stub_common_socket_options(fd, AF_INET);
2ff48e98
LP
1105 if (r < 0)
1106 return r;
b30bf55d 1107
8624f128
LP
1108 if (type == SOCK_STREAM) {
1109 r = set_dns_stub_common_tcp_socket_options(fd);
1110 if (r < 0)
1111 return r;
1112 }
1113
b30bf55d 1114 /* Make sure no traffic from outside the local host can leak to onto this socket */
953a02d1
LP
1115 r = socket_bind_to_ifindex(fd, LOOPBACK_IFINDEX);
1116 if (r < 0)
1117 return r;
b30bf55d 1118
d491917c
ZJS
1119 r = setsockopt_int(fd, IPPROTO_IP, IP_TTL, 1);
1120 if (r < 0)
1121 return r;
1122
424e490b
ZJS
1123 if (bind(fd, &sa.sa, sizeof(sa.in)) < 0)
1124 return -errno;
b30bf55d 1125
d491917c
ZJS
1126 if (type == SOCK_STREAM &&
1127 listen(fd, SOMAXCONN) < 0)
1128 return -errno;
1129
1130 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1131 type == SOCK_DGRAM ? on_dns_stub_packet : on_dns_stub_stream,
1132 m);
b30bf55d 1133 if (r < 0)
424e490b 1134 return r;
b30bf55d 1135
d491917c 1136 r = sd_event_source_set_io_fd_own(*event_source, true);
7216a3b5
YW
1137 if (r < 0)
1138 return r;
1139
d491917c
ZJS
1140 (void) sd_event_source_set_description(*event_source,
1141 type == SOCK_DGRAM ? "dns-stub-udp" : "dns-stub-tcp");
b30bf55d 1142
7216a3b5 1143 return TAKE_FD(fd);
b30bf55d
LP
1144}
1145
b5febb3f 1146static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type) {
1f05101f
SS
1147 _cleanup_free_ char *pretty = NULL;
1148 _cleanup_close_ int fd = -1;
ca8b62b5 1149 union sockaddr_union sa;
1f05101f
SS
1150 int r;
1151
0354029b 1152 assert(m);
b5febb3f 1153 assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
0354029b 1154
d491917c
ZJS
1155 if (!l)
1156 return manager_dns_stub_fd(m, type);
0354029b 1157
b5febb3f
ZJS
1158 sd_event_source **event_source = type == SOCK_DGRAM ? &l->udp_event_source : &l->tcp_event_source;
1159 if (*event_source)
1160 return sd_event_source_get_io_fd(*event_source);
1f05101f 1161
ca8b62b5
YW
1162 if (l->family == AF_INET)
1163 sa = (union sockaddr_union) {
1164 .in.sin_family = l->family,
49ef064c 1165 .in.sin_port = htobe16(dns_stub_listener_extra_port(l)),
ca8b62b5
YW
1166 .in.sin_addr = l->address.in,
1167 };
1168 else
1169 sa = (union sockaddr_union) {
1170 .in6.sin6_family = l->family,
49ef064c 1171 .in6.sin6_port = htobe16(dns_stub_listener_extra_port(l)),
ca8b62b5
YW
1172 .in6.sin6_addr = l->address.in6,
1173 };
1174
b5febb3f 1175 fd = socket(l->family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
1f05101f
SS
1176 if (fd < 0) {
1177 r = -errno;
1178 goto fail;
1179 }
1180
af8b1384 1181 r = set_dns_stub_common_socket_options(fd, l->family);
1f05101f
SS
1182 if (r < 0)
1183 goto fail;
1184
8624f128
LP
1185 if (type == SOCK_STREAM) {
1186 r = set_dns_stub_common_tcp_socket_options(fd);
1187 if (r < 0)
1188 goto fail;
1189 }
1190
69e3234d 1191 /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case
b5febb3f
ZJS
1192 * people may want ttl > 1. */
1193
5d0fe423 1194 r = socket_set_freebind(fd, l->family, true);
b5febb3f
ZJS
1195 if (r < 0)
1196 goto fail;
1197
eb170e75
LP
1198 if (type == SOCK_DGRAM) {
1199 r = socket_disable_pmtud(fd, l->family);
1200 if (r < 0)
1201 log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
20a001bd
LP
1202
1203 r = socket_set_recvfragsize(fd, l->family, true);
1204 if (r < 0)
1205 log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
eb170e75
LP
1206 }
1207
ca8b62b5 1208 if (bind(fd, &sa.sa, SOCKADDR_LEN(sa)) < 0) {
1f05101f
SS
1209 r = -errno;
1210 goto fail;
1211 }
1212
b5febb3f
ZJS
1213 if (type == SOCK_STREAM &&
1214 listen(fd, SOMAXCONN) < 0) {
1215 r = -errno;
1216 goto fail;
1217 }
1218
1219 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1220 type == SOCK_DGRAM ? on_dns_stub_packet_extra : on_dns_stub_stream_extra,
1221 l);
1f05101f
SS
1222 if (r < 0)
1223 goto fail;
1224
b5febb3f 1225 r = sd_event_source_set_io_fd_own(*event_source, true);
7216a3b5
YW
1226 if (r < 0)
1227 goto fail;
1228
b5febb3f
ZJS
1229 (void) sd_event_source_set_description(*event_source,
1230 type == SOCK_DGRAM ? "dns-stub-udp-extra" : "dns-stub-tcp-extra");
1f05101f
SS
1231
1232 if (DEBUG_LOGGING) {
ca8b62b5 1233 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
b5febb3f
ZJS
1234 log_debug("Listening on %s socket %s.",
1235 type == SOCK_DGRAM ? "UDP" : "TCP",
1236 strnull(pretty));
1f05101f
SS
1237 }
1238
7216a3b5 1239 return TAKE_FD(fd);
1f05101f 1240
b4b7ea1b 1241fail:
1c17bcb3 1242 assert(r < 0);
ca8b62b5 1243 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
b5febb3f
ZJS
1244 return log_warning_errno(r,
1245 r == -EADDRINUSE ? "Another process is already listening on %s socket %s: %m" :
1246 "Failed to listen on %s socket %s: %m",
1247 type == SOCK_DGRAM ? "UDP" : "TCP",
1248 strnull(pretty));
1f05101f
SS
1249}
1250
b30bf55d 1251int manager_dns_stub_start(Manager *m) {
424e490b 1252 const char *t = "UDP";
01b0669e 1253 int r = 0;
b30bf55d
LP
1254
1255 assert(m);
1256
d5da7707
ZJS
1257 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO)
1258 log_debug("Not creating stub listener.");
1259 else
1260 log_debug("Creating stub listener using %s.",
1261 m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" :
1262 m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" :
1263 "UDP/TCP");
1264
88d2cb7c 1265 if (FLAGS_SET(m->dns_stub_listener_mode, DNS_STUB_LISTENER_UDP))
d491917c 1266 r = manager_dns_stub_fd(m, SOCK_DGRAM);
b30bf55d 1267
424e490b 1268 if (r >= 0 &&
88d2cb7c 1269 FLAGS_SET(m->dns_stub_listener_mode, DNS_STUB_LISTENER_TCP)) {
424e490b 1270 t = "TCP";
d491917c 1271 r = manager_dns_stub_fd(m, SOCK_STREAM);
1ae43295 1272 }
b30bf55d 1273
0f4db364 1274 if (IN_SET(r, -EADDRINUSE, -EPERM)) {
d491917c
ZJS
1275 log_warning_errno(r,
1276 r == -EADDRINUSE ? "Another process is already listening on %s socket 127.0.0.53:53.\n"
1277 "Turning off local DNS stub support." :
1278 "Failed to listen on %s socket 127.0.0.53:53: %m.\n"
1279 "Turning off local DNS stub support.",
1280 t);
424e490b
ZJS
1281 manager_dns_stub_stop(m);
1282 } else if (r < 0)
1283 return log_error_errno(r, "Failed to listen on %s socket 127.0.0.53:53: %m", t);
b30bf55d 1284
1f05101f 1285 if (!ordered_set_isempty(m->dns_extra_stub_listeners)) {
36aaabc3 1286 DnsStubListenerExtra *l;
1f05101f 1287
dce65cd4 1288 log_debug("Creating extra stub listeners.");
1f05101f 1289
90e74a66 1290 ORDERED_SET_FOREACH(l, m->dns_extra_stub_listeners) {
7314b397 1291 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_UDP))
b5febb3f 1292 (void) manager_dns_stub_fd_extra(m, l, SOCK_DGRAM);
7314b397 1293 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_TCP))
b5febb3f 1294 (void) manager_dns_stub_fd_extra(m, l, SOCK_STREAM);
7314b397 1295 }
1f05101f
SS
1296 }
1297
b30bf55d
LP
1298 return 0;
1299}
1300
1301void manager_dns_stub_stop(Manager *m) {
1302 assert(m);
1303
97935302
ZJS
1304 m->dns_stub_udp_event_source = sd_event_source_disable_unref(m->dns_stub_udp_event_source);
1305 m->dns_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_stub_tcp_event_source);
b30bf55d 1306}
ae8f0ec3
LP
1307
1308static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = {
97935302 1309 [DNS_STUB_LISTENER_NO] = "no",
ae8f0ec3
LP
1310 [DNS_STUB_LISTENER_UDP] = "udp",
1311 [DNS_STUB_LISTENER_TCP] = "tcp",
1312 [DNS_STUB_LISTENER_YES] = "yes",
1313};
1314DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES);