]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/resolve/resolved-dns-stub.c
resolved: let's tweak how we calculate TTL left
[thirdparty/systemd.git] / src / resolve / resolved-dns-stub.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
b30bf55d 2
ca8b62b5 3#include <net/if_arp.h>
8624f128 4#include <netinet/tcp.h>
ca8b62b5 5
4ff9bc2e 6#include "errno-util.h"
b30bf55d 7#include "fd-util.h"
ef118d00 8#include "missing_network.h"
af8b1384 9#include "missing_socket.h"
b30bf55d 10#include "resolved-dns-stub.h"
1f05101f 11#include "socket-netlink.h"
b30bf55d 12#include "socket-util.h"
4a6eb824 13#include "stdio-util.h"
ae8f0ec3 14#include "string-table.h"
b30bf55d
LP
15
16/* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet,
17 * IP and UDP header sizes */
18#define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U)
19
b370adb5
LP
20/* On the extra stubs, use a more conservative choice */
21#define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX
22
b5febb3f 23static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type);
0354029b 24
ae8f0ec3
LP
25static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra *a, struct siphash *state) {
26 assert(a);
27
28 siphash24_compress(&a->mode, sizeof(a->mode), state);
29 siphash24_compress(&a->family, sizeof(a->family), state);
30 siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state);
31 siphash24_compress(&a->port, sizeof(a->port), state);
32}
33
34static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra *a, const DnsStubListenerExtra *b) {
35 int r;
36
37 assert(a);
38 assert(b);
39
40 r = CMP(a->mode, b->mode);
41 if (r != 0)
42 return r;
43
44 r = CMP(a->family, b->family);
45 if (r != 0)
46 return r;
47
48 r = memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
49 if (r != 0)
50 return r;
51
52 return CMP(a->port, b->port);
53}
54
55DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
56 dns_stub_listener_extra_hash_ops,
57 DnsStubListenerExtra,
58 dns_stub_listener_extra_hash_func,
59 dns_stub_listener_extra_compare_func,
60 dns_stub_listener_extra_free);
61
0354029b
LP
62int dns_stub_listener_extra_new(
63 Manager *m,
64 DnsStubListenerExtra **ret) {
ae8f0ec3 65
36aaabc3 66 DnsStubListenerExtra *l;
1f05101f 67
0354029b 68 l = new(DnsStubListenerExtra, 1);
1f05101f
SS
69 if (!l)
70 return -ENOMEM;
71
0354029b
LP
72 *l = (DnsStubListenerExtra) {
73 .manager = m,
74 };
1f05101f 75
0354029b 76 *ret = TAKE_PTR(l);
1f05101f
SS
77 return 0;
78}
79
36aaabc3 80DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p) {
bf22f231
YW
81 if (!p)
82 return NULL;
83
97935302
ZJS
84 p->udp_event_source = sd_event_source_disable_unref(p->udp_event_source);
85 p->tcp_event_source = sd_event_source_disable_unref(p->tcp_event_source);
bf22f231 86
bde69bbd
LP
87 hashmap_free(p->queries_by_packet);
88
bf22f231
YW
89 return mfree(p);
90}
91
bde69bbd
LP
92static void stub_packet_hash_func(const DnsPacket *p, struct siphash *state) {
93 assert(p);
94
95 siphash24_compress(&p->protocol, sizeof(p->protocol), state);
96 siphash24_compress(&p->family, sizeof(p->family), state);
97 siphash24_compress(&p->sender, sizeof(p->sender), state);
98 siphash24_compress(&p->ipproto, sizeof(p->ipproto), state);
99 siphash24_compress(&p->sender_port, sizeof(p->sender_port), state);
100 siphash24_compress(DNS_PACKET_HEADER(p), sizeof(DnsPacketHeader), state);
101
102 /* We don't bother hashing the full packet here, just the header */
103}
104
105static int stub_packet_compare_func(const DnsPacket *x, const DnsPacket *y) {
106 int r;
107
108 r = CMP(x->protocol, y->protocol);
109 if (r != 0)
110 return r;
111
112 r = CMP(x->family, y->family);
113 if (r != 0)
114 return r;
115
116 r = memcmp(&x->sender, &y->sender, sizeof(x->sender));
117 if (r != 0)
118 return r;
119
120 r = CMP(x->ipproto, y->ipproto);
121 if (r != 0)
122 return r;
123
124 r = CMP(x->sender_port, y->sender_port);
125 if (r != 0)
126 return r;
127
128 return memcmp(DNS_PACKET_HEADER(x), DNS_PACKET_HEADER(y), sizeof(DnsPacketHeader));
129}
130
131DEFINE_HASH_OPS(stub_packet_hash_ops, DnsPacket, stub_packet_hash_func, stub_packet_compare_func);
132
5bd7ebb3
LP
133static int reply_add_with_rrsig(
134 DnsAnswer **reply,
135 DnsResourceRecord *rr,
136 int ifindex,
137 DnsAnswerFlags flags,
138 DnsResourceRecord *rrsig,
139 bool with_rrsig) {
140 int r;
141
142 assert(reply);
143 assert(rr);
144
145 r = dns_answer_add_extend(reply, rr, ifindex, flags, rrsig);
146 if (r < 0)
147 return r;
148
149 if (with_rrsig && rrsig) {
150 r = dns_answer_add_extend(reply, rrsig, ifindex, flags, NULL);
151 if (r < 0)
152 return r;
153 }
154
155 return 0;
156}
157
775ae354
LP
158static int dns_stub_collect_answer_by_question(
159 DnsAnswer **reply,
51027656 160 DnsAnswer *answer,
775ae354
LP
161 DnsQuestion *question,
162 bool with_rrsig) { /* Add RRSIG RR matching each RR */
b30bf55d 163
5bd7ebb3 164 _cleanup_(dns_resource_key_unrefp) DnsResourceKey *redirected_key = NULL;
4838dc4f 165 unsigned n_cname_redirects = 0;
775ae354 166 DnsAnswerItem *item;
b30bf55d
LP
167 int r;
168
775ae354 169 assert(reply);
e8d23f92 170
4838dc4f
LP
171 /* Copies all RRs from 'answer' into 'reply', if they match 'question'. There might be direct and
172 * indirect matches (i.e. via CNAME/DNAME). If they have an indirect one, remember where we need to
173 * go, and restart the loop */
174
175 for (;;) {
176 _cleanup_(dns_resource_key_unrefp) DnsResourceKey *next_redirected_key = NULL;
177
178 DNS_ANSWER_FOREACH_ITEM(item, answer) {
179 DnsResourceKey *k = NULL;
180
181 if (redirected_key) {
182 /* There was a redirect in this packet, let's collect all matching RRs for the redirect */
183 r = dns_resource_key_match_rr(redirected_key, item->rr, NULL);
184 if (r < 0)
185 return r;
186
187 k = redirected_key;
188 } else if (question) {
189 /* We have a question, let's see if this RR matches it */
190 r = dns_question_matches_rr(question, item->rr, NULL);
191 if (r < 0)
192 return r;
193
194 k = question->keys[0];
195 } else
196 r = 1; /* No question, everything matches */
b30bf55d 197
5bd7ebb3
LP
198 if (r == 0) {
199 _cleanup_free_ char *target = NULL;
200
201 /* OK, so the RR doesn't directly match. Let's see if the RR is a matching
202 * CNAME or DNAME */
203
4838dc4f
LP
204 assert(k);
205
206 r = dns_resource_record_get_cname_target(k, item->rr, &target);
5bd7ebb3
LP
207 if (r == -EUNATCH)
208 continue; /* Not a CNAME/DNAME or doesn't match */
775ae354
LP
209 if (r < 0)
210 return r;
775ae354 211
4838dc4f
LP
212 /* Oh, wow, this is a redirect. Let's remember where this points, and store
213 * it in 'next_redirected_key'. Once we finished iterating through the rest
214 * of the RR's we'll start again, with the redirected RR key. */
215
216 n_cname_redirects++;
217 if (n_cname_redirects > CNAME_REDIRECT_MAX) /* don't loop forever */
218 return -ELOOP;
219
220 dns_resource_key_unref(next_redirected_key);
5bd7ebb3
LP
221
222 /* There can only be one CNAME per name, hence no point in storing more than one here */
4838dc4f
LP
223 next_redirected_key = dns_resource_key_new(k->class, k->type, target);
224 if (!next_redirected_key)
5bd7ebb3
LP
225 return -ENOMEM;
226 }
775ae354 227
4838dc4f
LP
228 /* Mask the section info, we want the primary answers to always go without section info, so
229 * that it is added to the answer section when we synthesize a reply. */
5bd7ebb3 230
4838dc4f
LP
231 r = reply_add_with_rrsig(
232 reply,
233 item->rr,
234 item->ifindex,
235 item->flags & ~DNS_ANSWER_MASK_SECTIONS,
236 item->rrsig,
237 with_rrsig);
238 if (r < 0)
239 return r;
240 }
5bd7ebb3 241
4838dc4f
LP
242 if (!next_redirected_key)
243 break;
5bd7ebb3 244
4838dc4f
LP
245 dns_resource_key_unref(redirected_key);
246 redirected_key = TAKE_PTR(next_redirected_key);
e8d23f92 247 }
b30bf55d 248
775ae354
LP
249 return 0;
250}
e8d23f92 251
775ae354
LP
252static int dns_stub_collect_answer_by_section(
253 DnsAnswer **reply,
254 DnsAnswer *answer,
255 DnsAnswerFlags section,
256 DnsAnswer *exclude1,
257 DnsAnswer *exclude2,
258 bool with_dnssec) { /* Include DNSSEC RRs. RRSIG, NSEC, … */
b30bf55d 259
775ae354 260 DnsAnswerItem *item;
775ae354 261 int r;
b30bf55d 262
775ae354
LP
263 assert(reply);
264
265 /* Copies all RRs from 'answer' into 'reply', if they originate from the specified section. Also,
266 * avoid any RRs listed in 'exclude'. */
267
268 DNS_ANSWER_FOREACH_ITEM(item, answer) {
269
270 if (dns_answer_contains(exclude1, item->rr) ||
271 dns_answer_contains(exclude2, item->rr))
272 continue;
273
274 if (!with_dnssec &&
275 dns_type_is_dnssec(item->rr->key->type))
276 continue;
277
278 if (((item->flags ^ section) & (DNS_ANSWER_SECTION_ANSWER|DNS_ANSWER_SECTION_AUTHORITY|DNS_ANSWER_SECTION_ADDITIONAL)) != 0)
279 continue;
280
5bd7ebb3
LP
281 r = reply_add_with_rrsig(
282 reply,
283 item->rr,
284 item->ifindex,
285 item->flags,
286 item->rrsig,
287 with_dnssec);
b30bf55d
LP
288 if (r < 0)
289 return r;
b30bf55d 290 }
e8d23f92 291
5bd7ebb3 292 return 0;
775ae354
LP
293}
294
295static int dns_stub_assign_sections(
296 DnsQuery *q,
297 DnsQuestion *question,
298 bool edns0_do) {
299
300 int r;
301
302 assert(q);
303 assert(question);
304
c6ebf89b
LP
305 /* Let's assign the 'answer' RRs we collected to their respective sections in the reply datagram. We
306 * try to reproduce a section assignment similar to what the upstream DNS server responded to us. We
307 * use the DNS_ANSWER_SECTION_xyz flags to match things up, which is where the original upstream's
308 * packet section assignment is stored in the DnsAnswer object. Not all RRs in the 'answer' objects
309 * come with section information though (for example, because they were synthesized locally, and not
310 * from a DNS packet). To deal with that we extend the assignment logic a bit: anything from the
311 * 'answer' object that directly matches the original question is always put in the ANSWER section,
312 * regardless if it carries section info, or what that section info says. Then, anything from the
313 * 'answer' objects that is from the ANSWER or AUTHORITY sections, and wasn't already added to the
314 * ANSWER section is placed in the AUTHORITY section. Everything else from either object is added to
315 * the ADDITIONAL section. */
775ae354
LP
316
317 /* Include all RRs that directly answer the question in the answer section */
318 r = dns_stub_collect_answer_by_question(
319 &q->reply_answer,
320 q->answer,
321 question,
322 edns0_do);
323 if (r < 0)
324 return r;
325
326 /* Include all RRs that originate from the answer or authority sections, and aren't listed in the
327 * answer section, in the authority section */
328 r = dns_stub_collect_answer_by_section(
329 &q->reply_authoritative,
330 q->answer,
331 DNS_ANSWER_SECTION_ANSWER,
332 q->reply_answer, NULL,
333 edns0_do);
334 if (r < 0)
335 return r;
775ae354
LP
336 r = dns_stub_collect_answer_by_section(
337 &q->reply_authoritative,
338 q->answer,
339 DNS_ANSWER_SECTION_AUTHORITY,
340 q->reply_answer, NULL,
341 edns0_do);
342 if (r < 0)
343 return r;
344
345 /* Include all RRs that originate from the additional sections in the additional section (except if
346 * already listed in the other two sections). Also add all RRs with no section marking. */
347 r = dns_stub_collect_answer_by_section(
348 &q->reply_additional,
349 q->answer,
350 DNS_ANSWER_SECTION_ADDITIONAL,
351 q->reply_answer, q->reply_authoritative,
352 edns0_do);
353 if (r < 0)
354 return r;
355 r = dns_stub_collect_answer_by_section(
356 &q->reply_additional,
357 q->answer,
358 0,
359 q->reply_answer, q->reply_authoritative,
360 edns0_do);
361 if (r < 0)
362 return r;
363
364 return 0;
365}
366
367static int dns_stub_make_reply_packet(
368 DnsPacket **ret,
369 size_t max_size,
370 DnsQuestion *q,
371 bool *ret_truncated) {
372
373 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
374 bool tc = false;
375 int r;
376
377 assert(ret);
378
379 r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, max_size);
380 if (r < 0)
381 return r;
382
383 r = dns_packet_append_question(p, q);
384 if (r == -EMSGSIZE)
385 tc = true;
386 else if (r < 0)
387 return r;
388
51027656 389 if (ret_truncated)
775ae354
LP
390 *ret_truncated = tc;
391 else if (tc)
51027656
LP
392 return -EMSGSIZE;
393
775ae354 394 DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q));
e8d23f92 395
775ae354
LP
396 *ret = TAKE_PTR(p);
397 return 0;
398}
399
400static int dns_stub_add_reply_packet_body(
401 DnsPacket *p,
402 DnsAnswer *answer,
403 DnsAnswer *authoritative,
404 DnsAnswer *additional,
405 bool edns0_do, /* Client expects DNSSEC RRs? */
406 bool *truncated) {
407
408 unsigned n_answer = 0, n_authoritative = 0, n_additional = 0;
409 bool tc = false;
410 int r;
411
412 assert(p);
413
414 /* Add the three sections to the packet. If the answer section doesn't fit we'll signal that as
415 * truncation. If the authoritative section doesn't fit and we are in DNSSEC mode, also signal
416 * truncation. In all other cases where things don't fit don't signal truncation, as for those cases
417 * the dropped RRs should not be essential. */
418
419 r = dns_packet_append_answer(p, answer, &n_answer);
420 if (r == -EMSGSIZE)
421 tc = true;
422 else if (r < 0)
423 return r;
424 else {
425 r = dns_packet_append_answer(p, authoritative, &n_authoritative);
426 if (r == -EMSGSIZE) {
427 if (edns0_do)
428 tc = true;
429 } else if (r < 0)
430 return r;
431 else {
432 r = dns_packet_append_answer(p, additional, &n_additional);
433 if (r < 0 && r != -EMSGSIZE)
434 return r;
435 }
436 }
437
438 if (tc) {
439 if (!truncated)
440 return -EMSGSIZE;
441
442 *truncated = true;
443 }
444
445 DNS_PACKET_HEADER(p)->ancount = htobe16(n_answer);
446 DNS_PACKET_HEADER(p)->nscount = htobe16(n_authoritative);
447 DNS_PACKET_HEADER(p)->arcount = htobe16(n_additional);
e8d23f92
LP
448 return 0;
449}
450
4a6eb824
LP
451static const char *nsid_string(void) {
452 static char buffer[SD_ID128_STRING_MAX + STRLEN(".resolved.systemd.io")] = "";
453 sd_id128_t id;
454 int r;
455
456 /* Let's generate a string that we can use as RFC5001 NSID identifier. The string shall identify us
457 * as systemd-resolved, and return a different string for each resolved instance without leaking host
458 * identity. Hence let's use a fixed suffix that identifies resolved, and a prefix generated from the
459 * machine ID but from which the machine ID cannot be determined.
460 *
461 * Clients can use this to determine whether an answer is originating locally or is proxied from
462 * upstream. */
463
464 if (!isempty(buffer))
465 return buffer;
466
467 r = sd_id128_get_machine_app_specific(
468 SD_ID128_MAKE(ed,d3,12,5d,16,b9,41,f9,a1,49,5f,ab,15,62,ab,27),
469 &id);
470 if (r < 0) {
b480543c 471 log_debug_errno(r, "Failed to determine machine ID, ignoring: %m");
4a6eb824
LP
472 return NULL;
473 }
474
475 xsprintf(buffer, SD_ID128_FORMAT_STR ".resolved.systemd.io", SD_ID128_FORMAT_VAL(id));
476 return buffer;
477}
478
e8d23f92
LP
479static int dns_stub_finish_reply_packet(
480 DnsPacket *p,
481 uint16_t id,
482 int rcode,
51027656 483 bool tc, /* set the Truncated bit? */
4ad017cd 484 bool aa, /* set the Authoritative Answer bit? */
e8d23f92
LP
485 bool add_opt, /* add an OPT RR to this packet? */
486 bool edns0_do, /* set the EDNS0 DNSSEC OK bit? */
b370adb5 487 bool ad, /* set the DNSSEC authenticated data bit? */
775ae354 488 bool cd, /* set the DNSSEC checking disabled bit? */
4a6eb824
LP
489 uint16_t max_udp_size, /* The maximum UDP datagram size to advertise to clients */
490 bool nsid) { /* whether to add NSID */
e8d23f92
LP
491
492 int r;
493
494 assert(p);
495
ff4caaae 496 if (add_opt) {
4a6eb824 497 r = dns_packet_append_opt(p, max_udp_size, edns0_do, /* include_rfc6975 = */ false, nsid ? nsid_string() : NULL, rcode, NULL);
ff4caaae
LP
498 if (r == -EMSGSIZE) /* Hit the size limit? then indicate truncation */
499 tc = true;
500 else if (r < 0)
501 return r;
ff4caaae 502 } else {
941dd294
LP
503 /* If the client can't to EDNS0, don't do DO either */
504 edns0_do = false;
505
775ae354 506 /* If we don't do EDNS, clamp the rcode to 4 bit */
941dd294
LP
507 if (rcode > 0xF)
508 rcode = DNS_RCODE_SERVFAIL;
509 }
510
8c9c68b5
LP
511 /* Don't set the CD bit unless DO is on, too */
512 if (!edns0_do)
775ae354
LP
513 cd = false;
514
8c9c68b5
LP
515 /* Note that we allow the AD bit to be set even if client didn't signal DO, as per RFC 6840, section
516 * 5.7 */
e8d23f92
LP
517
518 DNS_PACKET_HEADER(p)->id = id;
519
520 DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
51027656
LP
521 1 /* qr */,
522 0 /* opcode */,
4ad017cd 523 aa /* aa */,
51027656
LP
524 tc /* tc */,
525 1 /* rd */,
526 1 /* ra */,
e8d23f92 527 ad /* ad */,
775ae354 528 cd /* cd */,
e8d23f92 529 rcode));
b30bf55d 530
b30bf55d
LP
531 return 0;
532}
533
0354029b
LP
534static int dns_stub_send(
535 Manager *m,
536 DnsStubListenerExtra *l,
537 DnsStream *s,
538 DnsPacket *p,
539 DnsPacket *reply) {
540
b30bf55d
LP
541 int r;
542
543 assert(m);
544 assert(p);
545 assert(reply);
546
547 if (s)
548 r = dns_stream_write_packet(s, reply);
0354029b 549 else
b30bf55d
LP
550 /* Note that it is essential here that we explicitly choose the source IP address for this packet. This
551 * is because otherwise the kernel will choose it automatically based on the routing table and will
552 * thus pick 127.0.0.1 rather than 127.0.0.53. */
0354029b 553 r = manager_send(m,
b5febb3f 554 manager_dns_stub_fd_extra(m, l, SOCK_DGRAM),
0354029b
LP
555 l ? p->ifindex : LOOPBACK_IFINDEX, /* force loopback iface if this is the main listener stub */
556 p->family, &p->sender, p->sender_port, &p->destination,
557 reply);
b30bf55d
LP
558 if (r < 0)
559 return log_debug_errno(r, "Failed to send reply packet: %m");
560
561 return 0;
562}
563
39005e18
LP
564static int dns_stub_reply_with_edns0_do(DnsQuery *q) {
565 assert(q);
566
567 /* Reply with DNSSEC DO set? Only if client supports it; and we did any DNSSEC verification
568 * ourselves, or consider the data fully authenticated because we generated it locally, or the client
569 * set cd */
570
571 return DNS_PACKET_DO(q->request_packet) &&
572 (q->answer_dnssec_result >= 0 || /* we did proper DNSSEC validation … */
573 dns_query_fully_authenticated(q) || /* … or we considered it authentic otherwise … */
574 DNS_PACKET_CD(q->request_packet)); /* … or client set CD */
575}
576
5d7da51e
LP
577static void dns_stub_suppress_duplicate_section_rrs(DnsQuery *q) {
578 /* If we follow a CNAME/DNAME chain we might end up populating our sections with redundant RRs
579 * because we built up the sections from multiple reply packets (one from each CNAME/DNAME chain
580 * element). E.g. it could be that an RR that was included in the first reply's additional section
581 * ends up being relevant as main answer in a subsequent reply in the chain. Let's clean this up, and
582 * remove everything in the "higher priority" sections from the "lower priority" sections.
583 *
584 * Note that this removal matches by RR keys instead of the full RRs. This is because RRsets should
585 * always end up in one section fully or not at all, but never be split among sections.
586 *
587 * Specifically: we remove ANSWER section RRs from the AUTHORITATIVE and ADDITIONAL sections, as well
588 * as AUTHORITATIVE section RRs from the ADDITIONAL section. */
589
590 dns_answer_remove_by_answer_keys(&q->reply_authoritative, q->reply_answer);
591 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_answer);
592 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_authoritative);
593}
594
775ae354
LP
595static int dns_stub_send_reply(
596 DnsQuery *q,
597 int rcode) {
598
599 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
600 bool truncated, edns0_do;
601 int r;
602
603 assert(q);
604
39005e18 605 edns0_do = dns_stub_reply_with_edns0_do(q); /* let's check if we shall reply with EDNS0 DO? */
775ae354 606
775ae354
LP
607 r = dns_stub_make_reply_packet(
608 &reply,
609 DNS_PACKET_PAYLOAD_SIZE_MAX(q->request_packet),
610 q->request_packet->question,
611 &truncated);
612 if (r < 0)
613 return log_debug_errno(r, "Failed to build reply packet: %m");
614
5d7da51e
LP
615 dns_stub_suppress_duplicate_section_rrs(q);
616
775ae354
LP
617 r = dns_stub_add_reply_packet_body(
618 reply,
619 q->reply_answer,
620 q->reply_authoritative,
621 q->reply_additional,
622 edns0_do,
623 &truncated);
624 if (r < 0)
625 return log_debug_errno(r, "Failed to append reply packet body: %m");
626
627 r = dns_stub_finish_reply_packet(
628 reply,
629 DNS_PACKET_ID(q->request_packet),
630 rcode,
631 truncated,
4ad017cd 632 dns_query_fully_synthetic(q),
775ae354
LP
633 !!q->request_packet->opt,
634 edns0_do,
8c9c68b5 635 DNS_PACKET_AD(q->request_packet) && dns_query_fully_authenticated(q),
775ae354 636 DNS_PACKET_CD(q->request_packet),
4a6eb824
LP
637 q->stub_listener_extra ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
638 dns_packet_has_nsid_request(q->request_packet) > 0 && !q->stub_listener_extra);
775ae354
LP
639 if (r < 0)
640 return log_debug_errno(r, "Failed to build failure packet: %m");
641
642 return dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
643}
644
0354029b
LP
645static int dns_stub_send_failure(
646 Manager *m,
647 DnsStubListenerExtra *l,
648 DnsStream *s,
649 DnsPacket *p,
650 int rcode,
651 bool authenticated) {
652
b30bf55d 653 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
775ae354 654 bool truncated;
b30bf55d
LP
655 int r;
656
657 assert(m);
658 assert(p);
659
775ae354
LP
660 r = dns_stub_make_reply_packet(
661 &reply,
662 DNS_PACKET_PAYLOAD_SIZE_MAX(p),
663 p->question,
664 &truncated);
e8d23f92
LP
665 if (r < 0)
666 return log_debug_errno(r, "Failed to make failure packet: %m");
667
b370adb5
LP
668 r = dns_stub_finish_reply_packet(
669 reply,
670 DNS_PACKET_ID(p),
671 rcode,
775ae354 672 truncated,
4ad017cd 673 false,
b370adb5
LP
674 !!p->opt,
675 DNS_PACKET_DO(p),
8c9c68b5 676 DNS_PACKET_AD(p) && authenticated,
775ae354 677 DNS_PACKET_CD(p),
4a6eb824
LP
678 l ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
679 dns_packet_has_nsid_request(p) > 0 && !l);
b30bf55d
LP
680 if (r < 0)
681 return log_debug_errno(r, "Failed to build failure packet: %m");
682
0354029b 683 return dns_stub_send(m, l, s, p, reply);
b30bf55d
LP
684}
685
775ae354
LP
686static int dns_stub_patch_bypass_reply_packet(
687 DnsPacket **ret, /* Where to place the patched packet */
688 DnsPacket *original, /* The packet to patch */
689 DnsPacket *request) { /* The packet the patched packet shall look like a reply to */
690 _cleanup_(dns_packet_unrefp) DnsPacket *c = NULL;
691 int r;
692
693 assert(ret);
694 assert(original);
695 assert(request);
696
697 r = dns_packet_dup(&c, original);
698 if (r < 0)
699 return r;
700
701 /* Extract the packet, so that we know where the OPT field is */
702 r = dns_packet_extract(c);
703 if (r < 0)
704 return r;
705
706 /* Copy over the original client request ID, so that we can make the upstream query look like our own reply. */
707 DNS_PACKET_HEADER(c)->id = DNS_PACKET_HEADER(request)->id;
708
709 /* Patch in our own maximum datagram size, if EDNS0 was on */
710 r = dns_packet_patch_max_udp_size(c, ADVERTISE_DATAGRAM_SIZE_MAX);
711 if (r < 0)
712 return r;
713
714 /* Lower all TTLs by the time passed since we received the datagram. */
715 if (timestamp_is_set(original->timestamp)) {
716 r = dns_packet_patch_ttls(c, original->timestamp);
717 if (r < 0)
718 return r;
719 }
720
721 /* Our upstream connection might have supported larger DNS requests than our downstream one, hence
722 * set the TC bit if our reply is larger than what the client supports, and truncate. */
723 if (c->size > DNS_PACKET_PAYLOAD_SIZE_MAX(request)) {
724 log_debug("Artificially truncating stub response, as advertised size of client is smaller than upstream one.");
725 dns_packet_truncate(c, DNS_PACKET_PAYLOAD_SIZE_MAX(request));
726 DNS_PACKET_HEADER(c)->flags = htobe16(be16toh(DNS_PACKET_HEADER(c)->flags) | DNS_PACKET_FLAG_TC);
727 }
728
729 *ret = TAKE_PTR(c);
730 return 0;
731}
732
b30bf55d
LP
733static void dns_stub_query_complete(DnsQuery *q) {
734 int r;
735
736 assert(q);
775ae354 737 assert(q->request_packet);
b30bf55d 738
775ae354
LP
739 if (q->question_bypass) {
740 /* This is a bypass reply. If so, let's propagate the upstream packet, if we have it and it
741 * is regular DNS. (We can't do this if the upstream packet is LLMNR or mDNS, since the
742 * packets are not 100% compatible.) */
b30bf55d 743
775ae354
LP
744 if (q->answer_full_packet &&
745 q->answer_full_packet->protocol == DNS_PROTOCOL_DNS) {
746 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
e8d23f92 747
775ae354
LP
748 r = dns_stub_patch_bypass_reply_packet(&reply, q->answer_full_packet, q->request_packet);
749 if (r < 0)
750 log_debug_errno(r, "Failed to patch bypass reply packet: %m");
751 else
752 (void) dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
753
754 dns_query_free(q);
755 return;
e8d23f92 756 }
775ae354 757 }
b30bf55d 758
b97fc571
LP
759 /* Take all data from the current reply, and merge it into the three reply sections we are building
760 * up. We do this before processing CNAME redirects, so that we gradually build up our sections, and
761 * and keep adding all RRs in the CNAME chain. */
762 r = dns_stub_assign_sections(
763 q,
764 q->request_packet->question,
765 dns_stub_reply_with_edns0_do(q));
766 if (r < 0) {
767 log_debug_errno(r, "Failed to assign sections: %m");
768 dns_query_free(q);
769 return;
770 }
2f4d8e57 771
775ae354
LP
772 switch (q->state) {
773
774 case DNS_TRANSACTION_SUCCESS:
b97fc571
LP
775 r = dns_query_process_cname(q);
776 if (r == -ELOOP) { /* CNAME loop, let's send what we already have */
777 log_debug_errno(r, "Detected CNAME loop, returning what we already have.");
778 (void) dns_stub_send_reply(q, q->answer_rcode);
779 break;
780 }
781 if (r < 0) {
782 log_debug_errno(r, "Failed to process CNAME: %m");
783 break;
784 }
785 if (r == DNS_QUERY_RESTARTED)
786 return;
787
788 _fallthrough_;
789
b30bf55d 790 case DNS_TRANSACTION_RCODE_FAILURE:
775ae354 791 (void) dns_stub_send_reply(q, q->answer_rcode);
b30bf55d
LP
792 break;
793
794 case DNS_TRANSACTION_NOT_FOUND:
775ae354 795 (void) dns_stub_send_reply(q, DNS_RCODE_NXDOMAIN);
b30bf55d
LP
796 break;
797
798 case DNS_TRANSACTION_TIMEOUT:
799 case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
800 /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */
801 break;
802
803 case DNS_TRANSACTION_NO_SERVERS:
804 case DNS_TRANSACTION_INVALID_REPLY:
805 case DNS_TRANSACTION_ERRNO:
806 case DNS_TRANSACTION_ABORTED:
807 case DNS_TRANSACTION_DNSSEC_FAILED:
808 case DNS_TRANSACTION_NO_TRUST_ANCHOR:
809 case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
810 case DNS_TRANSACTION_NETWORK_DOWN:
775ae354 811 case DNS_TRANSACTION_NO_SOURCE:
49ef064c 812 case DNS_TRANSACTION_STUB_LOOP:
775ae354 813 (void) dns_stub_send_reply(q, DNS_RCODE_SERVFAIL);
b30bf55d
LP
814 break;
815
816 case DNS_TRANSACTION_NULL:
817 case DNS_TRANSACTION_PENDING:
818 case DNS_TRANSACTION_VALIDATING:
819 default:
820 assert_not_reached("Impossible state");
821 }
822
b30bf55d
LP
823 dns_query_free(q);
824}
825
826static int dns_stub_stream_complete(DnsStream *s, int error) {
827 assert(s);
828
b412af57
LP
829 log_debug_errno(error, "DNS TCP connection terminated, destroying queries: %m");
830
831 for (;;) {
832 DnsQuery *q;
833
834 q = set_first(s->queries);
835 if (!q)
836 break;
b30bf55d 837
b412af57
LP
838 dns_query_free(q);
839 }
b30bf55d 840
b412af57
LP
841 /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections
842 * should be kept as long as the client wants to. */
843 dns_stream_unref(s);
b30bf55d
LP
844 return 0;
845}
846
0354029b 847static void dns_stub_process_query(Manager *m, DnsStubListenerExtra *l, DnsStream *s, DnsPacket *p) {
ceb17827 848 _cleanup_(dns_query_freep) DnsQuery *q = NULL;
bde69bbd
LP
849 Hashmap **queries_by_packet;
850 DnsQuery *existing;
b30bf55d
LP
851 int r;
852
853 assert(m);
854 assert(p);
855 assert(p->protocol == DNS_PROTOCOL_DNS);
856
0354029b 857 if (!l && /* l == NULL if this is the main stub */
d1fb8cda
YW
858 (in_addr_is_localhost(p->family, &p->sender) <= 0 ||
859 in_addr_is_localhost(p->family, &p->destination) <= 0)) {
565147b7 860 log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring.");
ceb17827 861 return;
b30bf55d
LP
862 }
863
a9fd8837
LP
864 if (manager_packet_from_our_transaction(m, p)) {
865 log_debug("Got our own packet looped back, ignoring.");
866 return;
867 }
868
bde69bbd
LP
869 queries_by_packet = l ? &l->queries_by_packet : &m->stub_queries_by_packet;
870 existing = hashmap_get(*queries_by_packet, p);
871 if (existing && dns_packet_equal(existing->request_packet, p)) {
872 log_debug("Got repeat packet from client, ignoring.");
873 return;
874 }
875
b30bf55d
LP
876 r = dns_packet_extract(p);
877 if (r < 0) {
878 log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m");
0354029b 879 dns_stub_send_failure(m, l, s, p, DNS_RCODE_FORMERR, false);
ceb17827 880 return;
b30bf55d
LP
881 }
882
883 if (!DNS_PACKET_VERSION_SUPPORTED(p)) {
884 log_debug("Got EDNS OPT field with unsupported version number.");
0354029b 885 dns_stub_send_failure(m, l, s, p, DNS_RCODE_BADVERS, false);
ceb17827 886 return;
b30bf55d
LP
887 }
888
889 if (dns_type_is_obsolete(p->question->keys[0]->type)) {
890 log_debug("Got message with obsolete key type, refusing.");
30ee7071 891 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 892 return;
b30bf55d
LP
893 }
894
895 if (dns_type_is_zone_transer(p->question->keys[0]->type)) {
896 log_debug("Got request for zone transfer, refusing.");
30ee7071 897 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 898 return;
b30bf55d
LP
899 }
900
901 if (!DNS_PACKET_RD(p)) {
902 /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */
903 log_debug("Got request with recursion disabled, refusing.");
0354029b 904 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 905 return;
b30bf55d
LP
906 }
907
bde69bbd
LP
908 r = hashmap_ensure_allocated(queries_by_packet, &stub_packet_hash_ops);
909 if (r < 0) {
910 log_oom();
911 return;
912 }
913
b30bf55d 914 if (DNS_PACKET_DO(p) && DNS_PACKET_CD(p)) {
775ae354
LP
915 log_debug("Got request with DNSSEC checking disabled, enabling bypass logic.");
916
917 r = dns_query_new(m, &q, NULL, NULL, p, 0,
918 SD_RESOLVED_PROTOCOLS_ALL|
919 SD_RESOLVED_NO_CNAME|
920 SD_RESOLVED_NO_SEARCH|
921 SD_RESOLVED_NO_VALIDATE|
922 SD_RESOLVED_REQUIRE_PRIMARY|
923 SD_RESOLVED_CLAMP_TTL);
924 } else
925 r = dns_query_new(m, &q, p->question, p->question, NULL, 0,
926 SD_RESOLVED_PROTOCOLS_ALL|
927 SD_RESOLVED_NO_SEARCH|
2f4d8e57 928 (DNS_PACKET_DO(p) ? SD_RESOLVED_REQUIRE_PRIMARY : 0)|
775ae354 929 SD_RESOLVED_CLAMP_TTL);
b30bf55d
LP
930 if (r < 0) {
931 log_error_errno(r, "Failed to generate query object: %m");
0354029b 932 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
ceb17827 933 return;
b30bf55d
LP
934 }
935
775ae354
LP
936 q->request_packet = dns_packet_ref(p);
937 q->request_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */
0354029b 938 q->stub_listener_extra = l;
b30bf55d
LP
939 q->complete = dns_stub_query_complete;
940
941 if (s) {
b412af57
LP
942 /* Remember which queries belong to this stream, so that we can cancel them when the stream
943 * is disconnected early */
944
ceb17827 945 r = set_ensure_put(&s->queries, NULL, q);
b412af57
LP
946 if (r < 0) {
947 log_oom();
ceb17827 948 return;
b412af57 949 }
ceb17827 950 assert(r > 0);
b30bf55d
LP
951 }
952
bde69bbd
LP
953 /* Add the query to the hash table we use to determine repeat packets now. We don't care about
954 * failures here, since in the worst case we'll not recognize duplicate incoming requests, which
955 * isn't particularly bad. */
956 (void) hashmap_put(*queries_by_packet, q->request_packet, q);
957
b30bf55d
LP
958 r = dns_query_go(q);
959 if (r < 0) {
960 log_error_errno(r, "Failed to start query: %m");
0354029b 961 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
ceb17827 962 return;
b30bf55d
LP
963 }
964
52e63427 965 log_debug("Processing query...");
ceb17827 966 TAKE_PTR(q);
b30bf55d
LP
967}
968
0354029b 969static int on_dns_stub_packet_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
b30bf55d 970 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
b30bf55d
LP
971 int r;
972
973 r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p);
974 if (r <= 0)
975 return r;
976
977 if (dns_packet_validate_query(p) > 0) {
978 log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p));
979
0354029b 980 dns_stub_process_query(m, l, NULL, p);
b30bf55d
LP
981 } else
982 log_debug("Invalid DNS stub UDP packet, ignoring.");
983
984 return 0;
985}
986
d1fb8cda 987static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
0354029b 988 return on_dns_stub_packet_internal(s, fd, revents, userdata, NULL);
d1fb8cda
YW
989}
990
991static int on_dns_stub_packet_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
0354029b
LP
992 DnsStubListenerExtra *l = userdata;
993
994 assert(l);
995
996 return on_dns_stub_packet_internal(s, fd, revents, l->manager, l);
d1fb8cda
YW
997}
998
e4bed40f
ZJS
999static int on_dns_stub_stream_packet(DnsStream *s) {
1000 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
1001
1002 assert(s);
1003
1004 p = dns_stream_take_read_packet(s);
1005 assert(p);
1006
1007 if (dns_packet_validate_query(p) > 0) {
1008 log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p));
1009
1010 dns_stub_process_query(s->manager, s->stub_listener_extra, s, p);
1011 } else
1012 log_debug("Invalid DNS stub TCP packet, ignoring.");
1013
1014 return 0;
1015}
1016
1017static int on_dns_stub_stream_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
1018 DnsStream *stream;
1019 int cfd, r;
1020
1021 cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1022 if (cfd < 0) {
1023 if (ERRNO_IS_ACCEPT_AGAIN(errno))
1024 return 0;
1025
1026 return -errno;
1027 }
1028
1029 r = dns_stream_new(m, &stream, DNS_STREAM_STUB, DNS_PROTOCOL_DNS, cfd, NULL);
1030 if (r < 0) {
1031 safe_close(cfd);
1032 return r;
1033 }
1034
1035 stream->stub_listener_extra = l;
1036 stream->on_packet = on_dns_stub_stream_packet;
1037 stream->complete = dns_stub_stream_complete;
1038
1039 /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */
1040
1041 return 0;
1042}
1043
1044static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1045 return on_dns_stub_stream_internal(s, fd, revents, userdata, NULL);
1046}
1047
1048static int on_dns_stub_stream_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1049 DnsStubListenerExtra *l = userdata;
1050
1051 assert(l);
1052 return on_dns_stub_stream_internal(s, fd, revents, l->manager, l);
1053}
1054
af8b1384 1055static int set_dns_stub_common_socket_options(int fd, int family) {
1f05101f
SS
1056 int r;
1057
1058 assert(fd >= 0);
af8b1384 1059 assert(IN_SET(family, AF_INET, AF_INET6));
1f05101f
SS
1060
1061 r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
1062 if (r < 0)
1063 return r;
1064
5d0fe423
LP
1065 r = socket_set_recvpktinfo(fd, family, true);
1066 if (r < 0)
1067 return r;
af8b1384 1068
5d0fe423
LP
1069 r = socket_set_recvttl(fd, family, true);
1070 if (r < 0)
1071 return r;
af8b1384
YW
1072
1073 return 0;
1f05101f
SS
1074}
1075
8624f128
LP
1076static int set_dns_stub_common_tcp_socket_options(int fd) {
1077 int r;
1078
1079 assert(fd >= 0);
1080
1081 r = setsockopt_int(fd, IPPROTO_TCP, TCP_FASTOPEN, 5); /* Everybody appears to pick qlen=5, let's do the same here. */
1082 if (r < 0)
1083 log_debug_errno(r, "Failed to enable TCP_FASTOPEN on TCP listening socket, ignoring: %m");
1084
1085 r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true);
1086 if (r < 0)
1087 log_debug_errno(r, "Failed to enable TCP_NODELAY mode, ignoring: %m");
1088
1089 return 0;
1090}
1091
d491917c 1092static int manager_dns_stub_fd(Manager *m, int type) {
b30bf55d
LP
1093 union sockaddr_union sa = {
1094 .in.sin_family = AF_INET,
b30bf55d 1095 .in.sin_addr.s_addr = htobe32(INADDR_DNS_STUB),
d491917c 1096 .in.sin_port = htobe16(53),
b30bf55d 1097 };
424e490b 1098 _cleanup_close_ int fd = -1;
b30bf55d
LP
1099 int r;
1100
d491917c
ZJS
1101 assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
1102
1103 sd_event_source **event_source = type == SOCK_DGRAM ? &m->dns_stub_udp_event_source : &m->dns_stub_tcp_event_source;
1104 if (*event_source)
1105 return sd_event_source_get_io_fd(*event_source);
b30bf55d 1106
d491917c 1107 fd = socket(AF_INET, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
424e490b 1108 if (fd < 0)
b30bf55d
LP
1109 return -errno;
1110
af8b1384 1111 r = set_dns_stub_common_socket_options(fd, AF_INET);
2ff48e98
LP
1112 if (r < 0)
1113 return r;
b30bf55d 1114
8624f128
LP
1115 if (type == SOCK_STREAM) {
1116 r = set_dns_stub_common_tcp_socket_options(fd);
1117 if (r < 0)
1118 return r;
1119 }
1120
b30bf55d 1121 /* Make sure no traffic from outside the local host can leak to onto this socket */
953a02d1
LP
1122 r = socket_bind_to_ifindex(fd, LOOPBACK_IFINDEX);
1123 if (r < 0)
1124 return r;
b30bf55d 1125
d491917c
ZJS
1126 r = setsockopt_int(fd, IPPROTO_IP, IP_TTL, 1);
1127 if (r < 0)
1128 return r;
1129
424e490b
ZJS
1130 if (bind(fd, &sa.sa, sizeof(sa.in)) < 0)
1131 return -errno;
b30bf55d 1132
d491917c
ZJS
1133 if (type == SOCK_STREAM &&
1134 listen(fd, SOMAXCONN) < 0)
1135 return -errno;
1136
1137 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1138 type == SOCK_DGRAM ? on_dns_stub_packet : on_dns_stub_stream,
1139 m);
b30bf55d 1140 if (r < 0)
424e490b 1141 return r;
b30bf55d 1142
d491917c 1143 r = sd_event_source_set_io_fd_own(*event_source, true);
7216a3b5
YW
1144 if (r < 0)
1145 return r;
1146
d491917c
ZJS
1147 (void) sd_event_source_set_description(*event_source,
1148 type == SOCK_DGRAM ? "dns-stub-udp" : "dns-stub-tcp");
b30bf55d 1149
7216a3b5 1150 return TAKE_FD(fd);
b30bf55d
LP
1151}
1152
b5febb3f 1153static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type) {
1f05101f
SS
1154 _cleanup_free_ char *pretty = NULL;
1155 _cleanup_close_ int fd = -1;
ca8b62b5 1156 union sockaddr_union sa;
1f05101f
SS
1157 int r;
1158
0354029b 1159 assert(m);
b5febb3f 1160 assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
0354029b 1161
d491917c
ZJS
1162 if (!l)
1163 return manager_dns_stub_fd(m, type);
0354029b 1164
b5febb3f
ZJS
1165 sd_event_source **event_source = type == SOCK_DGRAM ? &l->udp_event_source : &l->tcp_event_source;
1166 if (*event_source)
1167 return sd_event_source_get_io_fd(*event_source);
1f05101f 1168
ca8b62b5
YW
1169 if (l->family == AF_INET)
1170 sa = (union sockaddr_union) {
1171 .in.sin_family = l->family,
49ef064c 1172 .in.sin_port = htobe16(dns_stub_listener_extra_port(l)),
ca8b62b5
YW
1173 .in.sin_addr = l->address.in,
1174 };
1175 else
1176 sa = (union sockaddr_union) {
1177 .in6.sin6_family = l->family,
49ef064c 1178 .in6.sin6_port = htobe16(dns_stub_listener_extra_port(l)),
ca8b62b5
YW
1179 .in6.sin6_addr = l->address.in6,
1180 };
1181
b5febb3f 1182 fd = socket(l->family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
1f05101f
SS
1183 if (fd < 0) {
1184 r = -errno;
1185 goto fail;
1186 }
1187
af8b1384 1188 r = set_dns_stub_common_socket_options(fd, l->family);
1f05101f
SS
1189 if (r < 0)
1190 goto fail;
1191
8624f128
LP
1192 if (type == SOCK_STREAM) {
1193 r = set_dns_stub_common_tcp_socket_options(fd);
1194 if (r < 0)
1195 goto fail;
1196 }
1197
69e3234d 1198 /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case
b5febb3f
ZJS
1199 * people may want ttl > 1. */
1200
5d0fe423 1201 r = socket_set_freebind(fd, l->family, true);
b5febb3f
ZJS
1202 if (r < 0)
1203 goto fail;
1204
eb170e75
LP
1205 if (type == SOCK_DGRAM) {
1206 r = socket_disable_pmtud(fd, l->family);
1207 if (r < 0)
1208 log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
20a001bd
LP
1209
1210 r = socket_set_recvfragsize(fd, l->family, true);
1211 if (r < 0)
1212 log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
eb170e75
LP
1213 }
1214
ca8b62b5 1215 if (bind(fd, &sa.sa, SOCKADDR_LEN(sa)) < 0) {
1f05101f
SS
1216 r = -errno;
1217 goto fail;
1218 }
1219
b5febb3f
ZJS
1220 if (type == SOCK_STREAM &&
1221 listen(fd, SOMAXCONN) < 0) {
1222 r = -errno;
1223 goto fail;
1224 }
1225
1226 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1227 type == SOCK_DGRAM ? on_dns_stub_packet_extra : on_dns_stub_stream_extra,
1228 l);
1f05101f
SS
1229 if (r < 0)
1230 goto fail;
1231
b5febb3f 1232 r = sd_event_source_set_io_fd_own(*event_source, true);
7216a3b5
YW
1233 if (r < 0)
1234 goto fail;
1235
b5febb3f
ZJS
1236 (void) sd_event_source_set_description(*event_source,
1237 type == SOCK_DGRAM ? "dns-stub-udp-extra" : "dns-stub-tcp-extra");
1f05101f
SS
1238
1239 if (DEBUG_LOGGING) {
ca8b62b5 1240 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
b5febb3f
ZJS
1241 log_debug("Listening on %s socket %s.",
1242 type == SOCK_DGRAM ? "UDP" : "TCP",
1243 strnull(pretty));
1f05101f
SS
1244 }
1245
7216a3b5 1246 return TAKE_FD(fd);
1f05101f 1247
b4b7ea1b 1248fail:
1c17bcb3 1249 assert(r < 0);
ca8b62b5 1250 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
b5febb3f
ZJS
1251 return log_warning_errno(r,
1252 r == -EADDRINUSE ? "Another process is already listening on %s socket %s: %m" :
1253 "Failed to listen on %s socket %s: %m",
1254 type == SOCK_DGRAM ? "UDP" : "TCP",
1255 strnull(pretty));
1f05101f
SS
1256}
1257
b30bf55d 1258int manager_dns_stub_start(Manager *m) {
424e490b 1259 const char *t = "UDP";
01b0669e 1260 int r = 0;
b30bf55d
LP
1261
1262 assert(m);
1263
d5da7707
ZJS
1264 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO)
1265 log_debug("Not creating stub listener.");
1266 else
1267 log_debug("Creating stub listener using %s.",
1268 m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" :
1269 m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" :
1270 "UDP/TCP");
1271
88d2cb7c 1272 if (FLAGS_SET(m->dns_stub_listener_mode, DNS_STUB_LISTENER_UDP))
d491917c 1273 r = manager_dns_stub_fd(m, SOCK_DGRAM);
b30bf55d 1274
424e490b 1275 if (r >= 0 &&
88d2cb7c 1276 FLAGS_SET(m->dns_stub_listener_mode, DNS_STUB_LISTENER_TCP)) {
424e490b 1277 t = "TCP";
d491917c 1278 r = manager_dns_stub_fd(m, SOCK_STREAM);
1ae43295 1279 }
b30bf55d 1280
0f4db364 1281 if (IN_SET(r, -EADDRINUSE, -EPERM)) {
d491917c
ZJS
1282 log_warning_errno(r,
1283 r == -EADDRINUSE ? "Another process is already listening on %s socket 127.0.0.53:53.\n"
1284 "Turning off local DNS stub support." :
1285 "Failed to listen on %s socket 127.0.0.53:53: %m.\n"
1286 "Turning off local DNS stub support.",
1287 t);
424e490b
ZJS
1288 manager_dns_stub_stop(m);
1289 } else if (r < 0)
1290 return log_error_errno(r, "Failed to listen on %s socket 127.0.0.53:53: %m", t);
b30bf55d 1291
1f05101f 1292 if (!ordered_set_isempty(m->dns_extra_stub_listeners)) {
36aaabc3 1293 DnsStubListenerExtra *l;
1f05101f 1294
dce65cd4 1295 log_debug("Creating extra stub listeners.");
1f05101f 1296
90e74a66 1297 ORDERED_SET_FOREACH(l, m->dns_extra_stub_listeners) {
7314b397 1298 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_UDP))
b5febb3f 1299 (void) manager_dns_stub_fd_extra(m, l, SOCK_DGRAM);
7314b397 1300 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_TCP))
b5febb3f 1301 (void) manager_dns_stub_fd_extra(m, l, SOCK_STREAM);
7314b397 1302 }
1f05101f
SS
1303 }
1304
b30bf55d
LP
1305 return 0;
1306}
1307
1308void manager_dns_stub_stop(Manager *m) {
1309 assert(m);
1310
97935302
ZJS
1311 m->dns_stub_udp_event_source = sd_event_source_disable_unref(m->dns_stub_udp_event_source);
1312 m->dns_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_stub_tcp_event_source);
b30bf55d 1313}
ae8f0ec3
LP
1314
1315static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = {
97935302 1316 [DNS_STUB_LISTENER_NO] = "no",
ae8f0ec3
LP
1317 [DNS_STUB_LISTENER_UDP] = "udp",
1318 [DNS_STUB_LISTENER_TCP] = "tcp",
1319 [DNS_STUB_LISTENER_YES] = "yes",
1320};
1321DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES);