]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/resolve/resolved-dns-stub.c
tree-wide: use ASSERT_PTR more
[thirdparty/systemd.git] / src / resolve / resolved-dns-stub.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
b30bf55d 2
ca8b62b5 3#include <net/if_arp.h>
8624f128 4#include <netinet/tcp.h>
ca8b62b5 5
4ff9bc2e 6#include "errno-util.h"
b30bf55d 7#include "fd-util.h"
ef118d00 8#include "missing_network.h"
af8b1384 9#include "missing_socket.h"
b30bf55d 10#include "resolved-dns-stub.h"
1f05101f 11#include "socket-netlink.h"
b30bf55d 12#include "socket-util.h"
4a6eb824 13#include "stdio-util.h"
ae8f0ec3 14#include "string-table.h"
b30bf55d
LP
15
16/* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet,
17 * IP and UDP header sizes */
18#define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U)
19
b370adb5
LP
20/* On the extra stubs, use a more conservative choice */
21#define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX
22
b5febb3f 23static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type);
a8d09063 24static int manager_dns_stub_fd(Manager *m, int family, const union in_addr_union *listen_address, int type);
0354029b 25
ae8f0ec3
LP
26static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra *a, struct siphash *state) {
27 assert(a);
28
29 siphash24_compress(&a->mode, sizeof(a->mode), state);
30 siphash24_compress(&a->family, sizeof(a->family), state);
31 siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state);
32 siphash24_compress(&a->port, sizeof(a->port), state);
33}
34
35static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra *a, const DnsStubListenerExtra *b) {
36 int r;
37
38 assert(a);
39 assert(b);
40
41 r = CMP(a->mode, b->mode);
42 if (r != 0)
43 return r;
44
45 r = CMP(a->family, b->family);
46 if (r != 0)
47 return r;
48
49 r = memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
50 if (r != 0)
51 return r;
52
53 return CMP(a->port, b->port);
54}
55
56DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
57 dns_stub_listener_extra_hash_ops,
58 DnsStubListenerExtra,
59 dns_stub_listener_extra_hash_func,
60 dns_stub_listener_extra_compare_func,
61 dns_stub_listener_extra_free);
62
0354029b
LP
63int dns_stub_listener_extra_new(
64 Manager *m,
65 DnsStubListenerExtra **ret) {
ae8f0ec3 66
36aaabc3 67 DnsStubListenerExtra *l;
1f05101f 68
0354029b 69 l = new(DnsStubListenerExtra, 1);
1f05101f
SS
70 if (!l)
71 return -ENOMEM;
72
0354029b
LP
73 *l = (DnsStubListenerExtra) {
74 .manager = m,
75 };
1f05101f 76
0354029b 77 *ret = TAKE_PTR(l);
1f05101f
SS
78 return 0;
79}
80
36aaabc3 81DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p) {
bf22f231
YW
82 if (!p)
83 return NULL;
84
97935302
ZJS
85 p->udp_event_source = sd_event_source_disable_unref(p->udp_event_source);
86 p->tcp_event_source = sd_event_source_disable_unref(p->tcp_event_source);
bf22f231 87
bde69bbd
LP
88 hashmap_free(p->queries_by_packet);
89
bf22f231
YW
90 return mfree(p);
91}
92
bde69bbd
LP
93static void stub_packet_hash_func(const DnsPacket *p, struct siphash *state) {
94 assert(p);
95
96 siphash24_compress(&p->protocol, sizeof(p->protocol), state);
97 siphash24_compress(&p->family, sizeof(p->family), state);
98 siphash24_compress(&p->sender, sizeof(p->sender), state);
99 siphash24_compress(&p->ipproto, sizeof(p->ipproto), state);
100 siphash24_compress(&p->sender_port, sizeof(p->sender_port), state);
101 siphash24_compress(DNS_PACKET_HEADER(p), sizeof(DnsPacketHeader), state);
102
103 /* We don't bother hashing the full packet here, just the header */
104}
105
106static int stub_packet_compare_func(const DnsPacket *x, const DnsPacket *y) {
107 int r;
108
109 r = CMP(x->protocol, y->protocol);
110 if (r != 0)
111 return r;
112
113 r = CMP(x->family, y->family);
114 if (r != 0)
115 return r;
116
117 r = memcmp(&x->sender, &y->sender, sizeof(x->sender));
118 if (r != 0)
119 return r;
120
121 r = CMP(x->ipproto, y->ipproto);
122 if (r != 0)
123 return r;
124
125 r = CMP(x->sender_port, y->sender_port);
126 if (r != 0)
127 return r;
128
129 return memcmp(DNS_PACKET_HEADER(x), DNS_PACKET_HEADER(y), sizeof(DnsPacketHeader));
130}
131
132DEFINE_HASH_OPS(stub_packet_hash_ops, DnsPacket, stub_packet_hash_func, stub_packet_compare_func);
133
5bd7ebb3
LP
134static int reply_add_with_rrsig(
135 DnsAnswer **reply,
136 DnsResourceRecord *rr,
137 int ifindex,
138 DnsAnswerFlags flags,
139 DnsResourceRecord *rrsig,
140 bool with_rrsig) {
141 int r;
142
143 assert(reply);
144 assert(rr);
145
146 r = dns_answer_add_extend(reply, rr, ifindex, flags, rrsig);
147 if (r < 0)
148 return r;
149
150 if (with_rrsig && rrsig) {
151 r = dns_answer_add_extend(reply, rrsig, ifindex, flags, NULL);
152 if (r < 0)
153 return r;
154 }
155
156 return 0;
157}
158
775ae354
LP
159static int dns_stub_collect_answer_by_question(
160 DnsAnswer **reply,
51027656 161 DnsAnswer *answer,
775ae354
LP
162 DnsQuestion *question,
163 bool with_rrsig) { /* Add RRSIG RR matching each RR */
b30bf55d 164
775ae354 165 DnsAnswerItem *item;
b30bf55d
LP
166 int r;
167
775ae354 168 assert(reply);
e8d23f92 169
915ba31c 170 /* Copies all RRs from 'answer' into 'reply', if they match 'question'. */
4838dc4f 171
915ba31c 172 DNS_ANSWER_FOREACH_ITEM(item, answer) {
5bd7ebb3 173
915ba31c
LP
174 /* We have a question, let's see if this RR matches it */
175 r = dns_question_matches_rr(question, item->rr, NULL);
176 if (r < 0)
177 return r;
178 if (!r) {
179 /* Maybe there's a CNAME/DNAME in here? If so, that's an answer too */
180 r = dns_question_matches_cname_or_dname(question, item->rr, NULL);
4838dc4f
LP
181 if (r < 0)
182 return r;
915ba31c
LP
183 if (!r)
184 continue;
4838dc4f 185 }
5bd7ebb3 186
915ba31c
LP
187 /* Mask the section info, we want the primary answers to always go without section
188 * info, so that it is added to the answer section when we synthesize a reply. */
5bd7ebb3 189
915ba31c
LP
190 r = reply_add_with_rrsig(
191 reply,
192 item->rr,
193 item->ifindex,
194 item->flags & ~DNS_ANSWER_MASK_SECTIONS,
195 item->rrsig,
196 with_rrsig);
197 if (r < 0)
198 return r;
e8d23f92 199 }
b30bf55d 200
775ae354
LP
201 return 0;
202}
e8d23f92 203
775ae354
LP
204static int dns_stub_collect_answer_by_section(
205 DnsAnswer **reply,
206 DnsAnswer *answer,
207 DnsAnswerFlags section,
208 DnsAnswer *exclude1,
209 DnsAnswer *exclude2,
210 bool with_dnssec) { /* Include DNSSEC RRs. RRSIG, NSEC, … */
b30bf55d 211
775ae354 212 DnsAnswerItem *item;
775ae354 213 int r;
b30bf55d 214
775ae354
LP
215 assert(reply);
216
217 /* Copies all RRs from 'answer' into 'reply', if they originate from the specified section. Also,
218 * avoid any RRs listed in 'exclude'. */
219
220 DNS_ANSWER_FOREACH_ITEM(item, answer) {
221
222 if (dns_answer_contains(exclude1, item->rr) ||
223 dns_answer_contains(exclude2, item->rr))
224 continue;
225
226 if (!with_dnssec &&
227 dns_type_is_dnssec(item->rr->key->type))
228 continue;
229
c4d98c3a 230 if (((item->flags ^ section) & DNS_ANSWER_MASK_SECTIONS) != 0)
775ae354
LP
231 continue;
232
5bd7ebb3
LP
233 r = reply_add_with_rrsig(
234 reply,
235 item->rr,
236 item->ifindex,
237 item->flags,
238 item->rrsig,
239 with_dnssec);
b30bf55d
LP
240 if (r < 0)
241 return r;
b30bf55d 242 }
e8d23f92 243
5bd7ebb3 244 return 0;
775ae354
LP
245}
246
247static int dns_stub_assign_sections(
248 DnsQuery *q,
249 DnsQuestion *question,
250 bool edns0_do) {
251
252 int r;
253
254 assert(q);
255 assert(question);
256
c6ebf89b
LP
257 /* Let's assign the 'answer' RRs we collected to their respective sections in the reply datagram. We
258 * try to reproduce a section assignment similar to what the upstream DNS server responded to us. We
259 * use the DNS_ANSWER_SECTION_xyz flags to match things up, which is where the original upstream's
260 * packet section assignment is stored in the DnsAnswer object. Not all RRs in the 'answer' objects
261 * come with section information though (for example, because they were synthesized locally, and not
262 * from a DNS packet). To deal with that we extend the assignment logic a bit: anything from the
263 * 'answer' object that directly matches the original question is always put in the ANSWER section,
264 * regardless if it carries section info, or what that section info says. Then, anything from the
265 * 'answer' objects that is from the ANSWER or AUTHORITY sections, and wasn't already added to the
266 * ANSWER section is placed in the AUTHORITY section. Everything else from either object is added to
267 * the ADDITIONAL section. */
775ae354
LP
268
269 /* Include all RRs that directly answer the question in the answer section */
270 r = dns_stub_collect_answer_by_question(
271 &q->reply_answer,
272 q->answer,
273 question,
274 edns0_do);
275 if (r < 0)
276 return r;
277
d451f0e8 278 /* Include all RRs that originate from the authority sections, and aren't already listed in the
775ae354
LP
279 * answer section, in the authority section */
280 r = dns_stub_collect_answer_by_section(
281 &q->reply_authoritative,
282 q->answer,
d451f0e8 283 DNS_ANSWER_SECTION_AUTHORITY,
775ae354
LP
284 q->reply_answer, NULL,
285 edns0_do);
286 if (r < 0)
287 return r;
d451f0e8
LP
288
289 /* Include all RRs that originate from the answer or additional sections in the additional section
290 * (except if already listed in the other two sections). Also add all RRs with no section marking. */
775ae354 291 r = dns_stub_collect_answer_by_section(
d451f0e8 292 &q->reply_additional,
775ae354 293 q->answer,
d451f0e8
LP
294 DNS_ANSWER_SECTION_ANSWER,
295 q->reply_answer, q->reply_authoritative,
775ae354
LP
296 edns0_do);
297 if (r < 0)
298 return r;
775ae354
LP
299 r = dns_stub_collect_answer_by_section(
300 &q->reply_additional,
301 q->answer,
302 DNS_ANSWER_SECTION_ADDITIONAL,
303 q->reply_answer, q->reply_authoritative,
304 edns0_do);
305 if (r < 0)
306 return r;
307 r = dns_stub_collect_answer_by_section(
308 &q->reply_additional,
309 q->answer,
310 0,
311 q->reply_answer, q->reply_authoritative,
312 edns0_do);
313 if (r < 0)
314 return r;
315
316 return 0;
317}
318
319static int dns_stub_make_reply_packet(
320 DnsPacket **ret,
321 size_t max_size,
322 DnsQuestion *q,
323 bool *ret_truncated) {
324
325 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
326 bool tc = false;
327 int r;
328
329 assert(ret);
330
331 r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, max_size);
332 if (r < 0)
333 return r;
334
335 r = dns_packet_append_question(p, q);
336 if (r == -EMSGSIZE)
337 tc = true;
338 else if (r < 0)
339 return r;
340
51027656 341 if (ret_truncated)
775ae354
LP
342 *ret_truncated = tc;
343 else if (tc)
51027656
LP
344 return -EMSGSIZE;
345
775ae354 346 DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q));
e8d23f92 347
775ae354
LP
348 *ret = TAKE_PTR(p);
349 return 0;
350}
351
352static int dns_stub_add_reply_packet_body(
353 DnsPacket *p,
354 DnsAnswer *answer,
355 DnsAnswer *authoritative,
356 DnsAnswer *additional,
357 bool edns0_do, /* Client expects DNSSEC RRs? */
358 bool *truncated) {
359
360 unsigned n_answer = 0, n_authoritative = 0, n_additional = 0;
361 bool tc = false;
362 int r;
363
364 assert(p);
365
366 /* Add the three sections to the packet. If the answer section doesn't fit we'll signal that as
367 * truncation. If the authoritative section doesn't fit and we are in DNSSEC mode, also signal
368 * truncation. In all other cases where things don't fit don't signal truncation, as for those cases
369 * the dropped RRs should not be essential. */
370
371 r = dns_packet_append_answer(p, answer, &n_answer);
372 if (r == -EMSGSIZE)
373 tc = true;
374 else if (r < 0)
375 return r;
376 else {
377 r = dns_packet_append_answer(p, authoritative, &n_authoritative);
378 if (r == -EMSGSIZE) {
379 if (edns0_do)
380 tc = true;
381 } else if (r < 0)
382 return r;
383 else {
384 r = dns_packet_append_answer(p, additional, &n_additional);
385 if (r < 0 && r != -EMSGSIZE)
386 return r;
387 }
388 }
389
390 if (tc) {
391 if (!truncated)
392 return -EMSGSIZE;
393
394 *truncated = true;
395 }
396
397 DNS_PACKET_HEADER(p)->ancount = htobe16(n_answer);
398 DNS_PACKET_HEADER(p)->nscount = htobe16(n_authoritative);
399 DNS_PACKET_HEADER(p)->arcount = htobe16(n_additional);
e8d23f92
LP
400 return 0;
401}
402
4a6eb824
LP
403static const char *nsid_string(void) {
404 static char buffer[SD_ID128_STRING_MAX + STRLEN(".resolved.systemd.io")] = "";
405 sd_id128_t id;
406 int r;
407
408 /* Let's generate a string that we can use as RFC5001 NSID identifier. The string shall identify us
409 * as systemd-resolved, and return a different string for each resolved instance without leaking host
410 * identity. Hence let's use a fixed suffix that identifies resolved, and a prefix generated from the
411 * machine ID but from which the machine ID cannot be determined.
412 *
413 * Clients can use this to determine whether an answer is originating locally or is proxied from
414 * upstream. */
415
416 if (!isempty(buffer))
417 return buffer;
418
419 r = sd_id128_get_machine_app_specific(
420 SD_ID128_MAKE(ed,d3,12,5d,16,b9,41,f9,a1,49,5f,ab,15,62,ab,27),
421 &id);
422 if (r < 0) {
b480543c 423 log_debug_errno(r, "Failed to determine machine ID, ignoring: %m");
4a6eb824
LP
424 return NULL;
425 }
426
427 xsprintf(buffer, SD_ID128_FORMAT_STR ".resolved.systemd.io", SD_ID128_FORMAT_VAL(id));
428 return buffer;
429}
430
e8d23f92
LP
431static int dns_stub_finish_reply_packet(
432 DnsPacket *p,
433 uint16_t id,
434 int rcode,
51027656 435 bool tc, /* set the Truncated bit? */
4ad017cd 436 bool aa, /* set the Authoritative Answer bit? */
da846b30 437 bool rd, /* set the Recursion Desired bit? */
e8d23f92
LP
438 bool add_opt, /* add an OPT RR to this packet? */
439 bool edns0_do, /* set the EDNS0 DNSSEC OK bit? */
b370adb5 440 bool ad, /* set the DNSSEC authenticated data bit? */
775ae354 441 bool cd, /* set the DNSSEC checking disabled bit? */
4a6eb824
LP
442 uint16_t max_udp_size, /* The maximum UDP datagram size to advertise to clients */
443 bool nsid) { /* whether to add NSID */
e8d23f92
LP
444
445 int r;
446
447 assert(p);
448
ff4caaae 449 if (add_opt) {
4a6eb824 450 r = dns_packet_append_opt(p, max_udp_size, edns0_do, /* include_rfc6975 = */ false, nsid ? nsid_string() : NULL, rcode, NULL);
ff4caaae
LP
451 if (r == -EMSGSIZE) /* Hit the size limit? then indicate truncation */
452 tc = true;
453 else if (r < 0)
454 return r;
ff4caaae 455 } else {
941dd294
LP
456 /* If the client can't to EDNS0, don't do DO either */
457 edns0_do = false;
458
775ae354 459 /* If we don't do EDNS, clamp the rcode to 4 bit */
941dd294
LP
460 if (rcode > 0xF)
461 rcode = DNS_RCODE_SERVFAIL;
462 }
463
8c9c68b5
LP
464 /* Don't set the CD bit unless DO is on, too */
465 if (!edns0_do)
775ae354
LP
466 cd = false;
467
8c9c68b5
LP
468 /* Note that we allow the AD bit to be set even if client didn't signal DO, as per RFC 6840, section
469 * 5.7 */
e8d23f92
LP
470
471 DNS_PACKET_HEADER(p)->id = id;
472
473 DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
51027656
LP
474 1 /* qr */,
475 0 /* opcode */,
4ad017cd 476 aa /* aa */,
51027656 477 tc /* tc */,
da846b30 478 rd /* rd */,
51027656 479 1 /* ra */,
e8d23f92 480 ad /* ad */,
775ae354 481 cd /* cd */,
e8d23f92 482 rcode));
b30bf55d 483
b30bf55d
LP
484 return 0;
485}
486
a8d09063
LP
487static bool address_is_proxy(int family, const union in_addr_union *a) {
488 assert(a);
489
490 /* Returns true if the specified address is the DNS "proxy" stub, i.e. where we unconditionally enable bypass mode */
491
492 if (family != AF_INET)
493 return false;
494
495 return be32toh(a->in.s_addr) == INADDR_DNS_PROXY_STUB;
496}
497
498static int find_socket_fd(
499 Manager *m,
500 DnsStubListenerExtra *l,
501 int family,
502 const union in_addr_union *listen_address,
503 int type) {
504
505 assert(m);
506
507 /* Finds the right socket to use for sending. If we know the extra listener, otherwise go via the
508 * address to send from */
509 if (l)
510 return manager_dns_stub_fd_extra(m, l, type);
511
512 return manager_dns_stub_fd(m, family, listen_address, type);
513}
514
0354029b
LP
515static int dns_stub_send(
516 Manager *m,
517 DnsStubListenerExtra *l,
518 DnsStream *s,
519 DnsPacket *p,
520 DnsPacket *reply) {
521
b30bf55d
LP
522 int r;
523
524 assert(m);
525 assert(p);
526 assert(reply);
527
528 if (s)
529 r = dns_stream_write_packet(s, reply);
a8d09063 530 else {
dfa14e28 531 int fd, ifindex;
a8d09063 532
de777ffa 533 fd = find_socket_fd(m, l, p->family, &p->destination, SOCK_DGRAM);
a8d09063
LP
534 if (fd < 0)
535 return fd;
536
dfa14e28
BF
537 if (address_is_proxy(p->family, &p->destination))
538 /* Force loopback iface if this is the loopback proxy stub
539 * and ifindex was normalized to 0 by manager_recv(). */
540 ifindex = p->ifindex ?: LOOPBACK_IFINDEX;
541 else
542 /* Force loopback iface if this is the main listener stub. */
543 ifindex = l ? p->ifindex : LOOPBACK_IFINDEX;
544
a8d09063
LP
545 /* Note that it is essential here that we explicitly choose the source IP address for this
546 * packet. This is because otherwise the kernel will choose it automatically based on the
dfa14e28 547 * routing table and will thus pick 127.0.0.1 rather than 127.0.0.53/54. */
0354029b 548 r = manager_send(m,
a8d09063 549 fd,
dfa14e28 550 ifindex,
0354029b
LP
551 p->family, &p->sender, p->sender_port, &p->destination,
552 reply);
a8d09063 553 }
b30bf55d
LP
554 if (r < 0)
555 return log_debug_errno(r, "Failed to send reply packet: %m");
556
557 return 0;
558}
559
39005e18
LP
560static int dns_stub_reply_with_edns0_do(DnsQuery *q) {
561 assert(q);
562
563 /* Reply with DNSSEC DO set? Only if client supports it; and we did any DNSSEC verification
564 * ourselves, or consider the data fully authenticated because we generated it locally, or the client
565 * set cd */
566
567 return DNS_PACKET_DO(q->request_packet) &&
568 (q->answer_dnssec_result >= 0 || /* we did proper DNSSEC validation … */
569 dns_query_fully_authenticated(q) || /* … or we considered it authentic otherwise … */
570 DNS_PACKET_CD(q->request_packet)); /* … or client set CD */
571}
572
5d7da51e
LP
573static void dns_stub_suppress_duplicate_section_rrs(DnsQuery *q) {
574 /* If we follow a CNAME/DNAME chain we might end up populating our sections with redundant RRs
575 * because we built up the sections from multiple reply packets (one from each CNAME/DNAME chain
576 * element). E.g. it could be that an RR that was included in the first reply's additional section
577 * ends up being relevant as main answer in a subsequent reply in the chain. Let's clean this up, and
578 * remove everything in the "higher priority" sections from the "lower priority" sections.
579 *
580 * Note that this removal matches by RR keys instead of the full RRs. This is because RRsets should
581 * always end up in one section fully or not at all, but never be split among sections.
582 *
583 * Specifically: we remove ANSWER section RRs from the AUTHORITATIVE and ADDITIONAL sections, as well
584 * as AUTHORITATIVE section RRs from the ADDITIONAL section. */
585
586 dns_answer_remove_by_answer_keys(&q->reply_authoritative, q->reply_answer);
587 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_answer);
588 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_authoritative);
589}
590
775ae354
LP
591static int dns_stub_send_reply(
592 DnsQuery *q,
593 int rcode) {
594
595 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
596 bool truncated, edns0_do;
597 int r;
598
599 assert(q);
600
39005e18 601 edns0_do = dns_stub_reply_with_edns0_do(q); /* let's check if we shall reply with EDNS0 DO? */
775ae354 602
775ae354
LP
603 r = dns_stub_make_reply_packet(
604 &reply,
605 DNS_PACKET_PAYLOAD_SIZE_MAX(q->request_packet),
606 q->request_packet->question,
607 &truncated);
608 if (r < 0)
609 return log_debug_errno(r, "Failed to build reply packet: %m");
610
5d7da51e
LP
611 dns_stub_suppress_duplicate_section_rrs(q);
612
775ae354
LP
613 r = dns_stub_add_reply_packet_body(
614 reply,
615 q->reply_answer,
616 q->reply_authoritative,
617 q->reply_additional,
618 edns0_do,
619 &truncated);
620 if (r < 0)
621 return log_debug_errno(r, "Failed to append reply packet body: %m");
622
623 r = dns_stub_finish_reply_packet(
624 reply,
625 DNS_PACKET_ID(q->request_packet),
626 rcode,
627 truncated,
9ddf099f 628 dns_query_fully_authoritative(q),
da846b30 629 DNS_PACKET_RD(q->request_packet),
775ae354
LP
630 !!q->request_packet->opt,
631 edns0_do,
b553abd8 632 (DNS_PACKET_AD(q->request_packet) || DNS_PACKET_DO(q->request_packet)) && dns_query_fully_authenticated(q),
775ae354 633 DNS_PACKET_CD(q->request_packet),
4a6eb824
LP
634 q->stub_listener_extra ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
635 dns_packet_has_nsid_request(q->request_packet) > 0 && !q->stub_listener_extra);
775ae354
LP
636 if (r < 0)
637 return log_debug_errno(r, "Failed to build failure packet: %m");
638
639 return dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
640}
641
0354029b
LP
642static int dns_stub_send_failure(
643 Manager *m,
644 DnsStubListenerExtra *l,
645 DnsStream *s,
646 DnsPacket *p,
647 int rcode,
648 bool authenticated) {
649
b30bf55d 650 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
775ae354 651 bool truncated;
b30bf55d
LP
652 int r;
653
654 assert(m);
655 assert(p);
656
775ae354
LP
657 r = dns_stub_make_reply_packet(
658 &reply,
659 DNS_PACKET_PAYLOAD_SIZE_MAX(p),
660 p->question,
661 &truncated);
e8d23f92
LP
662 if (r < 0)
663 return log_debug_errno(r, "Failed to make failure packet: %m");
664
b370adb5
LP
665 r = dns_stub_finish_reply_packet(
666 reply,
667 DNS_PACKET_ID(p),
668 rcode,
775ae354 669 truncated,
4ad017cd 670 false,
da846b30 671 DNS_PACKET_RD(p),
b370adb5
LP
672 !!p->opt,
673 DNS_PACKET_DO(p),
b553abd8 674 (DNS_PACKET_AD(p) || DNS_PACKET_DO(p)) && authenticated,
775ae354 675 DNS_PACKET_CD(p),
4a6eb824
LP
676 l ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
677 dns_packet_has_nsid_request(p) > 0 && !l);
b30bf55d
LP
678 if (r < 0)
679 return log_debug_errno(r, "Failed to build failure packet: %m");
680
0354029b 681 return dns_stub_send(m, l, s, p, reply);
b30bf55d
LP
682}
683
775ae354
LP
684static int dns_stub_patch_bypass_reply_packet(
685 DnsPacket **ret, /* Where to place the patched packet */
686 DnsPacket *original, /* The packet to patch */
687 DnsPacket *request) { /* The packet the patched packet shall look like a reply to */
688 _cleanup_(dns_packet_unrefp) DnsPacket *c = NULL;
689 int r;
690
691 assert(ret);
692 assert(original);
693 assert(request);
694
695 r = dns_packet_dup(&c, original);
696 if (r < 0)
697 return r;
698
699 /* Extract the packet, so that we know where the OPT field is */
700 r = dns_packet_extract(c);
701 if (r < 0)
702 return r;
703
704 /* Copy over the original client request ID, so that we can make the upstream query look like our own reply. */
705 DNS_PACKET_HEADER(c)->id = DNS_PACKET_HEADER(request)->id;
706
707 /* Patch in our own maximum datagram size, if EDNS0 was on */
708 r = dns_packet_patch_max_udp_size(c, ADVERTISE_DATAGRAM_SIZE_MAX);
709 if (r < 0)
710 return r;
711
712 /* Lower all TTLs by the time passed since we received the datagram. */
713 if (timestamp_is_set(original->timestamp)) {
714 r = dns_packet_patch_ttls(c, original->timestamp);
715 if (r < 0)
716 return r;
717 }
718
719 /* Our upstream connection might have supported larger DNS requests than our downstream one, hence
720 * set the TC bit if our reply is larger than what the client supports, and truncate. */
721 if (c->size > DNS_PACKET_PAYLOAD_SIZE_MAX(request)) {
722 log_debug("Artificially truncating stub response, as advertised size of client is smaller than upstream one.");
723 dns_packet_truncate(c, DNS_PACKET_PAYLOAD_SIZE_MAX(request));
724 DNS_PACKET_HEADER(c)->flags = htobe16(be16toh(DNS_PACKET_HEADER(c)->flags) | DNS_PACKET_FLAG_TC);
725 }
726
727 *ret = TAKE_PTR(c);
728 return 0;
729}
730
c704288c
YW
731static void dns_stub_query_complete(DnsQuery *query) {
732 _cleanup_(dns_query_freep) DnsQuery *q = query;
b30bf55d
LP
733 int r;
734
735 assert(q);
775ae354 736 assert(q->request_packet);
b30bf55d 737
775ae354
LP
738 if (q->question_bypass) {
739 /* This is a bypass reply. If so, let's propagate the upstream packet, if we have it and it
740 * is regular DNS. (We can't do this if the upstream packet is LLMNR or mDNS, since the
741 * packets are not 100% compatible.) */
b30bf55d 742
775ae354
LP
743 if (q->answer_full_packet &&
744 q->answer_full_packet->protocol == DNS_PROTOCOL_DNS) {
745 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
e8d23f92 746
775ae354
LP
747 r = dns_stub_patch_bypass_reply_packet(&reply, q->answer_full_packet, q->request_packet);
748 if (r < 0)
749 log_debug_errno(r, "Failed to patch bypass reply packet: %m");
750 else
751 (void) dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
752
775ae354 753 return;
e8d23f92 754 }
775ae354 755 }
b30bf55d 756
b97fc571
LP
757 /* Take all data from the current reply, and merge it into the three reply sections we are building
758 * up. We do this before processing CNAME redirects, so that we gradually build up our sections, and
759 * and keep adding all RRs in the CNAME chain. */
760 r = dns_stub_assign_sections(
761 q,
a7c0291c 762 dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
b97fc571 763 dns_stub_reply_with_edns0_do(q));
c704288c
YW
764 if (r < 0)
765 return (void) log_debug_errno(r, "Failed to assign sections: %m");
2f4d8e57 766
775ae354
LP
767 switch (q->state) {
768
915ba31c
LP
769 case DNS_TRANSACTION_SUCCESS: {
770 bool first = true;
771
772 for (;;) {
773 int cname_result;
774
775 cname_result = dns_query_process_cname_one(q);
776 if (cname_result == -ELOOP) { /* CNAME loop, let's send what we already have */
777 log_debug_errno(r, "Detected CNAME loop, returning what we already have.");
778 (void) dns_stub_send_reply(q, q->answer_rcode);
779 break;
780 }
781 if (cname_result < 0) {
782 log_debug_errno(cname_result, "Failed to process CNAME: %m");
783 break;
784 }
785
786 if (cname_result == DNS_QUERY_NOMATCH) {
787 /* This answer doesn't contain any RR that would answer our question
788 * positively, i.e. neither directly nor via CNAME. */
789
790 if (first) /* We never followed a CNAME and the answer doesn't match our
791 * question at all? Then this is final, the empty answer is the
792 * answer. */
793 break;
794
795 /* Otherwise, we already followed a CNAME once within this packet, and the
796 * packet doesn't answer our question. In that case let's restart the query,
797 * now with the redirected question. We'll */
798 r = dns_query_go(q);
c704288c
YW
799 if (r < 0)
800 return (void) log_debug_errno(r, "Failed to restart query: %m");
915ba31c 801
c704288c 802 TAKE_PTR(q);
915ba31c
LP
803 return;
804 }
805
806 r = dns_stub_assign_sections(
807 q,
808 dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
809 dns_stub_reply_with_edns0_do(q));
c704288c
YW
810 if (r < 0)
811 return (void) log_debug_errno(r, "Failed to assign sections: %m");
915ba31c
LP
812
813 if (cname_result == DNS_QUERY_MATCH) /* A match? Then we are done, let's return what we got */
814 break;
815
816 /* We followed a CNAME. and collected the RRs that answer the redirected question
817 * successfully. Let's not try to do this again. */
818 assert(cname_result == DNS_QUERY_CNAME);
819 first = false;
b97fc571 820 }
b97fc571
LP
821
822 _fallthrough_;
915ba31c 823 }
b97fc571 824
b30bf55d 825 case DNS_TRANSACTION_RCODE_FAILURE:
775ae354 826 (void) dns_stub_send_reply(q, q->answer_rcode);
b30bf55d
LP
827 break;
828
829 case DNS_TRANSACTION_NOT_FOUND:
775ae354 830 (void) dns_stub_send_reply(q, DNS_RCODE_NXDOMAIN);
b30bf55d
LP
831 break;
832
833 case DNS_TRANSACTION_TIMEOUT:
834 case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
835 /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */
836 break;
837
838 case DNS_TRANSACTION_NO_SERVERS:
839 case DNS_TRANSACTION_INVALID_REPLY:
840 case DNS_TRANSACTION_ERRNO:
841 case DNS_TRANSACTION_ABORTED:
842 case DNS_TRANSACTION_DNSSEC_FAILED:
843 case DNS_TRANSACTION_NO_TRUST_ANCHOR:
844 case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
845 case DNS_TRANSACTION_NETWORK_DOWN:
775ae354 846 case DNS_TRANSACTION_NO_SOURCE:
49ef064c 847 case DNS_TRANSACTION_STUB_LOOP:
775ae354 848 (void) dns_stub_send_reply(q, DNS_RCODE_SERVFAIL);
b30bf55d
LP
849 break;
850
851 case DNS_TRANSACTION_NULL:
852 case DNS_TRANSACTION_PENDING:
853 case DNS_TRANSACTION_VALIDATING:
854 default:
04499a70 855 assert_not_reached();
b30bf55d 856 }
b30bf55d
LP
857}
858
859static int dns_stub_stream_complete(DnsStream *s, int error) {
860 assert(s);
861
b412af57
LP
862 log_debug_errno(error, "DNS TCP connection terminated, destroying queries: %m");
863
864 for (;;) {
865 DnsQuery *q;
866
867 q = set_first(s->queries);
868 if (!q)
869 break;
b30bf55d 870
b412af57
LP
871 dns_query_free(q);
872 }
b30bf55d 873
b412af57
LP
874 /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections
875 * should be kept as long as the client wants to. */
876 dns_stream_unref(s);
b30bf55d
LP
877 return 0;
878}
879
0354029b 880static void dns_stub_process_query(Manager *m, DnsStubListenerExtra *l, DnsStream *s, DnsPacket *p) {
a8d09063 881 uint64_t protocol_flags = SD_RESOLVED_PROTOCOLS_ALL;
ceb17827 882 _cleanup_(dns_query_freep) DnsQuery *q = NULL;
bde69bbd
LP
883 Hashmap **queries_by_packet;
884 DnsQuery *existing;
a8d09063 885 bool bypass = false;
b30bf55d
LP
886 int r;
887
888 assert(m);
889 assert(p);
890 assert(p->protocol == DNS_PROTOCOL_DNS);
891
0354029b 892 if (!l && /* l == NULL if this is the main stub */
a8d09063 893 !address_is_proxy(p->family, &p->destination) && /* don't restrict needlessly for 127.0.0.54 */
d1fb8cda
YW
894 (in_addr_is_localhost(p->family, &p->sender) <= 0 ||
895 in_addr_is_localhost(p->family, &p->destination) <= 0)) {
565147b7 896 log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring.");
ceb17827 897 return;
b30bf55d
LP
898 }
899
a9fd8837
LP
900 if (manager_packet_from_our_transaction(m, p)) {
901 log_debug("Got our own packet looped back, ignoring.");
902 return;
903 }
904
bde69bbd
LP
905 queries_by_packet = l ? &l->queries_by_packet : &m->stub_queries_by_packet;
906 existing = hashmap_get(*queries_by_packet, p);
907 if (existing && dns_packet_equal(existing->request_packet, p)) {
908 log_debug("Got repeat packet from client, ignoring.");
909 return;
910 }
911
b30bf55d
LP
912 r = dns_packet_extract(p);
913 if (r < 0) {
914 log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m");
0354029b 915 dns_stub_send_failure(m, l, s, p, DNS_RCODE_FORMERR, false);
ceb17827 916 return;
b30bf55d
LP
917 }
918
919 if (!DNS_PACKET_VERSION_SUPPORTED(p)) {
920 log_debug("Got EDNS OPT field with unsupported version number.");
0354029b 921 dns_stub_send_failure(m, l, s, p, DNS_RCODE_BADVERS, false);
ceb17827 922 return;
b30bf55d
LP
923 }
924
ab715ddb 925 if (dns_type_is_obsolete(dns_question_first_key(p->question)->type)) {
b30bf55d 926 log_debug("Got message with obsolete key type, refusing.");
30ee7071 927 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 928 return;
b30bf55d
LP
929 }
930
ab715ddb 931 if (dns_type_is_zone_transer(dns_question_first_key(p->question)->type)) {
b30bf55d 932 log_debug("Got request for zone transfer, refusing.");
30ee7071 933 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 934 return;
b30bf55d
LP
935 }
936
937 if (!DNS_PACKET_RD(p)) {
938 /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */
939 log_debug("Got request with recursion disabled, refusing.");
0354029b 940 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 941 return;
b30bf55d
LP
942 }
943
bde69bbd
LP
944 r = hashmap_ensure_allocated(queries_by_packet, &stub_packet_hash_ops);
945 if (r < 0) {
946 log_oom();
947 return;
948 }
949
a8d09063
LP
950 if (address_is_proxy(p->family, &p->destination)) {
951 _cleanup_free_ char *dipa = NULL;
952
953 r = in_addr_to_string(p->family, &p->destination, &dipa);
e1158539
LP
954 if (r < 0)
955 return (void) log_error_errno(r, "Failed to format destination address: %m");
a8d09063
LP
956
957 log_debug("Got request to DNS proxy address 127.0.0.54, enabling bypass logic.");
958 bypass = true;
959 protocol_flags = SD_RESOLVED_DNS|SD_RESOLVED_NO_ZONE; /* Turn off mDNS/LLMNR for proxy stub. */
960 } else if ((DNS_PACKET_DO(p) && DNS_PACKET_CD(p))) {
775ae354 961 log_debug("Got request with DNSSEC checking disabled, enabling bypass logic.");
a8d09063
LP
962 bypass = true;
963 }
775ae354 964
a8d09063 965 if (bypass)
775ae354 966 r = dns_query_new(m, &q, NULL, NULL, p, 0,
a8d09063 967 protocol_flags|
775ae354
LP
968 SD_RESOLVED_NO_CNAME|
969 SD_RESOLVED_NO_SEARCH|
970 SD_RESOLVED_NO_VALIDATE|
971 SD_RESOLVED_REQUIRE_PRIMARY|
972 SD_RESOLVED_CLAMP_TTL);
a8d09063 973 else
775ae354 974 r = dns_query_new(m, &q, p->question, p->question, NULL, 0,
a8d09063 975 protocol_flags|
775ae354 976 SD_RESOLVED_NO_SEARCH|
2f4d8e57 977 (DNS_PACKET_DO(p) ? SD_RESOLVED_REQUIRE_PRIMARY : 0)|
775ae354 978 SD_RESOLVED_CLAMP_TTL);
b30bf55d
LP
979 if (r < 0) {
980 log_error_errno(r, "Failed to generate query object: %m");
0354029b 981 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
ceb17827 982 return;
b30bf55d
LP
983 }
984
775ae354
LP
985 q->request_packet = dns_packet_ref(p);
986 q->request_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */
0354029b 987 q->stub_listener_extra = l;
b30bf55d
LP
988 q->complete = dns_stub_query_complete;
989
990 if (s) {
b412af57
LP
991 /* Remember which queries belong to this stream, so that we can cancel them when the stream
992 * is disconnected early */
993
ceb17827 994 r = set_ensure_put(&s->queries, NULL, q);
b412af57
LP
995 if (r < 0) {
996 log_oom();
ceb17827 997 return;
b412af57 998 }
ceb17827 999 assert(r > 0);
b30bf55d
LP
1000 }
1001
bde69bbd
LP
1002 /* Add the query to the hash table we use to determine repeat packets now. We don't care about
1003 * failures here, since in the worst case we'll not recognize duplicate incoming requests, which
1004 * isn't particularly bad. */
1005 (void) hashmap_put(*queries_by_packet, q->request_packet, q);
1006
b30bf55d
LP
1007 r = dns_query_go(q);
1008 if (r < 0) {
1009 log_error_errno(r, "Failed to start query: %m");
0354029b 1010 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
ceb17827 1011 return;
b30bf55d
LP
1012 }
1013
52e63427 1014 log_debug("Processing query...");
ceb17827 1015 TAKE_PTR(q);
b30bf55d
LP
1016}
1017
0354029b 1018static int on_dns_stub_packet_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
b30bf55d 1019 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
b30bf55d
LP
1020 int r;
1021
1022 r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p);
1023 if (r <= 0)
1024 return r;
1025
1026 if (dns_packet_validate_query(p) > 0) {
1027 log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p));
1028
0354029b 1029 dns_stub_process_query(m, l, NULL, p);
b30bf55d
LP
1030 } else
1031 log_debug("Invalid DNS stub UDP packet, ignoring.");
1032
1033 return 0;
1034}
1035
d1fb8cda 1036static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
0354029b 1037 return on_dns_stub_packet_internal(s, fd, revents, userdata, NULL);
d1fb8cda
YW
1038}
1039
1040static int on_dns_stub_packet_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
99534007 1041 DnsStubListenerExtra *l = ASSERT_PTR(userdata);
0354029b
LP
1042
1043 return on_dns_stub_packet_internal(s, fd, revents, l->manager, l);
d1fb8cda
YW
1044}
1045
624f907e 1046static int on_dns_stub_stream_packet(DnsStream *s, DnsPacket *p) {
e4bed40f 1047 assert(s);
624f907e 1048 assert(s->manager);
e4bed40f
ZJS
1049 assert(p);
1050
1051 if (dns_packet_validate_query(p) > 0) {
1052 log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p));
1053
1054 dns_stub_process_query(s->manager, s->stub_listener_extra, s, p);
1055 } else
1056 log_debug("Invalid DNS stub TCP packet, ignoring.");
1057
1058 return 0;
1059}
1060
1061static int on_dns_stub_stream_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
1062 DnsStream *stream;
1063 int cfd, r;
1064
1065 cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1066 if (cfd < 0) {
1067 if (ERRNO_IS_ACCEPT_AGAIN(errno))
1068 return 0;
1069
1070 return -errno;
1071 }
1072
18230451
YW
1073 r = dns_stream_new(m, &stream, DNS_STREAM_STUB, DNS_PROTOCOL_DNS, cfd, NULL,
1074 on_dns_stub_stream_packet, dns_stub_stream_complete, DNS_STREAM_STUB_TIMEOUT_USEC);
e4bed40f
ZJS
1075 if (r < 0) {
1076 safe_close(cfd);
1077 return r;
1078 }
1079
1080 stream->stub_listener_extra = l;
e4bed40f
ZJS
1081
1082 /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */
1083
1084 return 0;
1085}
1086
1087static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1088 return on_dns_stub_stream_internal(s, fd, revents, userdata, NULL);
1089}
1090
1091static int on_dns_stub_stream_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
99534007 1092 DnsStubListenerExtra *l = ASSERT_PTR(userdata);
e4bed40f 1093
e4bed40f
ZJS
1094 return on_dns_stub_stream_internal(s, fd, revents, l->manager, l);
1095}
1096
af8b1384 1097static int set_dns_stub_common_socket_options(int fd, int family) {
1f05101f
SS
1098 int r;
1099
1100 assert(fd >= 0);
af8b1384 1101 assert(IN_SET(family, AF_INET, AF_INET6));
1f05101f
SS
1102
1103 r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
1104 if (r < 0)
1105 return r;
1106
5d0fe423
LP
1107 r = socket_set_recvpktinfo(fd, family, true);
1108 if (r < 0)
1109 return r;
af8b1384 1110
5d0fe423
LP
1111 r = socket_set_recvttl(fd, family, true);
1112 if (r < 0)
1113 return r;
af8b1384
YW
1114
1115 return 0;
1f05101f
SS
1116}
1117
8624f128
LP
1118static int set_dns_stub_common_tcp_socket_options(int fd) {
1119 int r;
1120
1121 assert(fd >= 0);
1122
1123 r = setsockopt_int(fd, IPPROTO_TCP, TCP_FASTOPEN, 5); /* Everybody appears to pick qlen=5, let's do the same here. */
1124 if (r < 0)
1125 log_debug_errno(r, "Failed to enable TCP_FASTOPEN on TCP listening socket, ignoring: %m");
1126
1127 r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true);
1128 if (r < 0)
1129 log_debug_errno(r, "Failed to enable TCP_NODELAY mode, ignoring: %m");
1130
1131 return 0;
1132}
1133
a8d09063
LP
1134static int manager_dns_stub_fd(
1135 Manager *m,
1136 int family,
1137 const union in_addr_union *listen_addr,
1138 int type) {
1139
1140 sd_event_source **event_source;
424e490b 1141 _cleanup_close_ int fd = -1;
a8d09063 1142 union sockaddr_union sa;
b30bf55d
LP
1143 int r;
1144
e1158539
LP
1145 assert(m);
1146 assert(listen_addr);
1147
a8d09063
LP
1148 if (type == SOCK_DGRAM)
1149 event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_udp_event_source : &m->dns_stub_udp_event_source;
1150 else if (type == SOCK_STREAM)
1151 event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_tcp_event_source : &m->dns_stub_tcp_event_source;
1152 else
1153 return -EPROTONOSUPPORT;
d491917c 1154
d491917c
ZJS
1155 if (*event_source)
1156 return sd_event_source_get_io_fd(*event_source);
b30bf55d 1157
a8d09063 1158 fd = socket(family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
424e490b 1159 if (fd < 0)
b30bf55d
LP
1160 return -errno;
1161
a8d09063 1162 r = set_dns_stub_common_socket_options(fd, family);
2ff48e98
LP
1163 if (r < 0)
1164 return r;
b30bf55d 1165
8624f128
LP
1166 if (type == SOCK_STREAM) {
1167 r = set_dns_stub_common_tcp_socket_options(fd);
1168 if (r < 0)
1169 return r;
1170 }
1171
a8d09063
LP
1172 /* Set slightly different socket options for the non-proxy and the proxy binding. The former we want
1173 * to be accessible only from the local host, for the latter it's OK if people use NAT redirects or
1174 * so to redirect external traffic to it. */
1175
1176 if (!address_is_proxy(family, listen_addr)) {
1177 /* Make sure no traffic from outside the local host can leak to onto this socket */
1178 r = socket_bind_to_ifindex(fd, LOOPBACK_IFINDEX);
1179 if (r < 0)
1180 return r;
1181
1182 r = socket_set_ttl(fd, family, 1);
1183 if (r < 0)
1184 return r;
1185 } else if (type == SOCK_DGRAM) {
e1158539
LP
1186 /* Turn off Path MTU Discovery for UDP, for security reasons. See socket_disable_pmtud() for
1187 * a longer discussion. (We only do this for sockets that are potentially externally
1188 * accessible, i.e. the proxy stub one. For the non-proxy one we instead set the TTL to 1,
1189 * see above, so that packets don't get routed at all.) */
a8d09063
LP
1190 r = socket_disable_pmtud(fd, family);
1191 if (r < 0)
1192 log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
1193
1194 r = socket_set_recvfragsize(fd, family, true);
1195 if (r < 0)
1196 log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
1197 }
b30bf55d 1198
a8d09063 1199 r = sockaddr_set_in_addr(&sa, family, listen_addr, 53);
d491917c
ZJS
1200 if (r < 0)
1201 return r;
1202
424e490b
ZJS
1203 if (bind(fd, &sa.sa, sizeof(sa.in)) < 0)
1204 return -errno;
b30bf55d 1205
d491917c
ZJS
1206 if (type == SOCK_STREAM &&
1207 listen(fd, SOMAXCONN) < 0)
1208 return -errno;
1209
1210 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1211 type == SOCK_DGRAM ? on_dns_stub_packet : on_dns_stub_stream,
1212 m);
b30bf55d 1213 if (r < 0)
424e490b 1214 return r;
b30bf55d 1215
d491917c 1216 r = sd_event_source_set_io_fd_own(*event_source, true);
7216a3b5
YW
1217 if (r < 0)
1218 return r;
1219
d491917c
ZJS
1220 (void) sd_event_source_set_description(*event_source,
1221 type == SOCK_DGRAM ? "dns-stub-udp" : "dns-stub-tcp");
b30bf55d 1222
7216a3b5 1223 return TAKE_FD(fd);
b30bf55d
LP
1224}
1225
b5febb3f 1226static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type) {
1f05101f
SS
1227 _cleanup_free_ char *pretty = NULL;
1228 _cleanup_close_ int fd = -1;
ca8b62b5 1229 union sockaddr_union sa;
1f05101f
SS
1230 int r;
1231
0354029b 1232 assert(m);
a8d09063 1233 assert(l);
b5febb3f 1234 assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
0354029b 1235
b5febb3f
ZJS
1236 sd_event_source **event_source = type == SOCK_DGRAM ? &l->udp_event_source : &l->tcp_event_source;
1237 if (*event_source)
1238 return sd_event_source_get_io_fd(*event_source);
1f05101f 1239
ca8b62b5
YW
1240 if (l->family == AF_INET)
1241 sa = (union sockaddr_union) {
1242 .in.sin_family = l->family,
49ef064c 1243 .in.sin_port = htobe16(dns_stub_listener_extra_port(l)),
ca8b62b5
YW
1244 .in.sin_addr = l->address.in,
1245 };
1246 else
1247 sa = (union sockaddr_union) {
1248 .in6.sin6_family = l->family,
49ef064c 1249 .in6.sin6_port = htobe16(dns_stub_listener_extra_port(l)),
ca8b62b5
YW
1250 .in6.sin6_addr = l->address.in6,
1251 };
1252
b5febb3f 1253 fd = socket(l->family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
1f05101f
SS
1254 if (fd < 0) {
1255 r = -errno;
1256 goto fail;
1257 }
1258
af8b1384 1259 r = set_dns_stub_common_socket_options(fd, l->family);
1f05101f
SS
1260 if (r < 0)
1261 goto fail;
1262
8624f128
LP
1263 if (type == SOCK_STREAM) {
1264 r = set_dns_stub_common_tcp_socket_options(fd);
1265 if (r < 0)
1266 goto fail;
1267 }
1268
69e3234d 1269 /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case
b5febb3f
ZJS
1270 * people may want ttl > 1. */
1271
5d0fe423 1272 r = socket_set_freebind(fd, l->family, true);
b5febb3f
ZJS
1273 if (r < 0)
1274 goto fail;
1275
eb170e75
LP
1276 if (type == SOCK_DGRAM) {
1277 r = socket_disable_pmtud(fd, l->family);
1278 if (r < 0)
1279 log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
20a001bd
LP
1280
1281 r = socket_set_recvfragsize(fd, l->family, true);
1282 if (r < 0)
1283 log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
eb170e75
LP
1284 }
1285
ded15213
LP
1286 r = RET_NERRNO(bind(fd, &sa.sa, SOCKADDR_LEN(sa)));
1287 if (r < 0)
1f05101f 1288 goto fail;
1f05101f 1289
b5febb3f
ZJS
1290 if (type == SOCK_STREAM &&
1291 listen(fd, SOMAXCONN) < 0) {
1292 r = -errno;
1293 goto fail;
1294 }
1295
1296 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1297 type == SOCK_DGRAM ? on_dns_stub_packet_extra : on_dns_stub_stream_extra,
1298 l);
1f05101f
SS
1299 if (r < 0)
1300 goto fail;
1301
b5febb3f 1302 r = sd_event_source_set_io_fd_own(*event_source, true);
7216a3b5
YW
1303 if (r < 0)
1304 goto fail;
1305
b5febb3f
ZJS
1306 (void) sd_event_source_set_description(*event_source,
1307 type == SOCK_DGRAM ? "dns-stub-udp-extra" : "dns-stub-tcp-extra");
1f05101f
SS
1308
1309 if (DEBUG_LOGGING) {
ca8b62b5 1310 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
b5febb3f
ZJS
1311 log_debug("Listening on %s socket %s.",
1312 type == SOCK_DGRAM ? "UDP" : "TCP",
1313 strnull(pretty));
1f05101f
SS
1314 }
1315
7216a3b5 1316 return TAKE_FD(fd);
1f05101f 1317
b4b7ea1b 1318fail:
1c17bcb3 1319 assert(r < 0);
ca8b62b5 1320 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
b5febb3f
ZJS
1321 return log_warning_errno(r,
1322 r == -EADDRINUSE ? "Another process is already listening on %s socket %s: %m" :
1323 "Failed to listen on %s socket %s: %m",
1324 type == SOCK_DGRAM ? "UDP" : "TCP",
1325 strnull(pretty));
1f05101f
SS
1326}
1327
b30bf55d 1328int manager_dns_stub_start(Manager *m) {
a8d09063 1329 int r;
b30bf55d
LP
1330
1331 assert(m);
1332
d5da7707
ZJS
1333 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO)
1334 log_debug("Not creating stub listener.");
a8d09063
LP
1335 else {
1336 static const struct {
1337 uint32_t addr;
1338 int socket_type;
1339 } stub_sockets[] = {
1340 { INADDR_DNS_STUB, SOCK_DGRAM },
1341 { INADDR_DNS_STUB, SOCK_STREAM },
1342 { INADDR_DNS_PROXY_STUB, SOCK_DGRAM },
1343 { INADDR_DNS_PROXY_STUB, SOCK_STREAM },
1344 };
1345
d5da7707
ZJS
1346 log_debug("Creating stub listener using %s.",
1347 m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" :
1348 m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" :
1349 "UDP/TCP");
1350
a8d09063
LP
1351 for (size_t i = 0; i < ELEMENTSOF(stub_sockets); i++) {
1352 union in_addr_union a = {
1353 .in.s_addr = htobe32(stub_sockets[i].addr),
1354 };
b30bf55d 1355
a8d09063
LP
1356 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP && stub_sockets[i].socket_type == SOCK_STREAM)
1357 continue;
1358 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP && stub_sockets[i].socket_type == SOCK_DGRAM)
1359 continue;
1360
1361 r = manager_dns_stub_fd(m, AF_INET, &a, stub_sockets[i].socket_type);
1362 if (r < 0) {
1363 _cleanup_free_ char *busy_socket = NULL;
1364
1365 if (asprintf(&busy_socket,
1366 "%s socket " IPV4_ADDRESS_FMT_STR ":53",
1367 stub_sockets[i].socket_type == SOCK_DGRAM ? "UDP" : "TCP",
1368 IPV4_ADDRESS_FMT_VAL(a.in)) < 0)
1369 return log_oom();
1370
1371 if (IN_SET(r, -EADDRINUSE, -EPERM)) {
1372 log_warning_errno(r,
1373 r == -EADDRINUSE ? "Another process is already listening on %s.\n"
1374 "Turning off local DNS stub support." :
1375 "Failed to listen on %s: %m.\n"
1376 "Turning off local DNS stub support.",
1377 busy_socket);
1378 manager_dns_stub_stop(m);
1379 break;
1380 }
b30bf55d 1381
a8d09063
LP
1382 return log_error_errno(r, "Failed to listen on %s: %m", busy_socket);
1383 }
1384 }
1385 }
b30bf55d 1386
1f05101f 1387 if (!ordered_set_isempty(m->dns_extra_stub_listeners)) {
36aaabc3 1388 DnsStubListenerExtra *l;
1f05101f 1389
dce65cd4 1390 log_debug("Creating extra stub listeners.");
1f05101f 1391
90e74a66 1392 ORDERED_SET_FOREACH(l, m->dns_extra_stub_listeners) {
7314b397 1393 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_UDP))
b5febb3f 1394 (void) manager_dns_stub_fd_extra(m, l, SOCK_DGRAM);
7314b397 1395 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_TCP))
b5febb3f 1396 (void) manager_dns_stub_fd_extra(m, l, SOCK_STREAM);
7314b397 1397 }
1f05101f
SS
1398 }
1399
b30bf55d
LP
1400 return 0;
1401}
1402
1403void manager_dns_stub_stop(Manager *m) {
1404 assert(m);
1405
97935302
ZJS
1406 m->dns_stub_udp_event_source = sd_event_source_disable_unref(m->dns_stub_udp_event_source);
1407 m->dns_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_stub_tcp_event_source);
a8d09063
LP
1408 m->dns_proxy_stub_udp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_udp_event_source);
1409 m->dns_proxy_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_tcp_event_source);
b30bf55d 1410}
ae8f0ec3
LP
1411
1412static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = {
97935302 1413 [DNS_STUB_LISTENER_NO] = "no",
ae8f0ec3
LP
1414 [DNS_STUB_LISTENER_UDP] = "udp",
1415 [DNS_STUB_LISTENER_TCP] = "tcp",
1416 [DNS_STUB_LISTENER_YES] = "yes",
1417};
1418DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES);