]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/resolve/resolved-dns-stub.c
Merge pull request #30284 from YHNdnzj/fstab-wantedby-defaultdeps
[thirdparty/systemd.git] / src / resolve / resolved-dns-stub.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
b30bf55d 2
ca8b62b5 3#include <net/if_arp.h>
8624f128 4#include <netinet/tcp.h>
ca8b62b5 5
0398c084 6#include "capability-util.h"
4ff9bc2e 7#include "errno-util.h"
b30bf55d 8#include "fd-util.h"
ef118d00 9#include "missing_network.h"
af8b1384 10#include "missing_socket.h"
b30bf55d 11#include "resolved-dns-stub.h"
1f05101f 12#include "socket-netlink.h"
b30bf55d 13#include "socket-util.h"
4a6eb824 14#include "stdio-util.h"
ae8f0ec3 15#include "string-table.h"
b30bf55d
LP
16
17/* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet,
18 * IP and UDP header sizes */
19#define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U)
20
b370adb5
LP
21/* On the extra stubs, use a more conservative choice */
22#define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX
23
b5febb3f 24static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type);
a8d09063 25static int manager_dns_stub_fd(Manager *m, int family, const union in_addr_union *listen_address, int type);
0354029b 26
ae8f0ec3
LP
27static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra *a, struct siphash *state) {
28 assert(a);
29
30 siphash24_compress(&a->mode, sizeof(a->mode), state);
31 siphash24_compress(&a->family, sizeof(a->family), state);
32 siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state);
33 siphash24_compress(&a->port, sizeof(a->port), state);
34}
35
36static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra *a, const DnsStubListenerExtra *b) {
37 int r;
38
39 assert(a);
40 assert(b);
41
42 r = CMP(a->mode, b->mode);
43 if (r != 0)
44 return r;
45
46 r = CMP(a->family, b->family);
47 if (r != 0)
48 return r;
49
50 r = memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
51 if (r != 0)
52 return r;
53
54 return CMP(a->port, b->port);
55}
56
57DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
58 dns_stub_listener_extra_hash_ops,
59 DnsStubListenerExtra,
60 dns_stub_listener_extra_hash_func,
61 dns_stub_listener_extra_compare_func,
62 dns_stub_listener_extra_free);
63
0354029b
LP
64int dns_stub_listener_extra_new(
65 Manager *m,
66 DnsStubListenerExtra **ret) {
ae8f0ec3 67
36aaabc3 68 DnsStubListenerExtra *l;
1f05101f 69
0354029b 70 l = new(DnsStubListenerExtra, 1);
1f05101f
SS
71 if (!l)
72 return -ENOMEM;
73
0354029b
LP
74 *l = (DnsStubListenerExtra) {
75 .manager = m,
76 };
1f05101f 77
0354029b 78 *ret = TAKE_PTR(l);
1f05101f
SS
79 return 0;
80}
81
36aaabc3 82DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p) {
bf22f231
YW
83 if (!p)
84 return NULL;
85
97935302
ZJS
86 p->udp_event_source = sd_event_source_disable_unref(p->udp_event_source);
87 p->tcp_event_source = sd_event_source_disable_unref(p->tcp_event_source);
bf22f231 88
bde69bbd
LP
89 hashmap_free(p->queries_by_packet);
90
bf22f231
YW
91 return mfree(p);
92}
93
bde69bbd
LP
94static void stub_packet_hash_func(const DnsPacket *p, struct siphash *state) {
95 assert(p);
96
97 siphash24_compress(&p->protocol, sizeof(p->protocol), state);
98 siphash24_compress(&p->family, sizeof(p->family), state);
99 siphash24_compress(&p->sender, sizeof(p->sender), state);
100 siphash24_compress(&p->ipproto, sizeof(p->ipproto), state);
101 siphash24_compress(&p->sender_port, sizeof(p->sender_port), state);
102 siphash24_compress(DNS_PACKET_HEADER(p), sizeof(DnsPacketHeader), state);
103
104 /* We don't bother hashing the full packet here, just the header */
105}
106
107static int stub_packet_compare_func(const DnsPacket *x, const DnsPacket *y) {
108 int r;
109
110 r = CMP(x->protocol, y->protocol);
111 if (r != 0)
112 return r;
113
114 r = CMP(x->family, y->family);
115 if (r != 0)
116 return r;
117
118 r = memcmp(&x->sender, &y->sender, sizeof(x->sender));
119 if (r != 0)
120 return r;
121
122 r = CMP(x->ipproto, y->ipproto);
123 if (r != 0)
124 return r;
125
126 r = CMP(x->sender_port, y->sender_port);
127 if (r != 0)
128 return r;
129
130 return memcmp(DNS_PACKET_HEADER(x), DNS_PACKET_HEADER(y), sizeof(DnsPacketHeader));
131}
132
133DEFINE_HASH_OPS(stub_packet_hash_ops, DnsPacket, stub_packet_hash_func, stub_packet_compare_func);
134
5bd7ebb3
LP
135static int reply_add_with_rrsig(
136 DnsAnswer **reply,
137 DnsResourceRecord *rr,
138 int ifindex,
139 DnsAnswerFlags flags,
140 DnsResourceRecord *rrsig,
141 bool with_rrsig) {
142 int r;
143
144 assert(reply);
145 assert(rr);
146
147 r = dns_answer_add_extend(reply, rr, ifindex, flags, rrsig);
148 if (r < 0)
149 return r;
150
151 if (with_rrsig && rrsig) {
152 r = dns_answer_add_extend(reply, rrsig, ifindex, flags, NULL);
153 if (r < 0)
154 return r;
155 }
156
157 return 0;
158}
159
775ae354
LP
160static int dns_stub_collect_answer_by_question(
161 DnsAnswer **reply,
51027656 162 DnsAnswer *answer,
775ae354
LP
163 DnsQuestion *question,
164 bool with_rrsig) { /* Add RRSIG RR matching each RR */
b30bf55d 165
775ae354 166 DnsAnswerItem *item;
b30bf55d
LP
167 int r;
168
775ae354 169 assert(reply);
e8d23f92 170
915ba31c 171 /* Copies all RRs from 'answer' into 'reply', if they match 'question'. */
4838dc4f 172
915ba31c 173 DNS_ANSWER_FOREACH_ITEM(item, answer) {
5bd7ebb3 174
915ba31c
LP
175 /* We have a question, let's see if this RR matches it */
176 r = dns_question_matches_rr(question, item->rr, NULL);
177 if (r < 0)
178 return r;
179 if (!r) {
180 /* Maybe there's a CNAME/DNAME in here? If so, that's an answer too */
181 r = dns_question_matches_cname_or_dname(question, item->rr, NULL);
4838dc4f
LP
182 if (r < 0)
183 return r;
915ba31c
LP
184 if (!r)
185 continue;
4838dc4f 186 }
5bd7ebb3 187
915ba31c
LP
188 /* Mask the section info, we want the primary answers to always go without section
189 * info, so that it is added to the answer section when we synthesize a reply. */
5bd7ebb3 190
915ba31c
LP
191 r = reply_add_with_rrsig(
192 reply,
193 item->rr,
194 item->ifindex,
195 item->flags & ~DNS_ANSWER_MASK_SECTIONS,
196 item->rrsig,
197 with_rrsig);
198 if (r < 0)
199 return r;
e8d23f92 200 }
b30bf55d 201
775ae354
LP
202 return 0;
203}
e8d23f92 204
775ae354
LP
205static int dns_stub_collect_answer_by_section(
206 DnsAnswer **reply,
207 DnsAnswer *answer,
208 DnsAnswerFlags section,
209 DnsAnswer *exclude1,
210 DnsAnswer *exclude2,
211 bool with_dnssec) { /* Include DNSSEC RRs. RRSIG, NSEC, … */
b30bf55d 212
775ae354 213 DnsAnswerItem *item;
775ae354 214 int r;
b30bf55d 215
775ae354
LP
216 assert(reply);
217
218 /* Copies all RRs from 'answer' into 'reply', if they originate from the specified section. Also,
219 * avoid any RRs listed in 'exclude'. */
220
221 DNS_ANSWER_FOREACH_ITEM(item, answer) {
222
223 if (dns_answer_contains(exclude1, item->rr) ||
224 dns_answer_contains(exclude2, item->rr))
225 continue;
226
227 if (!with_dnssec &&
228 dns_type_is_dnssec(item->rr->key->type))
229 continue;
230
c4d98c3a 231 if (((item->flags ^ section) & DNS_ANSWER_MASK_SECTIONS) != 0)
775ae354
LP
232 continue;
233
5bd7ebb3
LP
234 r = reply_add_with_rrsig(
235 reply,
236 item->rr,
237 item->ifindex,
238 item->flags,
239 item->rrsig,
240 with_dnssec);
b30bf55d
LP
241 if (r < 0)
242 return r;
b30bf55d 243 }
e8d23f92 244
5bd7ebb3 245 return 0;
775ae354
LP
246}
247
248static int dns_stub_assign_sections(
249 DnsQuery *q,
250 DnsQuestion *question,
251 bool edns0_do) {
252
253 int r;
254
255 assert(q);
256 assert(question);
257
c6ebf89b
LP
258 /* Let's assign the 'answer' RRs we collected to their respective sections in the reply datagram. We
259 * try to reproduce a section assignment similar to what the upstream DNS server responded to us. We
260 * use the DNS_ANSWER_SECTION_xyz flags to match things up, which is where the original upstream's
261 * packet section assignment is stored in the DnsAnswer object. Not all RRs in the 'answer' objects
262 * come with section information though (for example, because they were synthesized locally, and not
263 * from a DNS packet). To deal with that we extend the assignment logic a bit: anything from the
264 * 'answer' object that directly matches the original question is always put in the ANSWER section,
265 * regardless if it carries section info, or what that section info says. Then, anything from the
266 * 'answer' objects that is from the ANSWER or AUTHORITY sections, and wasn't already added to the
267 * ANSWER section is placed in the AUTHORITY section. Everything else from either object is added to
268 * the ADDITIONAL section. */
775ae354
LP
269
270 /* Include all RRs that directly answer the question in the answer section */
271 r = dns_stub_collect_answer_by_question(
272 &q->reply_answer,
273 q->answer,
274 question,
275 edns0_do);
276 if (r < 0)
277 return r;
278
d451f0e8 279 /* Include all RRs that originate from the authority sections, and aren't already listed in the
775ae354
LP
280 * answer section, in the authority section */
281 r = dns_stub_collect_answer_by_section(
282 &q->reply_authoritative,
283 q->answer,
d451f0e8 284 DNS_ANSWER_SECTION_AUTHORITY,
775ae354
LP
285 q->reply_answer, NULL,
286 edns0_do);
287 if (r < 0)
288 return r;
d451f0e8
LP
289
290 /* Include all RRs that originate from the answer or additional sections in the additional section
291 * (except if already listed in the other two sections). Also add all RRs with no section marking. */
775ae354 292 r = dns_stub_collect_answer_by_section(
d451f0e8 293 &q->reply_additional,
775ae354 294 q->answer,
d451f0e8
LP
295 DNS_ANSWER_SECTION_ANSWER,
296 q->reply_answer, q->reply_authoritative,
775ae354
LP
297 edns0_do);
298 if (r < 0)
299 return r;
775ae354
LP
300 r = dns_stub_collect_answer_by_section(
301 &q->reply_additional,
302 q->answer,
303 DNS_ANSWER_SECTION_ADDITIONAL,
304 q->reply_answer, q->reply_authoritative,
305 edns0_do);
306 if (r < 0)
307 return r;
308 r = dns_stub_collect_answer_by_section(
309 &q->reply_additional,
310 q->answer,
311 0,
312 q->reply_answer, q->reply_authoritative,
313 edns0_do);
314 if (r < 0)
315 return r;
316
317 return 0;
318}
319
320static int dns_stub_make_reply_packet(
321 DnsPacket **ret,
322 size_t max_size,
323 DnsQuestion *q,
324 bool *ret_truncated) {
325
326 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
327 bool tc = false;
328 int r;
329
330 assert(ret);
331
332 r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, max_size);
333 if (r < 0)
334 return r;
335
336 r = dns_packet_append_question(p, q);
337 if (r == -EMSGSIZE)
338 tc = true;
339 else if (r < 0)
340 return r;
341
51027656 342 if (ret_truncated)
775ae354
LP
343 *ret_truncated = tc;
344 else if (tc)
51027656
LP
345 return -EMSGSIZE;
346
775ae354 347 DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q));
e8d23f92 348
775ae354
LP
349 *ret = TAKE_PTR(p);
350 return 0;
351}
352
353static int dns_stub_add_reply_packet_body(
354 DnsPacket *p,
355 DnsAnswer *answer,
356 DnsAnswer *authoritative,
357 DnsAnswer *additional,
358 bool edns0_do, /* Client expects DNSSEC RRs? */
359 bool *truncated) {
360
361 unsigned n_answer = 0, n_authoritative = 0, n_additional = 0;
362 bool tc = false;
363 int r;
364
365 assert(p);
366
367 /* Add the three sections to the packet. If the answer section doesn't fit we'll signal that as
368 * truncation. If the authoritative section doesn't fit and we are in DNSSEC mode, also signal
369 * truncation. In all other cases where things don't fit don't signal truncation, as for those cases
370 * the dropped RRs should not be essential. */
371
372 r = dns_packet_append_answer(p, answer, &n_answer);
373 if (r == -EMSGSIZE)
374 tc = true;
375 else if (r < 0)
376 return r;
377 else {
378 r = dns_packet_append_answer(p, authoritative, &n_authoritative);
379 if (r == -EMSGSIZE) {
380 if (edns0_do)
381 tc = true;
382 } else if (r < 0)
383 return r;
384 else {
385 r = dns_packet_append_answer(p, additional, &n_additional);
386 if (r < 0 && r != -EMSGSIZE)
387 return r;
388 }
389 }
390
391 if (tc) {
392 if (!truncated)
393 return -EMSGSIZE;
394
395 *truncated = true;
396 }
397
398 DNS_PACKET_HEADER(p)->ancount = htobe16(n_answer);
399 DNS_PACKET_HEADER(p)->nscount = htobe16(n_authoritative);
400 DNS_PACKET_HEADER(p)->arcount = htobe16(n_additional);
e8d23f92
LP
401 return 0;
402}
403
4a6eb824
LP
404static const char *nsid_string(void) {
405 static char buffer[SD_ID128_STRING_MAX + STRLEN(".resolved.systemd.io")] = "";
406 sd_id128_t id;
407 int r;
408
409 /* Let's generate a string that we can use as RFC5001 NSID identifier. The string shall identify us
410 * as systemd-resolved, and return a different string for each resolved instance without leaking host
411 * identity. Hence let's use a fixed suffix that identifies resolved, and a prefix generated from the
412 * machine ID but from which the machine ID cannot be determined.
413 *
414 * Clients can use this to determine whether an answer is originating locally or is proxied from
415 * upstream. */
416
417 if (!isempty(buffer))
418 return buffer;
419
420 r = sd_id128_get_machine_app_specific(
421 SD_ID128_MAKE(ed,d3,12,5d,16,b9,41,f9,a1,49,5f,ab,15,62,ab,27),
422 &id);
423 if (r < 0) {
b480543c 424 log_debug_errno(r, "Failed to determine machine ID, ignoring: %m");
4a6eb824
LP
425 return NULL;
426 }
427
428 xsprintf(buffer, SD_ID128_FORMAT_STR ".resolved.systemd.io", SD_ID128_FORMAT_VAL(id));
429 return buffer;
430}
431
e8d23f92
LP
432static int dns_stub_finish_reply_packet(
433 DnsPacket *p,
434 uint16_t id,
435 int rcode,
51027656 436 bool tc, /* set the Truncated bit? */
4ad017cd 437 bool aa, /* set the Authoritative Answer bit? */
da846b30 438 bool rd, /* set the Recursion Desired bit? */
e8d23f92
LP
439 bool add_opt, /* add an OPT RR to this packet? */
440 bool edns0_do, /* set the EDNS0 DNSSEC OK bit? */
b370adb5 441 bool ad, /* set the DNSSEC authenticated data bit? */
775ae354 442 bool cd, /* set the DNSSEC checking disabled bit? */
4a6eb824
LP
443 uint16_t max_udp_size, /* The maximum UDP datagram size to advertise to clients */
444 bool nsid) { /* whether to add NSID */
e8d23f92
LP
445
446 int r;
447
448 assert(p);
449
ff4caaae 450 if (add_opt) {
4a6eb824 451 r = dns_packet_append_opt(p, max_udp_size, edns0_do, /* include_rfc6975 = */ false, nsid ? nsid_string() : NULL, rcode, NULL);
ff4caaae
LP
452 if (r == -EMSGSIZE) /* Hit the size limit? then indicate truncation */
453 tc = true;
454 else if (r < 0)
455 return r;
ff4caaae 456 } else {
941dd294
LP
457 /* If the client can't to EDNS0, don't do DO either */
458 edns0_do = false;
459
775ae354 460 /* If we don't do EDNS, clamp the rcode to 4 bit */
941dd294
LP
461 if (rcode > 0xF)
462 rcode = DNS_RCODE_SERVFAIL;
463 }
464
8c9c68b5
LP
465 /* Don't set the CD bit unless DO is on, too */
466 if (!edns0_do)
775ae354
LP
467 cd = false;
468
8c9c68b5
LP
469 /* Note that we allow the AD bit to be set even if client didn't signal DO, as per RFC 6840, section
470 * 5.7 */
e8d23f92
LP
471
472 DNS_PACKET_HEADER(p)->id = id;
473
474 DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
51027656
LP
475 1 /* qr */,
476 0 /* opcode */,
4ad017cd 477 aa /* aa */,
51027656 478 tc /* tc */,
da846b30 479 rd /* rd */,
51027656 480 1 /* ra */,
e8d23f92 481 ad /* ad */,
775ae354 482 cd /* cd */,
e8d23f92 483 rcode));
b30bf55d 484
b30bf55d
LP
485 return 0;
486}
487
a8d09063
LP
488static bool address_is_proxy(int family, const union in_addr_union *a) {
489 assert(a);
490
491 /* Returns true if the specified address is the DNS "proxy" stub, i.e. where we unconditionally enable bypass mode */
492
493 if (family != AF_INET)
494 return false;
495
496 return be32toh(a->in.s_addr) == INADDR_DNS_PROXY_STUB;
497}
498
499static int find_socket_fd(
500 Manager *m,
501 DnsStubListenerExtra *l,
502 int family,
503 const union in_addr_union *listen_address,
504 int type) {
505
506 assert(m);
507
508 /* Finds the right socket to use for sending. If we know the extra listener, otherwise go via the
509 * address to send from */
510 if (l)
511 return manager_dns_stub_fd_extra(m, l, type);
512
513 return manager_dns_stub_fd(m, family, listen_address, type);
514}
515
0354029b
LP
516static int dns_stub_send(
517 Manager *m,
518 DnsStubListenerExtra *l,
519 DnsStream *s,
520 DnsPacket *p,
521 DnsPacket *reply) {
522
b30bf55d
LP
523 int r;
524
525 assert(m);
526 assert(p);
527 assert(reply);
528
529 if (s)
530 r = dns_stream_write_packet(s, reply);
a8d09063 531 else {
dfa14e28 532 int fd, ifindex;
a8d09063 533
de777ffa 534 fd = find_socket_fd(m, l, p->family, &p->destination, SOCK_DGRAM);
a8d09063
LP
535 if (fd < 0)
536 return fd;
537
dfa14e28
BF
538 if (address_is_proxy(p->family, &p->destination))
539 /* Force loopback iface if this is the loopback proxy stub
540 * and ifindex was normalized to 0 by manager_recv(). */
541 ifindex = p->ifindex ?: LOOPBACK_IFINDEX;
542 else
543 /* Force loopback iface if this is the main listener stub. */
544 ifindex = l ? p->ifindex : LOOPBACK_IFINDEX;
545
a8d09063
LP
546 /* Note that it is essential here that we explicitly choose the source IP address for this
547 * packet. This is because otherwise the kernel will choose it automatically based on the
dfa14e28 548 * routing table and will thus pick 127.0.0.1 rather than 127.0.0.53/54. */
0354029b 549 r = manager_send(m,
a8d09063 550 fd,
dfa14e28 551 ifindex,
0354029b
LP
552 p->family, &p->sender, p->sender_port, &p->destination,
553 reply);
a8d09063 554 }
b30bf55d
LP
555 if (r < 0)
556 return log_debug_errno(r, "Failed to send reply packet: %m");
557
558 return 0;
559}
560
39005e18
LP
561static int dns_stub_reply_with_edns0_do(DnsQuery *q) {
562 assert(q);
563
564 /* Reply with DNSSEC DO set? Only if client supports it; and we did any DNSSEC verification
565 * ourselves, or consider the data fully authenticated because we generated it locally, or the client
566 * set cd */
567
568 return DNS_PACKET_DO(q->request_packet) &&
569 (q->answer_dnssec_result >= 0 || /* we did proper DNSSEC validation … */
570 dns_query_fully_authenticated(q) || /* … or we considered it authentic otherwise … */
571 DNS_PACKET_CD(q->request_packet)); /* … or client set CD */
572}
573
5d7da51e
LP
574static void dns_stub_suppress_duplicate_section_rrs(DnsQuery *q) {
575 /* If we follow a CNAME/DNAME chain we might end up populating our sections with redundant RRs
576 * because we built up the sections from multiple reply packets (one from each CNAME/DNAME chain
577 * element). E.g. it could be that an RR that was included in the first reply's additional section
578 * ends up being relevant as main answer in a subsequent reply in the chain. Let's clean this up, and
579 * remove everything in the "higher priority" sections from the "lower priority" sections.
580 *
581 * Note that this removal matches by RR keys instead of the full RRs. This is because RRsets should
582 * always end up in one section fully or not at all, but never be split among sections.
583 *
584 * Specifically: we remove ANSWER section RRs from the AUTHORITATIVE and ADDITIONAL sections, as well
585 * as AUTHORITATIVE section RRs from the ADDITIONAL section. */
586
587 dns_answer_remove_by_answer_keys(&q->reply_authoritative, q->reply_answer);
588 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_answer);
589 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_authoritative);
590}
591
775ae354
LP
592static int dns_stub_send_reply(
593 DnsQuery *q,
594 int rcode) {
595
596 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
597 bool truncated, edns0_do;
598 int r;
599
600 assert(q);
601
39005e18 602 edns0_do = dns_stub_reply_with_edns0_do(q); /* let's check if we shall reply with EDNS0 DO? */
775ae354 603
775ae354
LP
604 r = dns_stub_make_reply_packet(
605 &reply,
606 DNS_PACKET_PAYLOAD_SIZE_MAX(q->request_packet),
607 q->request_packet->question,
608 &truncated);
609 if (r < 0)
610 return log_debug_errno(r, "Failed to build reply packet: %m");
611
5d7da51e
LP
612 dns_stub_suppress_duplicate_section_rrs(q);
613
775ae354
LP
614 r = dns_stub_add_reply_packet_body(
615 reply,
616 q->reply_answer,
617 q->reply_authoritative,
618 q->reply_additional,
619 edns0_do,
620 &truncated);
621 if (r < 0)
622 return log_debug_errno(r, "Failed to append reply packet body: %m");
623
624 r = dns_stub_finish_reply_packet(
625 reply,
626 DNS_PACKET_ID(q->request_packet),
627 rcode,
628 truncated,
9ddf099f 629 dns_query_fully_authoritative(q),
da846b30 630 DNS_PACKET_RD(q->request_packet),
775ae354
LP
631 !!q->request_packet->opt,
632 edns0_do,
b553abd8 633 (DNS_PACKET_AD(q->request_packet) || DNS_PACKET_DO(q->request_packet)) && dns_query_fully_authenticated(q),
775ae354 634 DNS_PACKET_CD(q->request_packet),
4a6eb824
LP
635 q->stub_listener_extra ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
636 dns_packet_has_nsid_request(q->request_packet) > 0 && !q->stub_listener_extra);
775ae354
LP
637 if (r < 0)
638 return log_debug_errno(r, "Failed to build failure packet: %m");
639
640 return dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
641}
642
0354029b
LP
643static int dns_stub_send_failure(
644 Manager *m,
645 DnsStubListenerExtra *l,
646 DnsStream *s,
647 DnsPacket *p,
648 int rcode,
649 bool authenticated) {
650
b30bf55d 651 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
775ae354 652 bool truncated;
b30bf55d
LP
653 int r;
654
655 assert(m);
656 assert(p);
657
775ae354
LP
658 r = dns_stub_make_reply_packet(
659 &reply,
660 DNS_PACKET_PAYLOAD_SIZE_MAX(p),
661 p->question,
662 &truncated);
e8d23f92
LP
663 if (r < 0)
664 return log_debug_errno(r, "Failed to make failure packet: %m");
665
b370adb5
LP
666 r = dns_stub_finish_reply_packet(
667 reply,
668 DNS_PACKET_ID(p),
669 rcode,
775ae354 670 truncated,
4ad017cd 671 false,
da846b30 672 DNS_PACKET_RD(p),
b370adb5
LP
673 !!p->opt,
674 DNS_PACKET_DO(p),
b553abd8 675 (DNS_PACKET_AD(p) || DNS_PACKET_DO(p)) && authenticated,
775ae354 676 DNS_PACKET_CD(p),
4a6eb824
LP
677 l ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
678 dns_packet_has_nsid_request(p) > 0 && !l);
b30bf55d
LP
679 if (r < 0)
680 return log_debug_errno(r, "Failed to build failure packet: %m");
681
0354029b 682 return dns_stub_send(m, l, s, p, reply);
b30bf55d
LP
683}
684
775ae354
LP
685static int dns_stub_patch_bypass_reply_packet(
686 DnsPacket **ret, /* Where to place the patched packet */
687 DnsPacket *original, /* The packet to patch */
688 DnsPacket *request) { /* The packet the patched packet shall look like a reply to */
689 _cleanup_(dns_packet_unrefp) DnsPacket *c = NULL;
690 int r;
691
692 assert(ret);
693 assert(original);
694 assert(request);
695
696 r = dns_packet_dup(&c, original);
697 if (r < 0)
698 return r;
699
700 /* Extract the packet, so that we know where the OPT field is */
701 r = dns_packet_extract(c);
702 if (r < 0)
703 return r;
704
705 /* Copy over the original client request ID, so that we can make the upstream query look like our own reply. */
706 DNS_PACKET_HEADER(c)->id = DNS_PACKET_HEADER(request)->id;
707
708 /* Patch in our own maximum datagram size, if EDNS0 was on */
709 r = dns_packet_patch_max_udp_size(c, ADVERTISE_DATAGRAM_SIZE_MAX);
710 if (r < 0)
711 return r;
712
713 /* Lower all TTLs by the time passed since we received the datagram. */
714 if (timestamp_is_set(original->timestamp)) {
715 r = dns_packet_patch_ttls(c, original->timestamp);
716 if (r < 0)
717 return r;
718 }
719
720 /* Our upstream connection might have supported larger DNS requests than our downstream one, hence
721 * set the TC bit if our reply is larger than what the client supports, and truncate. */
722 if (c->size > DNS_PACKET_PAYLOAD_SIZE_MAX(request)) {
723 log_debug("Artificially truncating stub response, as advertised size of client is smaller than upstream one.");
724 dns_packet_truncate(c, DNS_PACKET_PAYLOAD_SIZE_MAX(request));
725 DNS_PACKET_HEADER(c)->flags = htobe16(be16toh(DNS_PACKET_HEADER(c)->flags) | DNS_PACKET_FLAG_TC);
726 }
727
728 *ret = TAKE_PTR(c);
729 return 0;
730}
731
c704288c
YW
732static void dns_stub_query_complete(DnsQuery *query) {
733 _cleanup_(dns_query_freep) DnsQuery *q = query;
b30bf55d
LP
734 int r;
735
736 assert(q);
775ae354 737 assert(q->request_packet);
b30bf55d 738
775ae354
LP
739 if (q->question_bypass) {
740 /* This is a bypass reply. If so, let's propagate the upstream packet, if we have it and it
741 * is regular DNS. (We can't do this if the upstream packet is LLMNR or mDNS, since the
742 * packets are not 100% compatible.) */
b30bf55d 743
775ae354
LP
744 if (q->answer_full_packet &&
745 q->answer_full_packet->protocol == DNS_PROTOCOL_DNS) {
746 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
e8d23f92 747
775ae354
LP
748 r = dns_stub_patch_bypass_reply_packet(&reply, q->answer_full_packet, q->request_packet);
749 if (r < 0)
750 log_debug_errno(r, "Failed to patch bypass reply packet: %m");
751 else
752 (void) dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
753
775ae354 754 return;
e8d23f92 755 }
775ae354 756 }
b30bf55d 757
b97fc571
LP
758 /* Take all data from the current reply, and merge it into the three reply sections we are building
759 * up. We do this before processing CNAME redirects, so that we gradually build up our sections, and
760 * and keep adding all RRs in the CNAME chain. */
761 r = dns_stub_assign_sections(
762 q,
a7c0291c 763 dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
b97fc571 764 dns_stub_reply_with_edns0_do(q));
c704288c
YW
765 if (r < 0)
766 return (void) log_debug_errno(r, "Failed to assign sections: %m");
2f4d8e57 767
775ae354
LP
768 switch (q->state) {
769
915ba31c
LP
770 case DNS_TRANSACTION_SUCCESS: {
771 bool first = true;
772
773 for (;;) {
774 int cname_result;
775
776 cname_result = dns_query_process_cname_one(q);
777 if (cname_result == -ELOOP) { /* CNAME loop, let's send what we already have */
fca212b0 778 log_debug("Detected CNAME loop, returning what we already have.");
915ba31c
LP
779 (void) dns_stub_send_reply(q, q->answer_rcode);
780 break;
781 }
782 if (cname_result < 0) {
783 log_debug_errno(cname_result, "Failed to process CNAME: %m");
784 break;
785 }
786
787 if (cname_result == DNS_QUERY_NOMATCH) {
788 /* This answer doesn't contain any RR that would answer our question
789 * positively, i.e. neither directly nor via CNAME. */
790
791 if (first) /* We never followed a CNAME and the answer doesn't match our
792 * question at all? Then this is final, the empty answer is the
793 * answer. */
794 break;
795
796 /* Otherwise, we already followed a CNAME once within this packet, and the
797 * packet doesn't answer our question. In that case let's restart the query,
798 * now with the redirected question. We'll */
799 r = dns_query_go(q);
c704288c
YW
800 if (r < 0)
801 return (void) log_debug_errno(r, "Failed to restart query: %m");
915ba31c 802
c704288c 803 TAKE_PTR(q);
915ba31c
LP
804 return;
805 }
806
807 r = dns_stub_assign_sections(
808 q,
809 dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
810 dns_stub_reply_with_edns0_do(q));
c704288c
YW
811 if (r < 0)
812 return (void) log_debug_errno(r, "Failed to assign sections: %m");
915ba31c
LP
813
814 if (cname_result == DNS_QUERY_MATCH) /* A match? Then we are done, let's return what we got */
815 break;
816
817 /* We followed a CNAME. and collected the RRs that answer the redirected question
818 * successfully. Let's not try to do this again. */
819 assert(cname_result == DNS_QUERY_CNAME);
820 first = false;
b97fc571 821 }
b97fc571
LP
822
823 _fallthrough_;
915ba31c 824 }
b97fc571 825
b30bf55d 826 case DNS_TRANSACTION_RCODE_FAILURE:
775ae354 827 (void) dns_stub_send_reply(q, q->answer_rcode);
b30bf55d
LP
828 break;
829
830 case DNS_TRANSACTION_NOT_FOUND:
775ae354 831 (void) dns_stub_send_reply(q, DNS_RCODE_NXDOMAIN);
b30bf55d
LP
832 break;
833
834 case DNS_TRANSACTION_TIMEOUT:
835 case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
836 /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */
837 break;
838
839 case DNS_TRANSACTION_NO_SERVERS:
840 case DNS_TRANSACTION_INVALID_REPLY:
841 case DNS_TRANSACTION_ERRNO:
842 case DNS_TRANSACTION_ABORTED:
843 case DNS_TRANSACTION_DNSSEC_FAILED:
844 case DNS_TRANSACTION_NO_TRUST_ANCHOR:
845 case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
846 case DNS_TRANSACTION_NETWORK_DOWN:
775ae354 847 case DNS_TRANSACTION_NO_SOURCE:
49ef064c 848 case DNS_TRANSACTION_STUB_LOOP:
775ae354 849 (void) dns_stub_send_reply(q, DNS_RCODE_SERVFAIL);
b30bf55d
LP
850 break;
851
852 case DNS_TRANSACTION_NULL:
853 case DNS_TRANSACTION_PENDING:
854 case DNS_TRANSACTION_VALIDATING:
855 default:
04499a70 856 assert_not_reached();
b30bf55d 857 }
b30bf55d
LP
858}
859
860static int dns_stub_stream_complete(DnsStream *s, int error) {
861 assert(s);
862
b412af57
LP
863 log_debug_errno(error, "DNS TCP connection terminated, destroying queries: %m");
864
865 for (;;) {
866 DnsQuery *q;
867
868 q = set_first(s->queries);
869 if (!q)
870 break;
b30bf55d 871
b412af57
LP
872 dns_query_free(q);
873 }
b30bf55d 874
b412af57
LP
875 /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections
876 * should be kept as long as the client wants to. */
877 dns_stream_unref(s);
b30bf55d
LP
878 return 0;
879}
880
0354029b 881static void dns_stub_process_query(Manager *m, DnsStubListenerExtra *l, DnsStream *s, DnsPacket *p) {
a8d09063 882 uint64_t protocol_flags = SD_RESOLVED_PROTOCOLS_ALL;
ceb17827 883 _cleanup_(dns_query_freep) DnsQuery *q = NULL;
bde69bbd
LP
884 Hashmap **queries_by_packet;
885 DnsQuery *existing;
a8d09063 886 bool bypass = false;
b30bf55d
LP
887 int r;
888
889 assert(m);
890 assert(p);
891 assert(p->protocol == DNS_PROTOCOL_DNS);
892
0354029b 893 if (!l && /* l == NULL if this is the main stub */
a8d09063 894 !address_is_proxy(p->family, &p->destination) && /* don't restrict needlessly for 127.0.0.54 */
d1fb8cda
YW
895 (in_addr_is_localhost(p->family, &p->sender) <= 0 ||
896 in_addr_is_localhost(p->family, &p->destination) <= 0)) {
565147b7 897 log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring.");
ceb17827 898 return;
b30bf55d
LP
899 }
900
a9fd8837
LP
901 if (manager_packet_from_our_transaction(m, p)) {
902 log_debug("Got our own packet looped back, ignoring.");
903 return;
904 }
905
bde69bbd
LP
906 queries_by_packet = l ? &l->queries_by_packet : &m->stub_queries_by_packet;
907 existing = hashmap_get(*queries_by_packet, p);
908 if (existing && dns_packet_equal(existing->request_packet, p)) {
909 log_debug("Got repeat packet from client, ignoring.");
910 return;
911 }
912
b30bf55d
LP
913 r = dns_packet_extract(p);
914 if (r < 0) {
915 log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m");
0354029b 916 dns_stub_send_failure(m, l, s, p, DNS_RCODE_FORMERR, false);
ceb17827 917 return;
b30bf55d
LP
918 }
919
920 if (!DNS_PACKET_VERSION_SUPPORTED(p)) {
921 log_debug("Got EDNS OPT field with unsupported version number.");
0354029b 922 dns_stub_send_failure(m, l, s, p, DNS_RCODE_BADVERS, false);
ceb17827 923 return;
b30bf55d
LP
924 }
925
ab715ddb 926 if (dns_type_is_obsolete(dns_question_first_key(p->question)->type)) {
b30bf55d 927 log_debug("Got message with obsolete key type, refusing.");
30ee7071 928 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 929 return;
b30bf55d
LP
930 }
931
d4fd7fb5 932 if (dns_type_is_zone_transfer(dns_question_first_key(p->question)->type)) {
b30bf55d 933 log_debug("Got request for zone transfer, refusing.");
30ee7071 934 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 935 return;
b30bf55d
LP
936 }
937
938 if (!DNS_PACKET_RD(p)) {
939 /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */
940 log_debug("Got request with recursion disabled, refusing.");
0354029b 941 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 942 return;
b30bf55d
LP
943 }
944
bde69bbd
LP
945 r = hashmap_ensure_allocated(queries_by_packet, &stub_packet_hash_ops);
946 if (r < 0) {
947 log_oom();
948 return;
949 }
950
a8d09063
LP
951 if (address_is_proxy(p->family, &p->destination)) {
952 _cleanup_free_ char *dipa = NULL;
953
954 r = in_addr_to_string(p->family, &p->destination, &dipa);
e1158539
LP
955 if (r < 0)
956 return (void) log_error_errno(r, "Failed to format destination address: %m");
a8d09063
LP
957
958 log_debug("Got request to DNS proxy address 127.0.0.54, enabling bypass logic.");
959 bypass = true;
960 protocol_flags = SD_RESOLVED_DNS|SD_RESOLVED_NO_ZONE; /* Turn off mDNS/LLMNR for proxy stub. */
961 } else if ((DNS_PACKET_DO(p) && DNS_PACKET_CD(p))) {
775ae354 962 log_debug("Got request with DNSSEC checking disabled, enabling bypass logic.");
a8d09063
LP
963 bypass = true;
964 }
775ae354 965
a8d09063 966 if (bypass)
775ae354 967 r = dns_query_new(m, &q, NULL, NULL, p, 0,
a8d09063 968 protocol_flags|
775ae354
LP
969 SD_RESOLVED_NO_CNAME|
970 SD_RESOLVED_NO_SEARCH|
971 SD_RESOLVED_NO_VALIDATE|
972 SD_RESOLVED_REQUIRE_PRIMARY|
973 SD_RESOLVED_CLAMP_TTL);
a8d09063 974 else
775ae354 975 r = dns_query_new(m, &q, p->question, p->question, NULL, 0,
a8d09063 976 protocol_flags|
775ae354 977 SD_RESOLVED_NO_SEARCH|
2f4d8e57 978 (DNS_PACKET_DO(p) ? SD_RESOLVED_REQUIRE_PRIMARY : 0)|
775ae354 979 SD_RESOLVED_CLAMP_TTL);
b30bf55d
LP
980 if (r < 0) {
981 log_error_errno(r, "Failed to generate query object: %m");
0354029b 982 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
ceb17827 983 return;
b30bf55d
LP
984 }
985
775ae354
LP
986 q->request_packet = dns_packet_ref(p);
987 q->request_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */
0354029b 988 q->stub_listener_extra = l;
b30bf55d
LP
989 q->complete = dns_stub_query_complete;
990
991 if (s) {
b412af57
LP
992 /* Remember which queries belong to this stream, so that we can cancel them when the stream
993 * is disconnected early */
994
ceb17827 995 r = set_ensure_put(&s->queries, NULL, q);
b412af57
LP
996 if (r < 0) {
997 log_oom();
ceb17827 998 return;
b412af57 999 }
ceb17827 1000 assert(r > 0);
b30bf55d
LP
1001 }
1002
bde69bbd
LP
1003 /* Add the query to the hash table we use to determine repeat packets now. We don't care about
1004 * failures here, since in the worst case we'll not recognize duplicate incoming requests, which
1005 * isn't particularly bad. */
1006 (void) hashmap_put(*queries_by_packet, q->request_packet, q);
1007
b30bf55d
LP
1008 r = dns_query_go(q);
1009 if (r < 0) {
1010 log_error_errno(r, "Failed to start query: %m");
0354029b 1011 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
ceb17827 1012 return;
b30bf55d
LP
1013 }
1014
52e63427 1015 log_debug("Processing query...");
ceb17827 1016 TAKE_PTR(q);
b30bf55d
LP
1017}
1018
0354029b 1019static int on_dns_stub_packet_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
b30bf55d 1020 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
b30bf55d
LP
1021 int r;
1022
1023 r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p);
1024 if (r <= 0)
1025 return r;
1026
1027 if (dns_packet_validate_query(p) > 0) {
1028 log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p));
1029
0354029b 1030 dns_stub_process_query(m, l, NULL, p);
b30bf55d
LP
1031 } else
1032 log_debug("Invalid DNS stub UDP packet, ignoring.");
1033
1034 return 0;
1035}
1036
d1fb8cda 1037static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
0354029b 1038 return on_dns_stub_packet_internal(s, fd, revents, userdata, NULL);
d1fb8cda
YW
1039}
1040
1041static int on_dns_stub_packet_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
99534007 1042 DnsStubListenerExtra *l = ASSERT_PTR(userdata);
0354029b
LP
1043
1044 return on_dns_stub_packet_internal(s, fd, revents, l->manager, l);
d1fb8cda
YW
1045}
1046
624f907e 1047static int on_dns_stub_stream_packet(DnsStream *s, DnsPacket *p) {
e4bed40f 1048 assert(s);
624f907e 1049 assert(s->manager);
e4bed40f
ZJS
1050 assert(p);
1051
1052 if (dns_packet_validate_query(p) > 0) {
1053 log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p));
1054
1055 dns_stub_process_query(s->manager, s->stub_listener_extra, s, p);
1056 } else
1057 log_debug("Invalid DNS stub TCP packet, ignoring.");
1058
1059 return 0;
1060}
1061
1062static int on_dns_stub_stream_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
1063 DnsStream *stream;
1064 int cfd, r;
1065
1066 cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1067 if (cfd < 0) {
1068 if (ERRNO_IS_ACCEPT_AGAIN(errno))
1069 return 0;
1070
1071 return -errno;
1072 }
1073
18230451
YW
1074 r = dns_stream_new(m, &stream, DNS_STREAM_STUB, DNS_PROTOCOL_DNS, cfd, NULL,
1075 on_dns_stub_stream_packet, dns_stub_stream_complete, DNS_STREAM_STUB_TIMEOUT_USEC);
e4bed40f
ZJS
1076 if (r < 0) {
1077 safe_close(cfd);
1078 return r;
1079 }
1080
1081 stream->stub_listener_extra = l;
e4bed40f
ZJS
1082
1083 /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */
1084
1085 return 0;
1086}
1087
1088static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1089 return on_dns_stub_stream_internal(s, fd, revents, userdata, NULL);
1090}
1091
1092static int on_dns_stub_stream_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
99534007 1093 DnsStubListenerExtra *l = ASSERT_PTR(userdata);
e4bed40f 1094
e4bed40f
ZJS
1095 return on_dns_stub_stream_internal(s, fd, revents, l->manager, l);
1096}
1097
af8b1384 1098static int set_dns_stub_common_socket_options(int fd, int family) {
1f05101f
SS
1099 int r;
1100
1101 assert(fd >= 0);
af8b1384 1102 assert(IN_SET(family, AF_INET, AF_INET6));
1f05101f
SS
1103
1104 r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
1105 if (r < 0)
1106 return r;
1107
5d0fe423
LP
1108 r = socket_set_recvpktinfo(fd, family, true);
1109 if (r < 0)
1110 return r;
af8b1384 1111
5d0fe423
LP
1112 r = socket_set_recvttl(fd, family, true);
1113 if (r < 0)
1114 return r;
af8b1384
YW
1115
1116 return 0;
1f05101f
SS
1117}
1118
8624f128
LP
1119static int set_dns_stub_common_tcp_socket_options(int fd) {
1120 int r;
1121
1122 assert(fd >= 0);
1123
1124 r = setsockopt_int(fd, IPPROTO_TCP, TCP_FASTOPEN, 5); /* Everybody appears to pick qlen=5, let's do the same here. */
1125 if (r < 0)
1126 log_debug_errno(r, "Failed to enable TCP_FASTOPEN on TCP listening socket, ignoring: %m");
1127
1128 r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true);
1129 if (r < 0)
1130 log_debug_errno(r, "Failed to enable TCP_NODELAY mode, ignoring: %m");
1131
1132 return 0;
1133}
1134
a8d09063
LP
1135static int manager_dns_stub_fd(
1136 Manager *m,
1137 int family,
1138 const union in_addr_union *listen_addr,
1139 int type) {
1140
1141 sd_event_source **event_source;
254d1313 1142 _cleanup_close_ int fd = -EBADF;
a8d09063 1143 union sockaddr_union sa;
b30bf55d
LP
1144 int r;
1145
e1158539
LP
1146 assert(m);
1147 assert(listen_addr);
1148
a8d09063
LP
1149 if (type == SOCK_DGRAM)
1150 event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_udp_event_source : &m->dns_stub_udp_event_source;
1151 else if (type == SOCK_STREAM)
1152 event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_tcp_event_source : &m->dns_stub_tcp_event_source;
1153 else
1154 return -EPROTONOSUPPORT;
d491917c 1155
d491917c
ZJS
1156 if (*event_source)
1157 return sd_event_source_get_io_fd(*event_source);
b30bf55d 1158
a8d09063 1159 fd = socket(family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
424e490b 1160 if (fd < 0)
b30bf55d
LP
1161 return -errno;
1162
a8d09063 1163 r = set_dns_stub_common_socket_options(fd, family);
2ff48e98
LP
1164 if (r < 0)
1165 return r;
b30bf55d 1166
8624f128
LP
1167 if (type == SOCK_STREAM) {
1168 r = set_dns_stub_common_tcp_socket_options(fd);
1169 if (r < 0)
1170 return r;
1171 }
1172
a8d09063
LP
1173 /* Set slightly different socket options for the non-proxy and the proxy binding. The former we want
1174 * to be accessible only from the local host, for the latter it's OK if people use NAT redirects or
1175 * so to redirect external traffic to it. */
1176
1177 if (!address_is_proxy(family, listen_addr)) {
1178 /* Make sure no traffic from outside the local host can leak to onto this socket */
1179 r = socket_bind_to_ifindex(fd, LOOPBACK_IFINDEX);
1180 if (r < 0)
1181 return r;
1182
1183 r = socket_set_ttl(fd, family, 1);
1184 if (r < 0)
1185 return r;
1186 } else if (type == SOCK_DGRAM) {
e1158539
LP
1187 /* Turn off Path MTU Discovery for UDP, for security reasons. See socket_disable_pmtud() for
1188 * a longer discussion. (We only do this for sockets that are potentially externally
1189 * accessible, i.e. the proxy stub one. For the non-proxy one we instead set the TTL to 1,
1190 * see above, so that packets don't get routed at all.) */
a8d09063
LP
1191 r = socket_disable_pmtud(fd, family);
1192 if (r < 0)
1193 log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
1194
1195 r = socket_set_recvfragsize(fd, family, true);
1196 if (r < 0)
1197 log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
1198 }
b30bf55d 1199
a8d09063 1200 r = sockaddr_set_in_addr(&sa, family, listen_addr, 53);
d491917c
ZJS
1201 if (r < 0)
1202 return r;
1203
424e490b
ZJS
1204 if (bind(fd, &sa.sa, sizeof(sa.in)) < 0)
1205 return -errno;
b30bf55d 1206
d491917c 1207 if (type == SOCK_STREAM &&
768fcd77 1208 listen(fd, SOMAXCONN_DELUXE) < 0)
d491917c
ZJS
1209 return -errno;
1210
1211 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1212 type == SOCK_DGRAM ? on_dns_stub_packet : on_dns_stub_stream,
1213 m);
b30bf55d 1214 if (r < 0)
424e490b 1215 return r;
b30bf55d 1216
d491917c 1217 r = sd_event_source_set_io_fd_own(*event_source, true);
7216a3b5
YW
1218 if (r < 0)
1219 return r;
1220
d491917c
ZJS
1221 (void) sd_event_source_set_description(*event_source,
1222 type == SOCK_DGRAM ? "dns-stub-udp" : "dns-stub-tcp");
b30bf55d 1223
7216a3b5 1224 return TAKE_FD(fd);
b30bf55d
LP
1225}
1226
b5febb3f 1227static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type) {
1f05101f 1228 _cleanup_free_ char *pretty = NULL;
254d1313 1229 _cleanup_close_ int fd = -EBADF;
ca8b62b5 1230 union sockaddr_union sa;
1f05101f
SS
1231 int r;
1232
0354029b 1233 assert(m);
a8d09063 1234 assert(l);
b5febb3f 1235 assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
0354029b 1236
b5febb3f
ZJS
1237 sd_event_source **event_source = type == SOCK_DGRAM ? &l->udp_event_source : &l->tcp_event_source;
1238 if (*event_source)
1239 return sd_event_source_get_io_fd(*event_source);
1f05101f 1240
0398c084
DDM
1241 if (!have_effective_cap(CAP_NET_BIND_SERVICE) && dns_stub_listener_extra_port(l) < 1024) {
1242 log_warning("Missing CAP_NET_BIND_SERVICE capability, not creating extra stub listener on port %hu.",
1243 dns_stub_listener_extra_port(l));
1244 return 0;
1245 }
1246
ca8b62b5
YW
1247 if (l->family == AF_INET)
1248 sa = (union sockaddr_union) {
1249 .in.sin_family = l->family,
49ef064c 1250 .in.sin_port = htobe16(dns_stub_listener_extra_port(l)),
ca8b62b5
YW
1251 .in.sin_addr = l->address.in,
1252 };
1253 else
1254 sa = (union sockaddr_union) {
1255 .in6.sin6_family = l->family,
49ef064c 1256 .in6.sin6_port = htobe16(dns_stub_listener_extra_port(l)),
ca8b62b5
YW
1257 .in6.sin6_addr = l->address.in6,
1258 };
1259
b5febb3f 1260 fd = socket(l->family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
1f05101f
SS
1261 if (fd < 0) {
1262 r = -errno;
1263 goto fail;
1264 }
1265
af8b1384 1266 r = set_dns_stub_common_socket_options(fd, l->family);
1f05101f
SS
1267 if (r < 0)
1268 goto fail;
1269
8624f128
LP
1270 if (type == SOCK_STREAM) {
1271 r = set_dns_stub_common_tcp_socket_options(fd);
1272 if (r < 0)
1273 goto fail;
1274 }
1275
69e3234d 1276 /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case
b5febb3f
ZJS
1277 * people may want ttl > 1. */
1278
5d0fe423 1279 r = socket_set_freebind(fd, l->family, true);
b5febb3f
ZJS
1280 if (r < 0)
1281 goto fail;
1282
eb170e75
LP
1283 if (type == SOCK_DGRAM) {
1284 r = socket_disable_pmtud(fd, l->family);
1285 if (r < 0)
1286 log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
20a001bd
LP
1287
1288 r = socket_set_recvfragsize(fd, l->family, true);
1289 if (r < 0)
1290 log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
eb170e75
LP
1291 }
1292
ded15213
LP
1293 r = RET_NERRNO(bind(fd, &sa.sa, SOCKADDR_LEN(sa)));
1294 if (r < 0)
1f05101f 1295 goto fail;
1f05101f 1296
b5febb3f 1297 if (type == SOCK_STREAM &&
768fcd77 1298 listen(fd, SOMAXCONN_DELUXE) < 0) {
b5febb3f
ZJS
1299 r = -errno;
1300 goto fail;
1301 }
1302
1303 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1304 type == SOCK_DGRAM ? on_dns_stub_packet_extra : on_dns_stub_stream_extra,
1305 l);
1f05101f
SS
1306 if (r < 0)
1307 goto fail;
1308
b5febb3f 1309 r = sd_event_source_set_io_fd_own(*event_source, true);
7216a3b5
YW
1310 if (r < 0)
1311 goto fail;
1312
b5febb3f
ZJS
1313 (void) sd_event_source_set_description(*event_source,
1314 type == SOCK_DGRAM ? "dns-stub-udp-extra" : "dns-stub-tcp-extra");
1f05101f
SS
1315
1316 if (DEBUG_LOGGING) {
ca8b62b5 1317 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
b5febb3f
ZJS
1318 log_debug("Listening on %s socket %s.",
1319 type == SOCK_DGRAM ? "UDP" : "TCP",
1320 strnull(pretty));
1f05101f
SS
1321 }
1322
7216a3b5 1323 return TAKE_FD(fd);
1f05101f 1324
b4b7ea1b 1325fail:
1c17bcb3 1326 assert(r < 0);
ca8b62b5 1327 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
b5febb3f
ZJS
1328 return log_warning_errno(r,
1329 r == -EADDRINUSE ? "Another process is already listening on %s socket %s: %m" :
1330 "Failed to listen on %s socket %s: %m",
1331 type == SOCK_DGRAM ? "UDP" : "TCP",
1332 strnull(pretty));
1f05101f
SS
1333}
1334
b30bf55d 1335int manager_dns_stub_start(Manager *m) {
a8d09063 1336 int r;
b30bf55d
LP
1337
1338 assert(m);
1339
d5da7707
ZJS
1340 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO)
1341 log_debug("Not creating stub listener.");
0398c084
DDM
1342 else if (!have_effective_cap(CAP_NET_BIND_SERVICE))
1343 log_warning("Missing CAP_NET_BIND_SERVICE capability, not creating stub listener on port 53.");
a8d09063
LP
1344 else {
1345 static const struct {
1346 uint32_t addr;
1347 int socket_type;
1348 } stub_sockets[] = {
1349 { INADDR_DNS_STUB, SOCK_DGRAM },
1350 { INADDR_DNS_STUB, SOCK_STREAM },
1351 { INADDR_DNS_PROXY_STUB, SOCK_DGRAM },
1352 { INADDR_DNS_PROXY_STUB, SOCK_STREAM },
1353 };
1354
d5da7707
ZJS
1355 log_debug("Creating stub listener using %s.",
1356 m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" :
1357 m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" :
1358 "UDP/TCP");
1359
a8d09063
LP
1360 for (size_t i = 0; i < ELEMENTSOF(stub_sockets); i++) {
1361 union in_addr_union a = {
1362 .in.s_addr = htobe32(stub_sockets[i].addr),
1363 };
b30bf55d 1364
a8d09063
LP
1365 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP && stub_sockets[i].socket_type == SOCK_STREAM)
1366 continue;
1367 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP && stub_sockets[i].socket_type == SOCK_DGRAM)
1368 continue;
1369
1370 r = manager_dns_stub_fd(m, AF_INET, &a, stub_sockets[i].socket_type);
1371 if (r < 0) {
1372 _cleanup_free_ char *busy_socket = NULL;
1373
1374 if (asprintf(&busy_socket,
1375 "%s socket " IPV4_ADDRESS_FMT_STR ":53",
1376 stub_sockets[i].socket_type == SOCK_DGRAM ? "UDP" : "TCP",
1377 IPV4_ADDRESS_FMT_VAL(a.in)) < 0)
1378 return log_oom();
1379
1380 if (IN_SET(r, -EADDRINUSE, -EPERM)) {
1381 log_warning_errno(r,
1382 r == -EADDRINUSE ? "Another process is already listening on %s.\n"
1383 "Turning off local DNS stub support." :
1384 "Failed to listen on %s: %m.\n"
1385 "Turning off local DNS stub support.",
1386 busy_socket);
1387 manager_dns_stub_stop(m);
1388 break;
1389 }
b30bf55d 1390
a8d09063
LP
1391 return log_error_errno(r, "Failed to listen on %s: %m", busy_socket);
1392 }
1393 }
1394 }
b30bf55d 1395
1f05101f 1396 if (!ordered_set_isempty(m->dns_extra_stub_listeners)) {
36aaabc3 1397 DnsStubListenerExtra *l;
1f05101f 1398
dce65cd4 1399 log_debug("Creating extra stub listeners.");
1f05101f 1400
90e74a66 1401 ORDERED_SET_FOREACH(l, m->dns_extra_stub_listeners) {
7314b397 1402 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_UDP))
b5febb3f 1403 (void) manager_dns_stub_fd_extra(m, l, SOCK_DGRAM);
7314b397 1404 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_TCP))
b5febb3f 1405 (void) manager_dns_stub_fd_extra(m, l, SOCK_STREAM);
7314b397 1406 }
1f05101f
SS
1407 }
1408
b30bf55d
LP
1409 return 0;
1410}
1411
1412void manager_dns_stub_stop(Manager *m) {
1413 assert(m);
1414
97935302
ZJS
1415 m->dns_stub_udp_event_source = sd_event_source_disable_unref(m->dns_stub_udp_event_source);
1416 m->dns_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_stub_tcp_event_source);
a8d09063
LP
1417 m->dns_proxy_stub_udp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_udp_event_source);
1418 m->dns_proxy_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_tcp_event_source);
b30bf55d 1419}
ae8f0ec3
LP
1420
1421static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = {
97935302 1422 [DNS_STUB_LISTENER_NO] = "no",
ae8f0ec3
LP
1423 [DNS_STUB_LISTENER_UDP] = "udp",
1424 [DNS_STUB_LISTENER_TCP] = "tcp",
1425 [DNS_STUB_LISTENER_YES] = "yes",
1426};
1427DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES);