]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/resolve/resolved-dns-stub.c
man/systemd-sysext: list ephemeral/ephemeral-import in the list of options
[thirdparty/systemd.git] / src / resolve / resolved-dns-stub.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
b30bf55d 2
8624f128 3#include <netinet/tcp.h>
ca8b62b5 4
284d7641
DDM
5#include "sd-event.h"
6#include "sd-id128.h"
7
b78d73fa 8#include "alloc-util.h"
0398c084 9#include "capability-util.h"
68527d30 10#include "dns-type.h"
4ff9bc2e 11#include "errno-util.h"
b30bf55d 12#include "fd-util.h"
68527d30 13#include "log.h"
ef118d00 14#include "missing_network.h"
68527d30
DDM
15#include "resolve-util.h"
16#include "resolved-dns-answer.h"
17#include "resolved-dns-packet.h"
18#include "resolved-dns-query.h"
19#include "resolved-dns-question.h"
20#include "resolved-dns-rr.h"
21#include "resolved-dns-stream.h"
b30bf55d 22#include "resolved-dns-stub.h"
68527d30
DDM
23#include "resolved-dns-transaction.h"
24#include "resolved-manager.h"
284d7641
DDM
25#include "set.h"
26#include "siphash24.h"
b30bf55d 27#include "socket-util.h"
4a6eb824 28#include "stdio-util.h"
ae8f0ec3 29#include "string-table.h"
284d7641
DDM
30#include "string-util.h"
31#include "time-util.h"
b30bf55d
LP
32
33/* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet,
34 * IP and UDP header sizes */
35#define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U)
36
b370adb5
LP
37/* On the extra stubs, use a more conservative choice */
38#define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX
39
b5febb3f 40static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type);
a8d09063 41static int manager_dns_stub_fd(Manager *m, int family, const union in_addr_union *listen_address, int type);
0354029b 42
ae8f0ec3
LP
43static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra *a, struct siphash *state) {
44 assert(a);
45
c01a5c05
YW
46 siphash24_compress_typesafe(a->mode, state);
47 siphash24_compress_typesafe(a->family, state);
48 in_addr_hash_func(&a->address, a->family, state);
49 siphash24_compress_typesafe(a->port, state);
ae8f0ec3
LP
50}
51
52static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra *a, const DnsStubListenerExtra *b) {
53 int r;
54
55 assert(a);
56 assert(b);
57
58 r = CMP(a->mode, b->mode);
59 if (r != 0)
60 return r;
61
62 r = CMP(a->family, b->family);
63 if (r != 0)
64 return r;
65
66 r = memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
67 if (r != 0)
68 return r;
69
70 return CMP(a->port, b->port);
71}
72
73DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
74 dns_stub_listener_extra_hash_ops,
75 DnsStubListenerExtra,
76 dns_stub_listener_extra_hash_func,
77 dns_stub_listener_extra_compare_func,
78 dns_stub_listener_extra_free);
79
0354029b
LP
80int dns_stub_listener_extra_new(
81 Manager *m,
82 DnsStubListenerExtra **ret) {
ae8f0ec3 83
36aaabc3 84 DnsStubListenerExtra *l;
1f05101f 85
0354029b 86 l = new(DnsStubListenerExtra, 1);
1f05101f
SS
87 if (!l)
88 return -ENOMEM;
89
0354029b
LP
90 *l = (DnsStubListenerExtra) {
91 .manager = m,
92 };
1f05101f 93
0354029b 94 *ret = TAKE_PTR(l);
1f05101f
SS
95 return 0;
96}
97
36aaabc3 98DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p) {
bf22f231
YW
99 if (!p)
100 return NULL;
101
97935302
ZJS
102 p->udp_event_source = sd_event_source_disable_unref(p->udp_event_source);
103 p->tcp_event_source = sd_event_source_disable_unref(p->tcp_event_source);
bf22f231 104
bde69bbd
LP
105 hashmap_free(p->queries_by_packet);
106
bf22f231
YW
107 return mfree(p);
108}
109
bde69bbd
LP
110static void stub_packet_hash_func(const DnsPacket *p, struct siphash *state) {
111 assert(p);
112
c01a5c05
YW
113 siphash24_compress_typesafe(p->protocol, state);
114 siphash24_compress_typesafe(p->family, state);
115 siphash24_compress_typesafe(p->sender, state);
116 siphash24_compress_typesafe(p->ipproto, state);
117 siphash24_compress_typesafe(p->sender_port, state);
bde69bbd
LP
118 siphash24_compress(DNS_PACKET_HEADER(p), sizeof(DnsPacketHeader), state);
119
120 /* We don't bother hashing the full packet here, just the header */
121}
122
123static int stub_packet_compare_func(const DnsPacket *x, const DnsPacket *y) {
124 int r;
125
126 r = CMP(x->protocol, y->protocol);
127 if (r != 0)
128 return r;
129
130 r = CMP(x->family, y->family);
131 if (r != 0)
132 return r;
133
134 r = memcmp(&x->sender, &y->sender, sizeof(x->sender));
135 if (r != 0)
136 return r;
137
138 r = CMP(x->ipproto, y->ipproto);
139 if (r != 0)
140 return r;
141
142 r = CMP(x->sender_port, y->sender_port);
143 if (r != 0)
144 return r;
145
146 return memcmp(DNS_PACKET_HEADER(x), DNS_PACKET_HEADER(y), sizeof(DnsPacketHeader));
147}
148
149DEFINE_HASH_OPS(stub_packet_hash_ops, DnsPacket, stub_packet_hash_func, stub_packet_compare_func);
150
5bd7ebb3
LP
151static int reply_add_with_rrsig(
152 DnsAnswer **reply,
153 DnsResourceRecord *rr,
154 int ifindex,
155 DnsAnswerFlags flags,
156 DnsResourceRecord *rrsig,
157 bool with_rrsig) {
158 int r;
159
160 assert(reply);
161 assert(rr);
162
163 r = dns_answer_add_extend(reply, rr, ifindex, flags, rrsig);
164 if (r < 0)
165 return r;
166
167 if (with_rrsig && rrsig) {
168 r = dns_answer_add_extend(reply, rrsig, ifindex, flags, NULL);
169 if (r < 0)
170 return r;
171 }
172
173 return 0;
174}
175
775ae354
LP
176static int dns_stub_collect_answer_by_question(
177 DnsAnswer **reply,
51027656 178 DnsAnswer *answer,
775ae354
LP
179 DnsQuestion *question,
180 bool with_rrsig) { /* Add RRSIG RR matching each RR */
b30bf55d 181
775ae354 182 DnsAnswerItem *item;
b30bf55d
LP
183 int r;
184
775ae354 185 assert(reply);
e8d23f92 186
915ba31c 187 /* Copies all RRs from 'answer' into 'reply', if they match 'question'. */
4838dc4f 188
915ba31c 189 DNS_ANSWER_FOREACH_ITEM(item, answer) {
5bd7ebb3 190
915ba31c
LP
191 /* We have a question, let's see if this RR matches it */
192 r = dns_question_matches_rr(question, item->rr, NULL);
193 if (r < 0)
194 return r;
195 if (!r) {
196 /* Maybe there's a CNAME/DNAME in here? If so, that's an answer too */
197 r = dns_question_matches_cname_or_dname(question, item->rr, NULL);
4838dc4f
LP
198 if (r < 0)
199 return r;
915ba31c
LP
200 if (!r)
201 continue;
4838dc4f 202 }
5bd7ebb3 203
915ba31c
LP
204 /* Mask the section info, we want the primary answers to always go without section
205 * info, so that it is added to the answer section when we synthesize a reply. */
5bd7ebb3 206
915ba31c
LP
207 r = reply_add_with_rrsig(
208 reply,
209 item->rr,
210 item->ifindex,
211 item->flags & ~DNS_ANSWER_MASK_SECTIONS,
212 item->rrsig,
213 with_rrsig);
214 if (r < 0)
215 return r;
e8d23f92 216 }
b30bf55d 217
775ae354
LP
218 return 0;
219}
e8d23f92 220
775ae354
LP
221static int dns_stub_collect_answer_by_section(
222 DnsAnswer **reply,
223 DnsAnswer *answer,
224 DnsAnswerFlags section,
225 DnsAnswer *exclude1,
226 DnsAnswer *exclude2,
227 bool with_dnssec) { /* Include DNSSEC RRs. RRSIG, NSEC, … */
b30bf55d 228
775ae354 229 DnsAnswerItem *item;
775ae354 230 int r;
b30bf55d 231
775ae354
LP
232 assert(reply);
233
234 /* Copies all RRs from 'answer' into 'reply', if they originate from the specified section. Also,
235 * avoid any RRs listed in 'exclude'. */
236
237 DNS_ANSWER_FOREACH_ITEM(item, answer) {
238
239 if (dns_answer_contains(exclude1, item->rr) ||
240 dns_answer_contains(exclude2, item->rr))
241 continue;
242
243 if (!with_dnssec &&
244 dns_type_is_dnssec(item->rr->key->type))
245 continue;
246
c4d98c3a 247 if (((item->flags ^ section) & DNS_ANSWER_MASK_SECTIONS) != 0)
775ae354
LP
248 continue;
249
5bd7ebb3
LP
250 r = reply_add_with_rrsig(
251 reply,
252 item->rr,
253 item->ifindex,
254 item->flags,
255 item->rrsig,
256 with_dnssec);
b30bf55d
LP
257 if (r < 0)
258 return r;
b30bf55d 259 }
e8d23f92 260
5bd7ebb3 261 return 0;
775ae354
LP
262}
263
264static int dns_stub_assign_sections(
265 DnsQuery *q,
266 DnsQuestion *question,
267 bool edns0_do) {
268
269 int r;
270
271 assert(q);
272 assert(question);
273
c6ebf89b
LP
274 /* Let's assign the 'answer' RRs we collected to their respective sections in the reply datagram. We
275 * try to reproduce a section assignment similar to what the upstream DNS server responded to us. We
276 * use the DNS_ANSWER_SECTION_xyz flags to match things up, which is where the original upstream's
277 * packet section assignment is stored in the DnsAnswer object. Not all RRs in the 'answer' objects
278 * come with section information though (for example, because they were synthesized locally, and not
279 * from a DNS packet). To deal with that we extend the assignment logic a bit: anything from the
280 * 'answer' object that directly matches the original question is always put in the ANSWER section,
281 * regardless if it carries section info, or what that section info says. Then, anything from the
282 * 'answer' objects that is from the ANSWER or AUTHORITY sections, and wasn't already added to the
283 * ANSWER section is placed in the AUTHORITY section. Everything else from either object is added to
284 * the ADDITIONAL section. */
775ae354
LP
285
286 /* Include all RRs that directly answer the question in the answer section */
287 r = dns_stub_collect_answer_by_question(
288 &q->reply_answer,
289 q->answer,
290 question,
291 edns0_do);
292 if (r < 0)
293 return r;
294
d451f0e8 295 /* Include all RRs that originate from the authority sections, and aren't already listed in the
775ae354
LP
296 * answer section, in the authority section */
297 r = dns_stub_collect_answer_by_section(
298 &q->reply_authoritative,
299 q->answer,
d451f0e8 300 DNS_ANSWER_SECTION_AUTHORITY,
775ae354
LP
301 q->reply_answer, NULL,
302 edns0_do);
303 if (r < 0)
304 return r;
d451f0e8
LP
305
306 /* Include all RRs that originate from the answer or additional sections in the additional section
307 * (except if already listed in the other two sections). Also add all RRs with no section marking. */
775ae354 308 r = dns_stub_collect_answer_by_section(
d451f0e8 309 &q->reply_additional,
775ae354 310 q->answer,
d451f0e8
LP
311 DNS_ANSWER_SECTION_ANSWER,
312 q->reply_answer, q->reply_authoritative,
775ae354
LP
313 edns0_do);
314 if (r < 0)
315 return r;
775ae354
LP
316 r = dns_stub_collect_answer_by_section(
317 &q->reply_additional,
318 q->answer,
319 DNS_ANSWER_SECTION_ADDITIONAL,
320 q->reply_answer, q->reply_authoritative,
321 edns0_do);
322 if (r < 0)
323 return r;
324 r = dns_stub_collect_answer_by_section(
325 &q->reply_additional,
326 q->answer,
327 0,
328 q->reply_answer, q->reply_authoritative,
329 edns0_do);
330 if (r < 0)
331 return r;
332
333 return 0;
334}
335
336static int dns_stub_make_reply_packet(
337 DnsPacket **ret,
338 size_t max_size,
339 DnsQuestion *q,
340 bool *ret_truncated) {
341
342 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
343 bool tc = false;
344 int r;
345
346 assert(ret);
347
348 r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, max_size);
349 if (r < 0)
350 return r;
351
352 r = dns_packet_append_question(p, q);
353 if (r == -EMSGSIZE)
354 tc = true;
355 else if (r < 0)
356 return r;
357
51027656 358 if (ret_truncated)
775ae354
LP
359 *ret_truncated = tc;
360 else if (tc)
51027656
LP
361 return -EMSGSIZE;
362
775ae354 363 DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q));
e8d23f92 364
775ae354
LP
365 *ret = TAKE_PTR(p);
366 return 0;
367}
368
369static int dns_stub_add_reply_packet_body(
370 DnsPacket *p,
371 DnsAnswer *answer,
372 DnsAnswer *authoritative,
373 DnsAnswer *additional,
374 bool edns0_do, /* Client expects DNSSEC RRs? */
375 bool *truncated) {
376
377 unsigned n_answer = 0, n_authoritative = 0, n_additional = 0;
378 bool tc = false;
379 int r;
380
381 assert(p);
382
383 /* Add the three sections to the packet. If the answer section doesn't fit we'll signal that as
384 * truncation. If the authoritative section doesn't fit and we are in DNSSEC mode, also signal
385 * truncation. In all other cases where things don't fit don't signal truncation, as for those cases
386 * the dropped RRs should not be essential. */
387
388 r = dns_packet_append_answer(p, answer, &n_answer);
389 if (r == -EMSGSIZE)
390 tc = true;
391 else if (r < 0)
392 return r;
393 else {
394 r = dns_packet_append_answer(p, authoritative, &n_authoritative);
395 if (r == -EMSGSIZE) {
396 if (edns0_do)
397 tc = true;
398 } else if (r < 0)
399 return r;
400 else {
401 r = dns_packet_append_answer(p, additional, &n_additional);
402 if (r < 0 && r != -EMSGSIZE)
403 return r;
404 }
405 }
406
407 if (tc) {
408 if (!truncated)
409 return -EMSGSIZE;
410
411 *truncated = true;
412 }
413
414 DNS_PACKET_HEADER(p)->ancount = htobe16(n_answer);
415 DNS_PACKET_HEADER(p)->nscount = htobe16(n_authoritative);
416 DNS_PACKET_HEADER(p)->arcount = htobe16(n_additional);
e8d23f92
LP
417 return 0;
418}
419
4a6eb824
LP
420static const char *nsid_string(void) {
421 static char buffer[SD_ID128_STRING_MAX + STRLEN(".resolved.systemd.io")] = "";
422 sd_id128_t id;
423 int r;
424
425 /* Let's generate a string that we can use as RFC5001 NSID identifier. The string shall identify us
426 * as systemd-resolved, and return a different string for each resolved instance without leaking host
427 * identity. Hence let's use a fixed suffix that identifies resolved, and a prefix generated from the
428 * machine ID but from which the machine ID cannot be determined.
429 *
430 * Clients can use this to determine whether an answer is originating locally or is proxied from
431 * upstream. */
432
433 if (!isempty(buffer))
434 return buffer;
435
436 r = sd_id128_get_machine_app_specific(
437 SD_ID128_MAKE(ed,d3,12,5d,16,b9,41,f9,a1,49,5f,ab,15,62,ab,27),
438 &id);
439 if (r < 0) {
b480543c 440 log_debug_errno(r, "Failed to determine machine ID, ignoring: %m");
4a6eb824
LP
441 return NULL;
442 }
443
444 xsprintf(buffer, SD_ID128_FORMAT_STR ".resolved.systemd.io", SD_ID128_FORMAT_VAL(id));
445 return buffer;
446}
447
e8d23f92
LP
448static int dns_stub_finish_reply_packet(
449 DnsPacket *p,
450 uint16_t id,
451 int rcode,
51027656 452 bool tc, /* set the Truncated bit? */
4ad017cd 453 bool aa, /* set the Authoritative Answer bit? */
da846b30 454 bool rd, /* set the Recursion Desired bit? */
e8d23f92
LP
455 bool add_opt, /* add an OPT RR to this packet? */
456 bool edns0_do, /* set the EDNS0 DNSSEC OK bit? */
b370adb5 457 bool ad, /* set the DNSSEC authenticated data bit? */
775ae354 458 bool cd, /* set the DNSSEC checking disabled bit? */
4a6eb824
LP
459 uint16_t max_udp_size, /* The maximum UDP datagram size to advertise to clients */
460 bool nsid) { /* whether to add NSID */
e8d23f92
LP
461
462 int r;
463
464 assert(p);
465
ff4caaae 466 if (add_opt) {
4a6eb824 467 r = dns_packet_append_opt(p, max_udp_size, edns0_do, /* include_rfc6975 = */ false, nsid ? nsid_string() : NULL, rcode, NULL);
ff4caaae
LP
468 if (r == -EMSGSIZE) /* Hit the size limit? then indicate truncation */
469 tc = true;
470 else if (r < 0)
471 return r;
ff4caaae 472 } else {
941dd294
LP
473 /* If the client can't to EDNS0, don't do DO either */
474 edns0_do = false;
475
775ae354 476 /* If we don't do EDNS, clamp the rcode to 4 bit */
941dd294
LP
477 if (rcode > 0xF)
478 rcode = DNS_RCODE_SERVFAIL;
479 }
480
8c9c68b5
LP
481 /* Note that we allow the AD bit to be set even if client didn't signal DO, as per RFC 6840, section
482 * 5.7 */
e8d23f92
LP
483
484 DNS_PACKET_HEADER(p)->id = id;
485
486 DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
51027656
LP
487 1 /* qr */,
488 0 /* opcode */,
4ad017cd 489 aa /* aa */,
51027656 490 tc /* tc */,
da846b30 491 rd /* rd */,
51027656 492 1 /* ra */,
e8d23f92 493 ad /* ad */,
775ae354 494 cd /* cd */,
e8d23f92 495 rcode));
b30bf55d 496
b30bf55d
LP
497 return 0;
498}
499
a8d09063
LP
500static bool address_is_proxy(int family, const union in_addr_union *a) {
501 assert(a);
502
503 /* Returns true if the specified address is the DNS "proxy" stub, i.e. where we unconditionally enable bypass mode */
504
505 if (family != AF_INET)
506 return false;
507
508 return be32toh(a->in.s_addr) == INADDR_DNS_PROXY_STUB;
509}
510
511static int find_socket_fd(
512 Manager *m,
513 DnsStubListenerExtra *l,
514 int family,
515 const union in_addr_union *listen_address,
516 int type) {
517
518 assert(m);
519
520 /* Finds the right socket to use for sending. If we know the extra listener, otherwise go via the
521 * address to send from */
522 if (l)
523 return manager_dns_stub_fd_extra(m, l, type);
524
525 return manager_dns_stub_fd(m, family, listen_address, type);
526}
527
0354029b
LP
528static int dns_stub_send(
529 Manager *m,
530 DnsStubListenerExtra *l,
531 DnsStream *s,
532 DnsPacket *p,
533 DnsPacket *reply) {
534
b30bf55d
LP
535 int r;
536
537 assert(m);
538 assert(p);
539 assert(reply);
540
541 if (s)
542 r = dns_stream_write_packet(s, reply);
a8d09063 543 else {
dfa14e28 544 int fd, ifindex;
a8d09063 545
de777ffa 546 fd = find_socket_fd(m, l, p->family, &p->destination, SOCK_DGRAM);
a8d09063
LP
547 if (fd < 0)
548 return fd;
549
dfa14e28
BF
550 if (address_is_proxy(p->family, &p->destination))
551 /* Force loopback iface if this is the loopback proxy stub
552 * and ifindex was normalized to 0 by manager_recv(). */
553 ifindex = p->ifindex ?: LOOPBACK_IFINDEX;
554 else
555 /* Force loopback iface if this is the main listener stub. */
556 ifindex = l ? p->ifindex : LOOPBACK_IFINDEX;
557
a8d09063
LP
558 /* Note that it is essential here that we explicitly choose the source IP address for this
559 * packet. This is because otherwise the kernel will choose it automatically based on the
dfa14e28 560 * routing table and will thus pick 127.0.0.1 rather than 127.0.0.53/54. */
0354029b 561 r = manager_send(m,
a8d09063 562 fd,
dfa14e28 563 ifindex,
0354029b
LP
564 p->family, &p->sender, p->sender_port, &p->destination,
565 reply);
a8d09063 566 }
b30bf55d
LP
567 if (r < 0)
568 return log_debug_errno(r, "Failed to send reply packet: %m");
569
570 return 0;
571}
572
39005e18
LP
573static int dns_stub_reply_with_edns0_do(DnsQuery *q) {
574 assert(q);
575
576 /* Reply with DNSSEC DO set? Only if client supports it; and we did any DNSSEC verification
577 * ourselves, or consider the data fully authenticated because we generated it locally, or the client
578 * set cd */
579
834dc383 580 return dns_packet_do(q->request_packet) &&
39005e18
LP
581 (q->answer_dnssec_result >= 0 || /* we did proper DNSSEC validation … */
582 dns_query_fully_authenticated(q) || /* … or we considered it authentic otherwise … */
583 DNS_PACKET_CD(q->request_packet)); /* … or client set CD */
584}
585
5d7da51e
LP
586static void dns_stub_suppress_duplicate_section_rrs(DnsQuery *q) {
587 /* If we follow a CNAME/DNAME chain we might end up populating our sections with redundant RRs
588 * because we built up the sections from multiple reply packets (one from each CNAME/DNAME chain
589 * element). E.g. it could be that an RR that was included in the first reply's additional section
590 * ends up being relevant as main answer in a subsequent reply in the chain. Let's clean this up, and
591 * remove everything in the "higher priority" sections from the "lower priority" sections.
592 *
593 * Note that this removal matches by RR keys instead of the full RRs. This is because RRsets should
594 * always end up in one section fully or not at all, but never be split among sections.
595 *
596 * Specifically: we remove ANSWER section RRs from the AUTHORITATIVE and ADDITIONAL sections, as well
597 * as AUTHORITATIVE section RRs from the ADDITIONAL section. */
598
599 dns_answer_remove_by_answer_keys(&q->reply_authoritative, q->reply_answer);
600 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_answer);
601 dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_authoritative);
602}
603
775ae354
LP
604static int dns_stub_send_reply(
605 DnsQuery *q,
606 int rcode) {
607
608 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
609 bool truncated, edns0_do;
610 int r;
611
612 assert(q);
613
39005e18 614 edns0_do = dns_stub_reply_with_edns0_do(q); /* let's check if we shall reply with EDNS0 DO? */
775ae354 615
775ae354
LP
616 r = dns_stub_make_reply_packet(
617 &reply,
834dc383 618 dns_packet_payload_size_max(q->request_packet),
775ae354
LP
619 q->request_packet->question,
620 &truncated);
621 if (r < 0)
622 return log_debug_errno(r, "Failed to build reply packet: %m");
623
5d7da51e
LP
624 dns_stub_suppress_duplicate_section_rrs(q);
625
775ae354
LP
626 r = dns_stub_add_reply_packet_body(
627 reply,
628 q->reply_answer,
629 q->reply_authoritative,
630 q->reply_additional,
631 edns0_do,
632 &truncated);
633 if (r < 0)
634 return log_debug_errno(r, "Failed to append reply packet body: %m");
635
636 r = dns_stub_finish_reply_packet(
637 reply,
638 DNS_PACKET_ID(q->request_packet),
639 rcode,
640 truncated,
9ddf099f 641 dns_query_fully_authoritative(q),
da846b30 642 DNS_PACKET_RD(q->request_packet),
775ae354
LP
643 !!q->request_packet->opt,
644 edns0_do,
834dc383 645 (DNS_PACKET_AD(q->request_packet) || dns_packet_do(q->request_packet)) && dns_query_fully_authenticated(q),
36074e01 646 FLAGS_SET(q->flags, SD_RESOLVED_NO_VALIDATE),
4a6eb824
LP
647 q->stub_listener_extra ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
648 dns_packet_has_nsid_request(q->request_packet) > 0 && !q->stub_listener_extra);
775ae354
LP
649 if (r < 0)
650 return log_debug_errno(r, "Failed to build failure packet: %m");
651
652 return dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
653}
654
0354029b
LP
655static int dns_stub_send_failure(
656 Manager *m,
657 DnsStubListenerExtra *l,
658 DnsStream *s,
659 DnsPacket *p,
660 int rcode,
661 bool authenticated) {
662
b30bf55d 663 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
775ae354 664 bool truncated;
b30bf55d
LP
665 int r;
666
667 assert(m);
668 assert(p);
669
775ae354
LP
670 r = dns_stub_make_reply_packet(
671 &reply,
834dc383 672 dns_packet_payload_size_max(p),
775ae354
LP
673 p->question,
674 &truncated);
e8d23f92
LP
675 if (r < 0)
676 return log_debug_errno(r, "Failed to make failure packet: %m");
677
b370adb5
LP
678 r = dns_stub_finish_reply_packet(
679 reply,
680 DNS_PACKET_ID(p),
681 rcode,
775ae354 682 truncated,
4ad017cd 683 false,
da846b30 684 DNS_PACKET_RD(p),
b370adb5 685 !!p->opt,
834dc383
DDM
686 dns_packet_do(p),
687 (DNS_PACKET_AD(p) || dns_packet_do(p)) && authenticated,
775ae354 688 DNS_PACKET_CD(p),
4a6eb824
LP
689 l ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
690 dns_packet_has_nsid_request(p) > 0 && !l);
b30bf55d
LP
691 if (r < 0)
692 return log_debug_errno(r, "Failed to build failure packet: %m");
693
0354029b 694 return dns_stub_send(m, l, s, p, reply);
b30bf55d
LP
695}
696
775ae354
LP
697static int dns_stub_patch_bypass_reply_packet(
698 DnsPacket **ret, /* Where to place the patched packet */
699 DnsPacket *original, /* The packet to patch */
13e15dae 700 DnsPacket *request, /* The packet the patched packet shall look like a reply to */
008f23b7 701 bool validated,
13e15dae 702 bool authenticated) {
775ae354
LP
703 _cleanup_(dns_packet_unrefp) DnsPacket *c = NULL;
704 int r;
705
706 assert(ret);
707 assert(original);
708 assert(request);
709
710 r = dns_packet_dup(&c, original);
711 if (r < 0)
712 return r;
713
714 /* Extract the packet, so that we know where the OPT field is */
715 r = dns_packet_extract(c);
716 if (r < 0)
717 return r;
718
719 /* Copy over the original client request ID, so that we can make the upstream query look like our own reply. */
720 DNS_PACKET_HEADER(c)->id = DNS_PACKET_HEADER(request)->id;
721
722 /* Patch in our own maximum datagram size, if EDNS0 was on */
723 r = dns_packet_patch_max_udp_size(c, ADVERTISE_DATAGRAM_SIZE_MAX);
724 if (r < 0)
725 return r;
726
727 /* Lower all TTLs by the time passed since we received the datagram. */
728 if (timestamp_is_set(original->timestamp)) {
729 r = dns_packet_patch_ttls(c, original->timestamp);
730 if (r < 0)
731 return r;
732 }
733
734 /* Our upstream connection might have supported larger DNS requests than our downstream one, hence
735 * set the TC bit if our reply is larger than what the client supports, and truncate. */
834dc383 736 if (c->size > dns_packet_payload_size_max(request)) {
775ae354 737 log_debug("Artificially truncating stub response, as advertised size of client is smaller than upstream one.");
834dc383 738 dns_packet_truncate(c, dns_packet_payload_size_max(request));
775ae354
LP
739 DNS_PACKET_HEADER(c)->flags = htobe16(be16toh(DNS_PACKET_HEADER(c)->flags) | DNS_PACKET_FLAG_TC);
740 }
741
008f23b7
RP
742 /* Patch the cd bit to reflect the state of validation: set when both we and the upstream
743 * resolver have checking disabled. */
744 DNS_PACKET_HEADER(c)->flags = htobe16(UPDATE_FLAG(be16toh(DNS_PACKET_HEADER(c)->flags),
745 DNS_PACKET_FLAG_CD, DNS_PACKET_CD(original) && !validated));
746
13e15dae 747 /* Ensure we don't pass along an untrusted ad flag for bypass packets */
008f23b7
RP
748 DNS_PACKET_HEADER(c)->flags = htobe16(UPDATE_FLAG(be16toh(DNS_PACKET_HEADER(c)->flags),
749 DNS_PACKET_FLAG_AD, authenticated));
13e15dae 750
775ae354
LP
751 *ret = TAKE_PTR(c);
752 return 0;
753}
754
c704288c
YW
755static void dns_stub_query_complete(DnsQuery *query) {
756 _cleanup_(dns_query_freep) DnsQuery *q = query;
b30bf55d
LP
757 int r;
758
759 assert(q);
775ae354 760 assert(q->request_packet);
b30bf55d 761
775ae354
LP
762 if (q->question_bypass) {
763 /* This is a bypass reply. If so, let's propagate the upstream packet, if we have it and it
764 * is regular DNS. (We can't do this if the upstream packet is LLMNR or mDNS, since the
765 * packets are not 100% compatible.) */
b30bf55d 766
775ae354
LP
767 if (q->answer_full_packet &&
768 q->answer_full_packet->protocol == DNS_PROTOCOL_DNS) {
769 _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
e8d23f92 770
13e15dae 771 r = dns_stub_patch_bypass_reply_packet(&reply, q->answer_full_packet, q->request_packet,
008f23b7 772 /* validated = */ !FLAGS_SET(q->flags, SD_RESOLVED_NO_VALIDATE),
13e15dae 773 FLAGS_SET(q->answer_query_flags, SD_RESOLVED_AUTHENTICATED));
775ae354
LP
774 if (r < 0)
775 log_debug_errno(r, "Failed to patch bypass reply packet: %m");
776 else
777 (void) dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
778
775ae354 779 return;
e8d23f92 780 }
775ae354 781 }
b30bf55d 782
b97fc571
LP
783 /* Take all data from the current reply, and merge it into the three reply sections we are building
784 * up. We do this before processing CNAME redirects, so that we gradually build up our sections, and
785 * and keep adding all RRs in the CNAME chain. */
786 r = dns_stub_assign_sections(
787 q,
a7c0291c 788 dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
b97fc571 789 dns_stub_reply_with_edns0_do(q));
c704288c
YW
790 if (r < 0)
791 return (void) log_debug_errno(r, "Failed to assign sections: %m");
2f4d8e57 792
775ae354
LP
793 switch (q->state) {
794
915ba31c
LP
795 case DNS_TRANSACTION_SUCCESS: {
796 bool first = true;
797
798 for (;;) {
799 int cname_result;
800
801 cname_result = dns_query_process_cname_one(q);
802 if (cname_result == -ELOOP) { /* CNAME loop, let's send what we already have */
fca212b0 803 log_debug("Detected CNAME loop, returning what we already have.");
915ba31c
LP
804 (void) dns_stub_send_reply(q, q->answer_rcode);
805 break;
806 }
807 if (cname_result < 0) {
808 log_debug_errno(cname_result, "Failed to process CNAME: %m");
809 break;
810 }
811
812 if (cname_result == DNS_QUERY_NOMATCH) {
813 /* This answer doesn't contain any RR that would answer our question
814 * positively, i.e. neither directly nor via CNAME. */
815
816 if (first) /* We never followed a CNAME and the answer doesn't match our
817 * question at all? Then this is final, the empty answer is the
818 * answer. */
819 break;
820
821 /* Otherwise, we already followed a CNAME once within this packet, and the
822 * packet doesn't answer our question. In that case let's restart the query,
823 * now with the redirected question. We'll */
824 r = dns_query_go(q);
c704288c
YW
825 if (r < 0)
826 return (void) log_debug_errno(r, "Failed to restart query: %m");
915ba31c 827
c704288c 828 TAKE_PTR(q);
915ba31c
LP
829 return;
830 }
831
832 r = dns_stub_assign_sections(
833 q,
834 dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
835 dns_stub_reply_with_edns0_do(q));
c704288c
YW
836 if (r < 0)
837 return (void) log_debug_errno(r, "Failed to assign sections: %m");
915ba31c
LP
838
839 if (cname_result == DNS_QUERY_MATCH) /* A match? Then we are done, let's return what we got */
840 break;
841
842 /* We followed a CNAME. and collected the RRs that answer the redirected question
843 * successfully. Let's not try to do this again. */
844 assert(cname_result == DNS_QUERY_CNAME);
845 first = false;
b97fc571 846 }
b97fc571
LP
847
848 _fallthrough_;
915ba31c 849 }
b97fc571 850
b30bf55d 851 case DNS_TRANSACTION_RCODE_FAILURE:
775ae354 852 (void) dns_stub_send_reply(q, q->answer_rcode);
b30bf55d
LP
853 break;
854
855 case DNS_TRANSACTION_NOT_FOUND:
775ae354 856 (void) dns_stub_send_reply(q, DNS_RCODE_NXDOMAIN);
b30bf55d
LP
857 break;
858
859 case DNS_TRANSACTION_TIMEOUT:
860 case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
861 /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */
862 break;
863
864 case DNS_TRANSACTION_NO_SERVERS:
4f2da49f
RP
865 /* We're not configured to give answers for this question. Refuse it. */
866 (void) dns_stub_send_reply(q, DNS_RCODE_REFUSED);
867 break;
868
591810c0
RP
869 case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
870 /* This RR Type is not implemented */
871 (void) dns_stub_send_reply(q, DNS_RCODE_NOTIMP);
872 break;
873
b30bf55d
LP
874 case DNS_TRANSACTION_INVALID_REPLY:
875 case DNS_TRANSACTION_ERRNO:
876 case DNS_TRANSACTION_ABORTED:
877 case DNS_TRANSACTION_DNSSEC_FAILED:
878 case DNS_TRANSACTION_NO_TRUST_ANCHOR:
b30bf55d 879 case DNS_TRANSACTION_NETWORK_DOWN:
775ae354 880 case DNS_TRANSACTION_NO_SOURCE:
49ef064c 881 case DNS_TRANSACTION_STUB_LOOP:
775ae354 882 (void) dns_stub_send_reply(q, DNS_RCODE_SERVFAIL);
b30bf55d
LP
883 break;
884
885 case DNS_TRANSACTION_NULL:
886 case DNS_TRANSACTION_PENDING:
887 case DNS_TRANSACTION_VALIDATING:
888 default:
04499a70 889 assert_not_reached();
b30bf55d 890 }
b30bf55d
LP
891}
892
893static int dns_stub_stream_complete(DnsStream *s, int error) {
894 assert(s);
895
b412af57
LP
896 log_debug_errno(error, "DNS TCP connection terminated, destroying queries: %m");
897
898 for (;;) {
899 DnsQuery *q;
900
901 q = set_first(s->queries);
902 if (!q)
903 break;
b30bf55d 904
b412af57
LP
905 dns_query_free(q);
906 }
b30bf55d 907
b412af57
LP
908 /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections
909 * should be kept as long as the client wants to. */
910 dns_stream_unref(s);
b30bf55d
LP
911 return 0;
912}
913
0354029b 914static void dns_stub_process_query(Manager *m, DnsStubListenerExtra *l, DnsStream *s, DnsPacket *p) {
a8d09063 915 uint64_t protocol_flags = SD_RESOLVED_PROTOCOLS_ALL;
ceb17827 916 _cleanup_(dns_query_freep) DnsQuery *q = NULL;
bde69bbd
LP
917 Hashmap **queries_by_packet;
918 DnsQuery *existing;
a8d09063 919 bool bypass = false;
b30bf55d
LP
920 int r;
921
922 assert(m);
923 assert(p);
924 assert(p->protocol == DNS_PROTOCOL_DNS);
925
0354029b 926 if (!l && /* l == NULL if this is the main stub */
a8d09063 927 !address_is_proxy(p->family, &p->destination) && /* don't restrict needlessly for 127.0.0.54 */
d1fb8cda
YW
928 (in_addr_is_localhost(p->family, &p->sender) <= 0 ||
929 in_addr_is_localhost(p->family, &p->destination) <= 0)) {
565147b7 930 log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring.");
ceb17827 931 return;
b30bf55d
LP
932 }
933
a9fd8837
LP
934 if (manager_packet_from_our_transaction(m, p)) {
935 log_debug("Got our own packet looped back, ignoring.");
936 return;
937 }
938
bde69bbd
LP
939 queries_by_packet = l ? &l->queries_by_packet : &m->stub_queries_by_packet;
940 existing = hashmap_get(*queries_by_packet, p);
941 if (existing && dns_packet_equal(existing->request_packet, p)) {
942 log_debug("Got repeat packet from client, ignoring.");
943 return;
944 }
945
b30bf55d
LP
946 r = dns_packet_extract(p);
947 if (r < 0) {
948 log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m");
0354029b 949 dns_stub_send_failure(m, l, s, p, DNS_RCODE_FORMERR, false);
ceb17827 950 return;
b30bf55d
LP
951 }
952
834dc383 953 if (!dns_packet_version_supported(p)) {
b30bf55d 954 log_debug("Got EDNS OPT field with unsupported version number.");
0354029b 955 dns_stub_send_failure(m, l, s, p, DNS_RCODE_BADVERS, false);
ceb17827 956 return;
b30bf55d
LP
957 }
958
ab715ddb 959 if (dns_type_is_obsolete(dns_question_first_key(p->question)->type)) {
b30bf55d 960 log_debug("Got message with obsolete key type, refusing.");
30ee7071 961 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 962 return;
b30bf55d
LP
963 }
964
d4fd7fb5 965 if (dns_type_is_zone_transfer(dns_question_first_key(p->question)->type)) {
b30bf55d 966 log_debug("Got request for zone transfer, refusing.");
30ee7071 967 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 968 return;
b30bf55d
LP
969 }
970
971 if (!DNS_PACKET_RD(p)) {
972 /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */
973 log_debug("Got request with recursion disabled, refusing.");
0354029b 974 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
ceb17827 975 return;
b30bf55d
LP
976 }
977
bde69bbd
LP
978 r = hashmap_ensure_allocated(queries_by_packet, &stub_packet_hash_ops);
979 if (r < 0) {
980 log_oom();
981 return;
982 }
983
a8d09063
LP
984 if (address_is_proxy(p->family, &p->destination)) {
985 _cleanup_free_ char *dipa = NULL;
986
987 r = in_addr_to_string(p->family, &p->destination, &dipa);
e1158539
LP
988 if (r < 0)
989 return (void) log_error_errno(r, "Failed to format destination address: %m");
a8d09063
LP
990
991 log_debug("Got request to DNS proxy address 127.0.0.54, enabling bypass logic.");
992 bypass = true;
993 protocol_flags = SD_RESOLVED_DNS|SD_RESOLVED_NO_ZONE; /* Turn off mDNS/LLMNR for proxy stub. */
834dc383 994 } else if (dns_packet_do(p)) {
9c47b334 995 log_debug("Got request with DNSSEC enabled, enabling bypass logic.");
a8d09063
LP
996 bypass = true;
997 }
775ae354 998
a8d09063 999 if (bypass)
775ae354 1000 r = dns_query_new(m, &q, NULL, NULL, p, 0,
a8d09063 1001 protocol_flags|
775ae354
LP
1002 SD_RESOLVED_NO_CNAME|
1003 SD_RESOLVED_NO_SEARCH|
008f23b7 1004 (DNS_PACKET_CD(p) ? SD_RESOLVED_NO_VALIDATE | SD_RESOLVED_NO_CACHE : 0)|
775ae354 1005 SD_RESOLVED_REQUIRE_PRIMARY|
718324c5
LP
1006 SD_RESOLVED_CLAMP_TTL|
1007 SD_RESOLVED_RELAX_SINGLE_LABEL);
a8d09063 1008 else
775ae354 1009 r = dns_query_new(m, &q, p->question, p->question, NULL, 0,
a8d09063 1010 protocol_flags|
775ae354 1011 SD_RESOLVED_NO_SEARCH|
36074e01 1012 (DNS_PACKET_CD(p) ? SD_RESOLVED_NO_VALIDATE | SD_RESOLVED_NO_CACHE : 0)|
834dc383 1013 (dns_packet_do(p) ? SD_RESOLVED_REQUIRE_PRIMARY : 0)|
775ae354 1014 SD_RESOLVED_CLAMP_TTL);
81ae2237
MNBKL
1015 if (r == -ENOANO) /* Refuse query if there is -ENOANO */
1016 return (void) dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
b30bf55d
LP
1017 if (r < 0) {
1018 log_error_errno(r, "Failed to generate query object: %m");
0354029b 1019 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
ceb17827 1020 return;
b30bf55d
LP
1021 }
1022
775ae354
LP
1023 q->request_packet = dns_packet_ref(p);
1024 q->request_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */
0354029b 1025 q->stub_listener_extra = l;
b30bf55d
LP
1026 q->complete = dns_stub_query_complete;
1027
1028 if (s) {
b412af57
LP
1029 /* Remember which queries belong to this stream, so that we can cancel them when the stream
1030 * is disconnected early */
1031
ceb17827 1032 r = set_ensure_put(&s->queries, NULL, q);
b412af57
LP
1033 if (r < 0) {
1034 log_oom();
ceb17827 1035 return;
b412af57 1036 }
ceb17827 1037 assert(r > 0);
b30bf55d
LP
1038 }
1039
bde69bbd
LP
1040 /* Add the query to the hash table we use to determine repeat packets now. We don't care about
1041 * failures here, since in the worst case we'll not recognize duplicate incoming requests, which
1042 * isn't particularly bad. */
1043 (void) hashmap_put(*queries_by_packet, q->request_packet, q);
1044
b30bf55d
LP
1045 r = dns_query_go(q);
1046 if (r < 0) {
1047 log_error_errno(r, "Failed to start query: %m");
0354029b 1048 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
ceb17827 1049 return;
b30bf55d
LP
1050 }
1051
52e63427 1052 log_debug("Processing query...");
ceb17827 1053 TAKE_PTR(q);
b30bf55d
LP
1054}
1055
0354029b 1056static int on_dns_stub_packet_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
b30bf55d 1057 _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
b30bf55d
LP
1058 int r;
1059
1060 r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p);
1061 if (r <= 0)
1062 return r;
1063
1064 if (dns_packet_validate_query(p) > 0) {
1065 log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p));
1066
0354029b 1067 dns_stub_process_query(m, l, NULL, p);
b30bf55d
LP
1068 } else
1069 log_debug("Invalid DNS stub UDP packet, ignoring.");
1070
1071 return 0;
1072}
1073
d1fb8cda 1074static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
0354029b 1075 return on_dns_stub_packet_internal(s, fd, revents, userdata, NULL);
d1fb8cda
YW
1076}
1077
1078static int on_dns_stub_packet_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
99534007 1079 DnsStubListenerExtra *l = ASSERT_PTR(userdata);
0354029b
LP
1080
1081 return on_dns_stub_packet_internal(s, fd, revents, l->manager, l);
d1fb8cda
YW
1082}
1083
624f907e 1084static int on_dns_stub_stream_packet(DnsStream *s, DnsPacket *p) {
e4bed40f 1085 assert(s);
624f907e 1086 assert(s->manager);
e4bed40f
ZJS
1087 assert(p);
1088
1089 if (dns_packet_validate_query(p) > 0) {
1090 log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p));
1091
1092 dns_stub_process_query(s->manager, s->stub_listener_extra, s, p);
1093 } else
1094 log_debug("Invalid DNS stub TCP packet, ignoring.");
1095
1096 return 0;
1097}
1098
1099static int on_dns_stub_stream_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
1100 DnsStream *stream;
1101 int cfd, r;
1102
1103 cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1104 if (cfd < 0) {
1105 if (ERRNO_IS_ACCEPT_AGAIN(errno))
1106 return 0;
1107
1108 return -errno;
1109 }
1110
18230451
YW
1111 r = dns_stream_new(m, &stream, DNS_STREAM_STUB, DNS_PROTOCOL_DNS, cfd, NULL,
1112 on_dns_stub_stream_packet, dns_stub_stream_complete, DNS_STREAM_STUB_TIMEOUT_USEC);
e4bed40f
ZJS
1113 if (r < 0) {
1114 safe_close(cfd);
1115 return r;
1116 }
1117
1118 stream->stub_listener_extra = l;
e4bed40f
ZJS
1119
1120 /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */
1121
1122 return 0;
1123}
1124
1125static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1126 return on_dns_stub_stream_internal(s, fd, revents, userdata, NULL);
1127}
1128
1129static int on_dns_stub_stream_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
99534007 1130 DnsStubListenerExtra *l = ASSERT_PTR(userdata);
e4bed40f 1131
e4bed40f
ZJS
1132 return on_dns_stub_stream_internal(s, fd, revents, l->manager, l);
1133}
1134
af8b1384 1135static int set_dns_stub_common_socket_options(int fd, int family) {
1f05101f
SS
1136 int r;
1137
1138 assert(fd >= 0);
af8b1384 1139 assert(IN_SET(family, AF_INET, AF_INET6));
1f05101f
SS
1140
1141 r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
1142 if (r < 0)
1143 return r;
1144
5d0fe423
LP
1145 r = socket_set_recvpktinfo(fd, family, true);
1146 if (r < 0)
1147 return r;
af8b1384 1148
5d0fe423
LP
1149 r = socket_set_recvttl(fd, family, true);
1150 if (r < 0)
1151 return r;
af8b1384
YW
1152
1153 return 0;
1f05101f
SS
1154}
1155
8624f128
LP
1156static int set_dns_stub_common_tcp_socket_options(int fd) {
1157 int r;
1158
1159 assert(fd >= 0);
1160
1161 r = setsockopt_int(fd, IPPROTO_TCP, TCP_FASTOPEN, 5); /* Everybody appears to pick qlen=5, let's do the same here. */
1162 if (r < 0)
1163 log_debug_errno(r, "Failed to enable TCP_FASTOPEN on TCP listening socket, ignoring: %m");
1164
1165 r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true);
1166 if (r < 0)
1167 log_debug_errno(r, "Failed to enable TCP_NODELAY mode, ignoring: %m");
1168
1169 return 0;
1170}
1171
a8d09063
LP
1172static int manager_dns_stub_fd(
1173 Manager *m,
1174 int family,
1175 const union in_addr_union *listen_addr,
1176 int type) {
1177
1178 sd_event_source **event_source;
254d1313 1179 _cleanup_close_ int fd = -EBADF;
a8d09063 1180 union sockaddr_union sa;
b30bf55d
LP
1181 int r;
1182
e1158539
LP
1183 assert(m);
1184 assert(listen_addr);
1185
a8d09063
LP
1186 if (type == SOCK_DGRAM)
1187 event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_udp_event_source : &m->dns_stub_udp_event_source;
1188 else if (type == SOCK_STREAM)
1189 event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_tcp_event_source : &m->dns_stub_tcp_event_source;
1190 else
1191 return -EPROTONOSUPPORT;
d491917c 1192
d491917c
ZJS
1193 if (*event_source)
1194 return sd_event_source_get_io_fd(*event_source);
b30bf55d 1195
a8d09063 1196 fd = socket(family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
424e490b 1197 if (fd < 0)
b30bf55d
LP
1198 return -errno;
1199
a8d09063 1200 r = set_dns_stub_common_socket_options(fd, family);
2ff48e98
LP
1201 if (r < 0)
1202 return r;
b30bf55d 1203
8624f128
LP
1204 if (type == SOCK_STREAM) {
1205 r = set_dns_stub_common_tcp_socket_options(fd);
1206 if (r < 0)
1207 return r;
1208 }
1209
a8d09063
LP
1210 /* Set slightly different socket options for the non-proxy and the proxy binding. The former we want
1211 * to be accessible only from the local host, for the latter it's OK if people use NAT redirects or
1212 * so to redirect external traffic to it. */
1213
1214 if (!address_is_proxy(family, listen_addr)) {
1215 /* Make sure no traffic from outside the local host can leak to onto this socket */
1216 r = socket_bind_to_ifindex(fd, LOOPBACK_IFINDEX);
1217 if (r < 0)
1218 return r;
1219
1220 r = socket_set_ttl(fd, family, 1);
1221 if (r < 0)
1222 return r;
1223 } else if (type == SOCK_DGRAM) {
e1158539
LP
1224 /* Turn off Path MTU Discovery for UDP, for security reasons. See socket_disable_pmtud() for
1225 * a longer discussion. (We only do this for sockets that are potentially externally
1226 * accessible, i.e. the proxy stub one. For the non-proxy one we instead set the TTL to 1,
1227 * see above, so that packets don't get routed at all.) */
a8d09063
LP
1228 r = socket_disable_pmtud(fd, family);
1229 if (r < 0)
1230 log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
1231
1232 r = socket_set_recvfragsize(fd, family, true);
1233 if (r < 0)
1234 log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
1235 }
b30bf55d 1236
a8d09063 1237 r = sockaddr_set_in_addr(&sa, family, listen_addr, 53);
d491917c
ZJS
1238 if (r < 0)
1239 return r;
1240
424e490b
ZJS
1241 if (bind(fd, &sa.sa, sizeof(sa.in)) < 0)
1242 return -errno;
b30bf55d 1243
d491917c 1244 if (type == SOCK_STREAM &&
768fcd77 1245 listen(fd, SOMAXCONN_DELUXE) < 0)
d491917c
ZJS
1246 return -errno;
1247
1248 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1249 type == SOCK_DGRAM ? on_dns_stub_packet : on_dns_stub_stream,
1250 m);
b30bf55d 1251 if (r < 0)
424e490b 1252 return r;
b30bf55d 1253
d491917c 1254 r = sd_event_source_set_io_fd_own(*event_source, true);
7216a3b5
YW
1255 if (r < 0)
1256 return r;
1257
d491917c
ZJS
1258 (void) sd_event_source_set_description(*event_source,
1259 type == SOCK_DGRAM ? "dns-stub-udp" : "dns-stub-tcp");
b30bf55d 1260
7216a3b5 1261 return TAKE_FD(fd);
b30bf55d
LP
1262}
1263
b5febb3f 1264static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type) {
1f05101f 1265 _cleanup_free_ char *pretty = NULL;
254d1313 1266 _cleanup_close_ int fd = -EBADF;
ca8b62b5 1267 union sockaddr_union sa;
1f05101f
SS
1268 int r;
1269
0354029b 1270 assert(m);
a8d09063 1271 assert(l);
b5febb3f 1272 assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
0354029b 1273
b5febb3f
ZJS
1274 sd_event_source **event_source = type == SOCK_DGRAM ? &l->udp_event_source : &l->tcp_event_source;
1275 if (*event_source)
1276 return sd_event_source_get_io_fd(*event_source);
1f05101f 1277
0398c084
DDM
1278 if (!have_effective_cap(CAP_NET_BIND_SERVICE) && dns_stub_listener_extra_port(l) < 1024) {
1279 log_warning("Missing CAP_NET_BIND_SERVICE capability, not creating extra stub listener on port %hu.",
1280 dns_stub_listener_extra_port(l));
1281 return 0;
1282 }
1283
ca8b62b5
YW
1284 if (l->family == AF_INET)
1285 sa = (union sockaddr_union) {
1286 .in.sin_family = l->family,
49ef064c 1287 .in.sin_port = htobe16(dns_stub_listener_extra_port(l)),
ca8b62b5
YW
1288 .in.sin_addr = l->address.in,
1289 };
1290 else
1291 sa = (union sockaddr_union) {
1292 .in6.sin6_family = l->family,
49ef064c 1293 .in6.sin6_port = htobe16(dns_stub_listener_extra_port(l)),
ca8b62b5
YW
1294 .in6.sin6_addr = l->address.in6,
1295 };
1296
b5febb3f 1297 fd = socket(l->family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
1f05101f
SS
1298 if (fd < 0) {
1299 r = -errno;
1300 goto fail;
1301 }
1302
af8b1384 1303 r = set_dns_stub_common_socket_options(fd, l->family);
1f05101f
SS
1304 if (r < 0)
1305 goto fail;
1306
8624f128
LP
1307 if (type == SOCK_STREAM) {
1308 r = set_dns_stub_common_tcp_socket_options(fd);
1309 if (r < 0)
1310 goto fail;
1311 }
1312
69e3234d 1313 /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case
b5febb3f
ZJS
1314 * people may want ttl > 1. */
1315
5d0fe423 1316 r = socket_set_freebind(fd, l->family, true);
b5febb3f
ZJS
1317 if (r < 0)
1318 goto fail;
1319
eb170e75
LP
1320 if (type == SOCK_DGRAM) {
1321 r = socket_disable_pmtud(fd, l->family);
1322 if (r < 0)
1323 log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
20a001bd
LP
1324
1325 r = socket_set_recvfragsize(fd, l->family, true);
1326 if (r < 0)
1327 log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
eb170e75
LP
1328 }
1329
a0233fcd 1330 r = RET_NERRNO(bind(fd, &sa.sa, sockaddr_len(&sa)));
ded15213 1331 if (r < 0)
1f05101f 1332 goto fail;
1f05101f 1333
b5febb3f 1334 if (type == SOCK_STREAM &&
768fcd77 1335 listen(fd, SOMAXCONN_DELUXE) < 0) {
b5febb3f
ZJS
1336 r = -errno;
1337 goto fail;
1338 }
1339
1340 r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1341 type == SOCK_DGRAM ? on_dns_stub_packet_extra : on_dns_stub_stream_extra,
1342 l);
1f05101f
SS
1343 if (r < 0)
1344 goto fail;
1345
b5febb3f 1346 r = sd_event_source_set_io_fd_own(*event_source, true);
7216a3b5
YW
1347 if (r < 0)
1348 goto fail;
1349
b5febb3f
ZJS
1350 (void) sd_event_source_set_description(*event_source,
1351 type == SOCK_DGRAM ? "dns-stub-udp-extra" : "dns-stub-tcp-extra");
1f05101f
SS
1352
1353 if (DEBUG_LOGGING) {
ca8b62b5 1354 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
b5febb3f
ZJS
1355 log_debug("Listening on %s socket %s.",
1356 type == SOCK_DGRAM ? "UDP" : "TCP",
1357 strnull(pretty));
1f05101f
SS
1358 }
1359
7216a3b5 1360 return TAKE_FD(fd);
1f05101f 1361
b4b7ea1b 1362fail:
1c17bcb3 1363 assert(r < 0);
ca8b62b5 1364 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
b5febb3f
ZJS
1365 return log_warning_errno(r,
1366 r == -EADDRINUSE ? "Another process is already listening on %s socket %s: %m" :
1367 "Failed to listen on %s socket %s: %m",
1368 type == SOCK_DGRAM ? "UDP" : "TCP",
1369 strnull(pretty));
1f05101f
SS
1370}
1371
b30bf55d 1372int manager_dns_stub_start(Manager *m) {
a8d09063 1373 int r;
b30bf55d
LP
1374
1375 assert(m);
1376
d5da7707
ZJS
1377 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO)
1378 log_debug("Not creating stub listener.");
0398c084
DDM
1379 else if (!have_effective_cap(CAP_NET_BIND_SERVICE))
1380 log_warning("Missing CAP_NET_BIND_SERVICE capability, not creating stub listener on port 53.");
a8d09063
LP
1381 else {
1382 static const struct {
1383 uint32_t addr;
1384 int socket_type;
1385 } stub_sockets[] = {
1386 { INADDR_DNS_STUB, SOCK_DGRAM },
1387 { INADDR_DNS_STUB, SOCK_STREAM },
1388 { INADDR_DNS_PROXY_STUB, SOCK_DGRAM },
1389 { INADDR_DNS_PROXY_STUB, SOCK_STREAM },
1390 };
1391
d5da7707
ZJS
1392 log_debug("Creating stub listener using %s.",
1393 m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" :
1394 m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" :
1395 "UDP/TCP");
1396
ddb8a639 1397 FOREACH_ELEMENT(s, stub_sockets) {
a8d09063 1398 union in_addr_union a = {
ddb8a639 1399 .in.s_addr = htobe32(s->addr),
a8d09063 1400 };
b30bf55d 1401
ddb8a639 1402 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP && s->socket_type == SOCK_STREAM)
a8d09063 1403 continue;
ddb8a639 1404 if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP && s->socket_type == SOCK_DGRAM)
a8d09063
LP
1405 continue;
1406
ddb8a639 1407 r = manager_dns_stub_fd(m, AF_INET, &a, s->socket_type);
a8d09063
LP
1408 if (r < 0) {
1409 _cleanup_free_ char *busy_socket = NULL;
1410
1411 if (asprintf(&busy_socket,
1412 "%s socket " IPV4_ADDRESS_FMT_STR ":53",
ddb8a639 1413 s->socket_type == SOCK_DGRAM ? "UDP" : "TCP",
a8d09063
LP
1414 IPV4_ADDRESS_FMT_VAL(a.in)) < 0)
1415 return log_oom();
1416
1417 if (IN_SET(r, -EADDRINUSE, -EPERM)) {
1418 log_warning_errno(r,
1419 r == -EADDRINUSE ? "Another process is already listening on %s.\n"
1420 "Turning off local DNS stub support." :
1421 "Failed to listen on %s: %m.\n"
5d5edcd3 1422 "Turning off local DNS stub support.",
a8d09063
LP
1423 busy_socket);
1424 manager_dns_stub_stop(m);
1425 break;
1426 }
b30bf55d 1427
a8d09063
LP
1428 return log_error_errno(r, "Failed to listen on %s: %m", busy_socket);
1429 }
1430 }
1431 }
b30bf55d 1432
1f05101f 1433 if (!ordered_set_isempty(m->dns_extra_stub_listeners)) {
36aaabc3 1434 DnsStubListenerExtra *l;
1f05101f 1435
dce65cd4 1436 log_debug("Creating extra stub listeners.");
1f05101f 1437
90e74a66 1438 ORDERED_SET_FOREACH(l, m->dns_extra_stub_listeners) {
7314b397 1439 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_UDP))
b5febb3f 1440 (void) manager_dns_stub_fd_extra(m, l, SOCK_DGRAM);
7314b397 1441 if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_TCP))
b5febb3f 1442 (void) manager_dns_stub_fd_extra(m, l, SOCK_STREAM);
7314b397 1443 }
1f05101f
SS
1444 }
1445
b30bf55d
LP
1446 return 0;
1447}
1448
1449void manager_dns_stub_stop(Manager *m) {
1450 assert(m);
1451
97935302
ZJS
1452 m->dns_stub_udp_event_source = sd_event_source_disable_unref(m->dns_stub_udp_event_source);
1453 m->dns_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_stub_tcp_event_source);
a8d09063
LP
1454 m->dns_proxy_stub_udp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_udp_event_source);
1455 m->dns_proxy_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_tcp_event_source);
b30bf55d 1456}
ae8f0ec3
LP
1457
1458static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = {
97935302 1459 [DNS_STUB_LISTENER_NO] = "no",
ae8f0ec3
LP
1460 [DNS_STUB_LISTENER_UDP] = "udp",
1461 [DNS_STUB_LISTENER_TCP] = "tcp",
1462 [DNS_STUB_LISTENER_YES] = "yes",
1463};
1464DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES);