]>
Commit | Line | Data |
---|---|---|
1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ | |
2 | ||
3 | #include <netinet/tcp.h> | |
4 | ||
5 | #include "sd-event.h" | |
6 | #include "sd-id128.h" | |
7 | ||
8 | #include "alloc-util.h" | |
9 | #include "capability-util.h" | |
10 | #include "dns-type.h" | |
11 | #include "errno-util.h" | |
12 | #include "fd-util.h" | |
13 | #include "log.h" | |
14 | #include "missing_network.h" | |
15 | #include "resolve-util.h" | |
16 | #include "resolved-dns-answer.h" | |
17 | #include "resolved-dns-packet.h" | |
18 | #include "resolved-dns-query.h" | |
19 | #include "resolved-dns-question.h" | |
20 | #include "resolved-dns-rr.h" | |
21 | #include "resolved-dns-stream.h" | |
22 | #include "resolved-dns-stub.h" | |
23 | #include "resolved-dns-transaction.h" | |
24 | #include "resolved-manager.h" | |
25 | #include "set.h" | |
26 | #include "siphash24.h" | |
27 | #include "socket-util.h" | |
28 | #include "stdio-util.h" | |
29 | #include "string-table.h" | |
30 | #include "string-util.h" | |
31 | #include "time-util.h" | |
32 | ||
33 | /* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet, | |
34 | * IP and UDP header sizes */ | |
35 | #define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U) | |
36 | ||
37 | /* On the extra stubs, use a more conservative choice */ | |
38 | #define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX | |
39 | ||
40 | static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type); | |
41 | static int manager_dns_stub_fd(Manager *m, int family, const union in_addr_union *listen_address, int type); | |
42 | ||
43 | static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra *a, struct siphash *state) { | |
44 | assert(a); | |
45 | ||
46 | siphash24_compress_typesafe(a->mode, state); | |
47 | siphash24_compress_typesafe(a->family, state); | |
48 | in_addr_hash_func(&a->address, a->family, state); | |
49 | siphash24_compress_typesafe(a->port, state); | |
50 | } | |
51 | ||
52 | static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra *a, const DnsStubListenerExtra *b) { | |
53 | int r; | |
54 | ||
55 | assert(a); | |
56 | assert(b); | |
57 | ||
58 | r = CMP(a->mode, b->mode); | |
59 | if (r != 0) | |
60 | return r; | |
61 | ||
62 | r = CMP(a->family, b->family); | |
63 | if (r != 0) | |
64 | return r; | |
65 | ||
66 | r = memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family)); | |
67 | if (r != 0) | |
68 | return r; | |
69 | ||
70 | return CMP(a->port, b->port); | |
71 | } | |
72 | ||
73 | DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR( | |
74 | dns_stub_listener_extra_hash_ops, | |
75 | DnsStubListenerExtra, | |
76 | dns_stub_listener_extra_hash_func, | |
77 | dns_stub_listener_extra_compare_func, | |
78 | dns_stub_listener_extra_free); | |
79 | ||
80 | int dns_stub_listener_extra_new( | |
81 | Manager *m, | |
82 | DnsStubListenerExtra **ret) { | |
83 | ||
84 | DnsStubListenerExtra *l; | |
85 | ||
86 | l = new(DnsStubListenerExtra, 1); | |
87 | if (!l) | |
88 | return -ENOMEM; | |
89 | ||
90 | *l = (DnsStubListenerExtra) { | |
91 | .manager = m, | |
92 | }; | |
93 | ||
94 | *ret = TAKE_PTR(l); | |
95 | return 0; | |
96 | } | |
97 | ||
98 | DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p) { | |
99 | if (!p) | |
100 | return NULL; | |
101 | ||
102 | p->udp_event_source = sd_event_source_disable_unref(p->udp_event_source); | |
103 | p->tcp_event_source = sd_event_source_disable_unref(p->tcp_event_source); | |
104 | ||
105 | hashmap_free(p->queries_by_packet); | |
106 | ||
107 | return mfree(p); | |
108 | } | |
109 | ||
110 | static void stub_packet_hash_func(const DnsPacket *p, struct siphash *state) { | |
111 | assert(p); | |
112 | ||
113 | siphash24_compress_typesafe(p->protocol, state); | |
114 | siphash24_compress_typesafe(p->family, state); | |
115 | siphash24_compress_typesafe(p->sender, state); | |
116 | siphash24_compress_typesafe(p->ipproto, state); | |
117 | siphash24_compress_typesafe(p->sender_port, state); | |
118 | siphash24_compress(DNS_PACKET_HEADER(p), sizeof(DnsPacketHeader), state); | |
119 | ||
120 | /* We don't bother hashing the full packet here, just the header */ | |
121 | } | |
122 | ||
123 | static int stub_packet_compare_func(const DnsPacket *x, const DnsPacket *y) { | |
124 | int r; | |
125 | ||
126 | r = CMP(x->protocol, y->protocol); | |
127 | if (r != 0) | |
128 | return r; | |
129 | ||
130 | r = CMP(x->family, y->family); | |
131 | if (r != 0) | |
132 | return r; | |
133 | ||
134 | r = memcmp(&x->sender, &y->sender, sizeof(x->sender)); | |
135 | if (r != 0) | |
136 | return r; | |
137 | ||
138 | r = CMP(x->ipproto, y->ipproto); | |
139 | if (r != 0) | |
140 | return r; | |
141 | ||
142 | r = CMP(x->sender_port, y->sender_port); | |
143 | if (r != 0) | |
144 | return r; | |
145 | ||
146 | return memcmp(DNS_PACKET_HEADER(x), DNS_PACKET_HEADER(y), sizeof(DnsPacketHeader)); | |
147 | } | |
148 | ||
149 | DEFINE_HASH_OPS(stub_packet_hash_ops, DnsPacket, stub_packet_hash_func, stub_packet_compare_func); | |
150 | ||
151 | static int reply_add_with_rrsig( | |
152 | DnsAnswer **reply, | |
153 | DnsResourceRecord *rr, | |
154 | int ifindex, | |
155 | DnsAnswerFlags flags, | |
156 | DnsResourceRecord *rrsig, | |
157 | bool with_rrsig) { | |
158 | int r; | |
159 | ||
160 | assert(reply); | |
161 | assert(rr); | |
162 | ||
163 | r = dns_answer_add_extend(reply, rr, ifindex, flags, rrsig); | |
164 | if (r < 0) | |
165 | return r; | |
166 | ||
167 | if (with_rrsig && rrsig) { | |
168 | r = dns_answer_add_extend(reply, rrsig, ifindex, flags, NULL); | |
169 | if (r < 0) | |
170 | return r; | |
171 | } | |
172 | ||
173 | return 0; | |
174 | } | |
175 | ||
176 | static int dns_stub_collect_answer_by_question( | |
177 | DnsAnswer **reply, | |
178 | DnsAnswer *answer, | |
179 | DnsQuestion *question, | |
180 | bool with_rrsig) { /* Add RRSIG RR matching each RR */ | |
181 | ||
182 | DnsAnswerItem *item; | |
183 | int r; | |
184 | ||
185 | assert(reply); | |
186 | ||
187 | /* Copies all RRs from 'answer' into 'reply', if they match 'question'. */ | |
188 | ||
189 | DNS_ANSWER_FOREACH_ITEM(item, answer) { | |
190 | ||
191 | /* We have a question, let's see if this RR matches it */ | |
192 | r = dns_question_matches_rr(question, item->rr, NULL); | |
193 | if (r < 0) | |
194 | return r; | |
195 | if (!r) { | |
196 | /* Maybe there's a CNAME/DNAME in here? If so, that's an answer too */ | |
197 | r = dns_question_matches_cname_or_dname(question, item->rr, NULL); | |
198 | if (r < 0) | |
199 | return r; | |
200 | if (!r) | |
201 | continue; | |
202 | } | |
203 | ||
204 | /* Mask the section info, we want the primary answers to always go without section | |
205 | * info, so that it is added to the answer section when we synthesize a reply. */ | |
206 | ||
207 | r = reply_add_with_rrsig( | |
208 | reply, | |
209 | item->rr, | |
210 | item->ifindex, | |
211 | item->flags & ~DNS_ANSWER_MASK_SECTIONS, | |
212 | item->rrsig, | |
213 | with_rrsig); | |
214 | if (r < 0) | |
215 | return r; | |
216 | } | |
217 | ||
218 | return 0; | |
219 | } | |
220 | ||
221 | static int dns_stub_collect_answer_by_section( | |
222 | DnsAnswer **reply, | |
223 | DnsAnswer *answer, | |
224 | DnsAnswerFlags section, | |
225 | DnsAnswer *exclude1, | |
226 | DnsAnswer *exclude2, | |
227 | bool with_dnssec) { /* Include DNSSEC RRs. RRSIG, NSEC, … */ | |
228 | ||
229 | DnsAnswerItem *item; | |
230 | int r; | |
231 | ||
232 | assert(reply); | |
233 | ||
234 | /* Copies all RRs from 'answer' into 'reply', if they originate from the specified section. Also, | |
235 | * avoid any RRs listed in 'exclude'. */ | |
236 | ||
237 | DNS_ANSWER_FOREACH_ITEM(item, answer) { | |
238 | ||
239 | if (dns_answer_contains(exclude1, item->rr) || | |
240 | dns_answer_contains(exclude2, item->rr)) | |
241 | continue; | |
242 | ||
243 | if (!with_dnssec && | |
244 | dns_type_is_dnssec(item->rr->key->type)) | |
245 | continue; | |
246 | ||
247 | if (((item->flags ^ section) & DNS_ANSWER_MASK_SECTIONS) != 0) | |
248 | continue; | |
249 | ||
250 | r = reply_add_with_rrsig( | |
251 | reply, | |
252 | item->rr, | |
253 | item->ifindex, | |
254 | item->flags, | |
255 | item->rrsig, | |
256 | with_dnssec); | |
257 | if (r < 0) | |
258 | return r; | |
259 | } | |
260 | ||
261 | return 0; | |
262 | } | |
263 | ||
264 | static int dns_stub_assign_sections( | |
265 | DnsQuery *q, | |
266 | DnsQuestion *question, | |
267 | bool edns0_do) { | |
268 | ||
269 | int r; | |
270 | ||
271 | assert(q); | |
272 | assert(question); | |
273 | ||
274 | /* Let's assign the 'answer' RRs we collected to their respective sections in the reply datagram. We | |
275 | * try to reproduce a section assignment similar to what the upstream DNS server responded to us. We | |
276 | * use the DNS_ANSWER_SECTION_xyz flags to match things up, which is where the original upstream's | |
277 | * packet section assignment is stored in the DnsAnswer object. Not all RRs in the 'answer' objects | |
278 | * come with section information though (for example, because they were synthesized locally, and not | |
279 | * from a DNS packet). To deal with that we extend the assignment logic a bit: anything from the | |
280 | * 'answer' object that directly matches the original question is always put in the ANSWER section, | |
281 | * regardless if it carries section info, or what that section info says. Then, anything from the | |
282 | * 'answer' objects that is from the ANSWER or AUTHORITY sections, and wasn't already added to the | |
283 | * ANSWER section is placed in the AUTHORITY section. Everything else from either object is added to | |
284 | * the ADDITIONAL section. */ | |
285 | ||
286 | /* Include all RRs that directly answer the question in the answer section */ | |
287 | r = dns_stub_collect_answer_by_question( | |
288 | &q->reply_answer, | |
289 | q->answer, | |
290 | question, | |
291 | edns0_do); | |
292 | if (r < 0) | |
293 | return r; | |
294 | ||
295 | /* Include all RRs that originate from the authority sections, and aren't already listed in the | |
296 | * answer section, in the authority section */ | |
297 | r = dns_stub_collect_answer_by_section( | |
298 | &q->reply_authoritative, | |
299 | q->answer, | |
300 | DNS_ANSWER_SECTION_AUTHORITY, | |
301 | q->reply_answer, NULL, | |
302 | edns0_do); | |
303 | if (r < 0) | |
304 | return r; | |
305 | ||
306 | /* Include all RRs that originate from the answer or additional sections in the additional section | |
307 | * (except if already listed in the other two sections). Also add all RRs with no section marking. */ | |
308 | r = dns_stub_collect_answer_by_section( | |
309 | &q->reply_additional, | |
310 | q->answer, | |
311 | DNS_ANSWER_SECTION_ANSWER, | |
312 | q->reply_answer, q->reply_authoritative, | |
313 | edns0_do); | |
314 | if (r < 0) | |
315 | return r; | |
316 | r = dns_stub_collect_answer_by_section( | |
317 | &q->reply_additional, | |
318 | q->answer, | |
319 | DNS_ANSWER_SECTION_ADDITIONAL, | |
320 | q->reply_answer, q->reply_authoritative, | |
321 | edns0_do); | |
322 | if (r < 0) | |
323 | return r; | |
324 | r = dns_stub_collect_answer_by_section( | |
325 | &q->reply_additional, | |
326 | q->answer, | |
327 | 0, | |
328 | q->reply_answer, q->reply_authoritative, | |
329 | edns0_do); | |
330 | if (r < 0) | |
331 | return r; | |
332 | ||
333 | return 0; | |
334 | } | |
335 | ||
336 | static int dns_stub_make_reply_packet( | |
337 | DnsPacket **ret, | |
338 | size_t max_size, | |
339 | DnsQuestion *q, | |
340 | bool *ret_truncated) { | |
341 | ||
342 | _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL; | |
343 | bool tc = false; | |
344 | int r; | |
345 | ||
346 | assert(ret); | |
347 | ||
348 | r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, max_size); | |
349 | if (r < 0) | |
350 | return r; | |
351 | ||
352 | r = dns_packet_append_question(p, q); | |
353 | if (r == -EMSGSIZE) | |
354 | tc = true; | |
355 | else if (r < 0) | |
356 | return r; | |
357 | ||
358 | if (ret_truncated) | |
359 | *ret_truncated = tc; | |
360 | else if (tc) | |
361 | return -EMSGSIZE; | |
362 | ||
363 | DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q)); | |
364 | ||
365 | *ret = TAKE_PTR(p); | |
366 | return 0; | |
367 | } | |
368 | ||
369 | static int dns_stub_add_reply_packet_body( | |
370 | DnsPacket *p, | |
371 | DnsAnswer *answer, | |
372 | DnsAnswer *authoritative, | |
373 | DnsAnswer *additional, | |
374 | bool edns0_do, /* Client expects DNSSEC RRs? */ | |
375 | bool *truncated) { | |
376 | ||
377 | unsigned n_answer = 0, n_authoritative = 0, n_additional = 0; | |
378 | bool tc = false; | |
379 | int r; | |
380 | ||
381 | assert(p); | |
382 | ||
383 | /* Add the three sections to the packet. If the answer section doesn't fit we'll signal that as | |
384 | * truncation. If the authoritative section doesn't fit and we are in DNSSEC mode, also signal | |
385 | * truncation. In all other cases where things don't fit don't signal truncation, as for those cases | |
386 | * the dropped RRs should not be essential. */ | |
387 | ||
388 | r = dns_packet_append_answer(p, answer, &n_answer); | |
389 | if (r == -EMSGSIZE) | |
390 | tc = true; | |
391 | else if (r < 0) | |
392 | return r; | |
393 | else { | |
394 | r = dns_packet_append_answer(p, authoritative, &n_authoritative); | |
395 | if (r == -EMSGSIZE) { | |
396 | if (edns0_do) | |
397 | tc = true; | |
398 | } else if (r < 0) | |
399 | return r; | |
400 | else { | |
401 | r = dns_packet_append_answer(p, additional, &n_additional); | |
402 | if (r < 0 && r != -EMSGSIZE) | |
403 | return r; | |
404 | } | |
405 | } | |
406 | ||
407 | if (tc) { | |
408 | if (!truncated) | |
409 | return -EMSGSIZE; | |
410 | ||
411 | *truncated = true; | |
412 | } | |
413 | ||
414 | DNS_PACKET_HEADER(p)->ancount = htobe16(n_answer); | |
415 | DNS_PACKET_HEADER(p)->nscount = htobe16(n_authoritative); | |
416 | DNS_PACKET_HEADER(p)->arcount = htobe16(n_additional); | |
417 | return 0; | |
418 | } | |
419 | ||
420 | static const char *nsid_string(void) { | |
421 | static char buffer[SD_ID128_STRING_MAX + STRLEN(".resolved.systemd.io")] = ""; | |
422 | sd_id128_t id; | |
423 | int r; | |
424 | ||
425 | /* Let's generate a string that we can use as RFC5001 NSID identifier. The string shall identify us | |
426 | * as systemd-resolved, and return a different string for each resolved instance without leaking host | |
427 | * identity. Hence let's use a fixed suffix that identifies resolved, and a prefix generated from the | |
428 | * machine ID but from which the machine ID cannot be determined. | |
429 | * | |
430 | * Clients can use this to determine whether an answer is originating locally or is proxied from | |
431 | * upstream. */ | |
432 | ||
433 | if (!isempty(buffer)) | |
434 | return buffer; | |
435 | ||
436 | r = sd_id128_get_machine_app_specific( | |
437 | SD_ID128_MAKE(ed,d3,12,5d,16,b9,41,f9,a1,49,5f,ab,15,62,ab,27), | |
438 | &id); | |
439 | if (r < 0) { | |
440 | log_debug_errno(r, "Failed to determine machine ID, ignoring: %m"); | |
441 | return NULL; | |
442 | } | |
443 | ||
444 | xsprintf(buffer, SD_ID128_FORMAT_STR ".resolved.systemd.io", SD_ID128_FORMAT_VAL(id)); | |
445 | return buffer; | |
446 | } | |
447 | ||
448 | static int dns_stub_finish_reply_packet( | |
449 | DnsPacket *p, | |
450 | uint16_t id, | |
451 | int rcode, | |
452 | bool tc, /* set the Truncated bit? */ | |
453 | bool aa, /* set the Authoritative Answer bit? */ | |
454 | bool rd, /* set the Recursion Desired bit? */ | |
455 | bool add_opt, /* add an OPT RR to this packet? */ | |
456 | bool edns0_do, /* set the EDNS0 DNSSEC OK bit? */ | |
457 | bool ad, /* set the DNSSEC authenticated data bit? */ | |
458 | bool cd, /* set the DNSSEC checking disabled bit? */ | |
459 | uint16_t max_udp_size, /* The maximum UDP datagram size to advertise to clients */ | |
460 | bool nsid) { /* whether to add NSID */ | |
461 | ||
462 | int r; | |
463 | ||
464 | assert(p); | |
465 | ||
466 | if (add_opt) { | |
467 | r = dns_packet_append_opt(p, max_udp_size, edns0_do, /* include_rfc6975 = */ false, nsid ? nsid_string() : NULL, rcode, NULL); | |
468 | if (r == -EMSGSIZE) /* Hit the size limit? then indicate truncation */ | |
469 | tc = true; | |
470 | else if (r < 0) | |
471 | return r; | |
472 | } else { | |
473 | /* If the client can't to EDNS0, don't do DO either */ | |
474 | edns0_do = false; | |
475 | ||
476 | /* If we don't do EDNS, clamp the rcode to 4 bit */ | |
477 | if (rcode > 0xF) | |
478 | rcode = DNS_RCODE_SERVFAIL; | |
479 | } | |
480 | ||
481 | /* Note that we allow the AD bit to be set even if client didn't signal DO, as per RFC 6840, section | |
482 | * 5.7 */ | |
483 | ||
484 | DNS_PACKET_HEADER(p)->id = id; | |
485 | ||
486 | DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS( | |
487 | 1 /* qr */, | |
488 | 0 /* opcode */, | |
489 | aa /* aa */, | |
490 | tc /* tc */, | |
491 | rd /* rd */, | |
492 | 1 /* ra */, | |
493 | ad /* ad */, | |
494 | cd /* cd */, | |
495 | rcode)); | |
496 | ||
497 | return 0; | |
498 | } | |
499 | ||
500 | static bool address_is_proxy(int family, const union in_addr_union *a) { | |
501 | assert(a); | |
502 | ||
503 | /* Returns true if the specified address is the DNS "proxy" stub, i.e. where we unconditionally enable bypass mode */ | |
504 | ||
505 | if (family != AF_INET) | |
506 | return false; | |
507 | ||
508 | return be32toh(a->in.s_addr) == INADDR_DNS_PROXY_STUB; | |
509 | } | |
510 | ||
511 | static int find_socket_fd( | |
512 | Manager *m, | |
513 | DnsStubListenerExtra *l, | |
514 | int family, | |
515 | const union in_addr_union *listen_address, | |
516 | int type) { | |
517 | ||
518 | assert(m); | |
519 | ||
520 | /* Finds the right socket to use for sending. If we know the extra listener, otherwise go via the | |
521 | * address to send from */ | |
522 | if (l) | |
523 | return manager_dns_stub_fd_extra(m, l, type); | |
524 | ||
525 | return manager_dns_stub_fd(m, family, listen_address, type); | |
526 | } | |
527 | ||
528 | static int dns_stub_send( | |
529 | Manager *m, | |
530 | DnsStubListenerExtra *l, | |
531 | DnsStream *s, | |
532 | DnsPacket *p, | |
533 | DnsPacket *reply) { | |
534 | ||
535 | int r; | |
536 | ||
537 | assert(m); | |
538 | assert(p); | |
539 | assert(reply); | |
540 | ||
541 | if (s) | |
542 | r = dns_stream_write_packet(s, reply); | |
543 | else { | |
544 | int fd, ifindex; | |
545 | ||
546 | fd = find_socket_fd(m, l, p->family, &p->destination, SOCK_DGRAM); | |
547 | if (fd < 0) | |
548 | return fd; | |
549 | ||
550 | if (address_is_proxy(p->family, &p->destination)) | |
551 | /* Force loopback iface if this is the loopback proxy stub | |
552 | * and ifindex was normalized to 0 by manager_recv(). */ | |
553 | ifindex = p->ifindex ?: LOOPBACK_IFINDEX; | |
554 | else | |
555 | /* Force loopback iface if this is the main listener stub. */ | |
556 | ifindex = l ? p->ifindex : LOOPBACK_IFINDEX; | |
557 | ||
558 | /* Note that it is essential here that we explicitly choose the source IP address for this | |
559 | * packet. This is because otherwise the kernel will choose it automatically based on the | |
560 | * routing table and will thus pick 127.0.0.1 rather than 127.0.0.53/54. */ | |
561 | r = manager_send(m, | |
562 | fd, | |
563 | ifindex, | |
564 | p->family, &p->sender, p->sender_port, &p->destination, | |
565 | reply); | |
566 | } | |
567 | if (r < 0) | |
568 | return log_debug_errno(r, "Failed to send reply packet: %m"); | |
569 | ||
570 | return 0; | |
571 | } | |
572 | ||
573 | static int dns_stub_reply_with_edns0_do(DnsQuery *q) { | |
574 | assert(q); | |
575 | ||
576 | /* Reply with DNSSEC DO set? Only if client supports it; and we did any DNSSEC verification | |
577 | * ourselves, or consider the data fully authenticated because we generated it locally, or the client | |
578 | * set cd */ | |
579 | ||
580 | return dns_packet_do(q->request_packet) && | |
581 | (q->answer_dnssec_result >= 0 || /* we did proper DNSSEC validation … */ | |
582 | dns_query_fully_authenticated(q) || /* … or we considered it authentic otherwise … */ | |
583 | DNS_PACKET_CD(q->request_packet)); /* … or client set CD */ | |
584 | } | |
585 | ||
586 | static void dns_stub_suppress_duplicate_section_rrs(DnsQuery *q) { | |
587 | /* If we follow a CNAME/DNAME chain we might end up populating our sections with redundant RRs | |
588 | * because we built up the sections from multiple reply packets (one from each CNAME/DNAME chain | |
589 | * element). E.g. it could be that an RR that was included in the first reply's additional section | |
590 | * ends up being relevant as main answer in a subsequent reply in the chain. Let's clean this up, and | |
591 | * remove everything in the "higher priority" sections from the "lower priority" sections. | |
592 | * | |
593 | * Note that this removal matches by RR keys instead of the full RRs. This is because RRsets should | |
594 | * always end up in one section fully or not at all, but never be split among sections. | |
595 | * | |
596 | * Specifically: we remove ANSWER section RRs from the AUTHORITATIVE and ADDITIONAL sections, as well | |
597 | * as AUTHORITATIVE section RRs from the ADDITIONAL section. */ | |
598 | ||
599 | dns_answer_remove_by_answer_keys(&q->reply_authoritative, q->reply_answer); | |
600 | dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_answer); | |
601 | dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_authoritative); | |
602 | } | |
603 | ||
604 | static int dns_stub_send_reply( | |
605 | DnsQuery *q, | |
606 | int rcode) { | |
607 | ||
608 | _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL; | |
609 | bool truncated, edns0_do; | |
610 | int r; | |
611 | ||
612 | assert(q); | |
613 | ||
614 | edns0_do = dns_stub_reply_with_edns0_do(q); /* let's check if we shall reply with EDNS0 DO? */ | |
615 | ||
616 | r = dns_stub_make_reply_packet( | |
617 | &reply, | |
618 | dns_packet_payload_size_max(q->request_packet), | |
619 | q->request_packet->question, | |
620 | &truncated); | |
621 | if (r < 0) | |
622 | return log_debug_errno(r, "Failed to build reply packet: %m"); | |
623 | ||
624 | dns_stub_suppress_duplicate_section_rrs(q); | |
625 | ||
626 | r = dns_stub_add_reply_packet_body( | |
627 | reply, | |
628 | q->reply_answer, | |
629 | q->reply_authoritative, | |
630 | q->reply_additional, | |
631 | edns0_do, | |
632 | &truncated); | |
633 | if (r < 0) | |
634 | return log_debug_errno(r, "Failed to append reply packet body: %m"); | |
635 | ||
636 | r = dns_stub_finish_reply_packet( | |
637 | reply, | |
638 | DNS_PACKET_ID(q->request_packet), | |
639 | rcode, | |
640 | truncated, | |
641 | dns_query_fully_authoritative(q), | |
642 | DNS_PACKET_RD(q->request_packet), | |
643 | !!q->request_packet->opt, | |
644 | edns0_do, | |
645 | (DNS_PACKET_AD(q->request_packet) || dns_packet_do(q->request_packet)) && dns_query_fully_authenticated(q), | |
646 | FLAGS_SET(q->flags, SD_RESOLVED_NO_VALIDATE), | |
647 | q->stub_listener_extra ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX, | |
648 | dns_packet_has_nsid_request(q->request_packet) > 0 && !q->stub_listener_extra); | |
649 | if (r < 0) | |
650 | return log_debug_errno(r, "Failed to build failure packet: %m"); | |
651 | ||
652 | return dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply); | |
653 | } | |
654 | ||
655 | static int dns_stub_send_failure( | |
656 | Manager *m, | |
657 | DnsStubListenerExtra *l, | |
658 | DnsStream *s, | |
659 | DnsPacket *p, | |
660 | int rcode, | |
661 | bool authenticated) { | |
662 | ||
663 | _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL; | |
664 | bool truncated; | |
665 | int r; | |
666 | ||
667 | assert(m); | |
668 | assert(p); | |
669 | ||
670 | r = dns_stub_make_reply_packet( | |
671 | &reply, | |
672 | dns_packet_payload_size_max(p), | |
673 | p->question, | |
674 | &truncated); | |
675 | if (r < 0) | |
676 | return log_debug_errno(r, "Failed to make failure packet: %m"); | |
677 | ||
678 | r = dns_stub_finish_reply_packet( | |
679 | reply, | |
680 | DNS_PACKET_ID(p), | |
681 | rcode, | |
682 | truncated, | |
683 | false, | |
684 | DNS_PACKET_RD(p), | |
685 | !!p->opt, | |
686 | dns_packet_do(p), | |
687 | (DNS_PACKET_AD(p) || dns_packet_do(p)) && authenticated, | |
688 | DNS_PACKET_CD(p), | |
689 | l ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX, | |
690 | dns_packet_has_nsid_request(p) > 0 && !l); | |
691 | if (r < 0) | |
692 | return log_debug_errno(r, "Failed to build failure packet: %m"); | |
693 | ||
694 | return dns_stub_send(m, l, s, p, reply); | |
695 | } | |
696 | ||
697 | static int dns_stub_patch_bypass_reply_packet( | |
698 | DnsPacket **ret, /* Where to place the patched packet */ | |
699 | DnsPacket *original, /* The packet to patch */ | |
700 | DnsPacket *request, /* The packet the patched packet shall look like a reply to */ | |
701 | bool validated, | |
702 | bool authenticated) { | |
703 | _cleanup_(dns_packet_unrefp) DnsPacket *c = NULL; | |
704 | int r; | |
705 | ||
706 | assert(ret); | |
707 | assert(original); | |
708 | assert(request); | |
709 | ||
710 | r = dns_packet_dup(&c, original); | |
711 | if (r < 0) | |
712 | return r; | |
713 | ||
714 | /* Extract the packet, so that we know where the OPT field is */ | |
715 | r = dns_packet_extract(c); | |
716 | if (r < 0) | |
717 | return r; | |
718 | ||
719 | /* Copy over the original client request ID, so that we can make the upstream query look like our own reply. */ | |
720 | DNS_PACKET_HEADER(c)->id = DNS_PACKET_HEADER(request)->id; | |
721 | ||
722 | /* Patch in our own maximum datagram size, if EDNS0 was on */ | |
723 | r = dns_packet_patch_max_udp_size(c, ADVERTISE_DATAGRAM_SIZE_MAX); | |
724 | if (r < 0) | |
725 | return r; | |
726 | ||
727 | /* Lower all TTLs by the time passed since we received the datagram. */ | |
728 | if (timestamp_is_set(original->timestamp)) { | |
729 | r = dns_packet_patch_ttls(c, original->timestamp); | |
730 | if (r < 0) | |
731 | return r; | |
732 | } | |
733 | ||
734 | /* Our upstream connection might have supported larger DNS requests than our downstream one, hence | |
735 | * set the TC bit if our reply is larger than what the client supports, and truncate. */ | |
736 | if (c->size > dns_packet_payload_size_max(request)) { | |
737 | log_debug("Artificially truncating stub response, as advertised size of client is smaller than upstream one."); | |
738 | dns_packet_truncate(c, dns_packet_payload_size_max(request)); | |
739 | DNS_PACKET_HEADER(c)->flags = htobe16(be16toh(DNS_PACKET_HEADER(c)->flags) | DNS_PACKET_FLAG_TC); | |
740 | } | |
741 | ||
742 | /* Patch the cd bit to reflect the state of validation: set when both we and the upstream | |
743 | * resolver have checking disabled. */ | |
744 | DNS_PACKET_HEADER(c)->flags = htobe16(UPDATE_FLAG(be16toh(DNS_PACKET_HEADER(c)->flags), | |
745 | DNS_PACKET_FLAG_CD, DNS_PACKET_CD(original) && !validated)); | |
746 | ||
747 | /* Ensure we don't pass along an untrusted ad flag for bypass packets */ | |
748 | DNS_PACKET_HEADER(c)->flags = htobe16(UPDATE_FLAG(be16toh(DNS_PACKET_HEADER(c)->flags), | |
749 | DNS_PACKET_FLAG_AD, authenticated)); | |
750 | ||
751 | *ret = TAKE_PTR(c); | |
752 | return 0; | |
753 | } | |
754 | ||
755 | static void dns_stub_query_complete(DnsQuery *query) { | |
756 | _cleanup_(dns_query_freep) DnsQuery *q = query; | |
757 | int r; | |
758 | ||
759 | assert(q); | |
760 | assert(q->request_packet); | |
761 | ||
762 | if (q->question_bypass) { | |
763 | /* This is a bypass reply. If so, let's propagate the upstream packet, if we have it and it | |
764 | * is regular DNS. (We can't do this if the upstream packet is LLMNR or mDNS, since the | |
765 | * packets are not 100% compatible.) */ | |
766 | ||
767 | if (q->answer_full_packet && | |
768 | q->answer_full_packet->protocol == DNS_PROTOCOL_DNS) { | |
769 | _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL; | |
770 | ||
771 | r = dns_stub_patch_bypass_reply_packet(&reply, q->answer_full_packet, q->request_packet, | |
772 | /* validated = */ !FLAGS_SET(q->flags, SD_RESOLVED_NO_VALIDATE), | |
773 | FLAGS_SET(q->answer_query_flags, SD_RESOLVED_AUTHENTICATED)); | |
774 | if (r < 0) | |
775 | log_debug_errno(r, "Failed to patch bypass reply packet: %m"); | |
776 | else | |
777 | (void) dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply); | |
778 | ||
779 | return; | |
780 | } | |
781 | } | |
782 | ||
783 | /* Take all data from the current reply, and merge it into the three reply sections we are building | |
784 | * up. We do this before processing CNAME redirects, so that we gradually build up our sections, and | |
785 | * and keep adding all RRs in the CNAME chain. */ | |
786 | r = dns_stub_assign_sections( | |
787 | q, | |
788 | dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS), | |
789 | dns_stub_reply_with_edns0_do(q)); | |
790 | if (r < 0) | |
791 | return (void) log_debug_errno(r, "Failed to assign sections: %m"); | |
792 | ||
793 | switch (q->state) { | |
794 | ||
795 | case DNS_TRANSACTION_SUCCESS: { | |
796 | bool first = true; | |
797 | ||
798 | for (;;) { | |
799 | int cname_result; | |
800 | ||
801 | cname_result = dns_query_process_cname_one(q); | |
802 | if (cname_result == -ELOOP) { /* CNAME loop, let's send what we already have */ | |
803 | log_debug("Detected CNAME loop, returning what we already have."); | |
804 | (void) dns_stub_send_reply(q, q->answer_rcode); | |
805 | break; | |
806 | } | |
807 | if (cname_result < 0) { | |
808 | log_debug_errno(cname_result, "Failed to process CNAME: %m"); | |
809 | break; | |
810 | } | |
811 | ||
812 | if (cname_result == DNS_QUERY_NOMATCH) { | |
813 | /* This answer doesn't contain any RR that would answer our question | |
814 | * positively, i.e. neither directly nor via CNAME. */ | |
815 | ||
816 | if (first) /* We never followed a CNAME and the answer doesn't match our | |
817 | * question at all? Then this is final, the empty answer is the | |
818 | * answer. */ | |
819 | break; | |
820 | ||
821 | /* Otherwise, we already followed a CNAME once within this packet, and the | |
822 | * packet doesn't answer our question. In that case let's restart the query, | |
823 | * now with the redirected question. We'll */ | |
824 | r = dns_query_go(q); | |
825 | if (r < 0) | |
826 | return (void) log_debug_errno(r, "Failed to restart query: %m"); | |
827 | ||
828 | TAKE_PTR(q); | |
829 | return; | |
830 | } | |
831 | ||
832 | r = dns_stub_assign_sections( | |
833 | q, | |
834 | dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS), | |
835 | dns_stub_reply_with_edns0_do(q)); | |
836 | if (r < 0) | |
837 | return (void) log_debug_errno(r, "Failed to assign sections: %m"); | |
838 | ||
839 | if (cname_result == DNS_QUERY_MATCH) /* A match? Then we are done, let's return what we got */ | |
840 | break; | |
841 | ||
842 | /* We followed a CNAME. and collected the RRs that answer the redirected question | |
843 | * successfully. Let's not try to do this again. */ | |
844 | assert(cname_result == DNS_QUERY_CNAME); | |
845 | first = false; | |
846 | } | |
847 | ||
848 | _fallthrough_; | |
849 | } | |
850 | ||
851 | case DNS_TRANSACTION_RCODE_FAILURE: | |
852 | (void) dns_stub_send_reply(q, q->answer_rcode); | |
853 | break; | |
854 | ||
855 | case DNS_TRANSACTION_NOT_FOUND: | |
856 | (void) dns_stub_send_reply(q, DNS_RCODE_NXDOMAIN); | |
857 | break; | |
858 | ||
859 | case DNS_TRANSACTION_TIMEOUT: | |
860 | case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED: | |
861 | /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */ | |
862 | break; | |
863 | ||
864 | case DNS_TRANSACTION_NO_SERVERS: | |
865 | /* We're not configured to give answers for this question. Refuse it. */ | |
866 | (void) dns_stub_send_reply(q, DNS_RCODE_REFUSED); | |
867 | break; | |
868 | ||
869 | case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED: | |
870 | /* This RR Type is not implemented */ | |
871 | (void) dns_stub_send_reply(q, DNS_RCODE_NOTIMP); | |
872 | break; | |
873 | ||
874 | case DNS_TRANSACTION_INVALID_REPLY: | |
875 | case DNS_TRANSACTION_ERRNO: | |
876 | case DNS_TRANSACTION_ABORTED: | |
877 | case DNS_TRANSACTION_DNSSEC_FAILED: | |
878 | case DNS_TRANSACTION_NO_TRUST_ANCHOR: | |
879 | case DNS_TRANSACTION_NETWORK_DOWN: | |
880 | case DNS_TRANSACTION_NO_SOURCE: | |
881 | case DNS_TRANSACTION_STUB_LOOP: | |
882 | (void) dns_stub_send_reply(q, DNS_RCODE_SERVFAIL); | |
883 | break; | |
884 | ||
885 | case DNS_TRANSACTION_NULL: | |
886 | case DNS_TRANSACTION_PENDING: | |
887 | case DNS_TRANSACTION_VALIDATING: | |
888 | default: | |
889 | assert_not_reached(); | |
890 | } | |
891 | } | |
892 | ||
893 | static int dns_stub_stream_complete(DnsStream *s, int error) { | |
894 | assert(s); | |
895 | ||
896 | log_debug_errno(error, "DNS TCP connection terminated, destroying queries: %m"); | |
897 | ||
898 | for (;;) { | |
899 | DnsQuery *q; | |
900 | ||
901 | q = set_first(s->queries); | |
902 | if (!q) | |
903 | break; | |
904 | ||
905 | dns_query_free(q); | |
906 | } | |
907 | ||
908 | /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections | |
909 | * should be kept as long as the client wants to. */ | |
910 | dns_stream_unref(s); | |
911 | return 0; | |
912 | } | |
913 | ||
914 | static void dns_stub_process_query(Manager *m, DnsStubListenerExtra *l, DnsStream *s, DnsPacket *p) { | |
915 | uint64_t protocol_flags = SD_RESOLVED_PROTOCOLS_ALL; | |
916 | _cleanup_(dns_query_freep) DnsQuery *q = NULL; | |
917 | Hashmap **queries_by_packet; | |
918 | DnsQuery *existing; | |
919 | bool bypass = false; | |
920 | int r; | |
921 | ||
922 | assert(m); | |
923 | assert(p); | |
924 | assert(p->protocol == DNS_PROTOCOL_DNS); | |
925 | ||
926 | if (!l && /* l == NULL if this is the main stub */ | |
927 | !address_is_proxy(p->family, &p->destination) && /* don't restrict needlessly for 127.0.0.54 */ | |
928 | (in_addr_is_localhost(p->family, &p->sender) <= 0 || | |
929 | in_addr_is_localhost(p->family, &p->destination) <= 0)) { | |
930 | log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring."); | |
931 | return; | |
932 | } | |
933 | ||
934 | if (manager_packet_from_our_transaction(m, p)) { | |
935 | log_debug("Got our own packet looped back, ignoring."); | |
936 | return; | |
937 | } | |
938 | ||
939 | queries_by_packet = l ? &l->queries_by_packet : &m->stub_queries_by_packet; | |
940 | existing = hashmap_get(*queries_by_packet, p); | |
941 | if (existing && dns_packet_equal(existing->request_packet, p)) { | |
942 | log_debug("Got repeat packet from client, ignoring."); | |
943 | return; | |
944 | } | |
945 | ||
946 | r = dns_packet_extract(p); | |
947 | if (r < 0) { | |
948 | log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m"); | |
949 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_FORMERR, false); | |
950 | return; | |
951 | } | |
952 | ||
953 | if (!dns_packet_version_supported(p)) { | |
954 | log_debug("Got EDNS OPT field with unsupported version number."); | |
955 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_BADVERS, false); | |
956 | return; | |
957 | } | |
958 | ||
959 | if (dns_type_is_obsolete(dns_question_first_key(p->question)->type)) { | |
960 | log_debug("Got message with obsolete key type, refusing."); | |
961 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false); | |
962 | return; | |
963 | } | |
964 | ||
965 | if (dns_type_is_zone_transfer(dns_question_first_key(p->question)->type)) { | |
966 | log_debug("Got request for zone transfer, refusing."); | |
967 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false); | |
968 | return; | |
969 | } | |
970 | ||
971 | if (!DNS_PACKET_RD(p)) { | |
972 | /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */ | |
973 | log_debug("Got request with recursion disabled, refusing."); | |
974 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false); | |
975 | return; | |
976 | } | |
977 | ||
978 | r = hashmap_ensure_allocated(queries_by_packet, &stub_packet_hash_ops); | |
979 | if (r < 0) { | |
980 | log_oom(); | |
981 | return; | |
982 | } | |
983 | ||
984 | if (address_is_proxy(p->family, &p->destination)) { | |
985 | _cleanup_free_ char *dipa = NULL; | |
986 | ||
987 | r = in_addr_to_string(p->family, &p->destination, &dipa); | |
988 | if (r < 0) | |
989 | return (void) log_error_errno(r, "Failed to format destination address: %m"); | |
990 | ||
991 | log_debug("Got request to DNS proxy address 127.0.0.54, enabling bypass logic."); | |
992 | bypass = true; | |
993 | protocol_flags = SD_RESOLVED_DNS|SD_RESOLVED_NO_ZONE; /* Turn off mDNS/LLMNR for proxy stub. */ | |
994 | } else if (dns_packet_do(p)) { | |
995 | log_debug("Got request with DNSSEC enabled, enabling bypass logic."); | |
996 | bypass = true; | |
997 | } | |
998 | ||
999 | if (bypass) | |
1000 | r = dns_query_new(m, &q, NULL, NULL, p, 0, | |
1001 | protocol_flags| | |
1002 | SD_RESOLVED_NO_CNAME| | |
1003 | SD_RESOLVED_NO_SEARCH| | |
1004 | (DNS_PACKET_CD(p) ? SD_RESOLVED_NO_VALIDATE | SD_RESOLVED_NO_CACHE : 0)| | |
1005 | SD_RESOLVED_REQUIRE_PRIMARY| | |
1006 | SD_RESOLVED_CLAMP_TTL| | |
1007 | SD_RESOLVED_RELAX_SINGLE_LABEL); | |
1008 | else | |
1009 | r = dns_query_new(m, &q, p->question, p->question, NULL, 0, | |
1010 | protocol_flags| | |
1011 | SD_RESOLVED_NO_SEARCH| | |
1012 | (DNS_PACKET_CD(p) ? SD_RESOLVED_NO_VALIDATE | SD_RESOLVED_NO_CACHE : 0)| | |
1013 | (dns_packet_do(p) ? SD_RESOLVED_REQUIRE_PRIMARY : 0)| | |
1014 | SD_RESOLVED_CLAMP_TTL); | |
1015 | if (r == -ENOANO) /* Refuse query if there is -ENOANO */ | |
1016 | return (void) dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false); | |
1017 | if (r < 0) { | |
1018 | log_error_errno(r, "Failed to generate query object: %m"); | |
1019 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false); | |
1020 | return; | |
1021 | } | |
1022 | ||
1023 | q->request_packet = dns_packet_ref(p); | |
1024 | q->request_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */ | |
1025 | q->stub_listener_extra = l; | |
1026 | q->complete = dns_stub_query_complete; | |
1027 | ||
1028 | if (s) { | |
1029 | /* Remember which queries belong to this stream, so that we can cancel them when the stream | |
1030 | * is disconnected early */ | |
1031 | ||
1032 | r = set_ensure_put(&s->queries, NULL, q); | |
1033 | if (r < 0) { | |
1034 | log_oom(); | |
1035 | return; | |
1036 | } | |
1037 | assert(r > 0); | |
1038 | } | |
1039 | ||
1040 | /* Add the query to the hash table we use to determine repeat packets now. We don't care about | |
1041 | * failures here, since in the worst case we'll not recognize duplicate incoming requests, which | |
1042 | * isn't particularly bad. */ | |
1043 | (void) hashmap_put(*queries_by_packet, q->request_packet, q); | |
1044 | ||
1045 | r = dns_query_go(q); | |
1046 | if (r < 0) { | |
1047 | log_error_errno(r, "Failed to start query: %m"); | |
1048 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false); | |
1049 | return; | |
1050 | } | |
1051 | ||
1052 | log_debug("Processing query..."); | |
1053 | TAKE_PTR(q); | |
1054 | } | |
1055 | ||
1056 | static int on_dns_stub_packet_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) { | |
1057 | _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL; | |
1058 | int r; | |
1059 | ||
1060 | r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p); | |
1061 | if (r <= 0) | |
1062 | return r; | |
1063 | ||
1064 | if (dns_packet_validate_query(p) > 0) { | |
1065 | log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p)); | |
1066 | ||
1067 | dns_stub_process_query(m, l, NULL, p); | |
1068 | } else | |
1069 | log_debug("Invalid DNS stub UDP packet, ignoring."); | |
1070 | ||
1071 | return 0; | |
1072 | } | |
1073 | ||
1074 | static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) { | |
1075 | return on_dns_stub_packet_internal(s, fd, revents, userdata, NULL); | |
1076 | } | |
1077 | ||
1078 | static int on_dns_stub_packet_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) { | |
1079 | DnsStubListenerExtra *l = ASSERT_PTR(userdata); | |
1080 | ||
1081 | return on_dns_stub_packet_internal(s, fd, revents, l->manager, l); | |
1082 | } | |
1083 | ||
1084 | static int on_dns_stub_stream_packet(DnsStream *s, DnsPacket *p) { | |
1085 | assert(s); | |
1086 | assert(s->manager); | |
1087 | assert(p); | |
1088 | ||
1089 | if (dns_packet_validate_query(p) > 0) { | |
1090 | log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p)); | |
1091 | ||
1092 | dns_stub_process_query(s->manager, s->stub_listener_extra, s, p); | |
1093 | } else | |
1094 | log_debug("Invalid DNS stub TCP packet, ignoring."); | |
1095 | ||
1096 | return 0; | |
1097 | } | |
1098 | ||
1099 | static int on_dns_stub_stream_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) { | |
1100 | DnsStream *stream; | |
1101 | int cfd, r; | |
1102 | ||
1103 | cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC); | |
1104 | if (cfd < 0) { | |
1105 | if (ERRNO_IS_ACCEPT_AGAIN(errno)) | |
1106 | return 0; | |
1107 | ||
1108 | return -errno; | |
1109 | } | |
1110 | ||
1111 | r = dns_stream_new(m, &stream, DNS_STREAM_STUB, DNS_PROTOCOL_DNS, cfd, NULL, | |
1112 | on_dns_stub_stream_packet, dns_stub_stream_complete, DNS_STREAM_STUB_TIMEOUT_USEC); | |
1113 | if (r < 0) { | |
1114 | safe_close(cfd); | |
1115 | return r; | |
1116 | } | |
1117 | ||
1118 | stream->stub_listener_extra = l; | |
1119 | ||
1120 | /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */ | |
1121 | ||
1122 | return 0; | |
1123 | } | |
1124 | ||
1125 | static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) { | |
1126 | return on_dns_stub_stream_internal(s, fd, revents, userdata, NULL); | |
1127 | } | |
1128 | ||
1129 | static int on_dns_stub_stream_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) { | |
1130 | DnsStubListenerExtra *l = ASSERT_PTR(userdata); | |
1131 | ||
1132 | return on_dns_stub_stream_internal(s, fd, revents, l->manager, l); | |
1133 | } | |
1134 | ||
1135 | static int set_dns_stub_common_socket_options(int fd, int family) { | |
1136 | int r; | |
1137 | ||
1138 | assert(fd >= 0); | |
1139 | assert(IN_SET(family, AF_INET, AF_INET6)); | |
1140 | ||
1141 | r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true); | |
1142 | if (r < 0) | |
1143 | return r; | |
1144 | ||
1145 | r = socket_set_recvpktinfo(fd, family, true); | |
1146 | if (r < 0) | |
1147 | return r; | |
1148 | ||
1149 | r = socket_set_recvttl(fd, family, true); | |
1150 | if (r < 0) | |
1151 | return r; | |
1152 | ||
1153 | return 0; | |
1154 | } | |
1155 | ||
1156 | static int set_dns_stub_common_tcp_socket_options(int fd) { | |
1157 | int r; | |
1158 | ||
1159 | assert(fd >= 0); | |
1160 | ||
1161 | r = setsockopt_int(fd, IPPROTO_TCP, TCP_FASTOPEN, 5); /* Everybody appears to pick qlen=5, let's do the same here. */ | |
1162 | if (r < 0) | |
1163 | log_debug_errno(r, "Failed to enable TCP_FASTOPEN on TCP listening socket, ignoring: %m"); | |
1164 | ||
1165 | r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true); | |
1166 | if (r < 0) | |
1167 | log_debug_errno(r, "Failed to enable TCP_NODELAY mode, ignoring: %m"); | |
1168 | ||
1169 | return 0; | |
1170 | } | |
1171 | ||
1172 | static int manager_dns_stub_fd( | |
1173 | Manager *m, | |
1174 | int family, | |
1175 | const union in_addr_union *listen_addr, | |
1176 | int type) { | |
1177 | ||
1178 | sd_event_source **event_source; | |
1179 | _cleanup_close_ int fd = -EBADF; | |
1180 | union sockaddr_union sa; | |
1181 | int r; | |
1182 | ||
1183 | assert(m); | |
1184 | assert(listen_addr); | |
1185 | ||
1186 | if (type == SOCK_DGRAM) | |
1187 | event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_udp_event_source : &m->dns_stub_udp_event_source; | |
1188 | else if (type == SOCK_STREAM) | |
1189 | event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_tcp_event_source : &m->dns_stub_tcp_event_source; | |
1190 | else | |
1191 | return -EPROTONOSUPPORT; | |
1192 | ||
1193 | if (*event_source) | |
1194 | return sd_event_source_get_io_fd(*event_source); | |
1195 | ||
1196 | fd = socket(family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); | |
1197 | if (fd < 0) | |
1198 | return -errno; | |
1199 | ||
1200 | r = set_dns_stub_common_socket_options(fd, family); | |
1201 | if (r < 0) | |
1202 | return r; | |
1203 | ||
1204 | if (type == SOCK_STREAM) { | |
1205 | r = set_dns_stub_common_tcp_socket_options(fd); | |
1206 | if (r < 0) | |
1207 | return r; | |
1208 | } | |
1209 | ||
1210 | /* Set slightly different socket options for the non-proxy and the proxy binding. The former we want | |
1211 | * to be accessible only from the local host, for the latter it's OK if people use NAT redirects or | |
1212 | * so to redirect external traffic to it. */ | |
1213 | ||
1214 | if (!address_is_proxy(family, listen_addr)) { | |
1215 | /* Make sure no traffic from outside the local host can leak to onto this socket */ | |
1216 | r = socket_bind_to_ifindex(fd, LOOPBACK_IFINDEX); | |
1217 | if (r < 0) | |
1218 | return r; | |
1219 | ||
1220 | r = socket_set_ttl(fd, family, 1); | |
1221 | if (r < 0) | |
1222 | return r; | |
1223 | } else if (type == SOCK_DGRAM) { | |
1224 | /* Turn off Path MTU Discovery for UDP, for security reasons. See socket_disable_pmtud() for | |
1225 | * a longer discussion. (We only do this for sockets that are potentially externally | |
1226 | * accessible, i.e. the proxy stub one. For the non-proxy one we instead set the TTL to 1, | |
1227 | * see above, so that packets don't get routed at all.) */ | |
1228 | r = socket_disable_pmtud(fd, family); | |
1229 | if (r < 0) | |
1230 | log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m"); | |
1231 | ||
1232 | r = socket_set_recvfragsize(fd, family, true); | |
1233 | if (r < 0) | |
1234 | log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m"); | |
1235 | } | |
1236 | ||
1237 | r = sockaddr_set_in_addr(&sa, family, listen_addr, 53); | |
1238 | if (r < 0) | |
1239 | return r; | |
1240 | ||
1241 | if (bind(fd, &sa.sa, sizeof(sa.in)) < 0) | |
1242 | return -errno; | |
1243 | ||
1244 | if (type == SOCK_STREAM && | |
1245 | listen(fd, SOMAXCONN_DELUXE) < 0) | |
1246 | return -errno; | |
1247 | ||
1248 | r = sd_event_add_io(m->event, event_source, fd, EPOLLIN, | |
1249 | type == SOCK_DGRAM ? on_dns_stub_packet : on_dns_stub_stream, | |
1250 | m); | |
1251 | if (r < 0) | |
1252 | return r; | |
1253 | ||
1254 | r = sd_event_source_set_io_fd_own(*event_source, true); | |
1255 | if (r < 0) | |
1256 | return r; | |
1257 | ||
1258 | (void) sd_event_source_set_description(*event_source, | |
1259 | type == SOCK_DGRAM ? "dns-stub-udp" : "dns-stub-tcp"); | |
1260 | ||
1261 | return TAKE_FD(fd); | |
1262 | } | |
1263 | ||
1264 | static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type) { | |
1265 | _cleanup_free_ char *pretty = NULL; | |
1266 | _cleanup_close_ int fd = -EBADF; | |
1267 | union sockaddr_union sa; | |
1268 | int r; | |
1269 | ||
1270 | assert(m); | |
1271 | assert(l); | |
1272 | assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM)); | |
1273 | ||
1274 | sd_event_source **event_source = type == SOCK_DGRAM ? &l->udp_event_source : &l->tcp_event_source; | |
1275 | if (*event_source) | |
1276 | return sd_event_source_get_io_fd(*event_source); | |
1277 | ||
1278 | if (!have_effective_cap(CAP_NET_BIND_SERVICE) && dns_stub_listener_extra_port(l) < 1024) { | |
1279 | log_warning("Missing CAP_NET_BIND_SERVICE capability, not creating extra stub listener on port %hu.", | |
1280 | dns_stub_listener_extra_port(l)); | |
1281 | return 0; | |
1282 | } | |
1283 | ||
1284 | if (l->family == AF_INET) | |
1285 | sa = (union sockaddr_union) { | |
1286 | .in.sin_family = l->family, | |
1287 | .in.sin_port = htobe16(dns_stub_listener_extra_port(l)), | |
1288 | .in.sin_addr = l->address.in, | |
1289 | }; | |
1290 | else | |
1291 | sa = (union sockaddr_union) { | |
1292 | .in6.sin6_family = l->family, | |
1293 | .in6.sin6_port = htobe16(dns_stub_listener_extra_port(l)), | |
1294 | .in6.sin6_addr = l->address.in6, | |
1295 | }; | |
1296 | ||
1297 | fd = socket(l->family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); | |
1298 | if (fd < 0) { | |
1299 | r = -errno; | |
1300 | goto fail; | |
1301 | } | |
1302 | ||
1303 | r = set_dns_stub_common_socket_options(fd, l->family); | |
1304 | if (r < 0) | |
1305 | goto fail; | |
1306 | ||
1307 | if (type == SOCK_STREAM) { | |
1308 | r = set_dns_stub_common_tcp_socket_options(fd); | |
1309 | if (r < 0) | |
1310 | goto fail; | |
1311 | } | |
1312 | ||
1313 | /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case | |
1314 | * people may want ttl > 1. */ | |
1315 | ||
1316 | r = socket_set_freebind(fd, l->family, true); | |
1317 | if (r < 0) | |
1318 | goto fail; | |
1319 | ||
1320 | if (type == SOCK_DGRAM) { | |
1321 | r = socket_disable_pmtud(fd, l->family); | |
1322 | if (r < 0) | |
1323 | log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m"); | |
1324 | ||
1325 | r = socket_set_recvfragsize(fd, l->family, true); | |
1326 | if (r < 0) | |
1327 | log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m"); | |
1328 | } | |
1329 | ||
1330 | r = RET_NERRNO(bind(fd, &sa.sa, sockaddr_len(&sa))); | |
1331 | if (r < 0) | |
1332 | goto fail; | |
1333 | ||
1334 | if (type == SOCK_STREAM && | |
1335 | listen(fd, SOMAXCONN_DELUXE) < 0) { | |
1336 | r = -errno; | |
1337 | goto fail; | |
1338 | } | |
1339 | ||
1340 | r = sd_event_add_io(m->event, event_source, fd, EPOLLIN, | |
1341 | type == SOCK_DGRAM ? on_dns_stub_packet_extra : on_dns_stub_stream_extra, | |
1342 | l); | |
1343 | if (r < 0) | |
1344 | goto fail; | |
1345 | ||
1346 | r = sd_event_source_set_io_fd_own(*event_source, true); | |
1347 | if (r < 0) | |
1348 | goto fail; | |
1349 | ||
1350 | (void) sd_event_source_set_description(*event_source, | |
1351 | type == SOCK_DGRAM ? "dns-stub-udp-extra" : "dns-stub-tcp-extra"); | |
1352 | ||
1353 | if (DEBUG_LOGGING) { | |
1354 | (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty); | |
1355 | log_debug("Listening on %s socket %s.", | |
1356 | type == SOCK_DGRAM ? "UDP" : "TCP", | |
1357 | strnull(pretty)); | |
1358 | } | |
1359 | ||
1360 | return TAKE_FD(fd); | |
1361 | ||
1362 | fail: | |
1363 | assert(r < 0); | |
1364 | (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty); | |
1365 | return log_warning_errno(r, | |
1366 | r == -EADDRINUSE ? "Another process is already listening on %s socket %s: %m" : | |
1367 | "Failed to listen on %s socket %s: %m", | |
1368 | type == SOCK_DGRAM ? "UDP" : "TCP", | |
1369 | strnull(pretty)); | |
1370 | } | |
1371 | ||
1372 | int manager_dns_stub_start(Manager *m) { | |
1373 | int r; | |
1374 | ||
1375 | assert(m); | |
1376 | ||
1377 | if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO) | |
1378 | log_debug("Not creating stub listener."); | |
1379 | else if (!have_effective_cap(CAP_NET_BIND_SERVICE)) | |
1380 | log_warning("Missing CAP_NET_BIND_SERVICE capability, not creating stub listener on port 53."); | |
1381 | else { | |
1382 | static const struct { | |
1383 | uint32_t addr; | |
1384 | int socket_type; | |
1385 | } stub_sockets[] = { | |
1386 | { INADDR_DNS_STUB, SOCK_DGRAM }, | |
1387 | { INADDR_DNS_STUB, SOCK_STREAM }, | |
1388 | { INADDR_DNS_PROXY_STUB, SOCK_DGRAM }, | |
1389 | { INADDR_DNS_PROXY_STUB, SOCK_STREAM }, | |
1390 | }; | |
1391 | ||
1392 | log_debug("Creating stub listener using %s.", | |
1393 | m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" : | |
1394 | m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" : | |
1395 | "UDP/TCP"); | |
1396 | ||
1397 | FOREACH_ELEMENT(s, stub_sockets) { | |
1398 | union in_addr_union a = { | |
1399 | .in.s_addr = htobe32(s->addr), | |
1400 | }; | |
1401 | ||
1402 | if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP && s->socket_type == SOCK_STREAM) | |
1403 | continue; | |
1404 | if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP && s->socket_type == SOCK_DGRAM) | |
1405 | continue; | |
1406 | ||
1407 | r = manager_dns_stub_fd(m, AF_INET, &a, s->socket_type); | |
1408 | if (r < 0) { | |
1409 | _cleanup_free_ char *busy_socket = NULL; | |
1410 | ||
1411 | if (asprintf(&busy_socket, | |
1412 | "%s socket " IPV4_ADDRESS_FMT_STR ":53", | |
1413 | s->socket_type == SOCK_DGRAM ? "UDP" : "TCP", | |
1414 | IPV4_ADDRESS_FMT_VAL(a.in)) < 0) | |
1415 | return log_oom(); | |
1416 | ||
1417 | if (IN_SET(r, -EADDRINUSE, -EPERM)) { | |
1418 | log_warning_errno(r, | |
1419 | r == -EADDRINUSE ? "Another process is already listening on %s.\n" | |
1420 | "Turning off local DNS stub support." : | |
1421 | "Failed to listen on %s: %m.\n" | |
1422 | "Turning off local DNS stub support.", | |
1423 | busy_socket); | |
1424 | manager_dns_stub_stop(m); | |
1425 | break; | |
1426 | } | |
1427 | ||
1428 | return log_error_errno(r, "Failed to listen on %s: %m", busy_socket); | |
1429 | } | |
1430 | } | |
1431 | } | |
1432 | ||
1433 | if (!ordered_set_isempty(m->dns_extra_stub_listeners)) { | |
1434 | DnsStubListenerExtra *l; | |
1435 | ||
1436 | log_debug("Creating extra stub listeners."); | |
1437 | ||
1438 | ORDERED_SET_FOREACH(l, m->dns_extra_stub_listeners) { | |
1439 | if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_UDP)) | |
1440 | (void) manager_dns_stub_fd_extra(m, l, SOCK_DGRAM); | |
1441 | if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_TCP)) | |
1442 | (void) manager_dns_stub_fd_extra(m, l, SOCK_STREAM); | |
1443 | } | |
1444 | } | |
1445 | ||
1446 | return 0; | |
1447 | } | |
1448 | ||
1449 | void manager_dns_stub_stop(Manager *m) { | |
1450 | assert(m); | |
1451 | ||
1452 | m->dns_stub_udp_event_source = sd_event_source_disable_unref(m->dns_stub_udp_event_source); | |
1453 | m->dns_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_stub_tcp_event_source); | |
1454 | m->dns_proxy_stub_udp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_udp_event_source); | |
1455 | m->dns_proxy_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_tcp_event_source); | |
1456 | } | |
1457 | ||
1458 | static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = { | |
1459 | [DNS_STUB_LISTENER_NO] = "no", | |
1460 | [DNS_STUB_LISTENER_UDP] = "udp", | |
1461 | [DNS_STUB_LISTENER_TCP] = "tcp", | |
1462 | [DNS_STUB_LISTENER_YES] = "yes", | |
1463 | }; | |
1464 | DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES); |