]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
b30bf55d | 2 | |
ca8b62b5 | 3 | #include <net/if_arp.h> |
8624f128 | 4 | #include <netinet/tcp.h> |
ca8b62b5 | 5 | |
4ff9bc2e | 6 | #include "errno-util.h" |
b30bf55d | 7 | #include "fd-util.h" |
ef118d00 | 8 | #include "missing_network.h" |
af8b1384 | 9 | #include "missing_socket.h" |
b30bf55d | 10 | #include "resolved-dns-stub.h" |
1f05101f | 11 | #include "socket-netlink.h" |
b30bf55d | 12 | #include "socket-util.h" |
4a6eb824 | 13 | #include "stdio-util.h" |
ae8f0ec3 | 14 | #include "string-table.h" |
b30bf55d LP |
15 | |
16 | /* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet, | |
17 | * IP and UDP header sizes */ | |
18 | #define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U) | |
19 | ||
b370adb5 LP |
20 | /* On the extra stubs, use a more conservative choice */ |
21 | #define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX | |
22 | ||
b5febb3f | 23 | static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type); |
a8d09063 | 24 | static int manager_dns_stub_fd(Manager *m, int family, const union in_addr_union *listen_address, int type); |
0354029b | 25 | |
ae8f0ec3 LP |
26 | static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra *a, struct siphash *state) { |
27 | assert(a); | |
28 | ||
29 | siphash24_compress(&a->mode, sizeof(a->mode), state); | |
30 | siphash24_compress(&a->family, sizeof(a->family), state); | |
31 | siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state); | |
32 | siphash24_compress(&a->port, sizeof(a->port), state); | |
33 | } | |
34 | ||
35 | static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra *a, const DnsStubListenerExtra *b) { | |
36 | int r; | |
37 | ||
38 | assert(a); | |
39 | assert(b); | |
40 | ||
41 | r = CMP(a->mode, b->mode); | |
42 | if (r != 0) | |
43 | return r; | |
44 | ||
45 | r = CMP(a->family, b->family); | |
46 | if (r != 0) | |
47 | return r; | |
48 | ||
49 | r = memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family)); | |
50 | if (r != 0) | |
51 | return r; | |
52 | ||
53 | return CMP(a->port, b->port); | |
54 | } | |
55 | ||
56 | DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR( | |
57 | dns_stub_listener_extra_hash_ops, | |
58 | DnsStubListenerExtra, | |
59 | dns_stub_listener_extra_hash_func, | |
60 | dns_stub_listener_extra_compare_func, | |
61 | dns_stub_listener_extra_free); | |
62 | ||
0354029b LP |
63 | int dns_stub_listener_extra_new( |
64 | Manager *m, | |
65 | DnsStubListenerExtra **ret) { | |
ae8f0ec3 | 66 | |
36aaabc3 | 67 | DnsStubListenerExtra *l; |
1f05101f | 68 | |
0354029b | 69 | l = new(DnsStubListenerExtra, 1); |
1f05101f SS |
70 | if (!l) |
71 | return -ENOMEM; | |
72 | ||
0354029b LP |
73 | *l = (DnsStubListenerExtra) { |
74 | .manager = m, | |
75 | }; | |
1f05101f | 76 | |
0354029b | 77 | *ret = TAKE_PTR(l); |
1f05101f SS |
78 | return 0; |
79 | } | |
80 | ||
36aaabc3 | 81 | DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p) { |
bf22f231 YW |
82 | if (!p) |
83 | return NULL; | |
84 | ||
97935302 ZJS |
85 | p->udp_event_source = sd_event_source_disable_unref(p->udp_event_source); |
86 | p->tcp_event_source = sd_event_source_disable_unref(p->tcp_event_source); | |
bf22f231 | 87 | |
bde69bbd LP |
88 | hashmap_free(p->queries_by_packet); |
89 | ||
bf22f231 YW |
90 | return mfree(p); |
91 | } | |
92 | ||
bde69bbd LP |
93 | static void stub_packet_hash_func(const DnsPacket *p, struct siphash *state) { |
94 | assert(p); | |
95 | ||
96 | siphash24_compress(&p->protocol, sizeof(p->protocol), state); | |
97 | siphash24_compress(&p->family, sizeof(p->family), state); | |
98 | siphash24_compress(&p->sender, sizeof(p->sender), state); | |
99 | siphash24_compress(&p->ipproto, sizeof(p->ipproto), state); | |
100 | siphash24_compress(&p->sender_port, sizeof(p->sender_port), state); | |
101 | siphash24_compress(DNS_PACKET_HEADER(p), sizeof(DnsPacketHeader), state); | |
102 | ||
103 | /* We don't bother hashing the full packet here, just the header */ | |
104 | } | |
105 | ||
106 | static int stub_packet_compare_func(const DnsPacket *x, const DnsPacket *y) { | |
107 | int r; | |
108 | ||
109 | r = CMP(x->protocol, y->protocol); | |
110 | if (r != 0) | |
111 | return r; | |
112 | ||
113 | r = CMP(x->family, y->family); | |
114 | if (r != 0) | |
115 | return r; | |
116 | ||
117 | r = memcmp(&x->sender, &y->sender, sizeof(x->sender)); | |
118 | if (r != 0) | |
119 | return r; | |
120 | ||
121 | r = CMP(x->ipproto, y->ipproto); | |
122 | if (r != 0) | |
123 | return r; | |
124 | ||
125 | r = CMP(x->sender_port, y->sender_port); | |
126 | if (r != 0) | |
127 | return r; | |
128 | ||
129 | return memcmp(DNS_PACKET_HEADER(x), DNS_PACKET_HEADER(y), sizeof(DnsPacketHeader)); | |
130 | } | |
131 | ||
132 | DEFINE_HASH_OPS(stub_packet_hash_ops, DnsPacket, stub_packet_hash_func, stub_packet_compare_func); | |
133 | ||
5bd7ebb3 LP |
134 | static int reply_add_with_rrsig( |
135 | DnsAnswer **reply, | |
136 | DnsResourceRecord *rr, | |
137 | int ifindex, | |
138 | DnsAnswerFlags flags, | |
139 | DnsResourceRecord *rrsig, | |
140 | bool with_rrsig) { | |
141 | int r; | |
142 | ||
143 | assert(reply); | |
144 | assert(rr); | |
145 | ||
146 | r = dns_answer_add_extend(reply, rr, ifindex, flags, rrsig); | |
147 | if (r < 0) | |
148 | return r; | |
149 | ||
150 | if (with_rrsig && rrsig) { | |
151 | r = dns_answer_add_extend(reply, rrsig, ifindex, flags, NULL); | |
152 | if (r < 0) | |
153 | return r; | |
154 | } | |
155 | ||
156 | return 0; | |
157 | } | |
158 | ||
775ae354 LP |
159 | static int dns_stub_collect_answer_by_question( |
160 | DnsAnswer **reply, | |
51027656 | 161 | DnsAnswer *answer, |
775ae354 LP |
162 | DnsQuestion *question, |
163 | bool with_rrsig) { /* Add RRSIG RR matching each RR */ | |
b30bf55d | 164 | |
775ae354 | 165 | DnsAnswerItem *item; |
b30bf55d LP |
166 | int r; |
167 | ||
775ae354 | 168 | assert(reply); |
e8d23f92 | 169 | |
915ba31c | 170 | /* Copies all RRs from 'answer' into 'reply', if they match 'question'. */ |
4838dc4f | 171 | |
915ba31c | 172 | DNS_ANSWER_FOREACH_ITEM(item, answer) { |
5bd7ebb3 | 173 | |
915ba31c LP |
174 | /* We have a question, let's see if this RR matches it */ |
175 | r = dns_question_matches_rr(question, item->rr, NULL); | |
176 | if (r < 0) | |
177 | return r; | |
178 | if (!r) { | |
179 | /* Maybe there's a CNAME/DNAME in here? If so, that's an answer too */ | |
180 | r = dns_question_matches_cname_or_dname(question, item->rr, NULL); | |
4838dc4f LP |
181 | if (r < 0) |
182 | return r; | |
915ba31c LP |
183 | if (!r) |
184 | continue; | |
4838dc4f | 185 | } |
5bd7ebb3 | 186 | |
915ba31c LP |
187 | /* Mask the section info, we want the primary answers to always go without section |
188 | * info, so that it is added to the answer section when we synthesize a reply. */ | |
5bd7ebb3 | 189 | |
915ba31c LP |
190 | r = reply_add_with_rrsig( |
191 | reply, | |
192 | item->rr, | |
193 | item->ifindex, | |
194 | item->flags & ~DNS_ANSWER_MASK_SECTIONS, | |
195 | item->rrsig, | |
196 | with_rrsig); | |
197 | if (r < 0) | |
198 | return r; | |
e8d23f92 | 199 | } |
b30bf55d | 200 | |
775ae354 LP |
201 | return 0; |
202 | } | |
e8d23f92 | 203 | |
775ae354 LP |
204 | static int dns_stub_collect_answer_by_section( |
205 | DnsAnswer **reply, | |
206 | DnsAnswer *answer, | |
207 | DnsAnswerFlags section, | |
208 | DnsAnswer *exclude1, | |
209 | DnsAnswer *exclude2, | |
210 | bool with_dnssec) { /* Include DNSSEC RRs. RRSIG, NSEC, … */ | |
b30bf55d | 211 | |
775ae354 | 212 | DnsAnswerItem *item; |
775ae354 | 213 | int r; |
b30bf55d | 214 | |
775ae354 LP |
215 | assert(reply); |
216 | ||
217 | /* Copies all RRs from 'answer' into 'reply', if they originate from the specified section. Also, | |
218 | * avoid any RRs listed in 'exclude'. */ | |
219 | ||
220 | DNS_ANSWER_FOREACH_ITEM(item, answer) { | |
221 | ||
222 | if (dns_answer_contains(exclude1, item->rr) || | |
223 | dns_answer_contains(exclude2, item->rr)) | |
224 | continue; | |
225 | ||
226 | if (!with_dnssec && | |
227 | dns_type_is_dnssec(item->rr->key->type)) | |
228 | continue; | |
229 | ||
c4d98c3a | 230 | if (((item->flags ^ section) & DNS_ANSWER_MASK_SECTIONS) != 0) |
775ae354 LP |
231 | continue; |
232 | ||
5bd7ebb3 LP |
233 | r = reply_add_with_rrsig( |
234 | reply, | |
235 | item->rr, | |
236 | item->ifindex, | |
237 | item->flags, | |
238 | item->rrsig, | |
239 | with_dnssec); | |
b30bf55d LP |
240 | if (r < 0) |
241 | return r; | |
b30bf55d | 242 | } |
e8d23f92 | 243 | |
5bd7ebb3 | 244 | return 0; |
775ae354 LP |
245 | } |
246 | ||
247 | static int dns_stub_assign_sections( | |
248 | DnsQuery *q, | |
249 | DnsQuestion *question, | |
250 | bool edns0_do) { | |
251 | ||
252 | int r; | |
253 | ||
254 | assert(q); | |
255 | assert(question); | |
256 | ||
c6ebf89b LP |
257 | /* Let's assign the 'answer' RRs we collected to their respective sections in the reply datagram. We |
258 | * try to reproduce a section assignment similar to what the upstream DNS server responded to us. We | |
259 | * use the DNS_ANSWER_SECTION_xyz flags to match things up, which is where the original upstream's | |
260 | * packet section assignment is stored in the DnsAnswer object. Not all RRs in the 'answer' objects | |
261 | * come with section information though (for example, because they were synthesized locally, and not | |
262 | * from a DNS packet). To deal with that we extend the assignment logic a bit: anything from the | |
263 | * 'answer' object that directly matches the original question is always put in the ANSWER section, | |
264 | * regardless if it carries section info, or what that section info says. Then, anything from the | |
265 | * 'answer' objects that is from the ANSWER or AUTHORITY sections, and wasn't already added to the | |
266 | * ANSWER section is placed in the AUTHORITY section. Everything else from either object is added to | |
267 | * the ADDITIONAL section. */ | |
775ae354 LP |
268 | |
269 | /* Include all RRs that directly answer the question in the answer section */ | |
270 | r = dns_stub_collect_answer_by_question( | |
271 | &q->reply_answer, | |
272 | q->answer, | |
273 | question, | |
274 | edns0_do); | |
275 | if (r < 0) | |
276 | return r; | |
277 | ||
d451f0e8 | 278 | /* Include all RRs that originate from the authority sections, and aren't already listed in the |
775ae354 LP |
279 | * answer section, in the authority section */ |
280 | r = dns_stub_collect_answer_by_section( | |
281 | &q->reply_authoritative, | |
282 | q->answer, | |
d451f0e8 | 283 | DNS_ANSWER_SECTION_AUTHORITY, |
775ae354 LP |
284 | q->reply_answer, NULL, |
285 | edns0_do); | |
286 | if (r < 0) | |
287 | return r; | |
d451f0e8 LP |
288 | |
289 | /* Include all RRs that originate from the answer or additional sections in the additional section | |
290 | * (except if already listed in the other two sections). Also add all RRs with no section marking. */ | |
775ae354 | 291 | r = dns_stub_collect_answer_by_section( |
d451f0e8 | 292 | &q->reply_additional, |
775ae354 | 293 | q->answer, |
d451f0e8 LP |
294 | DNS_ANSWER_SECTION_ANSWER, |
295 | q->reply_answer, q->reply_authoritative, | |
775ae354 LP |
296 | edns0_do); |
297 | if (r < 0) | |
298 | return r; | |
775ae354 LP |
299 | r = dns_stub_collect_answer_by_section( |
300 | &q->reply_additional, | |
301 | q->answer, | |
302 | DNS_ANSWER_SECTION_ADDITIONAL, | |
303 | q->reply_answer, q->reply_authoritative, | |
304 | edns0_do); | |
305 | if (r < 0) | |
306 | return r; | |
307 | r = dns_stub_collect_answer_by_section( | |
308 | &q->reply_additional, | |
309 | q->answer, | |
310 | 0, | |
311 | q->reply_answer, q->reply_authoritative, | |
312 | edns0_do); | |
313 | if (r < 0) | |
314 | return r; | |
315 | ||
316 | return 0; | |
317 | } | |
318 | ||
319 | static int dns_stub_make_reply_packet( | |
320 | DnsPacket **ret, | |
321 | size_t max_size, | |
322 | DnsQuestion *q, | |
323 | bool *ret_truncated) { | |
324 | ||
325 | _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL; | |
326 | bool tc = false; | |
327 | int r; | |
328 | ||
329 | assert(ret); | |
330 | ||
331 | r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, max_size); | |
332 | if (r < 0) | |
333 | return r; | |
334 | ||
335 | r = dns_packet_append_question(p, q); | |
336 | if (r == -EMSGSIZE) | |
337 | tc = true; | |
338 | else if (r < 0) | |
339 | return r; | |
340 | ||
51027656 | 341 | if (ret_truncated) |
775ae354 LP |
342 | *ret_truncated = tc; |
343 | else if (tc) | |
51027656 LP |
344 | return -EMSGSIZE; |
345 | ||
775ae354 | 346 | DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q)); |
e8d23f92 | 347 | |
775ae354 LP |
348 | *ret = TAKE_PTR(p); |
349 | return 0; | |
350 | } | |
351 | ||
352 | static int dns_stub_add_reply_packet_body( | |
353 | DnsPacket *p, | |
354 | DnsAnswer *answer, | |
355 | DnsAnswer *authoritative, | |
356 | DnsAnswer *additional, | |
357 | bool edns0_do, /* Client expects DNSSEC RRs? */ | |
358 | bool *truncated) { | |
359 | ||
360 | unsigned n_answer = 0, n_authoritative = 0, n_additional = 0; | |
361 | bool tc = false; | |
362 | int r; | |
363 | ||
364 | assert(p); | |
365 | ||
366 | /* Add the three sections to the packet. If the answer section doesn't fit we'll signal that as | |
367 | * truncation. If the authoritative section doesn't fit and we are in DNSSEC mode, also signal | |
368 | * truncation. In all other cases where things don't fit don't signal truncation, as for those cases | |
369 | * the dropped RRs should not be essential. */ | |
370 | ||
371 | r = dns_packet_append_answer(p, answer, &n_answer); | |
372 | if (r == -EMSGSIZE) | |
373 | tc = true; | |
374 | else if (r < 0) | |
375 | return r; | |
376 | else { | |
377 | r = dns_packet_append_answer(p, authoritative, &n_authoritative); | |
378 | if (r == -EMSGSIZE) { | |
379 | if (edns0_do) | |
380 | tc = true; | |
381 | } else if (r < 0) | |
382 | return r; | |
383 | else { | |
384 | r = dns_packet_append_answer(p, additional, &n_additional); | |
385 | if (r < 0 && r != -EMSGSIZE) | |
386 | return r; | |
387 | } | |
388 | } | |
389 | ||
390 | if (tc) { | |
391 | if (!truncated) | |
392 | return -EMSGSIZE; | |
393 | ||
394 | *truncated = true; | |
395 | } | |
396 | ||
397 | DNS_PACKET_HEADER(p)->ancount = htobe16(n_answer); | |
398 | DNS_PACKET_HEADER(p)->nscount = htobe16(n_authoritative); | |
399 | DNS_PACKET_HEADER(p)->arcount = htobe16(n_additional); | |
e8d23f92 LP |
400 | return 0; |
401 | } | |
402 | ||
4a6eb824 LP |
403 | static const char *nsid_string(void) { |
404 | static char buffer[SD_ID128_STRING_MAX + STRLEN(".resolved.systemd.io")] = ""; | |
405 | sd_id128_t id; | |
406 | int r; | |
407 | ||
408 | /* Let's generate a string that we can use as RFC5001 NSID identifier. The string shall identify us | |
409 | * as systemd-resolved, and return a different string for each resolved instance without leaking host | |
410 | * identity. Hence let's use a fixed suffix that identifies resolved, and a prefix generated from the | |
411 | * machine ID but from which the machine ID cannot be determined. | |
412 | * | |
413 | * Clients can use this to determine whether an answer is originating locally or is proxied from | |
414 | * upstream. */ | |
415 | ||
416 | if (!isempty(buffer)) | |
417 | return buffer; | |
418 | ||
419 | r = sd_id128_get_machine_app_specific( | |
420 | SD_ID128_MAKE(ed,d3,12,5d,16,b9,41,f9,a1,49,5f,ab,15,62,ab,27), | |
421 | &id); | |
422 | if (r < 0) { | |
b480543c | 423 | log_debug_errno(r, "Failed to determine machine ID, ignoring: %m"); |
4a6eb824 LP |
424 | return NULL; |
425 | } | |
426 | ||
427 | xsprintf(buffer, SD_ID128_FORMAT_STR ".resolved.systemd.io", SD_ID128_FORMAT_VAL(id)); | |
428 | return buffer; | |
429 | } | |
430 | ||
e8d23f92 LP |
431 | static int dns_stub_finish_reply_packet( |
432 | DnsPacket *p, | |
433 | uint16_t id, | |
434 | int rcode, | |
51027656 | 435 | bool tc, /* set the Truncated bit? */ |
4ad017cd | 436 | bool aa, /* set the Authoritative Answer bit? */ |
da846b30 | 437 | bool rd, /* set the Recursion Desired bit? */ |
e8d23f92 LP |
438 | bool add_opt, /* add an OPT RR to this packet? */ |
439 | bool edns0_do, /* set the EDNS0 DNSSEC OK bit? */ | |
b370adb5 | 440 | bool ad, /* set the DNSSEC authenticated data bit? */ |
775ae354 | 441 | bool cd, /* set the DNSSEC checking disabled bit? */ |
4a6eb824 LP |
442 | uint16_t max_udp_size, /* The maximum UDP datagram size to advertise to clients */ |
443 | bool nsid) { /* whether to add NSID */ | |
e8d23f92 LP |
444 | |
445 | int r; | |
446 | ||
447 | assert(p); | |
448 | ||
ff4caaae | 449 | if (add_opt) { |
4a6eb824 | 450 | r = dns_packet_append_opt(p, max_udp_size, edns0_do, /* include_rfc6975 = */ false, nsid ? nsid_string() : NULL, rcode, NULL); |
ff4caaae LP |
451 | if (r == -EMSGSIZE) /* Hit the size limit? then indicate truncation */ |
452 | tc = true; | |
453 | else if (r < 0) | |
454 | return r; | |
ff4caaae | 455 | } else { |
941dd294 LP |
456 | /* If the client can't to EDNS0, don't do DO either */ |
457 | edns0_do = false; | |
458 | ||
775ae354 | 459 | /* If we don't do EDNS, clamp the rcode to 4 bit */ |
941dd294 LP |
460 | if (rcode > 0xF) |
461 | rcode = DNS_RCODE_SERVFAIL; | |
462 | } | |
463 | ||
8c9c68b5 LP |
464 | /* Don't set the CD bit unless DO is on, too */ |
465 | if (!edns0_do) | |
775ae354 LP |
466 | cd = false; |
467 | ||
8c9c68b5 LP |
468 | /* Note that we allow the AD bit to be set even if client didn't signal DO, as per RFC 6840, section |
469 | * 5.7 */ | |
e8d23f92 LP |
470 | |
471 | DNS_PACKET_HEADER(p)->id = id; | |
472 | ||
473 | DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS( | |
51027656 LP |
474 | 1 /* qr */, |
475 | 0 /* opcode */, | |
4ad017cd | 476 | aa /* aa */, |
51027656 | 477 | tc /* tc */, |
da846b30 | 478 | rd /* rd */, |
51027656 | 479 | 1 /* ra */, |
e8d23f92 | 480 | ad /* ad */, |
775ae354 | 481 | cd /* cd */, |
e8d23f92 | 482 | rcode)); |
b30bf55d | 483 | |
b30bf55d LP |
484 | return 0; |
485 | } | |
486 | ||
a8d09063 LP |
487 | static bool address_is_proxy(int family, const union in_addr_union *a) { |
488 | assert(a); | |
489 | ||
490 | /* Returns true if the specified address is the DNS "proxy" stub, i.e. where we unconditionally enable bypass mode */ | |
491 | ||
492 | if (family != AF_INET) | |
493 | return false; | |
494 | ||
495 | return be32toh(a->in.s_addr) == INADDR_DNS_PROXY_STUB; | |
496 | } | |
497 | ||
498 | static int find_socket_fd( | |
499 | Manager *m, | |
500 | DnsStubListenerExtra *l, | |
501 | int family, | |
502 | const union in_addr_union *listen_address, | |
503 | int type) { | |
504 | ||
505 | assert(m); | |
506 | ||
507 | /* Finds the right socket to use for sending. If we know the extra listener, otherwise go via the | |
508 | * address to send from */ | |
509 | if (l) | |
510 | return manager_dns_stub_fd_extra(m, l, type); | |
511 | ||
512 | return manager_dns_stub_fd(m, family, listen_address, type); | |
513 | } | |
514 | ||
0354029b LP |
515 | static int dns_stub_send( |
516 | Manager *m, | |
517 | DnsStubListenerExtra *l, | |
518 | DnsStream *s, | |
519 | DnsPacket *p, | |
520 | DnsPacket *reply) { | |
521 | ||
b30bf55d LP |
522 | int r; |
523 | ||
524 | assert(m); | |
525 | assert(p); | |
526 | assert(reply); | |
527 | ||
528 | if (s) | |
529 | r = dns_stream_write_packet(s, reply); | |
a8d09063 | 530 | else { |
dfa14e28 | 531 | int fd, ifindex; |
a8d09063 | 532 | |
de777ffa | 533 | fd = find_socket_fd(m, l, p->family, &p->destination, SOCK_DGRAM); |
a8d09063 LP |
534 | if (fd < 0) |
535 | return fd; | |
536 | ||
dfa14e28 BF |
537 | if (address_is_proxy(p->family, &p->destination)) |
538 | /* Force loopback iface if this is the loopback proxy stub | |
539 | * and ifindex was normalized to 0 by manager_recv(). */ | |
540 | ifindex = p->ifindex ?: LOOPBACK_IFINDEX; | |
541 | else | |
542 | /* Force loopback iface if this is the main listener stub. */ | |
543 | ifindex = l ? p->ifindex : LOOPBACK_IFINDEX; | |
544 | ||
a8d09063 LP |
545 | /* Note that it is essential here that we explicitly choose the source IP address for this |
546 | * packet. This is because otherwise the kernel will choose it automatically based on the | |
dfa14e28 | 547 | * routing table and will thus pick 127.0.0.1 rather than 127.0.0.53/54. */ |
0354029b | 548 | r = manager_send(m, |
a8d09063 | 549 | fd, |
dfa14e28 | 550 | ifindex, |
0354029b LP |
551 | p->family, &p->sender, p->sender_port, &p->destination, |
552 | reply); | |
a8d09063 | 553 | } |
b30bf55d LP |
554 | if (r < 0) |
555 | return log_debug_errno(r, "Failed to send reply packet: %m"); | |
556 | ||
557 | return 0; | |
558 | } | |
559 | ||
39005e18 LP |
560 | static int dns_stub_reply_with_edns0_do(DnsQuery *q) { |
561 | assert(q); | |
562 | ||
563 | /* Reply with DNSSEC DO set? Only if client supports it; and we did any DNSSEC verification | |
564 | * ourselves, or consider the data fully authenticated because we generated it locally, or the client | |
565 | * set cd */ | |
566 | ||
567 | return DNS_PACKET_DO(q->request_packet) && | |
568 | (q->answer_dnssec_result >= 0 || /* we did proper DNSSEC validation … */ | |
569 | dns_query_fully_authenticated(q) || /* … or we considered it authentic otherwise … */ | |
570 | DNS_PACKET_CD(q->request_packet)); /* … or client set CD */ | |
571 | } | |
572 | ||
5d7da51e LP |
573 | static void dns_stub_suppress_duplicate_section_rrs(DnsQuery *q) { |
574 | /* If we follow a CNAME/DNAME chain we might end up populating our sections with redundant RRs | |
575 | * because we built up the sections from multiple reply packets (one from each CNAME/DNAME chain | |
576 | * element). E.g. it could be that an RR that was included in the first reply's additional section | |
577 | * ends up being relevant as main answer in a subsequent reply in the chain. Let's clean this up, and | |
578 | * remove everything in the "higher priority" sections from the "lower priority" sections. | |
579 | * | |
580 | * Note that this removal matches by RR keys instead of the full RRs. This is because RRsets should | |
581 | * always end up in one section fully or not at all, but never be split among sections. | |
582 | * | |
583 | * Specifically: we remove ANSWER section RRs from the AUTHORITATIVE and ADDITIONAL sections, as well | |
584 | * as AUTHORITATIVE section RRs from the ADDITIONAL section. */ | |
585 | ||
586 | dns_answer_remove_by_answer_keys(&q->reply_authoritative, q->reply_answer); | |
587 | dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_answer); | |
588 | dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_authoritative); | |
589 | } | |
590 | ||
775ae354 LP |
591 | static int dns_stub_send_reply( |
592 | DnsQuery *q, | |
593 | int rcode) { | |
594 | ||
595 | _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL; | |
596 | bool truncated, edns0_do; | |
597 | int r; | |
598 | ||
599 | assert(q); | |
600 | ||
39005e18 | 601 | edns0_do = dns_stub_reply_with_edns0_do(q); /* let's check if we shall reply with EDNS0 DO? */ |
775ae354 | 602 | |
775ae354 LP |
603 | r = dns_stub_make_reply_packet( |
604 | &reply, | |
605 | DNS_PACKET_PAYLOAD_SIZE_MAX(q->request_packet), | |
606 | q->request_packet->question, | |
607 | &truncated); | |
608 | if (r < 0) | |
609 | return log_debug_errno(r, "Failed to build reply packet: %m"); | |
610 | ||
5d7da51e LP |
611 | dns_stub_suppress_duplicate_section_rrs(q); |
612 | ||
775ae354 LP |
613 | r = dns_stub_add_reply_packet_body( |
614 | reply, | |
615 | q->reply_answer, | |
616 | q->reply_authoritative, | |
617 | q->reply_additional, | |
618 | edns0_do, | |
619 | &truncated); | |
620 | if (r < 0) | |
621 | return log_debug_errno(r, "Failed to append reply packet body: %m"); | |
622 | ||
623 | r = dns_stub_finish_reply_packet( | |
624 | reply, | |
625 | DNS_PACKET_ID(q->request_packet), | |
626 | rcode, | |
627 | truncated, | |
9ddf099f | 628 | dns_query_fully_authoritative(q), |
da846b30 | 629 | DNS_PACKET_RD(q->request_packet), |
775ae354 LP |
630 | !!q->request_packet->opt, |
631 | edns0_do, | |
b553abd8 | 632 | (DNS_PACKET_AD(q->request_packet) || DNS_PACKET_DO(q->request_packet)) && dns_query_fully_authenticated(q), |
775ae354 | 633 | DNS_PACKET_CD(q->request_packet), |
4a6eb824 LP |
634 | q->stub_listener_extra ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX, |
635 | dns_packet_has_nsid_request(q->request_packet) > 0 && !q->stub_listener_extra); | |
775ae354 LP |
636 | if (r < 0) |
637 | return log_debug_errno(r, "Failed to build failure packet: %m"); | |
638 | ||
639 | return dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply); | |
640 | } | |
641 | ||
0354029b LP |
642 | static int dns_stub_send_failure( |
643 | Manager *m, | |
644 | DnsStubListenerExtra *l, | |
645 | DnsStream *s, | |
646 | DnsPacket *p, | |
647 | int rcode, | |
648 | bool authenticated) { | |
649 | ||
b30bf55d | 650 | _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL; |
775ae354 | 651 | bool truncated; |
b30bf55d LP |
652 | int r; |
653 | ||
654 | assert(m); | |
655 | assert(p); | |
656 | ||
775ae354 LP |
657 | r = dns_stub_make_reply_packet( |
658 | &reply, | |
659 | DNS_PACKET_PAYLOAD_SIZE_MAX(p), | |
660 | p->question, | |
661 | &truncated); | |
e8d23f92 LP |
662 | if (r < 0) |
663 | return log_debug_errno(r, "Failed to make failure packet: %m"); | |
664 | ||
b370adb5 LP |
665 | r = dns_stub_finish_reply_packet( |
666 | reply, | |
667 | DNS_PACKET_ID(p), | |
668 | rcode, | |
775ae354 | 669 | truncated, |
4ad017cd | 670 | false, |
da846b30 | 671 | DNS_PACKET_RD(p), |
b370adb5 LP |
672 | !!p->opt, |
673 | DNS_PACKET_DO(p), | |
b553abd8 | 674 | (DNS_PACKET_AD(p) || DNS_PACKET_DO(p)) && authenticated, |
775ae354 | 675 | DNS_PACKET_CD(p), |
4a6eb824 LP |
676 | l ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX, |
677 | dns_packet_has_nsid_request(p) > 0 && !l); | |
b30bf55d LP |
678 | if (r < 0) |
679 | return log_debug_errno(r, "Failed to build failure packet: %m"); | |
680 | ||
0354029b | 681 | return dns_stub_send(m, l, s, p, reply); |
b30bf55d LP |
682 | } |
683 | ||
775ae354 LP |
684 | static int dns_stub_patch_bypass_reply_packet( |
685 | DnsPacket **ret, /* Where to place the patched packet */ | |
686 | DnsPacket *original, /* The packet to patch */ | |
687 | DnsPacket *request) { /* The packet the patched packet shall look like a reply to */ | |
688 | _cleanup_(dns_packet_unrefp) DnsPacket *c = NULL; | |
689 | int r; | |
690 | ||
691 | assert(ret); | |
692 | assert(original); | |
693 | assert(request); | |
694 | ||
695 | r = dns_packet_dup(&c, original); | |
696 | if (r < 0) | |
697 | return r; | |
698 | ||
699 | /* Extract the packet, so that we know where the OPT field is */ | |
700 | r = dns_packet_extract(c); | |
701 | if (r < 0) | |
702 | return r; | |
703 | ||
704 | /* Copy over the original client request ID, so that we can make the upstream query look like our own reply. */ | |
705 | DNS_PACKET_HEADER(c)->id = DNS_PACKET_HEADER(request)->id; | |
706 | ||
707 | /* Patch in our own maximum datagram size, if EDNS0 was on */ | |
708 | r = dns_packet_patch_max_udp_size(c, ADVERTISE_DATAGRAM_SIZE_MAX); | |
709 | if (r < 0) | |
710 | return r; | |
711 | ||
712 | /* Lower all TTLs by the time passed since we received the datagram. */ | |
713 | if (timestamp_is_set(original->timestamp)) { | |
714 | r = dns_packet_patch_ttls(c, original->timestamp); | |
715 | if (r < 0) | |
716 | return r; | |
717 | } | |
718 | ||
719 | /* Our upstream connection might have supported larger DNS requests than our downstream one, hence | |
720 | * set the TC bit if our reply is larger than what the client supports, and truncate. */ | |
721 | if (c->size > DNS_PACKET_PAYLOAD_SIZE_MAX(request)) { | |
722 | log_debug("Artificially truncating stub response, as advertised size of client is smaller than upstream one."); | |
723 | dns_packet_truncate(c, DNS_PACKET_PAYLOAD_SIZE_MAX(request)); | |
724 | DNS_PACKET_HEADER(c)->flags = htobe16(be16toh(DNS_PACKET_HEADER(c)->flags) | DNS_PACKET_FLAG_TC); | |
725 | } | |
726 | ||
727 | *ret = TAKE_PTR(c); | |
728 | return 0; | |
729 | } | |
730 | ||
c704288c YW |
731 | static void dns_stub_query_complete(DnsQuery *query) { |
732 | _cleanup_(dns_query_freep) DnsQuery *q = query; | |
b30bf55d LP |
733 | int r; |
734 | ||
735 | assert(q); | |
775ae354 | 736 | assert(q->request_packet); |
b30bf55d | 737 | |
775ae354 LP |
738 | if (q->question_bypass) { |
739 | /* This is a bypass reply. If so, let's propagate the upstream packet, if we have it and it | |
740 | * is regular DNS. (We can't do this if the upstream packet is LLMNR or mDNS, since the | |
741 | * packets are not 100% compatible.) */ | |
b30bf55d | 742 | |
775ae354 LP |
743 | if (q->answer_full_packet && |
744 | q->answer_full_packet->protocol == DNS_PROTOCOL_DNS) { | |
745 | _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL; | |
e8d23f92 | 746 | |
775ae354 LP |
747 | r = dns_stub_patch_bypass_reply_packet(&reply, q->answer_full_packet, q->request_packet); |
748 | if (r < 0) | |
749 | log_debug_errno(r, "Failed to patch bypass reply packet: %m"); | |
750 | else | |
751 | (void) dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply); | |
752 | ||
775ae354 | 753 | return; |
e8d23f92 | 754 | } |
775ae354 | 755 | } |
b30bf55d | 756 | |
b97fc571 LP |
757 | /* Take all data from the current reply, and merge it into the three reply sections we are building |
758 | * up. We do this before processing CNAME redirects, so that we gradually build up our sections, and | |
759 | * and keep adding all RRs in the CNAME chain. */ | |
760 | r = dns_stub_assign_sections( | |
761 | q, | |
a7c0291c | 762 | dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS), |
b97fc571 | 763 | dns_stub_reply_with_edns0_do(q)); |
c704288c YW |
764 | if (r < 0) |
765 | return (void) log_debug_errno(r, "Failed to assign sections: %m"); | |
2f4d8e57 | 766 | |
775ae354 LP |
767 | switch (q->state) { |
768 | ||
915ba31c LP |
769 | case DNS_TRANSACTION_SUCCESS: { |
770 | bool first = true; | |
771 | ||
772 | for (;;) { | |
773 | int cname_result; | |
774 | ||
775 | cname_result = dns_query_process_cname_one(q); | |
776 | if (cname_result == -ELOOP) { /* CNAME loop, let's send what we already have */ | |
777 | log_debug_errno(r, "Detected CNAME loop, returning what we already have."); | |
778 | (void) dns_stub_send_reply(q, q->answer_rcode); | |
779 | break; | |
780 | } | |
781 | if (cname_result < 0) { | |
782 | log_debug_errno(cname_result, "Failed to process CNAME: %m"); | |
783 | break; | |
784 | } | |
785 | ||
786 | if (cname_result == DNS_QUERY_NOMATCH) { | |
787 | /* This answer doesn't contain any RR that would answer our question | |
788 | * positively, i.e. neither directly nor via CNAME. */ | |
789 | ||
790 | if (first) /* We never followed a CNAME and the answer doesn't match our | |
791 | * question at all? Then this is final, the empty answer is the | |
792 | * answer. */ | |
793 | break; | |
794 | ||
795 | /* Otherwise, we already followed a CNAME once within this packet, and the | |
796 | * packet doesn't answer our question. In that case let's restart the query, | |
797 | * now with the redirected question. We'll */ | |
798 | r = dns_query_go(q); | |
c704288c YW |
799 | if (r < 0) |
800 | return (void) log_debug_errno(r, "Failed to restart query: %m"); | |
915ba31c | 801 | |
c704288c | 802 | TAKE_PTR(q); |
915ba31c LP |
803 | return; |
804 | } | |
805 | ||
806 | r = dns_stub_assign_sections( | |
807 | q, | |
808 | dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS), | |
809 | dns_stub_reply_with_edns0_do(q)); | |
c704288c YW |
810 | if (r < 0) |
811 | return (void) log_debug_errno(r, "Failed to assign sections: %m"); | |
915ba31c LP |
812 | |
813 | if (cname_result == DNS_QUERY_MATCH) /* A match? Then we are done, let's return what we got */ | |
814 | break; | |
815 | ||
816 | /* We followed a CNAME. and collected the RRs that answer the redirected question | |
817 | * successfully. Let's not try to do this again. */ | |
818 | assert(cname_result == DNS_QUERY_CNAME); | |
819 | first = false; | |
b97fc571 | 820 | } |
b97fc571 LP |
821 | |
822 | _fallthrough_; | |
915ba31c | 823 | } |
b97fc571 | 824 | |
b30bf55d | 825 | case DNS_TRANSACTION_RCODE_FAILURE: |
775ae354 | 826 | (void) dns_stub_send_reply(q, q->answer_rcode); |
b30bf55d LP |
827 | break; |
828 | ||
829 | case DNS_TRANSACTION_NOT_FOUND: | |
775ae354 | 830 | (void) dns_stub_send_reply(q, DNS_RCODE_NXDOMAIN); |
b30bf55d LP |
831 | break; |
832 | ||
833 | case DNS_TRANSACTION_TIMEOUT: | |
834 | case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED: | |
835 | /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */ | |
836 | break; | |
837 | ||
838 | case DNS_TRANSACTION_NO_SERVERS: | |
839 | case DNS_TRANSACTION_INVALID_REPLY: | |
840 | case DNS_TRANSACTION_ERRNO: | |
841 | case DNS_TRANSACTION_ABORTED: | |
842 | case DNS_TRANSACTION_DNSSEC_FAILED: | |
843 | case DNS_TRANSACTION_NO_TRUST_ANCHOR: | |
844 | case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED: | |
845 | case DNS_TRANSACTION_NETWORK_DOWN: | |
775ae354 | 846 | case DNS_TRANSACTION_NO_SOURCE: |
49ef064c | 847 | case DNS_TRANSACTION_STUB_LOOP: |
775ae354 | 848 | (void) dns_stub_send_reply(q, DNS_RCODE_SERVFAIL); |
b30bf55d LP |
849 | break; |
850 | ||
851 | case DNS_TRANSACTION_NULL: | |
852 | case DNS_TRANSACTION_PENDING: | |
853 | case DNS_TRANSACTION_VALIDATING: | |
854 | default: | |
04499a70 | 855 | assert_not_reached(); |
b30bf55d | 856 | } |
b30bf55d LP |
857 | } |
858 | ||
859 | static int dns_stub_stream_complete(DnsStream *s, int error) { | |
860 | assert(s); | |
861 | ||
b412af57 LP |
862 | log_debug_errno(error, "DNS TCP connection terminated, destroying queries: %m"); |
863 | ||
864 | for (;;) { | |
865 | DnsQuery *q; | |
866 | ||
867 | q = set_first(s->queries); | |
868 | if (!q) | |
869 | break; | |
b30bf55d | 870 | |
b412af57 LP |
871 | dns_query_free(q); |
872 | } | |
b30bf55d | 873 | |
b412af57 LP |
874 | /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections |
875 | * should be kept as long as the client wants to. */ | |
876 | dns_stream_unref(s); | |
b30bf55d LP |
877 | return 0; |
878 | } | |
879 | ||
0354029b | 880 | static void dns_stub_process_query(Manager *m, DnsStubListenerExtra *l, DnsStream *s, DnsPacket *p) { |
a8d09063 | 881 | uint64_t protocol_flags = SD_RESOLVED_PROTOCOLS_ALL; |
ceb17827 | 882 | _cleanup_(dns_query_freep) DnsQuery *q = NULL; |
bde69bbd LP |
883 | Hashmap **queries_by_packet; |
884 | DnsQuery *existing; | |
a8d09063 | 885 | bool bypass = false; |
b30bf55d LP |
886 | int r; |
887 | ||
888 | assert(m); | |
889 | assert(p); | |
890 | assert(p->protocol == DNS_PROTOCOL_DNS); | |
891 | ||
0354029b | 892 | if (!l && /* l == NULL if this is the main stub */ |
a8d09063 | 893 | !address_is_proxy(p->family, &p->destination) && /* don't restrict needlessly for 127.0.0.54 */ |
d1fb8cda YW |
894 | (in_addr_is_localhost(p->family, &p->sender) <= 0 || |
895 | in_addr_is_localhost(p->family, &p->destination) <= 0)) { | |
565147b7 | 896 | log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring."); |
ceb17827 | 897 | return; |
b30bf55d LP |
898 | } |
899 | ||
a9fd8837 LP |
900 | if (manager_packet_from_our_transaction(m, p)) { |
901 | log_debug("Got our own packet looped back, ignoring."); | |
902 | return; | |
903 | } | |
904 | ||
bde69bbd LP |
905 | queries_by_packet = l ? &l->queries_by_packet : &m->stub_queries_by_packet; |
906 | existing = hashmap_get(*queries_by_packet, p); | |
907 | if (existing && dns_packet_equal(existing->request_packet, p)) { | |
908 | log_debug("Got repeat packet from client, ignoring."); | |
909 | return; | |
910 | } | |
911 | ||
b30bf55d LP |
912 | r = dns_packet_extract(p); |
913 | if (r < 0) { | |
914 | log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m"); | |
0354029b | 915 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_FORMERR, false); |
ceb17827 | 916 | return; |
b30bf55d LP |
917 | } |
918 | ||
919 | if (!DNS_PACKET_VERSION_SUPPORTED(p)) { | |
920 | log_debug("Got EDNS OPT field with unsupported version number."); | |
0354029b | 921 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_BADVERS, false); |
ceb17827 | 922 | return; |
b30bf55d LP |
923 | } |
924 | ||
ab715ddb | 925 | if (dns_type_is_obsolete(dns_question_first_key(p->question)->type)) { |
b30bf55d | 926 | log_debug("Got message with obsolete key type, refusing."); |
30ee7071 | 927 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false); |
ceb17827 | 928 | return; |
b30bf55d LP |
929 | } |
930 | ||
ab715ddb | 931 | if (dns_type_is_zone_transer(dns_question_first_key(p->question)->type)) { |
b30bf55d | 932 | log_debug("Got request for zone transfer, refusing."); |
30ee7071 | 933 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false); |
ceb17827 | 934 | return; |
b30bf55d LP |
935 | } |
936 | ||
937 | if (!DNS_PACKET_RD(p)) { | |
938 | /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */ | |
939 | log_debug("Got request with recursion disabled, refusing."); | |
0354029b | 940 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false); |
ceb17827 | 941 | return; |
b30bf55d LP |
942 | } |
943 | ||
bde69bbd LP |
944 | r = hashmap_ensure_allocated(queries_by_packet, &stub_packet_hash_ops); |
945 | if (r < 0) { | |
946 | log_oom(); | |
947 | return; | |
948 | } | |
949 | ||
a8d09063 LP |
950 | if (address_is_proxy(p->family, &p->destination)) { |
951 | _cleanup_free_ char *dipa = NULL; | |
952 | ||
953 | r = in_addr_to_string(p->family, &p->destination, &dipa); | |
e1158539 LP |
954 | if (r < 0) |
955 | return (void) log_error_errno(r, "Failed to format destination address: %m"); | |
a8d09063 LP |
956 | |
957 | log_debug("Got request to DNS proxy address 127.0.0.54, enabling bypass logic."); | |
958 | bypass = true; | |
959 | protocol_flags = SD_RESOLVED_DNS|SD_RESOLVED_NO_ZONE; /* Turn off mDNS/LLMNR for proxy stub. */ | |
960 | } else if ((DNS_PACKET_DO(p) && DNS_PACKET_CD(p))) { | |
775ae354 | 961 | log_debug("Got request with DNSSEC checking disabled, enabling bypass logic."); |
a8d09063 LP |
962 | bypass = true; |
963 | } | |
775ae354 | 964 | |
a8d09063 | 965 | if (bypass) |
775ae354 | 966 | r = dns_query_new(m, &q, NULL, NULL, p, 0, |
a8d09063 | 967 | protocol_flags| |
775ae354 LP |
968 | SD_RESOLVED_NO_CNAME| |
969 | SD_RESOLVED_NO_SEARCH| | |
970 | SD_RESOLVED_NO_VALIDATE| | |
971 | SD_RESOLVED_REQUIRE_PRIMARY| | |
972 | SD_RESOLVED_CLAMP_TTL); | |
a8d09063 | 973 | else |
775ae354 | 974 | r = dns_query_new(m, &q, p->question, p->question, NULL, 0, |
a8d09063 | 975 | protocol_flags| |
775ae354 | 976 | SD_RESOLVED_NO_SEARCH| |
2f4d8e57 | 977 | (DNS_PACKET_DO(p) ? SD_RESOLVED_REQUIRE_PRIMARY : 0)| |
775ae354 | 978 | SD_RESOLVED_CLAMP_TTL); |
b30bf55d LP |
979 | if (r < 0) { |
980 | log_error_errno(r, "Failed to generate query object: %m"); | |
0354029b | 981 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false); |
ceb17827 | 982 | return; |
b30bf55d LP |
983 | } |
984 | ||
775ae354 LP |
985 | q->request_packet = dns_packet_ref(p); |
986 | q->request_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */ | |
0354029b | 987 | q->stub_listener_extra = l; |
b30bf55d LP |
988 | q->complete = dns_stub_query_complete; |
989 | ||
990 | if (s) { | |
b412af57 LP |
991 | /* Remember which queries belong to this stream, so that we can cancel them when the stream |
992 | * is disconnected early */ | |
993 | ||
ceb17827 | 994 | r = set_ensure_put(&s->queries, NULL, q); |
b412af57 LP |
995 | if (r < 0) { |
996 | log_oom(); | |
ceb17827 | 997 | return; |
b412af57 | 998 | } |
ceb17827 | 999 | assert(r > 0); |
b30bf55d LP |
1000 | } |
1001 | ||
bde69bbd LP |
1002 | /* Add the query to the hash table we use to determine repeat packets now. We don't care about |
1003 | * failures here, since in the worst case we'll not recognize duplicate incoming requests, which | |
1004 | * isn't particularly bad. */ | |
1005 | (void) hashmap_put(*queries_by_packet, q->request_packet, q); | |
1006 | ||
b30bf55d LP |
1007 | r = dns_query_go(q); |
1008 | if (r < 0) { | |
1009 | log_error_errno(r, "Failed to start query: %m"); | |
0354029b | 1010 | dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false); |
ceb17827 | 1011 | return; |
b30bf55d LP |
1012 | } |
1013 | ||
52e63427 | 1014 | log_debug("Processing query..."); |
ceb17827 | 1015 | TAKE_PTR(q); |
b30bf55d LP |
1016 | } |
1017 | ||
0354029b | 1018 | static int on_dns_stub_packet_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) { |
b30bf55d | 1019 | _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL; |
b30bf55d LP |
1020 | int r; |
1021 | ||
1022 | r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p); | |
1023 | if (r <= 0) | |
1024 | return r; | |
1025 | ||
1026 | if (dns_packet_validate_query(p) > 0) { | |
1027 | log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p)); | |
1028 | ||
0354029b | 1029 | dns_stub_process_query(m, l, NULL, p); |
b30bf55d LP |
1030 | } else |
1031 | log_debug("Invalid DNS stub UDP packet, ignoring."); | |
1032 | ||
1033 | return 0; | |
1034 | } | |
1035 | ||
d1fb8cda | 1036 | static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) { |
0354029b | 1037 | return on_dns_stub_packet_internal(s, fd, revents, userdata, NULL); |
d1fb8cda YW |
1038 | } |
1039 | ||
1040 | static int on_dns_stub_packet_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) { | |
99534007 | 1041 | DnsStubListenerExtra *l = ASSERT_PTR(userdata); |
0354029b LP |
1042 | |
1043 | return on_dns_stub_packet_internal(s, fd, revents, l->manager, l); | |
d1fb8cda YW |
1044 | } |
1045 | ||
624f907e | 1046 | static int on_dns_stub_stream_packet(DnsStream *s, DnsPacket *p) { |
e4bed40f | 1047 | assert(s); |
624f907e | 1048 | assert(s->manager); |
e4bed40f ZJS |
1049 | assert(p); |
1050 | ||
1051 | if (dns_packet_validate_query(p) > 0) { | |
1052 | log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p)); | |
1053 | ||
1054 | dns_stub_process_query(s->manager, s->stub_listener_extra, s, p); | |
1055 | } else | |
1056 | log_debug("Invalid DNS stub TCP packet, ignoring."); | |
1057 | ||
1058 | return 0; | |
1059 | } | |
1060 | ||
1061 | static int on_dns_stub_stream_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) { | |
1062 | DnsStream *stream; | |
1063 | int cfd, r; | |
1064 | ||
1065 | cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC); | |
1066 | if (cfd < 0) { | |
1067 | if (ERRNO_IS_ACCEPT_AGAIN(errno)) | |
1068 | return 0; | |
1069 | ||
1070 | return -errno; | |
1071 | } | |
1072 | ||
18230451 YW |
1073 | r = dns_stream_new(m, &stream, DNS_STREAM_STUB, DNS_PROTOCOL_DNS, cfd, NULL, |
1074 | on_dns_stub_stream_packet, dns_stub_stream_complete, DNS_STREAM_STUB_TIMEOUT_USEC); | |
e4bed40f ZJS |
1075 | if (r < 0) { |
1076 | safe_close(cfd); | |
1077 | return r; | |
1078 | } | |
1079 | ||
1080 | stream->stub_listener_extra = l; | |
e4bed40f ZJS |
1081 | |
1082 | /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */ | |
1083 | ||
1084 | return 0; | |
1085 | } | |
1086 | ||
1087 | static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) { | |
1088 | return on_dns_stub_stream_internal(s, fd, revents, userdata, NULL); | |
1089 | } | |
1090 | ||
1091 | static int on_dns_stub_stream_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) { | |
99534007 | 1092 | DnsStubListenerExtra *l = ASSERT_PTR(userdata); |
e4bed40f | 1093 | |
e4bed40f ZJS |
1094 | return on_dns_stub_stream_internal(s, fd, revents, l->manager, l); |
1095 | } | |
1096 | ||
af8b1384 | 1097 | static int set_dns_stub_common_socket_options(int fd, int family) { |
1f05101f SS |
1098 | int r; |
1099 | ||
1100 | assert(fd >= 0); | |
af8b1384 | 1101 | assert(IN_SET(family, AF_INET, AF_INET6)); |
1f05101f SS |
1102 | |
1103 | r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true); | |
1104 | if (r < 0) | |
1105 | return r; | |
1106 | ||
5d0fe423 LP |
1107 | r = socket_set_recvpktinfo(fd, family, true); |
1108 | if (r < 0) | |
1109 | return r; | |
af8b1384 | 1110 | |
5d0fe423 LP |
1111 | r = socket_set_recvttl(fd, family, true); |
1112 | if (r < 0) | |
1113 | return r; | |
af8b1384 YW |
1114 | |
1115 | return 0; | |
1f05101f SS |
1116 | } |
1117 | ||
8624f128 LP |
1118 | static int set_dns_stub_common_tcp_socket_options(int fd) { |
1119 | int r; | |
1120 | ||
1121 | assert(fd >= 0); | |
1122 | ||
1123 | r = setsockopt_int(fd, IPPROTO_TCP, TCP_FASTOPEN, 5); /* Everybody appears to pick qlen=5, let's do the same here. */ | |
1124 | if (r < 0) | |
1125 | log_debug_errno(r, "Failed to enable TCP_FASTOPEN on TCP listening socket, ignoring: %m"); | |
1126 | ||
1127 | r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true); | |
1128 | if (r < 0) | |
1129 | log_debug_errno(r, "Failed to enable TCP_NODELAY mode, ignoring: %m"); | |
1130 | ||
1131 | return 0; | |
1132 | } | |
1133 | ||
a8d09063 LP |
1134 | static int manager_dns_stub_fd( |
1135 | Manager *m, | |
1136 | int family, | |
1137 | const union in_addr_union *listen_addr, | |
1138 | int type) { | |
1139 | ||
1140 | sd_event_source **event_source; | |
424e490b | 1141 | _cleanup_close_ int fd = -1; |
a8d09063 | 1142 | union sockaddr_union sa; |
b30bf55d LP |
1143 | int r; |
1144 | ||
e1158539 LP |
1145 | assert(m); |
1146 | assert(listen_addr); | |
1147 | ||
a8d09063 LP |
1148 | if (type == SOCK_DGRAM) |
1149 | event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_udp_event_source : &m->dns_stub_udp_event_source; | |
1150 | else if (type == SOCK_STREAM) | |
1151 | event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_tcp_event_source : &m->dns_stub_tcp_event_source; | |
1152 | else | |
1153 | return -EPROTONOSUPPORT; | |
d491917c | 1154 | |
d491917c ZJS |
1155 | if (*event_source) |
1156 | return sd_event_source_get_io_fd(*event_source); | |
b30bf55d | 1157 | |
a8d09063 | 1158 | fd = socket(family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); |
424e490b | 1159 | if (fd < 0) |
b30bf55d LP |
1160 | return -errno; |
1161 | ||
a8d09063 | 1162 | r = set_dns_stub_common_socket_options(fd, family); |
2ff48e98 LP |
1163 | if (r < 0) |
1164 | return r; | |
b30bf55d | 1165 | |
8624f128 LP |
1166 | if (type == SOCK_STREAM) { |
1167 | r = set_dns_stub_common_tcp_socket_options(fd); | |
1168 | if (r < 0) | |
1169 | return r; | |
1170 | } | |
1171 | ||
a8d09063 LP |
1172 | /* Set slightly different socket options for the non-proxy and the proxy binding. The former we want |
1173 | * to be accessible only from the local host, for the latter it's OK if people use NAT redirects or | |
1174 | * so to redirect external traffic to it. */ | |
1175 | ||
1176 | if (!address_is_proxy(family, listen_addr)) { | |
1177 | /* Make sure no traffic from outside the local host can leak to onto this socket */ | |
1178 | r = socket_bind_to_ifindex(fd, LOOPBACK_IFINDEX); | |
1179 | if (r < 0) | |
1180 | return r; | |
1181 | ||
1182 | r = socket_set_ttl(fd, family, 1); | |
1183 | if (r < 0) | |
1184 | return r; | |
1185 | } else if (type == SOCK_DGRAM) { | |
e1158539 LP |
1186 | /* Turn off Path MTU Discovery for UDP, for security reasons. See socket_disable_pmtud() for |
1187 | * a longer discussion. (We only do this for sockets that are potentially externally | |
1188 | * accessible, i.e. the proxy stub one. For the non-proxy one we instead set the TTL to 1, | |
1189 | * see above, so that packets don't get routed at all.) */ | |
a8d09063 LP |
1190 | r = socket_disable_pmtud(fd, family); |
1191 | if (r < 0) | |
1192 | log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m"); | |
1193 | ||
1194 | r = socket_set_recvfragsize(fd, family, true); | |
1195 | if (r < 0) | |
1196 | log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m"); | |
1197 | } | |
b30bf55d | 1198 | |
a8d09063 | 1199 | r = sockaddr_set_in_addr(&sa, family, listen_addr, 53); |
d491917c ZJS |
1200 | if (r < 0) |
1201 | return r; | |
1202 | ||
424e490b ZJS |
1203 | if (bind(fd, &sa.sa, sizeof(sa.in)) < 0) |
1204 | return -errno; | |
b30bf55d | 1205 | |
d491917c ZJS |
1206 | if (type == SOCK_STREAM && |
1207 | listen(fd, SOMAXCONN) < 0) | |
1208 | return -errno; | |
1209 | ||
1210 | r = sd_event_add_io(m->event, event_source, fd, EPOLLIN, | |
1211 | type == SOCK_DGRAM ? on_dns_stub_packet : on_dns_stub_stream, | |
1212 | m); | |
b30bf55d | 1213 | if (r < 0) |
424e490b | 1214 | return r; |
b30bf55d | 1215 | |
d491917c | 1216 | r = sd_event_source_set_io_fd_own(*event_source, true); |
7216a3b5 YW |
1217 | if (r < 0) |
1218 | return r; | |
1219 | ||
d491917c ZJS |
1220 | (void) sd_event_source_set_description(*event_source, |
1221 | type == SOCK_DGRAM ? "dns-stub-udp" : "dns-stub-tcp"); | |
b30bf55d | 1222 | |
7216a3b5 | 1223 | return TAKE_FD(fd); |
b30bf55d LP |
1224 | } |
1225 | ||
b5febb3f | 1226 | static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type) { |
1f05101f SS |
1227 | _cleanup_free_ char *pretty = NULL; |
1228 | _cleanup_close_ int fd = -1; | |
ca8b62b5 | 1229 | union sockaddr_union sa; |
1f05101f SS |
1230 | int r; |
1231 | ||
0354029b | 1232 | assert(m); |
a8d09063 | 1233 | assert(l); |
b5febb3f | 1234 | assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM)); |
0354029b | 1235 | |
b5febb3f ZJS |
1236 | sd_event_source **event_source = type == SOCK_DGRAM ? &l->udp_event_source : &l->tcp_event_source; |
1237 | if (*event_source) | |
1238 | return sd_event_source_get_io_fd(*event_source); | |
1f05101f | 1239 | |
ca8b62b5 YW |
1240 | if (l->family == AF_INET) |
1241 | sa = (union sockaddr_union) { | |
1242 | .in.sin_family = l->family, | |
49ef064c | 1243 | .in.sin_port = htobe16(dns_stub_listener_extra_port(l)), |
ca8b62b5 YW |
1244 | .in.sin_addr = l->address.in, |
1245 | }; | |
1246 | else | |
1247 | sa = (union sockaddr_union) { | |
1248 | .in6.sin6_family = l->family, | |
49ef064c | 1249 | .in6.sin6_port = htobe16(dns_stub_listener_extra_port(l)), |
ca8b62b5 YW |
1250 | .in6.sin6_addr = l->address.in6, |
1251 | }; | |
1252 | ||
b5febb3f | 1253 | fd = socket(l->family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); |
1f05101f SS |
1254 | if (fd < 0) { |
1255 | r = -errno; | |
1256 | goto fail; | |
1257 | } | |
1258 | ||
af8b1384 | 1259 | r = set_dns_stub_common_socket_options(fd, l->family); |
1f05101f SS |
1260 | if (r < 0) |
1261 | goto fail; | |
1262 | ||
8624f128 LP |
1263 | if (type == SOCK_STREAM) { |
1264 | r = set_dns_stub_common_tcp_socket_options(fd); | |
1265 | if (r < 0) | |
1266 | goto fail; | |
1267 | } | |
1268 | ||
69e3234d | 1269 | /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case |
b5febb3f ZJS |
1270 | * people may want ttl > 1. */ |
1271 | ||
5d0fe423 | 1272 | r = socket_set_freebind(fd, l->family, true); |
b5febb3f ZJS |
1273 | if (r < 0) |
1274 | goto fail; | |
1275 | ||
eb170e75 LP |
1276 | if (type == SOCK_DGRAM) { |
1277 | r = socket_disable_pmtud(fd, l->family); | |
1278 | if (r < 0) | |
1279 | log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m"); | |
20a001bd LP |
1280 | |
1281 | r = socket_set_recvfragsize(fd, l->family, true); | |
1282 | if (r < 0) | |
1283 | log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m"); | |
eb170e75 LP |
1284 | } |
1285 | ||
ded15213 LP |
1286 | r = RET_NERRNO(bind(fd, &sa.sa, SOCKADDR_LEN(sa))); |
1287 | if (r < 0) | |
1f05101f | 1288 | goto fail; |
1f05101f | 1289 | |
b5febb3f ZJS |
1290 | if (type == SOCK_STREAM && |
1291 | listen(fd, SOMAXCONN) < 0) { | |
1292 | r = -errno; | |
1293 | goto fail; | |
1294 | } | |
1295 | ||
1296 | r = sd_event_add_io(m->event, event_source, fd, EPOLLIN, | |
1297 | type == SOCK_DGRAM ? on_dns_stub_packet_extra : on_dns_stub_stream_extra, | |
1298 | l); | |
1f05101f SS |
1299 | if (r < 0) |
1300 | goto fail; | |
1301 | ||
b5febb3f | 1302 | r = sd_event_source_set_io_fd_own(*event_source, true); |
7216a3b5 YW |
1303 | if (r < 0) |
1304 | goto fail; | |
1305 | ||
b5febb3f ZJS |
1306 | (void) sd_event_source_set_description(*event_source, |
1307 | type == SOCK_DGRAM ? "dns-stub-udp-extra" : "dns-stub-tcp-extra"); | |
1f05101f SS |
1308 | |
1309 | if (DEBUG_LOGGING) { | |
ca8b62b5 | 1310 | (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty); |
b5febb3f ZJS |
1311 | log_debug("Listening on %s socket %s.", |
1312 | type == SOCK_DGRAM ? "UDP" : "TCP", | |
1313 | strnull(pretty)); | |
1f05101f SS |
1314 | } |
1315 | ||
7216a3b5 | 1316 | return TAKE_FD(fd); |
1f05101f | 1317 | |
b4b7ea1b | 1318 | fail: |
1c17bcb3 | 1319 | assert(r < 0); |
ca8b62b5 | 1320 | (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty); |
b5febb3f ZJS |
1321 | return log_warning_errno(r, |
1322 | r == -EADDRINUSE ? "Another process is already listening on %s socket %s: %m" : | |
1323 | "Failed to listen on %s socket %s: %m", | |
1324 | type == SOCK_DGRAM ? "UDP" : "TCP", | |
1325 | strnull(pretty)); | |
1f05101f SS |
1326 | } |
1327 | ||
b30bf55d | 1328 | int manager_dns_stub_start(Manager *m) { |
a8d09063 | 1329 | int r; |
b30bf55d LP |
1330 | |
1331 | assert(m); | |
1332 | ||
d5da7707 ZJS |
1333 | if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO) |
1334 | log_debug("Not creating stub listener."); | |
a8d09063 LP |
1335 | else { |
1336 | static const struct { | |
1337 | uint32_t addr; | |
1338 | int socket_type; | |
1339 | } stub_sockets[] = { | |
1340 | { INADDR_DNS_STUB, SOCK_DGRAM }, | |
1341 | { INADDR_DNS_STUB, SOCK_STREAM }, | |
1342 | { INADDR_DNS_PROXY_STUB, SOCK_DGRAM }, | |
1343 | { INADDR_DNS_PROXY_STUB, SOCK_STREAM }, | |
1344 | }; | |
1345 | ||
d5da7707 ZJS |
1346 | log_debug("Creating stub listener using %s.", |
1347 | m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" : | |
1348 | m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" : | |
1349 | "UDP/TCP"); | |
1350 | ||
a8d09063 LP |
1351 | for (size_t i = 0; i < ELEMENTSOF(stub_sockets); i++) { |
1352 | union in_addr_union a = { | |
1353 | .in.s_addr = htobe32(stub_sockets[i].addr), | |
1354 | }; | |
b30bf55d | 1355 | |
a8d09063 LP |
1356 | if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP && stub_sockets[i].socket_type == SOCK_STREAM) |
1357 | continue; | |
1358 | if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP && stub_sockets[i].socket_type == SOCK_DGRAM) | |
1359 | continue; | |
1360 | ||
1361 | r = manager_dns_stub_fd(m, AF_INET, &a, stub_sockets[i].socket_type); | |
1362 | if (r < 0) { | |
1363 | _cleanup_free_ char *busy_socket = NULL; | |
1364 | ||
1365 | if (asprintf(&busy_socket, | |
1366 | "%s socket " IPV4_ADDRESS_FMT_STR ":53", | |
1367 | stub_sockets[i].socket_type == SOCK_DGRAM ? "UDP" : "TCP", | |
1368 | IPV4_ADDRESS_FMT_VAL(a.in)) < 0) | |
1369 | return log_oom(); | |
1370 | ||
1371 | if (IN_SET(r, -EADDRINUSE, -EPERM)) { | |
1372 | log_warning_errno(r, | |
1373 | r == -EADDRINUSE ? "Another process is already listening on %s.\n" | |
1374 | "Turning off local DNS stub support." : | |
1375 | "Failed to listen on %s: %m.\n" | |
1376 | "Turning off local DNS stub support.", | |
1377 | busy_socket); | |
1378 | manager_dns_stub_stop(m); | |
1379 | break; | |
1380 | } | |
b30bf55d | 1381 | |
a8d09063 LP |
1382 | return log_error_errno(r, "Failed to listen on %s: %m", busy_socket); |
1383 | } | |
1384 | } | |
1385 | } | |
b30bf55d | 1386 | |
1f05101f | 1387 | if (!ordered_set_isempty(m->dns_extra_stub_listeners)) { |
36aaabc3 | 1388 | DnsStubListenerExtra *l; |
1f05101f | 1389 | |
dce65cd4 | 1390 | log_debug("Creating extra stub listeners."); |
1f05101f | 1391 | |
90e74a66 | 1392 | ORDERED_SET_FOREACH(l, m->dns_extra_stub_listeners) { |
7314b397 | 1393 | if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_UDP)) |
b5febb3f | 1394 | (void) manager_dns_stub_fd_extra(m, l, SOCK_DGRAM); |
7314b397 | 1395 | if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_TCP)) |
b5febb3f | 1396 | (void) manager_dns_stub_fd_extra(m, l, SOCK_STREAM); |
7314b397 | 1397 | } |
1f05101f SS |
1398 | } |
1399 | ||
b30bf55d LP |
1400 | return 0; |
1401 | } | |
1402 | ||
1403 | void manager_dns_stub_stop(Manager *m) { | |
1404 | assert(m); | |
1405 | ||
97935302 ZJS |
1406 | m->dns_stub_udp_event_source = sd_event_source_disable_unref(m->dns_stub_udp_event_source); |
1407 | m->dns_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_stub_tcp_event_source); | |
a8d09063 LP |
1408 | m->dns_proxy_stub_udp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_udp_event_source); |
1409 | m->dns_proxy_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_tcp_event_source); | |
b30bf55d | 1410 | } |
ae8f0ec3 LP |
1411 | |
1412 | static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = { | |
97935302 | 1413 | [DNS_STUB_LISTENER_NO] = "no", |
ae8f0ec3 LP |
1414 | [DNS_STUB_LISTENER_UDP] = "udp", |
1415 | [DNS_STUB_LISTENER_TCP] = "tcp", | |
1416 | [DNS_STUB_LISTENER_YES] = "yes", | |
1417 | }; | |
1418 | DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES); |