1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 #include <netinet/tcp.h>
8 #include "alloc-util.h"
9 #include "capability-util.h"
11 #include "errno-util.h"
14 #include "missing_network.h"
15 #include "resolve-util.h"
16 #include "resolved-dns-answer.h"
17 #include "resolved-dns-packet.h"
18 #include "resolved-dns-query.h"
19 #include "resolved-dns-question.h"
20 #include "resolved-dns-rr.h"
21 #include "resolved-dns-stream.h"
22 #include "resolved-dns-stub.h"
23 #include "resolved-dns-transaction.h"
24 #include "resolved-manager.h"
26 #include "siphash24.h"
27 #include "socket-util.h"
28 #include "stdio-util.h"
29 #include "string-table.h"
30 #include "string-util.h"
31 #include "time-util.h"
33 /* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet,
34 * IP and UDP header sizes */
35 #define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U)
37 /* On the extra stubs, use a more conservative choice */
38 #define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX
40 static int manager_dns_stub_fd_extra(Manager
*m
, DnsStubListenerExtra
*l
, int type
);
41 static int manager_dns_stub_fd(Manager
*m
, int family
, const union in_addr_union
*listen_address
, int type
);
43 static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra
*a
, struct siphash
*state
) {
46 siphash24_compress_typesafe(a
->mode
, state
);
47 siphash24_compress_typesafe(a
->family
, state
);
48 in_addr_hash_func(&a
->address
, a
->family
, state
);
49 siphash24_compress_typesafe(a
->port
, state
);
52 static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra
*a
, const DnsStubListenerExtra
*b
) {
58 r
= CMP(a
->mode
, b
->mode
);
62 r
= CMP(a
->family
, b
->family
);
66 r
= memcmp(&a
->address
, &b
->address
, FAMILY_ADDRESS_SIZE(a
->family
));
70 return CMP(a
->port
, b
->port
);
73 DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
74 dns_stub_listener_extra_hash_ops
,
76 dns_stub_listener_extra_hash_func
,
77 dns_stub_listener_extra_compare_func
,
78 dns_stub_listener_extra_free
);
80 int dns_stub_listener_extra_new(
82 DnsStubListenerExtra
**ret
) {
84 DnsStubListenerExtra
*l
;
86 l
= new(DnsStubListenerExtra
, 1);
90 *l
= (DnsStubListenerExtra
) {
98 DnsStubListenerExtra
*dns_stub_listener_extra_free(DnsStubListenerExtra
*p
) {
102 p
->udp_event_source
= sd_event_source_disable_unref(p
->udp_event_source
);
103 p
->tcp_event_source
= sd_event_source_disable_unref(p
->tcp_event_source
);
105 hashmap_free(p
->queries_by_packet
);
110 static void stub_packet_hash_func(const DnsPacket
*p
, struct siphash
*state
) {
113 siphash24_compress_typesafe(p
->protocol
, state
);
114 siphash24_compress_typesafe(p
->family
, state
);
115 siphash24_compress_typesafe(p
->sender
, state
);
116 siphash24_compress_typesafe(p
->ipproto
, state
);
117 siphash24_compress_typesafe(p
->sender_port
, state
);
118 siphash24_compress(DNS_PACKET_HEADER(p
), sizeof(DnsPacketHeader
), state
);
120 /* We don't bother hashing the full packet here, just the header */
123 static int stub_packet_compare_func(const DnsPacket
*x
, const DnsPacket
*y
) {
126 r
= CMP(x
->protocol
, y
->protocol
);
130 r
= CMP(x
->family
, y
->family
);
134 r
= memcmp(&x
->sender
, &y
->sender
, sizeof(x
->sender
));
138 r
= CMP(x
->ipproto
, y
->ipproto
);
142 r
= CMP(x
->sender_port
, y
->sender_port
);
146 return memcmp(DNS_PACKET_HEADER(x
), DNS_PACKET_HEADER(y
), sizeof(DnsPacketHeader
));
149 DEFINE_HASH_OPS(stub_packet_hash_ops
, DnsPacket
, stub_packet_hash_func
, stub_packet_compare_func
);
151 static int reply_add_with_rrsig(
153 DnsResourceRecord
*rr
,
155 DnsAnswerFlags flags
,
156 DnsResourceRecord
*rrsig
,
163 r
= dns_answer_add_extend(reply
, rr
, ifindex
, flags
, rrsig
);
167 if (with_rrsig
&& rrsig
) {
168 r
= dns_answer_add_extend(reply
, rrsig
, ifindex
, flags
, NULL
);
176 static int dns_stub_collect_answer_by_question(
179 DnsQuestion
*question
,
180 bool with_rrsig
) { /* Add RRSIG RR matching each RR */
187 /* Copies all RRs from 'answer' into 'reply', if they match 'question'. */
189 DNS_ANSWER_FOREACH_ITEM(item
, answer
) {
191 /* We have a question, let's see if this RR matches it */
192 r
= dns_question_matches_rr(question
, item
->rr
, NULL
);
196 /* Maybe there's a CNAME/DNAME in here? If so, that's an answer too */
197 r
= dns_question_matches_cname_or_dname(question
, item
->rr
, NULL
);
204 /* Mask the section info, we want the primary answers to always go without section
205 * info, so that it is added to the answer section when we synthesize a reply. */
207 r
= reply_add_with_rrsig(
211 item
->flags
& ~DNS_ANSWER_MASK_SECTIONS
,
221 static int dns_stub_collect_answer_by_section(
224 DnsAnswerFlags section
,
227 bool with_dnssec
) { /* Include DNSSEC RRs. RRSIG, NSEC, … */
234 /* Copies all RRs from 'answer' into 'reply', if they originate from the specified section. Also,
235 * avoid any RRs listed in 'exclude'. */
237 DNS_ANSWER_FOREACH_ITEM(item
, answer
) {
239 if (dns_answer_contains(exclude1
, item
->rr
) ||
240 dns_answer_contains(exclude2
, item
->rr
))
244 dns_type_is_dnssec(item
->rr
->key
->type
))
247 if (((item
->flags
^ section
) & DNS_ANSWER_MASK_SECTIONS
) != 0)
250 r
= reply_add_with_rrsig(
264 static int dns_stub_assign_sections(
266 DnsQuestion
*question
,
274 /* Let's assign the 'answer' RRs we collected to their respective sections in the reply datagram. We
275 * try to reproduce a section assignment similar to what the upstream DNS server responded to us. We
276 * use the DNS_ANSWER_SECTION_xyz flags to match things up, which is where the original upstream's
277 * packet section assignment is stored in the DnsAnswer object. Not all RRs in the 'answer' objects
278 * come with section information though (for example, because they were synthesized locally, and not
279 * from a DNS packet). To deal with that we extend the assignment logic a bit: anything from the
280 * 'answer' object that directly matches the original question is always put in the ANSWER section,
281 * regardless if it carries section info, or what that section info says. Then, anything from the
282 * 'answer' objects that is from the ANSWER or AUTHORITY sections, and wasn't already added to the
283 * ANSWER section is placed in the AUTHORITY section. Everything else from either object is added to
284 * the ADDITIONAL section. */
286 /* Include all RRs that directly answer the question in the answer section */
287 r
= dns_stub_collect_answer_by_question(
295 /* Include all RRs that originate from the authority sections, and aren't already listed in the
296 * answer section, in the authority section */
297 r
= dns_stub_collect_answer_by_section(
298 &q
->reply_authoritative
,
300 DNS_ANSWER_SECTION_AUTHORITY
,
301 q
->reply_answer
, NULL
,
306 /* Include all RRs that originate from the answer or additional sections in the additional section
307 * (except if already listed in the other two sections). Also add all RRs with no section marking. */
308 r
= dns_stub_collect_answer_by_section(
309 &q
->reply_additional
,
311 DNS_ANSWER_SECTION_ANSWER
,
312 q
->reply_answer
, q
->reply_authoritative
,
316 r
= dns_stub_collect_answer_by_section(
317 &q
->reply_additional
,
319 DNS_ANSWER_SECTION_ADDITIONAL
,
320 q
->reply_answer
, q
->reply_authoritative
,
324 r
= dns_stub_collect_answer_by_section(
325 &q
->reply_additional
,
328 q
->reply_answer
, q
->reply_authoritative
,
336 static int dns_stub_make_reply_packet(
340 bool *ret_truncated
) {
342 _cleanup_(dns_packet_unrefp
) DnsPacket
*p
= NULL
;
348 r
= dns_packet_new(&p
, DNS_PROTOCOL_DNS
, 0, max_size
);
352 r
= dns_packet_append_question(p
, q
);
363 DNS_PACKET_HEADER(p
)->qdcount
= htobe16(dns_question_size(q
));
369 static int dns_stub_add_reply_packet_body(
372 DnsAnswer
*authoritative
,
373 DnsAnswer
*additional
,
374 bool edns0_do
, /* Client expects DNSSEC RRs? */
377 unsigned n_answer
= 0, n_authoritative
= 0, n_additional
= 0;
383 /* Add the three sections to the packet. If the answer section doesn't fit we'll signal that as
384 * truncation. If the authoritative section doesn't fit and we are in DNSSEC mode, also signal
385 * truncation. In all other cases where things don't fit don't signal truncation, as for those cases
386 * the dropped RRs should not be essential. */
388 r
= dns_packet_append_answer(p
, answer
, &n_answer
);
394 r
= dns_packet_append_answer(p
, authoritative
, &n_authoritative
);
395 if (r
== -EMSGSIZE
) {
401 r
= dns_packet_append_answer(p
, additional
, &n_additional
);
402 if (r
< 0 && r
!= -EMSGSIZE
)
414 DNS_PACKET_HEADER(p
)->ancount
= htobe16(n_answer
);
415 DNS_PACKET_HEADER(p
)->nscount
= htobe16(n_authoritative
);
416 DNS_PACKET_HEADER(p
)->arcount
= htobe16(n_additional
);
420 static const char *nsid_string(void) {
421 static char buffer
[SD_ID128_STRING_MAX
+ STRLEN(".resolved.systemd.io")] = "";
425 /* Let's generate a string that we can use as RFC5001 NSID identifier. The string shall identify us
426 * as systemd-resolved, and return a different string for each resolved instance without leaking host
427 * identity. Hence let's use a fixed suffix that identifies resolved, and a prefix generated from the
428 * machine ID but from which the machine ID cannot be determined.
430 * Clients can use this to determine whether an answer is originating locally or is proxied from
433 if (!isempty(buffer
))
436 r
= sd_id128_get_machine_app_specific(
437 SD_ID128_MAKE(ed
,d3
,12,5d
,16,b9
,41,f9
,a1
,49,5f
,ab
,15,62,ab
,27),
440 log_debug_errno(r
, "Failed to determine machine ID, ignoring: %m");
444 xsprintf(buffer
, SD_ID128_FORMAT_STR
".resolved.systemd.io", SD_ID128_FORMAT_VAL(id
));
448 static int dns_stub_finish_reply_packet(
452 bool tc
, /* set the Truncated bit? */
453 bool aa
, /* set the Authoritative Answer bit? */
454 bool rd
, /* set the Recursion Desired bit? */
455 bool add_opt
, /* add an OPT RR to this packet? */
456 bool edns0_do
, /* set the EDNS0 DNSSEC OK bit? */
457 bool ad
, /* set the DNSSEC authenticated data bit? */
458 bool cd
, /* set the DNSSEC checking disabled bit? */
459 uint16_t max_udp_size
, /* The maximum UDP datagram size to advertise to clients */
460 bool nsid
) { /* whether to add NSID */
467 r
= dns_packet_append_opt(p
, max_udp_size
, edns0_do
, /* include_rfc6975 = */ false, nsid
? nsid_string() : NULL
, rcode
, NULL
);
468 if (r
== -EMSGSIZE
) /* Hit the size limit? then indicate truncation */
473 /* If the client can't to EDNS0, don't do DO either */
476 /* If we don't do EDNS, clamp the rcode to 4 bit */
478 rcode
= DNS_RCODE_SERVFAIL
;
481 /* Note that we allow the AD bit to be set even if client didn't signal DO, as per RFC 6840, section
484 DNS_PACKET_HEADER(p
)->id
= id
;
486 DNS_PACKET_HEADER(p
)->flags
= htobe16(DNS_PACKET_MAKE_FLAGS(
500 static bool address_is_proxy(int family
, const union in_addr_union
*a
) {
503 /* Returns true if the specified address is the DNS "proxy" stub, i.e. where we unconditionally enable bypass mode */
505 if (family
!= AF_INET
)
508 return be32toh(a
->in
.s_addr
) == INADDR_DNS_PROXY_STUB
;
511 static int find_socket_fd(
513 DnsStubListenerExtra
*l
,
515 const union in_addr_union
*listen_address
,
520 /* Finds the right socket to use for sending. If we know the extra listener, otherwise go via the
521 * address to send from */
523 return manager_dns_stub_fd_extra(m
, l
, type
);
525 return manager_dns_stub_fd(m
, family
, listen_address
, type
);
528 static int dns_stub_send(
530 DnsStubListenerExtra
*l
,
542 r
= dns_stream_write_packet(s
, reply
);
546 fd
= find_socket_fd(m
, l
, p
->family
, &p
->destination
, SOCK_DGRAM
);
550 if (address_is_proxy(p
->family
, &p
->destination
))
551 /* Force loopback iface if this is the loopback proxy stub
552 * and ifindex was normalized to 0 by manager_recv(). */
553 ifindex
= p
->ifindex
?: LOOPBACK_IFINDEX
;
555 /* Force loopback iface if this is the main listener stub. */
556 ifindex
= l
? p
->ifindex
: LOOPBACK_IFINDEX
;
558 /* Note that it is essential here that we explicitly choose the source IP address for this
559 * packet. This is because otherwise the kernel will choose it automatically based on the
560 * routing table and will thus pick 127.0.0.1 rather than 127.0.0.53/54. */
564 p
->family
, &p
->sender
, p
->sender_port
, &p
->destination
,
568 return log_debug_errno(r
, "Failed to send reply packet: %m");
573 static int dns_stub_reply_with_edns0_do(DnsQuery
*q
) {
576 /* Reply with DNSSEC DO set? Only if client supports it; and we did any DNSSEC verification
577 * ourselves, or consider the data fully authenticated because we generated it locally, or the client
580 return dns_packet_do(q
->request_packet
) &&
581 (q
->answer_dnssec_result
>= 0 || /* we did proper DNSSEC validation … */
582 dns_query_fully_authenticated(q
) || /* … or we considered it authentic otherwise … */
583 DNS_PACKET_CD(q
->request_packet
)); /* … or client set CD */
586 static void dns_stub_suppress_duplicate_section_rrs(DnsQuery
*q
) {
587 /* If we follow a CNAME/DNAME chain we might end up populating our sections with redundant RRs
588 * because we built up the sections from multiple reply packets (one from each CNAME/DNAME chain
589 * element). E.g. it could be that an RR that was included in the first reply's additional section
590 * ends up being relevant as main answer in a subsequent reply in the chain. Let's clean this up, and
591 * remove everything in the "higher priority" sections from the "lower priority" sections.
593 * Note that this removal matches by RR keys instead of the full RRs. This is because RRsets should
594 * always end up in one section fully or not at all, but never be split among sections.
596 * Specifically: we remove ANSWER section RRs from the AUTHORITATIVE and ADDITIONAL sections, as well
597 * as AUTHORITATIVE section RRs from the ADDITIONAL section. */
599 dns_answer_remove_by_answer_keys(&q
->reply_authoritative
, q
->reply_answer
);
600 dns_answer_remove_by_answer_keys(&q
->reply_additional
, q
->reply_answer
);
601 dns_answer_remove_by_answer_keys(&q
->reply_additional
, q
->reply_authoritative
);
604 static int dns_stub_send_reply(
608 _cleanup_(dns_packet_unrefp
) DnsPacket
*reply
= NULL
;
609 bool truncated
, edns0_do
;
614 edns0_do
= dns_stub_reply_with_edns0_do(q
); /* let's check if we shall reply with EDNS0 DO? */
616 r
= dns_stub_make_reply_packet(
618 dns_packet_payload_size_max(q
->request_packet
),
619 q
->request_packet
->question
,
622 return log_debug_errno(r
, "Failed to build reply packet: %m");
624 dns_stub_suppress_duplicate_section_rrs(q
);
626 r
= dns_stub_add_reply_packet_body(
629 q
->reply_authoritative
,
634 return log_debug_errno(r
, "Failed to append reply packet body: %m");
636 r
= dns_stub_finish_reply_packet(
638 DNS_PACKET_ID(q
->request_packet
),
641 dns_query_fully_authoritative(q
),
642 DNS_PACKET_RD(q
->request_packet
),
643 !!q
->request_packet
->opt
,
645 (DNS_PACKET_AD(q
->request_packet
) || dns_packet_do(q
->request_packet
)) && dns_query_fully_authenticated(q
),
646 FLAGS_SET(q
->flags
, SD_RESOLVED_NO_VALIDATE
),
647 q
->stub_listener_extra
? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX
: ADVERTISE_DATAGRAM_SIZE_MAX
,
648 dns_packet_has_nsid_request(q
->request_packet
) > 0 && !q
->stub_listener_extra
);
650 return log_debug_errno(r
, "Failed to build failure packet: %m");
652 return dns_stub_send(q
->manager
, q
->stub_listener_extra
, q
->request_stream
, q
->request_packet
, reply
);
655 static int dns_stub_send_failure(
657 DnsStubListenerExtra
*l
,
661 bool authenticated
) {
663 _cleanup_(dns_packet_unrefp
) DnsPacket
*reply
= NULL
;
670 r
= dns_stub_make_reply_packet(
672 dns_packet_payload_size_max(p
),
676 return log_debug_errno(r
, "Failed to make failure packet: %m");
678 r
= dns_stub_finish_reply_packet(
687 (DNS_PACKET_AD(p
) || dns_packet_do(p
)) && authenticated
,
689 l
? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX
: ADVERTISE_DATAGRAM_SIZE_MAX
,
690 dns_packet_has_nsid_request(p
) > 0 && !l
);
692 return log_debug_errno(r
, "Failed to build failure packet: %m");
694 return dns_stub_send(m
, l
, s
, p
, reply
);
697 static int dns_stub_patch_bypass_reply_packet(
698 DnsPacket
**ret
, /* Where to place the patched packet */
699 DnsPacket
*original
, /* The packet to patch */
700 DnsPacket
*request
, /* The packet the patched packet shall look like a reply to */
702 bool authenticated
) {
703 _cleanup_(dns_packet_unrefp
) DnsPacket
*c
= NULL
;
710 r
= dns_packet_dup(&c
, original
);
714 /* Extract the packet, so that we know where the OPT field is */
715 r
= dns_packet_extract(c
);
719 /* Copy over the original client request ID, so that we can make the upstream query look like our own reply. */
720 DNS_PACKET_HEADER(c
)->id
= DNS_PACKET_HEADER(request
)->id
;
722 /* Patch in our own maximum datagram size, if EDNS0 was on */
723 r
= dns_packet_patch_max_udp_size(c
, ADVERTISE_DATAGRAM_SIZE_MAX
);
727 /* Lower all TTLs by the time passed since we received the datagram. */
728 if (timestamp_is_set(original
->timestamp
)) {
729 r
= dns_packet_patch_ttls(c
, original
->timestamp
);
734 /* Our upstream connection might have supported larger DNS requests than our downstream one, hence
735 * set the TC bit if our reply is larger than what the client supports, and truncate. */
736 if (c
->size
> dns_packet_payload_size_max(request
)) {
737 log_debug("Artificially truncating stub response, as advertised size of client is smaller than upstream one.");
738 dns_packet_truncate(c
, dns_packet_payload_size_max(request
));
739 DNS_PACKET_HEADER(c
)->flags
= htobe16(be16toh(DNS_PACKET_HEADER(c
)->flags
) | DNS_PACKET_FLAG_TC
);
742 /* Patch the cd bit to reflect the state of validation: set when both we and the upstream
743 * resolver have checking disabled. */
744 DNS_PACKET_HEADER(c
)->flags
= htobe16(UPDATE_FLAG(be16toh(DNS_PACKET_HEADER(c
)->flags
),
745 DNS_PACKET_FLAG_CD
, DNS_PACKET_CD(original
) && !validated
));
747 /* Ensure we don't pass along an untrusted ad flag for bypass packets */
748 DNS_PACKET_HEADER(c
)->flags
= htobe16(UPDATE_FLAG(be16toh(DNS_PACKET_HEADER(c
)->flags
),
749 DNS_PACKET_FLAG_AD
, authenticated
));
755 static void dns_stub_query_complete(DnsQuery
*query
) {
756 _cleanup_(dns_query_freep
) DnsQuery
*q
= query
;
760 assert(q
->request_packet
);
762 if (q
->question_bypass
) {
763 /* This is a bypass reply. If so, let's propagate the upstream packet, if we have it and it
764 * is regular DNS. (We can't do this if the upstream packet is LLMNR or mDNS, since the
765 * packets are not 100% compatible.) */
767 if (q
->answer_full_packet
&&
768 q
->answer_full_packet
->protocol
== DNS_PROTOCOL_DNS
) {
769 _cleanup_(dns_packet_unrefp
) DnsPacket
*reply
= NULL
;
771 r
= dns_stub_patch_bypass_reply_packet(&reply
, q
->answer_full_packet
, q
->request_packet
,
772 /* validated = */ !FLAGS_SET(q
->flags
, SD_RESOLVED_NO_VALIDATE
),
773 FLAGS_SET(q
->answer_query_flags
, SD_RESOLVED_AUTHENTICATED
));
775 log_debug_errno(r
, "Failed to patch bypass reply packet: %m");
777 (void) dns_stub_send(q
->manager
, q
->stub_listener_extra
, q
->request_stream
, q
->request_packet
, reply
);
783 /* Take all data from the current reply, and merge it into the three reply sections we are building
784 * up. We do this before processing CNAME redirects, so that we gradually build up our sections, and
785 * and keep adding all RRs in the CNAME chain. */
786 r
= dns_stub_assign_sections(
788 dns_query_question_for_protocol(q
, DNS_PROTOCOL_DNS
),
789 dns_stub_reply_with_edns0_do(q
));
791 return (void) log_debug_errno(r
, "Failed to assign sections: %m");
795 case DNS_TRANSACTION_SUCCESS
: {
801 cname_result
= dns_query_process_cname_one(q
);
802 if (cname_result
== -ELOOP
) { /* CNAME loop, let's send what we already have */
803 log_debug("Detected CNAME loop, returning what we already have.");
804 (void) dns_stub_send_reply(q
, q
->answer_rcode
);
807 if (cname_result
< 0) {
808 log_debug_errno(cname_result
, "Failed to process CNAME: %m");
812 if (cname_result
== DNS_QUERY_NOMATCH
) {
813 /* This answer doesn't contain any RR that would answer our question
814 * positively, i.e. neither directly nor via CNAME. */
816 if (first
) /* We never followed a CNAME and the answer doesn't match our
817 * question at all? Then this is final, the empty answer is the
821 /* Otherwise, we already followed a CNAME once within this packet, and the
822 * packet doesn't answer our question. In that case let's restart the query,
823 * now with the redirected question. We'll */
826 return (void) log_debug_errno(r
, "Failed to restart query: %m");
832 r
= dns_stub_assign_sections(
834 dns_query_question_for_protocol(q
, DNS_PROTOCOL_DNS
),
835 dns_stub_reply_with_edns0_do(q
));
837 return (void) log_debug_errno(r
, "Failed to assign sections: %m");
839 if (cname_result
== DNS_QUERY_MATCH
) /* A match? Then we are done, let's return what we got */
842 /* We followed a CNAME. and collected the RRs that answer the redirected question
843 * successfully. Let's not try to do this again. */
844 assert(cname_result
== DNS_QUERY_CNAME
);
851 case DNS_TRANSACTION_RCODE_FAILURE
:
852 (void) dns_stub_send_reply(q
, q
->answer_rcode
);
855 case DNS_TRANSACTION_NOT_FOUND
:
856 (void) dns_stub_send_reply(q
, DNS_RCODE_NXDOMAIN
);
859 case DNS_TRANSACTION_TIMEOUT
:
860 case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED
:
861 /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */
864 case DNS_TRANSACTION_NO_SERVERS
:
865 /* We're not configured to give answers for this question. Refuse it. */
866 (void) dns_stub_send_reply(q
, DNS_RCODE_REFUSED
);
869 case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED
:
870 /* This RR Type is not implemented */
871 (void) dns_stub_send_reply(q
, DNS_RCODE_NOTIMP
);
874 case DNS_TRANSACTION_INVALID_REPLY
:
875 case DNS_TRANSACTION_ERRNO
:
876 case DNS_TRANSACTION_ABORTED
:
877 case DNS_TRANSACTION_DNSSEC_FAILED
:
878 case DNS_TRANSACTION_NO_TRUST_ANCHOR
:
879 case DNS_TRANSACTION_NETWORK_DOWN
:
880 case DNS_TRANSACTION_NO_SOURCE
:
881 case DNS_TRANSACTION_STUB_LOOP
:
882 (void) dns_stub_send_reply(q
, DNS_RCODE_SERVFAIL
);
885 case DNS_TRANSACTION_NULL
:
886 case DNS_TRANSACTION_PENDING
:
887 case DNS_TRANSACTION_VALIDATING
:
889 assert_not_reached();
893 static int dns_stub_stream_complete(DnsStream
*s
, int error
) {
896 log_debug_errno(error
, "DNS TCP connection terminated, destroying queries: %m");
901 q
= set_first(s
->queries
);
908 /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections
909 * should be kept as long as the client wants to. */
914 static void dns_stub_process_query(Manager
*m
, DnsStubListenerExtra
*l
, DnsStream
*s
, DnsPacket
*p
) {
915 uint64_t protocol_flags
= SD_RESOLVED_PROTOCOLS_ALL
;
916 _cleanup_(dns_query_freep
) DnsQuery
*q
= NULL
;
917 Hashmap
**queries_by_packet
;
924 assert(p
->protocol
== DNS_PROTOCOL_DNS
);
926 if (!l
&& /* l == NULL if this is the main stub */
927 !address_is_proxy(p
->family
, &p
->destination
) && /* don't restrict needlessly for 127.0.0.54 */
928 (in_addr_is_localhost(p
->family
, &p
->sender
) <= 0 ||
929 in_addr_is_localhost(p
->family
, &p
->destination
) <= 0)) {
930 log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring.");
934 if (manager_packet_from_our_transaction(m
, p
)) {
935 log_debug("Got our own packet looped back, ignoring.");
939 queries_by_packet
= l
? &l
->queries_by_packet
: &m
->stub_queries_by_packet
;
940 existing
= hashmap_get(*queries_by_packet
, p
);
941 if (existing
&& dns_packet_equal(existing
->request_packet
, p
)) {
942 log_debug("Got repeat packet from client, ignoring.");
946 r
= dns_packet_extract(p
);
948 log_debug_errno(r
, "Failed to extract resources from incoming packet, ignoring packet: %m");
949 dns_stub_send_failure(m
, l
, s
, p
, DNS_RCODE_FORMERR
, false);
953 if (!dns_packet_version_supported(p
)) {
954 log_debug("Got EDNS OPT field with unsupported version number.");
955 dns_stub_send_failure(m
, l
, s
, p
, DNS_RCODE_BADVERS
, false);
959 if (dns_type_is_obsolete(dns_question_first_key(p
->question
)->type
)) {
960 log_debug("Got message with obsolete key type, refusing.");
961 dns_stub_send_failure(m
, l
, s
, p
, DNS_RCODE_REFUSED
, false);
965 if (dns_type_is_zone_transfer(dns_question_first_key(p
->question
)->type
)) {
966 log_debug("Got request for zone transfer, refusing.");
967 dns_stub_send_failure(m
, l
, s
, p
, DNS_RCODE_REFUSED
, false);
971 if (!DNS_PACKET_RD(p
)) {
972 /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */
973 log_debug("Got request with recursion disabled, refusing.");
974 dns_stub_send_failure(m
, l
, s
, p
, DNS_RCODE_REFUSED
, false);
978 r
= hashmap_ensure_allocated(queries_by_packet
, &stub_packet_hash_ops
);
984 if (address_is_proxy(p
->family
, &p
->destination
)) {
985 _cleanup_free_
char *dipa
= NULL
;
987 r
= in_addr_to_string(p
->family
, &p
->destination
, &dipa
);
989 return (void) log_error_errno(r
, "Failed to format destination address: %m");
991 log_debug("Got request to DNS proxy address 127.0.0.54, enabling bypass logic.");
993 protocol_flags
= SD_RESOLVED_DNS
|SD_RESOLVED_NO_ZONE
; /* Turn off mDNS/LLMNR for proxy stub. */
994 } else if (dns_packet_do(p
)) {
995 log_debug("Got request with DNSSEC enabled, enabling bypass logic.");
1000 r
= dns_query_new(m
, &q
, NULL
, NULL
, p
, 0,
1002 SD_RESOLVED_NO_CNAME
|
1003 SD_RESOLVED_NO_SEARCH
|
1004 (DNS_PACKET_CD(p
) ? SD_RESOLVED_NO_VALIDATE
| SD_RESOLVED_NO_CACHE
: 0)|
1005 SD_RESOLVED_REQUIRE_PRIMARY
|
1006 SD_RESOLVED_CLAMP_TTL
|
1007 SD_RESOLVED_RELAX_SINGLE_LABEL
);
1009 r
= dns_query_new(m
, &q
, p
->question
, p
->question
, NULL
, 0,
1011 SD_RESOLVED_NO_SEARCH
|
1012 (DNS_PACKET_CD(p
) ? SD_RESOLVED_NO_VALIDATE
| SD_RESOLVED_NO_CACHE
: 0)|
1013 (dns_packet_do(p
) ? SD_RESOLVED_REQUIRE_PRIMARY
: 0)|
1014 SD_RESOLVED_CLAMP_TTL
);
1015 if (r
== -ENOANO
) /* Refuse query if there is -ENOANO */
1016 return (void) dns_stub_send_failure(m
, l
, s
, p
, DNS_RCODE_REFUSED
, false);
1018 log_error_errno(r
, "Failed to generate query object: %m");
1019 dns_stub_send_failure(m
, l
, s
, p
, DNS_RCODE_SERVFAIL
, false);
1023 q
->request_packet
= dns_packet_ref(p
);
1024 q
->request_stream
= dns_stream_ref(s
); /* make sure the stream stays around until we can send a reply through it */
1025 q
->stub_listener_extra
= l
;
1026 q
->complete
= dns_stub_query_complete
;
1029 /* Remember which queries belong to this stream, so that we can cancel them when the stream
1030 * is disconnected early */
1032 r
= set_ensure_put(&s
->queries
, NULL
, q
);
1040 /* Add the query to the hash table we use to determine repeat packets now. We don't care about
1041 * failures here, since in the worst case we'll not recognize duplicate incoming requests, which
1042 * isn't particularly bad. */
1043 (void) hashmap_put(*queries_by_packet
, q
->request_packet
, q
);
1045 r
= dns_query_go(q
);
1047 log_error_errno(r
, "Failed to start query: %m");
1048 dns_stub_send_failure(m
, l
, s
, p
, DNS_RCODE_SERVFAIL
, false);
1052 log_debug("Processing query...");
1056 static int on_dns_stub_packet_internal(sd_event_source
*s
, int fd
, uint32_t revents
, Manager
*m
, DnsStubListenerExtra
*l
) {
1057 _cleanup_(dns_packet_unrefp
) DnsPacket
*p
= NULL
;
1060 r
= manager_recv(m
, fd
, DNS_PROTOCOL_DNS
, &p
);
1064 if (dns_packet_validate_query(p
) > 0) {
1065 log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p
));
1067 dns_stub_process_query(m
, l
, NULL
, p
);
1069 log_debug("Invalid DNS stub UDP packet, ignoring.");
1074 static int on_dns_stub_packet(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
1075 return on_dns_stub_packet_internal(s
, fd
, revents
, userdata
, NULL
);
1078 static int on_dns_stub_packet_extra(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
1079 DnsStubListenerExtra
*l
= ASSERT_PTR(userdata
);
1081 return on_dns_stub_packet_internal(s
, fd
, revents
, l
->manager
, l
);
1084 static int on_dns_stub_stream_packet(DnsStream
*s
, DnsPacket
*p
) {
1089 if (dns_packet_validate_query(p
) > 0) {
1090 log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p
));
1092 dns_stub_process_query(s
->manager
, s
->stub_listener_extra
, s
, p
);
1094 log_debug("Invalid DNS stub TCP packet, ignoring.");
1099 static int on_dns_stub_stream_internal(sd_event_source
*s
, int fd
, uint32_t revents
, Manager
*m
, DnsStubListenerExtra
*l
) {
1103 cfd
= accept4(fd
, NULL
, NULL
, SOCK_NONBLOCK
|SOCK_CLOEXEC
);
1105 if (ERRNO_IS_ACCEPT_AGAIN(errno
))
1111 r
= dns_stream_new(m
, &stream
, DNS_STREAM_STUB
, DNS_PROTOCOL_DNS
, cfd
, NULL
,
1112 on_dns_stub_stream_packet
, dns_stub_stream_complete
, DNS_STREAM_STUB_TIMEOUT_USEC
);
1118 stream
->stub_listener_extra
= l
;
1120 /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */
1125 static int on_dns_stub_stream(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
1126 return on_dns_stub_stream_internal(s
, fd
, revents
, userdata
, NULL
);
1129 static int on_dns_stub_stream_extra(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
1130 DnsStubListenerExtra
*l
= ASSERT_PTR(userdata
);
1132 return on_dns_stub_stream_internal(s
, fd
, revents
, l
->manager
, l
);
1135 static int set_dns_stub_common_socket_options(int fd
, int family
) {
1139 assert(IN_SET(family
, AF_INET
, AF_INET6
));
1141 r
= setsockopt_int(fd
, SOL_SOCKET
, SO_REUSEADDR
, true);
1145 r
= socket_set_recvpktinfo(fd
, family
, true);
1149 r
= socket_set_recvttl(fd
, family
, true);
1156 static int set_dns_stub_common_tcp_socket_options(int fd
) {
1161 r
= setsockopt_int(fd
, IPPROTO_TCP
, TCP_FASTOPEN
, 5); /* Everybody appears to pick qlen=5, let's do the same here. */
1163 log_debug_errno(r
, "Failed to enable TCP_FASTOPEN on TCP listening socket, ignoring: %m");
1165 r
= setsockopt_int(fd
, IPPROTO_TCP
, TCP_NODELAY
, true);
1167 log_debug_errno(r
, "Failed to enable TCP_NODELAY mode, ignoring: %m");
1172 static int manager_dns_stub_fd(
1175 const union in_addr_union
*listen_addr
,
1178 sd_event_source
**event_source
;
1179 _cleanup_close_
int fd
= -EBADF
;
1180 union sockaddr_union sa
;
1184 assert(listen_addr
);
1186 if (type
== SOCK_DGRAM
)
1187 event_source
= address_is_proxy(family
, listen_addr
) ? &m
->dns_proxy_stub_udp_event_source
: &m
->dns_stub_udp_event_source
;
1188 else if (type
== SOCK_STREAM
)
1189 event_source
= address_is_proxy(family
, listen_addr
) ? &m
->dns_proxy_stub_tcp_event_source
: &m
->dns_stub_tcp_event_source
;
1191 return -EPROTONOSUPPORT
;
1194 return sd_event_source_get_io_fd(*event_source
);
1196 fd
= socket(family
, type
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
1200 r
= set_dns_stub_common_socket_options(fd
, family
);
1204 if (type
== SOCK_STREAM
) {
1205 r
= set_dns_stub_common_tcp_socket_options(fd
);
1210 /* Set slightly different socket options for the non-proxy and the proxy binding. The former we want
1211 * to be accessible only from the local host, for the latter it's OK if people use NAT redirects or
1212 * so to redirect external traffic to it. */
1214 if (!address_is_proxy(family
, listen_addr
)) {
1215 /* Make sure no traffic from outside the local host can leak to onto this socket */
1216 r
= socket_bind_to_ifindex(fd
, LOOPBACK_IFINDEX
);
1220 r
= socket_set_ttl(fd
, family
, 1);
1223 } else if (type
== SOCK_DGRAM
) {
1224 /* Turn off Path MTU Discovery for UDP, for security reasons. See socket_disable_pmtud() for
1225 * a longer discussion. (We only do this for sockets that are potentially externally
1226 * accessible, i.e. the proxy stub one. For the non-proxy one we instead set the TTL to 1,
1227 * see above, so that packets don't get routed at all.) */
1228 r
= socket_disable_pmtud(fd
, family
);
1230 log_debug_errno(r
, "Failed to disable UDP PMTUD, ignoring: %m");
1232 r
= socket_set_recvfragsize(fd
, family
, true);
1234 log_debug_errno(r
, "Failed to enable fragment size reception, ignoring: %m");
1237 r
= sockaddr_set_in_addr(&sa
, family
, listen_addr
, 53);
1241 if (bind(fd
, &sa
.sa
, sizeof(sa
.in
)) < 0)
1244 if (type
== SOCK_STREAM
&&
1245 listen(fd
, SOMAXCONN_DELUXE
) < 0)
1248 r
= sd_event_add_io(m
->event
, event_source
, fd
, EPOLLIN
,
1249 type
== SOCK_DGRAM
? on_dns_stub_packet
: on_dns_stub_stream
,
1254 r
= sd_event_source_set_io_fd_own(*event_source
, true);
1258 (void) sd_event_source_set_description(*event_source
,
1259 type
== SOCK_DGRAM
? "dns-stub-udp" : "dns-stub-tcp");
1264 static int manager_dns_stub_fd_extra(Manager
*m
, DnsStubListenerExtra
*l
, int type
) {
1265 _cleanup_free_
char *pretty
= NULL
;
1266 _cleanup_close_
int fd
= -EBADF
;
1267 union sockaddr_union sa
;
1272 assert(IN_SET(type
, SOCK_DGRAM
, SOCK_STREAM
));
1274 sd_event_source
**event_source
= type
== SOCK_DGRAM
? &l
->udp_event_source
: &l
->tcp_event_source
;
1276 return sd_event_source_get_io_fd(*event_source
);
1278 if (!have_effective_cap(CAP_NET_BIND_SERVICE
) && dns_stub_listener_extra_port(l
) < 1024) {
1279 log_warning("Missing CAP_NET_BIND_SERVICE capability, not creating extra stub listener on port %hu.",
1280 dns_stub_listener_extra_port(l
));
1284 if (l
->family
== AF_INET
)
1285 sa
= (union sockaddr_union
) {
1286 .in
.sin_family
= l
->family
,
1287 .in
.sin_port
= htobe16(dns_stub_listener_extra_port(l
)),
1288 .in
.sin_addr
= l
->address
.in
,
1291 sa
= (union sockaddr_union
) {
1292 .in6
.sin6_family
= l
->family
,
1293 .in6
.sin6_port
= htobe16(dns_stub_listener_extra_port(l
)),
1294 .in6
.sin6_addr
= l
->address
.in6
,
1297 fd
= socket(l
->family
, type
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
1303 r
= set_dns_stub_common_socket_options(fd
, l
->family
);
1307 if (type
== SOCK_STREAM
) {
1308 r
= set_dns_stub_common_tcp_socket_options(fd
);
1313 /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case
1314 * people may want ttl > 1. */
1316 r
= socket_set_freebind(fd
, l
->family
, true);
1320 if (type
== SOCK_DGRAM
) {
1321 r
= socket_disable_pmtud(fd
, l
->family
);
1323 log_debug_errno(r
, "Failed to disable UDP PMTUD, ignoring: %m");
1325 r
= socket_set_recvfragsize(fd
, l
->family
, true);
1327 log_debug_errno(r
, "Failed to enable fragment size reception, ignoring: %m");
1330 r
= RET_NERRNO(bind(fd
, &sa
.sa
, sockaddr_len(&sa
)));
1334 if (type
== SOCK_STREAM
&&
1335 listen(fd
, SOMAXCONN_DELUXE
) < 0) {
1340 r
= sd_event_add_io(m
->event
, event_source
, fd
, EPOLLIN
,
1341 type
== SOCK_DGRAM
? on_dns_stub_packet_extra
: on_dns_stub_stream_extra
,
1346 r
= sd_event_source_set_io_fd_own(*event_source
, true);
1350 (void) sd_event_source_set_description(*event_source
,
1351 type
== SOCK_DGRAM
? "dns-stub-udp-extra" : "dns-stub-tcp-extra");
1353 if (DEBUG_LOGGING
) {
1354 (void) in_addr_port_to_string(l
->family
, &l
->address
, l
->port
, &pretty
);
1355 log_debug("Listening on %s socket %s.",
1356 type
== SOCK_DGRAM
? "UDP" : "TCP",
1364 (void) in_addr_port_to_string(l
->family
, &l
->address
, l
->port
, &pretty
);
1365 return log_warning_errno(r
,
1366 r
== -EADDRINUSE
? "Another process is already listening on %s socket %s: %m" :
1367 "Failed to listen on %s socket %s: %m",
1368 type
== SOCK_DGRAM
? "UDP" : "TCP",
1372 int manager_dns_stub_start(Manager
*m
) {
1377 if (m
->dns_stub_listener_mode
== DNS_STUB_LISTENER_NO
)
1378 log_debug("Not creating stub listener.");
1379 else if (!have_effective_cap(CAP_NET_BIND_SERVICE
))
1380 log_warning("Missing CAP_NET_BIND_SERVICE capability, not creating stub listener on port 53.");
1382 static const struct {
1385 } stub_sockets
[] = {
1386 { INADDR_DNS_STUB
, SOCK_DGRAM
},
1387 { INADDR_DNS_STUB
, SOCK_STREAM
},
1388 { INADDR_DNS_PROXY_STUB
, SOCK_DGRAM
},
1389 { INADDR_DNS_PROXY_STUB
, SOCK_STREAM
},
1392 log_debug("Creating stub listener using %s.",
1393 m
->dns_stub_listener_mode
== DNS_STUB_LISTENER_UDP
? "UDP" :
1394 m
->dns_stub_listener_mode
== DNS_STUB_LISTENER_TCP
? "TCP" :
1397 FOREACH_ELEMENT(s
, stub_sockets
) {
1398 union in_addr_union a
= {
1399 .in
.s_addr
= htobe32(s
->addr
),
1402 if (m
->dns_stub_listener_mode
== DNS_STUB_LISTENER_UDP
&& s
->socket_type
== SOCK_STREAM
)
1404 if (m
->dns_stub_listener_mode
== DNS_STUB_LISTENER_TCP
&& s
->socket_type
== SOCK_DGRAM
)
1407 r
= manager_dns_stub_fd(m
, AF_INET
, &a
, s
->socket_type
);
1409 _cleanup_free_
char *busy_socket
= NULL
;
1411 if (asprintf(&busy_socket
,
1412 "%s socket " IPV4_ADDRESS_FMT_STR
":53",
1413 s
->socket_type
== SOCK_DGRAM
? "UDP" : "TCP",
1414 IPV4_ADDRESS_FMT_VAL(a
.in
)) < 0)
1417 if (IN_SET(r
, -EADDRINUSE
, -EPERM
)) {
1418 log_warning_errno(r
,
1419 r
== -EADDRINUSE
? "Another process is already listening on %s.\n"
1420 "Turning off local DNS stub support." :
1421 "Failed to listen on %s: %m.\n"
1422 "Turning off local DNS stub support.",
1424 manager_dns_stub_stop(m
);
1428 return log_error_errno(r
, "Failed to listen on %s: %m", busy_socket
);
1433 if (!ordered_set_isempty(m
->dns_extra_stub_listeners
)) {
1434 DnsStubListenerExtra
*l
;
1436 log_debug("Creating extra stub listeners.");
1438 ORDERED_SET_FOREACH(l
, m
->dns_extra_stub_listeners
) {
1439 if (FLAGS_SET(l
->mode
, DNS_STUB_LISTENER_UDP
))
1440 (void) manager_dns_stub_fd_extra(m
, l
, SOCK_DGRAM
);
1441 if (FLAGS_SET(l
->mode
, DNS_STUB_LISTENER_TCP
))
1442 (void) manager_dns_stub_fd_extra(m
, l
, SOCK_STREAM
);
1449 void manager_dns_stub_stop(Manager
*m
) {
1452 m
->dns_stub_udp_event_source
= sd_event_source_disable_unref(m
->dns_stub_udp_event_source
);
1453 m
->dns_stub_tcp_event_source
= sd_event_source_disable_unref(m
->dns_stub_tcp_event_source
);
1454 m
->dns_proxy_stub_udp_event_source
= sd_event_source_disable_unref(m
->dns_proxy_stub_udp_event_source
);
1455 m
->dns_proxy_stub_tcp_event_source
= sd_event_source_disable_unref(m
->dns_proxy_stub_tcp_event_source
);
1458 static const char* const dns_stub_listener_mode_table
[_DNS_STUB_LISTENER_MODE_MAX
] = {
1459 [DNS_STUB_LISTENER_NO
] = "no",
1460 [DNS_STUB_LISTENER_UDP
] = "udp",
1461 [DNS_STUB_LISTENER_TCP
] = "tcp",
1462 [DNS_STUB_LISTENER_YES
] = "yes",
1464 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode
, DnsStubListenerMode
, DNS_STUB_LISTENER_YES
);