]> git.ipfire.org Git - people/ms/dnsmasq.git/blob - src/forward.c
protocol handling for DNSSEC
[people/ms/dnsmasq.git] / src / forward.c
1 /* dnsmasq is Copyright (c) 2000-2014 Simon Kelley
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 dated June, 1991, or
6 (at your option) version 3 dated 29 June, 2007.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17 #include "dnsmasq.h"
18
19 static struct frec *lookup_frec(unsigned short id, unsigned int crc);
20 static struct frec *lookup_frec_by_sender(unsigned short id,
21 union mysockaddr *addr,
22 unsigned int crc);
23 static unsigned short get_id(unsigned int crc);
24 static void free_frec(struct frec *f);
25 static struct randfd *allocate_rfd(int family);
26
27 /* Send a UDP packet with its source address set as "source"
28 unless nowild is true, when we just send it with the kernel default */
29 int send_from(int fd, int nowild, char *packet, size_t len,
30 union mysockaddr *to, struct all_addr *source,
31 unsigned int iface)
32 {
33 struct msghdr msg;
34 struct iovec iov[1];
35 union {
36 struct cmsghdr align; /* this ensures alignment */
37 #if defined(HAVE_LINUX_NETWORK)
38 char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
39 #elif defined(IP_SENDSRCADDR)
40 char control[CMSG_SPACE(sizeof(struct in_addr))];
41 #endif
42 #ifdef HAVE_IPV6
43 char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
44 #endif
45 } control_u;
46
47 iov[0].iov_base = packet;
48 iov[0].iov_len = len;
49
50 msg.msg_control = NULL;
51 msg.msg_controllen = 0;
52 msg.msg_flags = 0;
53 msg.msg_name = to;
54 msg.msg_namelen = sa_len(to);
55 msg.msg_iov = iov;
56 msg.msg_iovlen = 1;
57
58 if (!nowild)
59 {
60 struct cmsghdr *cmptr;
61 msg.msg_control = &control_u;
62 msg.msg_controllen = sizeof(control_u);
63 cmptr = CMSG_FIRSTHDR(&msg);
64
65 if (to->sa.sa_family == AF_INET)
66 {
67 #if defined(HAVE_LINUX_NETWORK)
68 struct in_pktinfo p;
69 p.ipi_ifindex = 0;
70 p.ipi_spec_dst = source->addr.addr4;
71 memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
72 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
73 cmptr->cmsg_level = IPPROTO_IP;
74 cmptr->cmsg_type = IP_PKTINFO;
75 #elif defined(IP_SENDSRCADDR)
76 memcpy(CMSG_DATA(cmptr), &(source->addr.addr4), sizeof(source->addr.addr4));
77 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
78 cmptr->cmsg_level = IPPROTO_IP;
79 cmptr->cmsg_type = IP_SENDSRCADDR;
80 #endif
81 }
82 else
83 #ifdef HAVE_IPV6
84 {
85 struct in6_pktinfo p;
86 p.ipi6_ifindex = iface; /* Need iface for IPv6 to handle link-local addrs */
87 p.ipi6_addr = source->addr.addr6;
88 memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
89 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
90 cmptr->cmsg_type = daemon->v6pktinfo;
91 cmptr->cmsg_level = IPPROTO_IPV6;
92 }
93 #else
94 (void)iface; /* eliminate warning */
95 #endif
96 }
97
98 while (sendmsg(fd, &msg, 0) == -1)
99 {
100 if (retry_send())
101 continue;
102
103 /* If interface is still in DAD, EINVAL results - ignore that. */
104 if (errno == EINVAL)
105 break;
106
107 my_syslog(LOG_ERR, _("failed to send packet: %s"), strerror(errno));
108 return 0;
109 }
110
111 return 1;
112 }
113
114 static unsigned int search_servers(time_t now, struct all_addr **addrpp,
115 unsigned int qtype, char *qdomain, int *type, char **domain, int *norebind)
116
117 {
118 /* If the query ends in the domain in one of our servers, set
119 domain to point to that name. We find the largest match to allow both
120 domain.org and sub.domain.org to exist. */
121
122 unsigned int namelen = strlen(qdomain);
123 unsigned int matchlen = 0;
124 struct server *serv;
125 unsigned int flags = 0;
126
127 for (serv = daemon->servers; serv; serv=serv->next)
128 /* domain matches take priority over NODOTS matches */
129 if ((serv->flags & SERV_FOR_NODOTS) && *type != SERV_HAS_DOMAIN && !strchr(qdomain, '.') && namelen != 0)
130 {
131 unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
132 *type = SERV_FOR_NODOTS;
133 if (serv->flags & SERV_NO_ADDR)
134 flags = F_NXDOMAIN;
135 else if (serv->flags & SERV_LITERAL_ADDRESS)
136 {
137 if (sflag & qtype)
138 {
139 flags = sflag;
140 if (serv->addr.sa.sa_family == AF_INET)
141 *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
142 #ifdef HAVE_IPV6
143 else
144 *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
145 #endif
146 }
147 else if (!flags || (flags & F_NXDOMAIN))
148 flags = F_NOERR;
149 }
150 }
151 else if (serv->flags & SERV_HAS_DOMAIN)
152 {
153 unsigned int domainlen = strlen(serv->domain);
154 char *matchstart = qdomain + namelen - domainlen;
155 if (namelen >= domainlen &&
156 hostname_isequal(matchstart, serv->domain) &&
157 (domainlen == 0 || namelen == domainlen || *(matchstart-1) == '.' ))
158 {
159 if (serv->flags & SERV_NO_REBIND)
160 *norebind = 1;
161 else
162 {
163 unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
164 /* implement priority rules for --address and --server for same domain.
165 --address wins if the address is for the correct AF
166 --server wins otherwise. */
167 if (domainlen != 0 && domainlen == matchlen)
168 {
169 if ((serv->flags & SERV_LITERAL_ADDRESS))
170 {
171 if (!(sflag & qtype) && flags == 0)
172 continue;
173 }
174 else
175 {
176 if (flags & (F_IPV4 | F_IPV6))
177 continue;
178 }
179 }
180
181 if (domainlen >= matchlen)
182 {
183 *type = serv->flags & (SERV_HAS_DOMAIN | SERV_USE_RESOLV | SERV_NO_REBIND);
184 *domain = serv->domain;
185 matchlen = domainlen;
186 if (serv->flags & SERV_NO_ADDR)
187 flags = F_NXDOMAIN;
188 else if (serv->flags & SERV_LITERAL_ADDRESS)
189 {
190 if (sflag & qtype)
191 {
192 flags = sflag;
193 if (serv->addr.sa.sa_family == AF_INET)
194 *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
195 #ifdef HAVE_IPV6
196 else
197 *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
198 #endif
199 }
200 else if (!flags || (flags & F_NXDOMAIN))
201 flags = F_NOERR;
202 }
203 else
204 flags = 0;
205 }
206 }
207 }
208 }
209
210 if (flags == 0 && !(qtype & F_QUERY) &&
211 option_bool(OPT_NODOTS_LOCAL) && !strchr(qdomain, '.') && namelen != 0)
212 /* don't forward A or AAAA queries for simple names, except the empty name */
213 flags = F_NOERR;
214
215 if (flags == F_NXDOMAIN && check_for_local_domain(qdomain, now))
216 flags = F_NOERR;
217
218 if (flags)
219 {
220 int logflags = 0;
221
222 if (flags == F_NXDOMAIN || flags == F_NOERR)
223 logflags = F_NEG | qtype;
224
225 log_query(logflags | flags | F_CONFIG | F_FORWARD, qdomain, *addrpp, NULL);
226 }
227 else if ((*type) & SERV_USE_RESOLV)
228 {
229 *type = 0; /* use normal servers for this domain */
230 *domain = NULL;
231 }
232 return flags;
233 }
234
235 static int forward_query(int udpfd, union mysockaddr *udpaddr,
236 struct all_addr *dst_addr, unsigned int dst_iface,
237 struct dns_header *header, size_t plen, time_t now, struct frec *forward)
238 {
239 char *domain = NULL;
240 int type = 0, norebind = 0;
241 struct all_addr *addrp = NULL;
242 unsigned int crc = questions_crc(header, plen, daemon->namebuff);
243 unsigned int flags = 0;
244 unsigned int gotname = extract_request(header, plen, daemon->namebuff, NULL);
245 struct server *start = NULL;
246
247 /* RFC 4035: sect 4.6 para 2 */
248 header->hb4 &= ~HB4_AD;
249
250 /* may be no servers available. */
251 if (!daemon->servers)
252 forward = NULL;
253 else if (forward || (forward = lookup_frec_by_sender(ntohs(header->id), udpaddr, crc)))
254 {
255 /* retry on existing query, send to all available servers */
256 domain = forward->sentto->domain;
257 forward->sentto->failed_queries++;
258 if (!option_bool(OPT_ORDER))
259 {
260 forward->forwardall = 1;
261 daemon->last_server = NULL;
262 }
263 type = forward->sentto->flags & SERV_TYPE;
264 if (!(start = forward->sentto->next))
265 start = daemon->servers; /* at end of list, recycle */
266 header->id = htons(forward->new_id);
267 }
268 else
269 {
270 if (gotname)
271 flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
272
273 if (!flags && !(forward = get_new_frec(now, NULL, 0)))
274 /* table full - server failure. */
275 flags = F_NEG;
276
277 if (forward)
278 {
279 forward->source = *udpaddr;
280 forward->dest = *dst_addr;
281 forward->iface = dst_iface;
282 forward->orig_id = ntohs(header->id);
283 forward->new_id = get_id(crc);
284 forward->fd = udpfd;
285 forward->crc = crc;
286 forward->forwardall = 0;
287 forward->flags = 0;
288 if (norebind)
289 forward->flags |= FREC_NOREBIND;
290 if (header->hb4 & HB4_CD)
291 forward->flags |= FREC_CHECKING_DISABLED;
292
293 header->id = htons(forward->new_id);
294
295 /* In strict_order mode, always try servers in the order
296 specified in resolv.conf, if a domain is given
297 always try all the available servers,
298 otherwise, use the one last known to work. */
299
300 if (type == 0)
301 {
302 if (option_bool(OPT_ORDER))
303 start = daemon->servers;
304 else if (!(start = daemon->last_server) ||
305 daemon->forwardcount++ > FORWARD_TEST ||
306 difftime(now, daemon->forwardtime) > FORWARD_TIME)
307 {
308 start = daemon->servers;
309 forward->forwardall = 1;
310 daemon->forwardcount = 0;
311 daemon->forwardtime = now;
312 }
313 }
314 else
315 {
316 start = daemon->servers;
317 if (!option_bool(OPT_ORDER))
318 forward->forwardall = 1;
319 }
320 }
321 }
322
323 /* check for send errors here (no route to host)
324 if we fail to send to all nameservers, send back an error
325 packet straight away (helps modem users when offline) */
326
327 if (!flags && forward)
328 {
329 struct server *firstsentto = start;
330 int forwarded = 0;
331
332 if (option_bool(OPT_ADD_MAC))
333 plen = add_mac(header, plen, ((char *) header) + daemon->packet_buff_sz, &forward->source);
334
335 if (option_bool(OPT_CLIENT_SUBNET))
336 {
337 size_t new = add_source_addr(header, plen, ((char *) header) + daemon->packet_buff_sz, &forward->source);
338 if (new != plen)
339 {
340 plen = new;
341 forward->flags |= FREC_HAS_SUBNET;
342 }
343 }
344
345 #ifdef HAVE_DNSSEC
346 if (option_bool(OPT_DNSSEC_VALID))
347 {
348 plen = add_do_bit(header, plen, ((char *) header) + daemon->packet_buff_sz);
349 header->hb4 |= HB4_CD;
350 }
351 #endif
352
353 while (1)
354 {
355 /* only send to servers dealing with our domain.
356 domain may be NULL, in which case server->domain
357 must be NULL also. */
358
359 if (type == (start->flags & SERV_TYPE) &&
360 (type != SERV_HAS_DOMAIN || hostname_isequal(domain, start->domain)) &&
361 !(start->flags & SERV_LITERAL_ADDRESS))
362 {
363 int fd;
364
365 /* find server socket to use, may need to get random one. */
366 if (start->sfd)
367 fd = start->sfd->fd;
368 else
369 {
370 #ifdef HAVE_IPV6
371 if (start->addr.sa.sa_family == AF_INET6)
372 {
373 if (!forward->rfd6 &&
374 !(forward->rfd6 = allocate_rfd(AF_INET6)))
375 break;
376 daemon->rfd_save = forward->rfd6;
377 fd = forward->rfd6->fd;
378 }
379 else
380 #endif
381 {
382 if (!forward->rfd4 &&
383 !(forward->rfd4 = allocate_rfd(AF_INET)))
384 break;
385 daemon->rfd_save = forward->rfd4;
386 fd = forward->rfd4->fd;
387 }
388
389 #ifdef HAVE_CONNTRACK
390 /* Copy connection mark of incoming query to outgoing connection. */
391 if (option_bool(OPT_CONNTRACK))
392 {
393 unsigned int mark;
394 if (get_incoming_mark(&forward->source, &forward->dest, 0, &mark))
395 setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
396 }
397 #endif
398 }
399
400 if (sendto(fd, (char *)header, plen, 0,
401 &start->addr.sa,
402 sa_len(&start->addr)) == -1)
403 {
404 if (retry_send())
405 continue;
406 }
407 else
408 {
409 /* Keep info in case we want to re-send this packet */
410 daemon->srv_save = start;
411 daemon->packet_len = plen;
412
413 if (!gotname)
414 strcpy(daemon->namebuff, "query");
415 if (start->addr.sa.sa_family == AF_INET)
416 log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
417 (struct all_addr *)&start->addr.in.sin_addr, NULL);
418 #ifdef HAVE_IPV6
419 else
420 log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
421 (struct all_addr *)&start->addr.in6.sin6_addr, NULL);
422 #endif
423 start->queries++;
424 forwarded = 1;
425 forward->sentto = start;
426 if (!forward->forwardall)
427 break;
428 forward->forwardall++;
429 }
430 }
431
432 if (!(start = start->next))
433 start = daemon->servers;
434
435 if (start == firstsentto)
436 break;
437 }
438
439 if (forwarded)
440 return 1;
441
442 /* could not send on, prepare to return */
443 header->id = htons(forward->orig_id);
444 free_frec(forward); /* cancel */
445 }
446
447 /* could not send on, return empty answer or address if known for whole domain */
448 if (udpfd != -1)
449 {
450 plen = setup_reply(header, plen, addrp, flags, daemon->local_ttl);
451 send_from(udpfd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND), (char *)header, plen, udpaddr, dst_addr, dst_iface);
452 }
453
454 return 0;
455 }
456
457 static size_t process_reply(struct dns_header *header, time_t now, struct server *server, size_t n, int check_rebind,
458 int no_cache, int cache_secure, int check_subnet, union mysockaddr *query_source)
459 {
460 unsigned char *pheader, *sizep;
461 char **sets = 0;
462 int munged = 0, is_sign;
463 size_t plen;
464
465 #ifdef HAVE_IPSET
466 /* Similar algorithm to search_servers. */
467 struct ipsets *ipset_pos;
468 unsigned int namelen = strlen(daemon->namebuff);
469 unsigned int matchlen = 0;
470 for (ipset_pos = daemon->ipsets; ipset_pos; ipset_pos = ipset_pos->next)
471 {
472 unsigned int domainlen = strlen(ipset_pos->domain);
473 char *matchstart = daemon->namebuff + namelen - domainlen;
474 if (namelen >= domainlen && hostname_isequal(matchstart, ipset_pos->domain) &&
475 (domainlen == 0 || namelen == domainlen || *(matchstart - 1) == '.' ) &&
476 domainlen >= matchlen) {
477 matchlen = domainlen;
478 sets = ipset_pos->sets;
479 }
480 }
481 #endif
482
483 /* If upstream is advertising a larger UDP packet size
484 than we allow, trim it so that we don't get overlarge
485 requests for the client. We can't do this for signed packets. */
486
487 if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign)))
488 {
489 if (!is_sign)
490 {
491 unsigned short udpsz;
492 unsigned char *psave = sizep;
493
494 GETSHORT(udpsz, sizep);
495 if (udpsz > daemon->edns_pktsz)
496 PUTSHORT(daemon->edns_pktsz, psave);
497 }
498
499 if (check_subnet && !check_source(header, plen, pheader, query_source))
500 {
501 my_syslog(LOG_WARNING, _("discarding DNS reply: subnet option mismatch"));
502 return 0;
503 }
504 }
505
506 /* RFC 4035 sect 4.6 para 3 */
507 if (!is_sign && !option_bool(OPT_DNSSEC_PROXY))
508 header->hb4 &= ~HB4_AD;
509
510 #ifdef HAVE_DNSSEC
511 if (option_bool(OPT_DNSSEC_VALID))
512 header->hb4 &= ~HB4_AD;
513
514 if (!(header->hb4 & HB4_CD) && cache_secure)
515 header->hb4 |= HB4_AD;
516 #endif
517
518 if (OPCODE(header) != QUERY || (RCODE(header) != NOERROR && RCODE(header) != NXDOMAIN))
519 return n;
520
521 /* Complain loudly if the upstream server is non-recursive. */
522 if (!(header->hb4 & HB4_RA) && RCODE(header) == NOERROR && ntohs(header->ancount) == 0 &&
523 server && !(server->flags & SERV_WARNED_RECURSIVE))
524 {
525 prettyprint_addr(&server->addr, daemon->namebuff);
526 my_syslog(LOG_WARNING, _("nameserver %s refused to do a recursive query"), daemon->namebuff);
527 if (!option_bool(OPT_LOG))
528 server->flags |= SERV_WARNED_RECURSIVE;
529 }
530
531 if (daemon->bogus_addr && RCODE(header) != NXDOMAIN &&
532 check_for_bogus_wildcard(header, n, daemon->namebuff, daemon->bogus_addr, now))
533 {
534 munged = 1;
535 SET_RCODE(header, NXDOMAIN);
536 header->hb3 &= ~HB3_AA;
537 }
538 else
539 {
540 if (RCODE(header) == NXDOMAIN &&
541 extract_request(header, n, daemon->namebuff, NULL) &&
542 check_for_local_domain(daemon->namebuff, now))
543 {
544 /* if we forwarded a query for a locally known name (because it was for
545 an unknown type) and the answer is NXDOMAIN, convert that to NODATA,
546 since we know that the domain exists, even if upstream doesn't */
547 munged = 1;
548 header->hb3 |= HB3_AA;
549 SET_RCODE(header, NOERROR);
550 }
551
552 if (extract_addresses(header, n, daemon->namebuff, now, sets, is_sign, check_rebind, no_cache, cache_secure))
553 {
554 my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected: %s"), daemon->namebuff);
555 munged = 1;
556 }
557 }
558
559 #ifdef HAVE_DNSSEC
560 if (no_cache && !(header->hb4 & HB4_CD))
561 {
562 if (option_bool(OPT_DNSSEC_PERMISS))
563 {
564 unsigned short type;
565 char types[20];
566
567 if (extract_request(header, (size_t)n, daemon->namebuff, &type))
568 {
569 querystr("", types, type);
570 my_syslog(LOG_WARNING, _("DNSSEC validation failed: query %s%s"), daemon->namebuff, types);
571 }
572 else
573 my_syslog(LOG_WARNING, _("DNSSEC validation failed for unknown query"));
574 }
575 else
576 {
577 /* Bogus reply, turn into SERVFAIL */
578 SET_RCODE(header, SERVFAIL);
579 munged = 1;
580 }
581 }
582 #endif
583
584 /* do this after extract_addresses. Ensure NODATA reply and remove
585 nameserver info. */
586
587 if (munged)
588 {
589 header->ancount = htons(0);
590 header->nscount = htons(0);
591 header->arcount = htons(0);
592 }
593
594 /* the bogus-nxdomain stuff, doctor and NXDOMAIN->NODATA munging can all elide
595 sections of the packet. Find the new length here and put back pseudoheader
596 if it was removed. */
597 return resize_packet(header, n, pheader, plen);
598 }
599
600 /* sets new last_server */
601 void reply_query(int fd, int family, time_t now)
602 {
603 /* packet from peer server, extract data for cache, and send to
604 original requester */
605 struct dns_header *header;
606 union mysockaddr serveraddr;
607 struct frec *forward;
608 socklen_t addrlen = sizeof(serveraddr);
609 ssize_t n = recvfrom(fd, daemon->packet, daemon->packet_buff_sz, 0, &serveraddr.sa, &addrlen);
610 size_t nn;
611 struct server *server;
612
613 /* packet buffer overwritten */
614 daemon->srv_save = NULL;
615
616 /* Determine the address of the server replying so that we can mark that as good */
617 serveraddr.sa.sa_family = family;
618 #ifdef HAVE_IPV6
619 if (serveraddr.sa.sa_family == AF_INET6)
620 serveraddr.in6.sin6_flowinfo = 0;
621 #endif
622
623 /* spoof check: answer must come from known server, */
624 for (server = daemon->servers; server; server = server->next)
625 if (!(server->flags & (SERV_LITERAL_ADDRESS | SERV_NO_ADDR)) &&
626 sockaddr_isequal(&server->addr, &serveraddr))
627 break;
628
629 header = (struct dns_header *)daemon->packet;
630
631 if (!server ||
632 n < (int)sizeof(struct dns_header) || !(header->hb3 & HB3_QR) ||
633 !(forward = lookup_frec(ntohs(header->id), questions_crc(header, n, daemon->namebuff))))
634 return;
635
636 if ((RCODE(header) == SERVFAIL || RCODE(header) == REFUSED) &&
637 !option_bool(OPT_ORDER) &&
638 forward->forwardall == 0)
639 /* for broken servers, attempt to send to another one. */
640 {
641 unsigned char *pheader;
642 size_t plen;
643 int is_sign;
644
645 /* recreate query from reply */
646 pheader = find_pseudoheader(header, (size_t)n, &plen, NULL, &is_sign);
647 if (!is_sign)
648 {
649 header->ancount = htons(0);
650 header->nscount = htons(0);
651 header->arcount = htons(0);
652 if ((nn = resize_packet(header, (size_t)n, pheader, plen)))
653 {
654 header->hb3 &= ~(HB3_QR | HB3_TC);
655 forward_query(-1, NULL, NULL, 0, header, nn, now, forward);
656 return;
657 }
658 }
659 }
660
661 server = forward->sentto;
662
663 if ((forward->sentto->flags & SERV_TYPE) == 0)
664 {
665 if (RCODE(header) == SERVFAIL || RCODE(header) == REFUSED)
666 server = NULL;
667 else
668 {
669 struct server *last_server;
670
671 /* find good server by address if possible, otherwise assume the last one we sent to */
672 for (last_server = daemon->servers; last_server; last_server = last_server->next)
673 if (!(last_server->flags & (SERV_LITERAL_ADDRESS | SERV_HAS_DOMAIN | SERV_FOR_NODOTS | SERV_NO_ADDR)) &&
674 sockaddr_isequal(&last_server->addr, &serveraddr))
675 {
676 server = last_server;
677 break;
678 }
679 }
680 if (!option_bool(OPT_ALL_SERVERS))
681 daemon->last_server = server;
682 }
683
684 /* If the answer is an error, keep the forward record in place in case
685 we get a good reply from another server. Kill it when we've
686 had replies from all to avoid filling the forwarding table when
687 everything is broken */
688 if (forward->forwardall == 0 || --forward->forwardall == 1 ||
689 (RCODE(header) != REFUSED && RCODE(header) != SERVFAIL))
690 {
691 int check_rebind = 0, no_cache_dnssec = 0, cache_secure = 0;
692
693 if (option_bool(OPT_NO_REBIND))
694 check_rebind = !(forward->flags & FREC_NOREBIND);
695
696 /* Don't cache replies where DNSSEC validation was turned off, either
697 the upstream server told us so, or the original query specified it. */
698 if ((header->hb4 & HB4_CD) || (forward->flags & FREC_CHECKING_DISABLED))
699 no_cache_dnssec = 1;
700
701 #ifdef HAVE_DNSSEC
702 if (option_bool(OPT_DNSSEC_VALID) && !(forward->flags & FREC_CHECKING_DISABLED))
703 {
704 int status;
705
706 /* We've had a reply already, which we're validating. Ignore this duplicate */
707 if (forward->stash)
708 return;
709
710 if (header->hb3 & HB3_TC)
711 {
712 /* Truncated answer can't be validated.
713 The client will retry over TCP, but if this is an answer to a
714 DNSSEC-generated query, we have a problem. Should really re-send
715 over TCP. No-one with any sense will make a DNSKEY or DS RRset
716 exceed 4096, so this may not be a real problem. Just log
717 for now. */
718 if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
719 my_syslog(LOG_ERR, _("Reply to DNSSEC query truncated - validation fails."));
720 status = STAT_INSECURE;
721 }
722 else if (forward->flags & FREC_DNSKEY_QUERY)
723 status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
724 else if (forward->flags & FREC_DS_QUERY)
725 status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
726 else
727 status = dnssec_validate_reply(now, header, n, daemon->namebuff, daemon->keyname, &forward->class);
728
729 /* Can't validate, as we're missing key data. Put this
730 answer aside, whilst we get that. */
731 if (status == STAT_NEED_DS || status == STAT_NEED_KEY)
732 {
733 struct frec *new;
734
735 if ((new = get_new_frec(now, NULL, 1)))
736 {
737 struct frec *next = new->next;
738 *new = *forward; /* copy everything, then overwrite */
739 new->next = next;
740 new->stash = NULL;
741 new->blocking_query = NULL;
742 new->rfd4 = NULL;
743 #ifdef HAVE_IPV6
744 new->rfd6 = NULL;
745 #endif
746 new->flags &= ~(FREC_DNSKEY_QUERY | FREC_DS_QUERY);
747
748 if ((forward->stash = blockdata_alloc((char *)header, n)))
749 {
750 int fd;
751
752 forward->stash_len = n;
753
754 new->dependent = forward; /* to find query awaiting new one. */
755 forward->blocking_query = new; /* for garbage cleaning */
756 /* validate routines leave name of required record in daemon->keyname */
757 if (status == STAT_NEED_KEY)
758 {
759 new->flags |= FREC_DNSKEY_QUERY;
760 nn = dnssec_generate_query(header, ((char *) header) + daemon->packet_buff_sz,
761 daemon->keyname, forward->class, T_DNSKEY, &server->addr);
762 }
763 else if (status == STAT_NEED_DS)
764 {
765 new->flags |= FREC_DS_QUERY;
766 nn = dnssec_generate_query(header,((char *) header) + daemon->packet_buff_sz,
767 daemon->keyname, forward->class, T_DS, &server->addr);
768 }
769 new->crc = questions_crc(header, nn, daemon->namebuff);
770 new->new_id = get_id(new->crc);
771 header->id = htons(new->new_id);
772
773 /* Don't resend this. */
774 daemon->srv_save = NULL;
775
776 if (server->sfd)
777 fd = server->sfd->fd;
778 else
779 {
780 fd = -1;
781 #ifdef HAVE_IPV6
782 if (server->addr.sa.sa_family == AF_INET6)
783 {
784 if (new->rfd6 || (new->rfd6 = allocate_rfd(AF_INET6)))
785 fd = new->rfd6->fd;
786 }
787 else
788 #endif
789 {
790 if (new->rfd4 || (new->rfd4 = allocate_rfd(AF_INET)))
791 fd = new->rfd4->fd;
792 }
793 }
794
795 if (fd != -1)
796 {
797 while (sendto(fd, (char *)header, nn, 0, &server->addr.sa, sa_len(&server->addr)) == -1 && retry_send());
798 server->queries++;
799 }
800 }
801 }
802
803 return;
804 }
805
806 /* Ok, we reached far enough up the chain-of-trust that we can validate something.
807 Now wind back down, pulling back answers which wouldn't previously validate
808 and validate them with the new data. Failure to find needed data here is an internal error.
809 Once we get to the original answer (FREC_DNSSEC_QUERY not set) and it validates,
810 return it to the original requestor. */
811 if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
812 {
813 while (forward->dependent)
814 {
815 struct frec *prev;
816
817 if (status == STAT_SECURE)
818 {
819 if (forward->flags & FREC_DNSKEY_QUERY)
820 status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
821 else if (forward->flags & FREC_DS_QUERY)
822 status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
823 }
824
825 prev = forward->dependent;
826 free_frec(forward);
827 forward = prev;
828 forward->blocking_query = NULL; /* already gone */
829 blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
830 n = forward->stash_len;
831 }
832
833 /* All DNSKEY and DS records done and in cache, now finally validate original
834 answer, provided last DNSKEY is OK. */
835 if (status == STAT_SECURE)
836 status = dnssec_validate_reply(now, header, n, daemon->namebuff, daemon->keyname, &forward->class);
837
838 if (status == STAT_NEED_DS || status == STAT_NEED_KEY)
839 {
840 my_syslog(LOG_ERR, _("Unexpected missing data for DNSSEC validation"));
841 status = STAT_INSECURE;
842 }
843 }
844
845 log_query(F_KEYTAG | F_SECSTAT, "result", NULL,
846 status == STAT_SECURE ? "SECURE" : (status == STAT_INSECURE ? "INSECURE" : "BOGUS"));
847
848 no_cache_dnssec = 0;
849
850 if (status == STAT_SECURE)
851 cache_secure = 1;
852 else if (status == STAT_BOGUS)
853 no_cache_dnssec = 1;
854
855 /* restore CD bit to the value in the query */
856 if (forward->flags & FREC_CHECKING_DISABLED)
857 header->hb4 |= HB4_CD;
858 else
859 header->hb4 &= ~HB4_CD;
860 }
861 #endif
862
863 if ((nn = process_reply(header, now, server, (size_t)n, check_rebind, no_cache_dnssec, cache_secure,
864 forward->flags & FREC_HAS_SUBNET, &forward->source)))
865 {
866 header->id = htons(forward->orig_id);
867 header->hb4 |= HB4_RA; /* recursion if available */
868 send_from(forward->fd, option_bool(OPT_NOWILD) || option_bool (OPT_CLEVERBIND), daemon->packet, nn,
869 &forward->source, &forward->dest, forward->iface);
870 }
871 free_frec(forward); /* cancel */
872 }
873 }
874
875
876 void receive_query(struct listener *listen, time_t now)
877 {
878 struct dns_header *header = (struct dns_header *)daemon->packet;
879 union mysockaddr source_addr;
880 unsigned short type;
881 struct all_addr dst_addr;
882 struct in_addr netmask, dst_addr_4;
883 size_t m;
884 ssize_t n;
885 int if_index = 0, auth_dns = 0;
886 #ifdef HAVE_AUTH
887 int local_auth = 0;
888 #endif
889 struct iovec iov[1];
890 struct msghdr msg;
891 struct cmsghdr *cmptr;
892 union {
893 struct cmsghdr align; /* this ensures alignment */
894 #ifdef HAVE_IPV6
895 char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
896 #endif
897 #if defined(HAVE_LINUX_NETWORK)
898 char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
899 #elif defined(IP_RECVDSTADDR) && defined(HAVE_SOLARIS_NETWORK)
900 char control[CMSG_SPACE(sizeof(struct in_addr)) +
901 CMSG_SPACE(sizeof(unsigned int))];
902 #elif defined(IP_RECVDSTADDR)
903 char control[CMSG_SPACE(sizeof(struct in_addr)) +
904 CMSG_SPACE(sizeof(struct sockaddr_dl))];
905 #endif
906 } control_u;
907 #ifdef HAVE_IPV6
908 /* Can always get recvd interface for IPv6 */
909 int check_dst = !option_bool(OPT_NOWILD) || listen->family == AF_INET6;
910 #else
911 int check_dst = !option_bool(OPT_NOWILD);
912 #endif
913
914 /* packet buffer overwritten */
915 daemon->srv_save = NULL;
916
917 dst_addr_4.s_addr = 0;
918 netmask.s_addr = 0;
919
920 if (option_bool(OPT_NOWILD) && listen->iface)
921 {
922 auth_dns = listen->iface->dns_auth;
923
924 if (listen->family == AF_INET)
925 {
926 dst_addr_4 = listen->iface->addr.in.sin_addr;
927 netmask = listen->iface->netmask;
928 }
929 }
930
931 iov[0].iov_base = daemon->packet;
932 iov[0].iov_len = daemon->edns_pktsz;
933
934 msg.msg_control = control_u.control;
935 msg.msg_controllen = sizeof(control_u);
936 msg.msg_flags = 0;
937 msg.msg_name = &source_addr;
938 msg.msg_namelen = sizeof(source_addr);
939 msg.msg_iov = iov;
940 msg.msg_iovlen = 1;
941
942 if ((n = recvmsg(listen->fd, &msg, 0)) == -1)
943 return;
944
945 if (n < (int)sizeof(struct dns_header) ||
946 (msg.msg_flags & MSG_TRUNC) ||
947 (header->hb3 & HB3_QR))
948 return;
949
950 source_addr.sa.sa_family = listen->family;
951 #ifdef HAVE_IPV6
952 if (listen->family == AF_INET6)
953 source_addr.in6.sin6_flowinfo = 0;
954 #endif
955
956 if (check_dst)
957 {
958 struct ifreq ifr;
959
960 if (msg.msg_controllen < sizeof(struct cmsghdr))
961 return;
962
963 #if defined(HAVE_LINUX_NETWORK)
964 if (listen->family == AF_INET)
965 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
966 if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_PKTINFO)
967 {
968 union {
969 unsigned char *c;
970 struct in_pktinfo *p;
971 } p;
972 p.c = CMSG_DATA(cmptr);
973 dst_addr_4 = dst_addr.addr.addr4 = p.p->ipi_spec_dst;
974 if_index = p.p->ipi_ifindex;
975 }
976 #elif defined(IP_RECVDSTADDR) && defined(IP_RECVIF)
977 if (listen->family == AF_INET)
978 {
979 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
980 {
981 union {
982 unsigned char *c;
983 unsigned int *i;
984 struct in_addr *a;
985 #ifndef HAVE_SOLARIS_NETWORK
986 struct sockaddr_dl *s;
987 #endif
988 } p;
989 p.c = CMSG_DATA(cmptr);
990 if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVDSTADDR)
991 dst_addr_4 = dst_addr.addr.addr4 = *(p.a);
992 else if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVIF)
993 #ifdef HAVE_SOLARIS_NETWORK
994 if_index = *(p.i);
995 #else
996 if_index = p.s->sdl_index;
997 #endif
998 }
999 }
1000 #endif
1001
1002 #ifdef HAVE_IPV6
1003 if (listen->family == AF_INET6)
1004 {
1005 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
1006 if (cmptr->cmsg_level == IPPROTO_IPV6 && cmptr->cmsg_type == daemon->v6pktinfo)
1007 {
1008 union {
1009 unsigned char *c;
1010 struct in6_pktinfo *p;
1011 } p;
1012 p.c = CMSG_DATA(cmptr);
1013
1014 dst_addr.addr.addr6 = p.p->ipi6_addr;
1015 if_index = p.p->ipi6_ifindex;
1016 }
1017 }
1018 #endif
1019
1020 /* enforce available interface configuration */
1021
1022 if (!indextoname(listen->fd, if_index, ifr.ifr_name))
1023 return;
1024
1025 if (!iface_check(listen->family, &dst_addr, ifr.ifr_name, &auth_dns))
1026 {
1027 if (!option_bool(OPT_CLEVERBIND))
1028 enumerate_interfaces(0);
1029 if (!loopback_exception(listen->fd, listen->family, &dst_addr, ifr.ifr_name) &&
1030 !label_exception(if_index, listen->family, &dst_addr))
1031 return;
1032 }
1033
1034 if (listen->family == AF_INET && option_bool(OPT_LOCALISE))
1035 {
1036 struct irec *iface;
1037
1038 /* get the netmask of the interface whch has the address we were sent to.
1039 This is no neccessarily the interface we arrived on. */
1040
1041 for (iface = daemon->interfaces; iface; iface = iface->next)
1042 if (iface->addr.sa.sa_family == AF_INET &&
1043 iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
1044 break;
1045
1046 /* interface may be new */
1047 if (!iface && !option_bool(OPT_CLEVERBIND))
1048 enumerate_interfaces(0);
1049
1050 for (iface = daemon->interfaces; iface; iface = iface->next)
1051 if (iface->addr.sa.sa_family == AF_INET &&
1052 iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
1053 break;
1054
1055 /* If we failed, abandon localisation */
1056 if (iface)
1057 netmask = iface->netmask;
1058 else
1059 dst_addr_4.s_addr = 0;
1060 }
1061 }
1062
1063 if (extract_request(header, (size_t)n, daemon->namebuff, &type))
1064 {
1065 char types[20];
1066 #ifdef HAVE_AUTH
1067 struct auth_zone *zone;
1068 #endif
1069
1070 querystr(auth_dns ? "auth" : "query", types, type);
1071
1072 if (listen->family == AF_INET)
1073 log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
1074 (struct all_addr *)&source_addr.in.sin_addr, types);
1075 #ifdef HAVE_IPV6
1076 else
1077 log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
1078 (struct all_addr *)&source_addr.in6.sin6_addr, types);
1079 #endif
1080
1081 #ifdef HAVE_AUTH
1082 /* find queries for zones we're authoritative for, and answer them directly */
1083 if (!auth_dns)
1084 for (zone = daemon->auth_zones; zone; zone = zone->next)
1085 if (in_zone(zone, daemon->namebuff, NULL))
1086 {
1087 auth_dns = 1;
1088 local_auth = 1;
1089 break;
1090 }
1091 #endif
1092 }
1093
1094 #ifdef HAVE_AUTH
1095 if (auth_dns)
1096 {
1097 m = answer_auth(header, ((char *) header) + daemon->packet_buff_sz, (size_t)n, now, &source_addr, local_auth);
1098 if (m >= 1)
1099 {
1100 send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1101 (char *)header, m, &source_addr, &dst_addr, if_index);
1102 daemon->auth_answer++;
1103 }
1104 }
1105 else
1106 #endif
1107 {
1108 m = answer_request(header, ((char *) header) + daemon->packet_buff_sz, (size_t)n,
1109 dst_addr_4, netmask, now);
1110
1111 if (m >= 1)
1112 {
1113 send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1114 (char *)header, m, &source_addr, &dst_addr, if_index);
1115 daemon->local_answer++;
1116 }
1117 else if (forward_query(listen->fd, &source_addr, &dst_addr, if_index,
1118 header, (size_t)n, now, NULL))
1119 daemon->queries_forwarded++;
1120 else
1121 daemon->local_answer++;
1122 }
1123 }
1124
1125 #ifdef HAVE_DNSSEC
1126 static int tcp_key_recurse(time_t now, int status, int class, char *keyname, struct server *server)
1127 {
1128 /* Recurse up the key heirarchy */
1129 size_t n;
1130 unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
1131 unsigned char *payload = &packet[2];
1132 struct dns_header *header = (struct dns_header *)payload;
1133 u16 *length = (u16 *)packet;
1134 int new_status;
1135 unsigned char c1, c2;
1136
1137 n = dnssec_generate_query(header, ((char *) header) + 65536, keyname, class,
1138 status == STAT_NEED_KEY ? T_DNSKEY : T_DS, &server->addr);
1139
1140 *length = htons(n);
1141
1142 if (!read_write(server->tcpfd, packet, n + sizeof(u16), 0) ||
1143 !read_write(server->tcpfd, &c1, 1, 1) ||
1144 !read_write(server->tcpfd, &c2, 1, 1) ||
1145 !read_write(server->tcpfd, payload, (c1 << 8) | c2, 1))
1146 {
1147 close(server->tcpfd);
1148 server->tcpfd = -1;
1149 new_status = STAT_INSECURE;
1150 }
1151 else
1152 {
1153 n = (c1 << 8) | c2;
1154
1155 if (status == STAT_NEED_KEY)
1156 new_status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, class);
1157 else
1158 new_status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, class);
1159
1160 if (new_status == STAT_NEED_DS || new_status == STAT_NEED_KEY)
1161 {
1162 if ((new_status = tcp_key_recurse(now, new_status, class, daemon->keyname, server) == STAT_SECURE))
1163 {
1164 if (status == STAT_NEED_KEY)
1165 new_status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, class);
1166 else
1167 new_status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, class);
1168
1169 if (new_status == STAT_NEED_DS || new_status == STAT_NEED_KEY)
1170 {
1171 my_syslog(LOG_ERR, _("Unexpected missing data for DNSSEC validation"));
1172 status = STAT_INSECURE;
1173 }
1174 }
1175 }
1176 }
1177
1178 free(packet);
1179
1180 return new_status;
1181 }
1182 #endif
1183
1184
1185 /* The daemon forks before calling this: it should deal with one connection,
1186 blocking as neccessary, and then return. Note, need to be a bit careful
1187 about resources for debug mode, when the fork is suppressed: that's
1188 done by the caller. */
1189 unsigned char *tcp_request(int confd, time_t now,
1190 union mysockaddr *local_addr, struct in_addr netmask, int auth_dns)
1191 {
1192 size_t size = 0;
1193 int norebind = 0;
1194 #ifdef HAVE_AUTH
1195 int local_auth = 0;
1196 #endif
1197 int checking_disabled, check_subnet, no_cache_dnssec = 0, cache_secure = 0;
1198 size_t m;
1199 unsigned short qtype;
1200 unsigned int gotname;
1201 unsigned char c1, c2;
1202 /* Max TCP packet + slop + size */
1203 unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
1204 unsigned char *payload = &packet[2];
1205 /* largest field in header is 16-bits, so this is still sufficiently aligned */
1206 struct dns_header *header = (struct dns_header *)payload;
1207 u16 *length = (u16 *)packet;
1208 struct server *last_server;
1209 struct in_addr dst_addr_4;
1210 union mysockaddr peer_addr;
1211 socklen_t peer_len = sizeof(union mysockaddr);
1212
1213 if (getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) == -1)
1214 return packet;
1215
1216 while (1)
1217 {
1218 if (!packet ||
1219 !read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
1220 !(size = c1 << 8 | c2) ||
1221 !read_write(confd, payload, size, 1))
1222 return packet;
1223
1224 if (size < (int)sizeof(struct dns_header))
1225 continue;
1226
1227 check_subnet = 0;
1228
1229 /* save state of "cd" flag in query */
1230 if ((checking_disabled = header->hb4 & HB4_CD))
1231 no_cache_dnssec = 1;
1232
1233 /* RFC 4035: sect 4.6 para 2 */
1234 header->hb4 &= ~HB4_AD;
1235
1236 if ((gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
1237 {
1238 char types[20];
1239 #ifdef HAVE_AUTH
1240 struct auth_zone *zone;
1241 #endif
1242 querystr(auth_dns ? "auth" : "query", types, qtype);
1243
1244 if (peer_addr.sa.sa_family == AF_INET)
1245 log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
1246 (struct all_addr *)&peer_addr.in.sin_addr, types);
1247 #ifdef HAVE_IPV6
1248 else
1249 log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
1250 (struct all_addr *)&peer_addr.in6.sin6_addr, types);
1251 #endif
1252
1253 #ifdef HAVE_AUTH
1254 /* find queries for zones we're authoritative for, and answer them directly */
1255 if (!auth_dns)
1256 for (zone = daemon->auth_zones; zone; zone = zone->next)
1257 if (in_zone(zone, daemon->namebuff, NULL))
1258 {
1259 auth_dns = 1;
1260 local_auth = 1;
1261 break;
1262 }
1263 #endif
1264 }
1265
1266 if (local_addr->sa.sa_family == AF_INET)
1267 dst_addr_4 = local_addr->in.sin_addr;
1268 else
1269 dst_addr_4.s_addr = 0;
1270
1271 #ifdef HAVE_AUTH
1272 if (auth_dns)
1273 m = answer_auth(header, ((char *) header) + 65536, (size_t)size, now, &peer_addr, local_auth);
1274 else
1275 #endif
1276 {
1277 /* m > 0 if answered from cache */
1278 m = answer_request(header, ((char *) header) + 65536, (size_t)size,
1279 dst_addr_4, netmask, now);
1280
1281 /* Do this by steam now we're not in the select() loop */
1282 check_log_writer(NULL);
1283
1284 if (m == 0)
1285 {
1286 unsigned int flags = 0;
1287 struct all_addr *addrp = NULL;
1288 int type = 0;
1289 char *domain = NULL;
1290
1291 if (option_bool(OPT_ADD_MAC))
1292 size = add_mac(header, size, ((char *) header) + 65536, &peer_addr);
1293
1294 if (option_bool(OPT_CLIENT_SUBNET))
1295 {
1296 size_t new = add_source_addr(header, size, ((char *) header) + 65536, &peer_addr);
1297 if (size != new)
1298 {
1299 size = new;
1300 check_subnet = 1;
1301 }
1302 }
1303
1304 if (gotname)
1305 flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
1306
1307 if (type != 0 || option_bool(OPT_ORDER) || !daemon->last_server)
1308 last_server = daemon->servers;
1309 else
1310 last_server = daemon->last_server;
1311
1312 if (!flags && last_server)
1313 {
1314 struct server *firstsendto = NULL;
1315 unsigned int crc = questions_crc(header, (unsigned int)size, daemon->namebuff);
1316
1317 /* Loop round available servers until we succeed in connecting to one.
1318 Note that this code subtley ensures that consecutive queries on this connection
1319 which can go to the same server, do so. */
1320 while (1)
1321 {
1322 if (!firstsendto)
1323 firstsendto = last_server;
1324 else
1325 {
1326 if (!(last_server = last_server->next))
1327 last_server = daemon->servers;
1328
1329 if (last_server == firstsendto)
1330 break;
1331 }
1332
1333 /* server for wrong domain */
1334 if (type != (last_server->flags & SERV_TYPE) ||
1335 (type == SERV_HAS_DOMAIN && !hostname_isequal(domain, last_server->domain)))
1336 continue;
1337
1338 if (last_server->tcpfd == -1)
1339 {
1340 if ((last_server->tcpfd = socket(last_server->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
1341 continue;
1342
1343 if ((!local_bind(last_server->tcpfd, &last_server->source_addr, last_server->interface, 1) ||
1344 connect(last_server->tcpfd, &last_server->addr.sa, sa_len(&last_server->addr)) == -1))
1345 {
1346 close(last_server->tcpfd);
1347 last_server->tcpfd = -1;
1348 continue;
1349 }
1350
1351 #ifdef HAVE_DNSSEC
1352 if (option_bool(OPT_DNSSEC_VALID))
1353 {
1354 size = add_do_bit(header, size, ((char *) header) + 65536);
1355 header->hb4 |= HB4_CD;
1356 }
1357 #endif
1358
1359 #ifdef HAVE_CONNTRACK
1360 /* Copy connection mark of incoming query to outgoing connection. */
1361 if (option_bool(OPT_CONNTRACK))
1362 {
1363 unsigned int mark;
1364 struct all_addr local;
1365 #ifdef HAVE_IPV6
1366 if (local_addr->sa.sa_family == AF_INET6)
1367 local.addr.addr6 = local_addr->in6.sin6_addr;
1368 else
1369 #endif
1370 local.addr.addr4 = local_addr->in.sin_addr;
1371
1372 if (get_incoming_mark(&peer_addr, &local, 1, &mark))
1373 setsockopt(last_server->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
1374 }
1375 #endif
1376 }
1377
1378 *length = htons(size);
1379
1380 if (!read_write(last_server->tcpfd, packet, size + sizeof(u16), 0) ||
1381 !read_write(last_server->tcpfd, &c1, 1, 1) ||
1382 !read_write(last_server->tcpfd, &c2, 1, 1) ||
1383 !read_write(last_server->tcpfd, payload, (c1 << 8) | c2, 1))
1384 {
1385 close(last_server->tcpfd);
1386 last_server->tcpfd = -1;
1387 continue;
1388 }
1389
1390 m = (c1 << 8) | c2;
1391
1392 if (!gotname)
1393 strcpy(daemon->namebuff, "query");
1394 if (last_server->addr.sa.sa_family == AF_INET)
1395 log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
1396 (struct all_addr *)&last_server->addr.in.sin_addr, NULL);
1397 #ifdef HAVE_IPV6
1398 else
1399 log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
1400 (struct all_addr *)&last_server->addr.in6.sin6_addr, NULL);
1401 #endif
1402
1403 #ifdef HAVE_DNSSEC
1404 if (option_bool(OPT_DNSSEC_VALID) && !checking_disabled)
1405 {
1406 int class, status;
1407
1408 status = dnssec_validate_reply(now, header, m, daemon->namebuff, daemon->keyname, &class);
1409
1410 if (status == STAT_NEED_DS || status == STAT_NEED_KEY)
1411 {
1412 if ((status = tcp_key_recurse(now, status, class, daemon->keyname, last_server)) == STAT_SECURE)
1413 status = dnssec_validate_reply(now, header, m, daemon->namebuff, daemon->keyname, &class);
1414 }
1415
1416 log_query(F_KEYTAG | F_SECSTAT, "result", NULL,
1417 status == STAT_SECURE ? "SECURE" : (status == STAT_INSECURE ? "INSECURE" : "BOGUS"));
1418
1419 if (status == STAT_BOGUS)
1420 no_cache_dnssec = 1;
1421
1422 if (status == STAT_SECURE)
1423 cache_secure = 1;
1424 }
1425 #endif
1426
1427 /* restore CD bit to the value in the query */
1428 if (checking_disabled)
1429 header->hb4 |= HB4_CD;
1430 else
1431 header->hb4 &= ~HB4_CD;
1432
1433 /* There's no point in updating the cache, since this process will exit and
1434 lose the information after a few queries. We make this call for the alias and
1435 bogus-nxdomain side-effects. */
1436 /* If the crc of the question section doesn't match the crc we sent, then
1437 someone might be attempting to insert bogus values into the cache by
1438 sending replies containing questions and bogus answers. */
1439 if (crc == questions_crc(header, (unsigned int)m, daemon->namebuff))
1440 m = process_reply(header, now, last_server, (unsigned int)m,
1441 option_bool(OPT_NO_REBIND) && !norebind, no_cache_dnssec,
1442 cache_secure, check_subnet, &peer_addr);
1443
1444 break;
1445 }
1446 }
1447
1448 /* In case of local answer or no connections made. */
1449 if (m == 0)
1450 m = setup_reply(header, (unsigned int)size, addrp, flags, daemon->local_ttl);
1451 }
1452 }
1453
1454 check_log_writer(NULL);
1455
1456 *length = htons(m);
1457
1458 if (m == 0 || !read_write(confd, packet, m + sizeof(u16), 0))
1459 return packet;
1460 }
1461 }
1462
1463 static struct frec *allocate_frec(time_t now)
1464 {
1465 struct frec *f;
1466
1467 if ((f = (struct frec *)whine_malloc(sizeof(struct frec))))
1468 {
1469 f->next = daemon->frec_list;
1470 f->time = now;
1471 f->sentto = NULL;
1472 f->rfd4 = NULL;
1473 f->flags = 0;
1474 #ifdef HAVE_IPV6
1475 f->rfd6 = NULL;
1476 #endif
1477 #ifdef HAVE_DNSSEC
1478 f->blocking_query = NULL;
1479 #endif
1480 daemon->frec_list = f;
1481 }
1482
1483 return f;
1484 }
1485
1486 static struct randfd *allocate_rfd(int family)
1487 {
1488 static int finger = 0;
1489 int i;
1490
1491 /* limit the number of sockets we have open to avoid starvation of
1492 (eg) TFTP. Once we have a reasonable number, randomness should be OK */
1493
1494 for (i = 0; i < RANDOM_SOCKS; i++)
1495 if (daemon->randomsocks[i].refcount == 0)
1496 {
1497 if ((daemon->randomsocks[i].fd = random_sock(family)) == -1)
1498 break;
1499
1500 daemon->randomsocks[i].refcount = 1;
1501 daemon->randomsocks[i].family = family;
1502 return &daemon->randomsocks[i];
1503 }
1504
1505 /* No free ones or cannot get new socket, grab an existing one */
1506 for (i = 0; i < RANDOM_SOCKS; i++)
1507 {
1508 int j = (i+finger) % RANDOM_SOCKS;
1509 if (daemon->randomsocks[j].refcount != 0 &&
1510 daemon->randomsocks[j].family == family &&
1511 daemon->randomsocks[j].refcount != 0xffff)
1512 {
1513 finger = j;
1514 daemon->randomsocks[j].refcount++;
1515 return &daemon->randomsocks[j];
1516 }
1517 }
1518
1519 return NULL; /* doom */
1520 }
1521 static void free_frec(struct frec *f)
1522 {
1523 if (f->rfd4 && --(f->rfd4->refcount) == 0)
1524 close(f->rfd4->fd);
1525
1526 f->rfd4 = NULL;
1527 f->sentto = NULL;
1528 f->flags = 0;
1529
1530 #ifdef HAVE_IPV6
1531 if (f->rfd6 && --(f->rfd6->refcount) == 0)
1532 close(f->rfd6->fd);
1533
1534 f->rfd6 = NULL;
1535 #endif
1536
1537 #ifdef HAVE_DNSSEC
1538 if (f->stash)
1539 {
1540 blockdata_free(f->stash);
1541 f->stash = NULL;
1542 }
1543
1544 /* Anything we're waiting on is pointless now, too */
1545 if (f->blocking_query)
1546 free_frec(f->blocking_query);
1547 f->blocking_query = NULL;
1548
1549 #endif
1550 }
1551
1552 /* if wait==NULL return a free or older than TIMEOUT record.
1553 else return *wait zero if one available, or *wait is delay to
1554 when the oldest in-use record will expire. Impose an absolute
1555 limit of 4*TIMEOUT before we wipe things (for random sockets).
1556 If force is set, always return a result, even if we have
1557 to allocate above the limit. */
1558 struct frec *get_new_frec(time_t now, int *wait, int force)
1559 {
1560 struct frec *f, *oldest, *target;
1561 int count;
1562
1563 if (wait)
1564 *wait = 0;
1565
1566 for (f = daemon->frec_list, oldest = NULL, target = NULL, count = 0; f; f = f->next, count++)
1567 if (!f->sentto)
1568 target = f;
1569 else
1570 {
1571 if (difftime(now, f->time) >= 4*TIMEOUT)
1572 {
1573 free_frec(f);
1574 target = f;
1575 }
1576
1577 if (!oldest || difftime(f->time, oldest->time) <= 0)
1578 oldest = f;
1579 }
1580
1581 if (target)
1582 {
1583 target->time = now;
1584 return target;
1585 }
1586
1587 /* can't find empty one, use oldest if there is one
1588 and it's older than timeout */
1589 if (oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
1590 {
1591 /* keep stuff for twice timeout if we can by allocating a new
1592 record instead */
1593 if (difftime(now, oldest->time) < 2*TIMEOUT &&
1594 count <= daemon->ftabsize &&
1595 (f = allocate_frec(now)))
1596 return f;
1597
1598 if (!wait)
1599 {
1600 free_frec(oldest);
1601 oldest->time = now;
1602 }
1603 return oldest;
1604 }
1605
1606 /* none available, calculate time 'till oldest record expires */
1607 if (!force && count > daemon->ftabsize)
1608 {
1609 static time_t last_log = 0;
1610
1611 if (oldest && wait)
1612 *wait = oldest->time + (time_t)TIMEOUT - now;
1613
1614 if ((int)difftime(now, last_log) > 5)
1615 {
1616 last_log = now;
1617 my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
1618 }
1619
1620 return NULL;
1621 }
1622
1623 if (!(f = allocate_frec(now)) && wait)
1624 /* wait one second on malloc failure */
1625 *wait = 1;
1626
1627 return f; /* OK if malloc fails and this is NULL */
1628 }
1629
1630 /* crc is all-ones if not known. */
1631 static struct frec *lookup_frec(unsigned short id, unsigned int crc)
1632 {
1633 struct frec *f;
1634
1635 for(f = daemon->frec_list; f; f = f->next)
1636 if (f->sentto && f->new_id == id &&
1637 (f->crc == crc || crc == 0xffffffff))
1638 return f;
1639
1640 return NULL;
1641 }
1642
1643 static struct frec *lookup_frec_by_sender(unsigned short id,
1644 union mysockaddr *addr,
1645 unsigned int crc)
1646 {
1647 struct frec *f;
1648
1649 for(f = daemon->frec_list; f; f = f->next)
1650 if (f->sentto &&
1651 f->orig_id == id &&
1652 f->crc == crc &&
1653 sockaddr_isequal(&f->source, addr))
1654 return f;
1655
1656 return NULL;
1657 }
1658
1659 /* A server record is going away, remove references to it */
1660 void server_gone(struct server *server)
1661 {
1662 struct frec *f;
1663
1664 for (f = daemon->frec_list; f; f = f->next)
1665 if (f->sentto && f->sentto == server)
1666 free_frec(f);
1667
1668 if (daemon->last_server == server)
1669 daemon->last_server = NULL;
1670
1671 if (daemon->srv_save == server)
1672 daemon->srv_save = NULL;
1673 }
1674
1675 /* return unique random ids. */
1676 static unsigned short get_id(unsigned int crc)
1677 {
1678 unsigned short ret = 0;
1679
1680 do
1681 ret = rand16();
1682 while (lookup_frec(ret, crc));
1683
1684 return ret;
1685 }
1686
1687
1688
1689
1690