]> git.ipfire.org Git - people/ms/dnsmasq.git/blob - src/forward.c
Strip DNSSEC RRs when query doesn't have DO bit set.
[people/ms/dnsmasq.git] / src / forward.c
1 /* dnsmasq is Copyright (c) 2000-2014 Simon Kelley
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 dated June, 1991, or
6 (at your option) version 3 dated 29 June, 2007.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17 #include "dnsmasq.h"
18
19 static struct frec *lookup_frec(unsigned short id, void *hash);
20 static struct frec *lookup_frec_by_sender(unsigned short id,
21 union mysockaddr *addr,
22 void *hash);
23 static unsigned short get_id(void);
24 static void free_frec(struct frec *f);
25 static struct randfd *allocate_rfd(int family);
26
27 /* Send a UDP packet with its source address set as "source"
28 unless nowild is true, when we just send it with the kernel default */
29 int send_from(int fd, int nowild, char *packet, size_t len,
30 union mysockaddr *to, struct all_addr *source,
31 unsigned int iface)
32 {
33 struct msghdr msg;
34 struct iovec iov[1];
35 union {
36 struct cmsghdr align; /* this ensures alignment */
37 #if defined(HAVE_LINUX_NETWORK)
38 char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
39 #elif defined(IP_SENDSRCADDR)
40 char control[CMSG_SPACE(sizeof(struct in_addr))];
41 #endif
42 #ifdef HAVE_IPV6
43 char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
44 #endif
45 } control_u;
46
47 iov[0].iov_base = packet;
48 iov[0].iov_len = len;
49
50 msg.msg_control = NULL;
51 msg.msg_controllen = 0;
52 msg.msg_flags = 0;
53 msg.msg_name = to;
54 msg.msg_namelen = sa_len(to);
55 msg.msg_iov = iov;
56 msg.msg_iovlen = 1;
57
58 if (!nowild)
59 {
60 struct cmsghdr *cmptr;
61 msg.msg_control = &control_u;
62 msg.msg_controllen = sizeof(control_u);
63 cmptr = CMSG_FIRSTHDR(&msg);
64
65 if (to->sa.sa_family == AF_INET)
66 {
67 #if defined(HAVE_LINUX_NETWORK)
68 struct in_pktinfo p;
69 p.ipi_ifindex = 0;
70 p.ipi_spec_dst = source->addr.addr4;
71 memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
72 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
73 cmptr->cmsg_level = IPPROTO_IP;
74 cmptr->cmsg_type = IP_PKTINFO;
75 #elif defined(IP_SENDSRCADDR)
76 memcpy(CMSG_DATA(cmptr), &(source->addr.addr4), sizeof(source->addr.addr4));
77 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
78 cmptr->cmsg_level = IPPROTO_IP;
79 cmptr->cmsg_type = IP_SENDSRCADDR;
80 #endif
81 }
82 else
83 #ifdef HAVE_IPV6
84 {
85 struct in6_pktinfo p;
86 p.ipi6_ifindex = iface; /* Need iface for IPv6 to handle link-local addrs */
87 p.ipi6_addr = source->addr.addr6;
88 memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
89 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
90 cmptr->cmsg_type = daemon->v6pktinfo;
91 cmptr->cmsg_level = IPPROTO_IPV6;
92 }
93 #else
94 (void)iface; /* eliminate warning */
95 #endif
96 }
97
98 while (sendmsg(fd, &msg, 0) == -1)
99 {
100 if (retry_send())
101 continue;
102
103 /* If interface is still in DAD, EINVAL results - ignore that. */
104 if (errno == EINVAL)
105 break;
106
107 my_syslog(LOG_ERR, _("failed to send packet: %s"), strerror(errno));
108 return 0;
109 }
110
111 return 1;
112 }
113
114 static unsigned int search_servers(time_t now, struct all_addr **addrpp,
115 unsigned int qtype, char *qdomain, int *type, char **domain, int *norebind)
116
117 {
118 /* If the query ends in the domain in one of our servers, set
119 domain to point to that name. We find the largest match to allow both
120 domain.org and sub.domain.org to exist. */
121
122 unsigned int namelen = strlen(qdomain);
123 unsigned int matchlen = 0;
124 struct server *serv;
125 unsigned int flags = 0;
126
127 for (serv = daemon->servers; serv; serv=serv->next)
128 /* domain matches take priority over NODOTS matches */
129 if ((serv->flags & SERV_FOR_NODOTS) && *type != SERV_HAS_DOMAIN && !strchr(qdomain, '.') && namelen != 0)
130 {
131 unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
132 *type = SERV_FOR_NODOTS;
133 if (serv->flags & SERV_NO_ADDR)
134 flags = F_NXDOMAIN;
135 else if (serv->flags & SERV_LITERAL_ADDRESS)
136 {
137 if (sflag & qtype)
138 {
139 flags = sflag;
140 if (serv->addr.sa.sa_family == AF_INET)
141 *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
142 #ifdef HAVE_IPV6
143 else
144 *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
145 #endif
146 }
147 else if (!flags || (flags & F_NXDOMAIN))
148 flags = F_NOERR;
149 }
150 }
151 else if (serv->flags & SERV_HAS_DOMAIN)
152 {
153 unsigned int domainlen = strlen(serv->domain);
154 char *matchstart = qdomain + namelen - domainlen;
155 if (namelen >= domainlen &&
156 hostname_isequal(matchstart, serv->domain) &&
157 (domainlen == 0 || namelen == domainlen || *(matchstart-1) == '.' ))
158 {
159 if (serv->flags & SERV_NO_REBIND)
160 *norebind = 1;
161 else
162 {
163 unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
164 /* implement priority rules for --address and --server for same domain.
165 --address wins if the address is for the correct AF
166 --server wins otherwise. */
167 if (domainlen != 0 && domainlen == matchlen)
168 {
169 if ((serv->flags & SERV_LITERAL_ADDRESS))
170 {
171 if (!(sflag & qtype) && flags == 0)
172 continue;
173 }
174 else
175 {
176 if (flags & (F_IPV4 | F_IPV6))
177 continue;
178 }
179 }
180
181 if (domainlen >= matchlen)
182 {
183 *type = serv->flags & (SERV_HAS_DOMAIN | SERV_USE_RESOLV | SERV_NO_REBIND);
184 *domain = serv->domain;
185 matchlen = domainlen;
186 if (serv->flags & SERV_NO_ADDR)
187 flags = F_NXDOMAIN;
188 else if (serv->flags & SERV_LITERAL_ADDRESS)
189 {
190 if (sflag & qtype)
191 {
192 flags = sflag;
193 if (serv->addr.sa.sa_family == AF_INET)
194 *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
195 #ifdef HAVE_IPV6
196 else
197 *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
198 #endif
199 }
200 else if (!flags || (flags & F_NXDOMAIN))
201 flags = F_NOERR;
202 }
203 else
204 flags = 0;
205 }
206 }
207 }
208 }
209
210 if (flags == 0 && !(qtype & F_QUERY) &&
211 option_bool(OPT_NODOTS_LOCAL) && !strchr(qdomain, '.') && namelen != 0)
212 /* don't forward A or AAAA queries for simple names, except the empty name */
213 flags = F_NOERR;
214
215 if (flags == F_NXDOMAIN && check_for_local_domain(qdomain, now))
216 flags = F_NOERR;
217
218 if (flags)
219 {
220 int logflags = 0;
221
222 if (flags == F_NXDOMAIN || flags == F_NOERR)
223 logflags = F_NEG | qtype;
224
225 log_query(logflags | flags | F_CONFIG | F_FORWARD, qdomain, *addrpp, NULL);
226 }
227 else if ((*type) & SERV_USE_RESOLV)
228 {
229 *type = 0; /* use normal servers for this domain */
230 *domain = NULL;
231 }
232 return flags;
233 }
234
235 static int forward_query(int udpfd, union mysockaddr *udpaddr,
236 struct all_addr *dst_addr, unsigned int dst_iface,
237 struct dns_header *header, size_t plen, time_t now,
238 struct frec *forward, int ad_reqd, int do_bit)
239 {
240 char *domain = NULL;
241 int type = 0, norebind = 0;
242 struct all_addr *addrp = NULL;
243 unsigned int flags = 0;
244 struct server *start = NULL;
245 #ifdef HAVE_DNSSEC
246 void *hash = hash_questions(header, plen, daemon->namebuff);
247 #else
248 unsigned int crc = questions_crc(header, plen, daemon->namebuff);
249 void *hash = &crc;
250 #endif
251 unsigned int gotname = extract_request(header, plen, daemon->namebuff, NULL);
252
253 /* may be no servers available. */
254 if (!daemon->servers)
255 forward = NULL;
256 else if (forward || (hash && (forward = lookup_frec_by_sender(ntohs(header->id), udpaddr, hash))))
257 {
258 #ifdef HAVE_DNSSEC
259 /* If we've already got an answer to this query, but we're awaiting keys for validation,
260 there's no point retrying the query, retry the key query instead...... */
261 if (forward->blocking_query)
262 {
263 int fd;
264
265 while (forward->blocking_query)
266 forward = forward->blocking_query;
267
268 blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
269 plen = forward->stash_len;
270
271 if (forward->sentto->addr.sa.sa_family)
272 log_query(F_DNSSEC | F_IPV4, "retry", (struct all_addr *)&forward->sentto->addr.in.sin_addr, "dnssec");
273 #ifdef HAVE_IPV6
274 else
275 log_query(F_DNSSEC | F_IPV6, "retry", (struct all_addr *)&forward->sentto->addr.in6.sin6_addr, "dnssec");
276 #endif
277
278 if (forward->sentto->sfd)
279 fd = forward->sentto->sfd->fd;
280 else
281 {
282 #ifdef HAVE_IPV6
283 if (forward->sentto->addr.sa.sa_family == AF_INET6)
284 fd = forward->rfd6->fd;
285 else
286 #endif
287 fd = forward->rfd4->fd;
288 }
289
290 while (sendto(fd, (char *)header, plen, 0,
291 &forward->sentto->addr.sa,
292 sa_len(&forward->sentto->addr)) == -1 && retry_send());
293
294 return 1;
295 }
296 #endif
297
298 /* retry on existing query, send to all available servers */
299 domain = forward->sentto->domain;
300 forward->sentto->failed_queries++;
301 if (!option_bool(OPT_ORDER))
302 {
303 forward->forwardall = 1;
304 daemon->last_server = NULL;
305 }
306 type = forward->sentto->flags & SERV_TYPE;
307 if (!(start = forward->sentto->next))
308 start = daemon->servers; /* at end of list, recycle */
309 header->id = htons(forward->new_id);
310 }
311 else
312 {
313 if (gotname)
314 flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
315
316 if (!flags && !(forward = get_new_frec(now, NULL, 0)))
317 /* table full - server failure. */
318 flags = F_NEG;
319
320 if (forward)
321 {
322 forward->source = *udpaddr;
323 forward->dest = *dst_addr;
324 forward->iface = dst_iface;
325 forward->orig_id = ntohs(header->id);
326 forward->new_id = get_id();
327 forward->fd = udpfd;
328 memcpy(forward->hash, hash, HASH_SIZE);
329 forward->forwardall = 0;
330 forward->flags = 0;
331 if (norebind)
332 forward->flags |= FREC_NOREBIND;
333 if (header->hb4 & HB4_CD)
334 forward->flags |= FREC_CHECKING_DISABLED;
335 if (ad_reqd)
336 forward->flags |= FREC_AD_QUESTION;
337 #ifdef HAVE_DNSSEC
338 forward->work_counter = DNSSEC_WORK;
339 if (do_bit)
340 forward->flags |= FREC_DO_QUESTION;
341 #endif
342
343 header->id = htons(forward->new_id);
344
345 /* In strict_order mode, always try servers in the order
346 specified in resolv.conf, if a domain is given
347 always try all the available servers,
348 otherwise, use the one last known to work. */
349
350 if (type == 0)
351 {
352 if (option_bool(OPT_ORDER))
353 start = daemon->servers;
354 else if (!(start = daemon->last_server) ||
355 daemon->forwardcount++ > FORWARD_TEST ||
356 difftime(now, daemon->forwardtime) > FORWARD_TIME)
357 {
358 start = daemon->servers;
359 forward->forwardall = 1;
360 daemon->forwardcount = 0;
361 daemon->forwardtime = now;
362 }
363 }
364 else
365 {
366 start = daemon->servers;
367 if (!option_bool(OPT_ORDER))
368 forward->forwardall = 1;
369 }
370 }
371 }
372
373 /* check for send errors here (no route to host)
374 if we fail to send to all nameservers, send back an error
375 packet straight away (helps modem users when offline) */
376
377 if (!flags && forward)
378 {
379 struct server *firstsentto = start;
380 int forwarded = 0;
381
382 if (option_bool(OPT_ADD_MAC))
383 plen = add_mac(header, plen, ((char *) header) + daemon->packet_buff_sz, &forward->source);
384
385 if (option_bool(OPT_CLIENT_SUBNET))
386 {
387 size_t new = add_source_addr(header, plen, ((char *) header) + daemon->packet_buff_sz, &forward->source);
388 if (new != plen)
389 {
390 plen = new;
391 forward->flags |= FREC_HAS_SUBNET;
392 }
393 }
394
395 #ifdef HAVE_DNSSEC
396 if (option_bool(OPT_DNSSEC_VALID))
397 {
398 size_t new_plen = add_do_bit(header, plen, ((char *) header) + daemon->packet_buff_sz);
399
400 /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
401 this allows it to select auth servers when one is returning bad data. */
402 if (option_bool(OPT_DNSSEC_DEBUG))
403 header->hb4 |= HB4_CD;
404
405 if (new_plen != plen)
406 forward->flags |= FREC_ADDED_PHEADER;
407
408 plen = new_plen;
409 }
410 #endif
411
412 while (1)
413 {
414 /* only send to servers dealing with our domain.
415 domain may be NULL, in which case server->domain
416 must be NULL also. */
417
418 if (type == (start->flags & SERV_TYPE) &&
419 (type != SERV_HAS_DOMAIN || hostname_isequal(domain, start->domain)) &&
420 !(start->flags & SERV_LITERAL_ADDRESS))
421 {
422 int fd;
423
424 /* find server socket to use, may need to get random one. */
425 if (start->sfd)
426 fd = start->sfd->fd;
427 else
428 {
429 #ifdef HAVE_IPV6
430 if (start->addr.sa.sa_family == AF_INET6)
431 {
432 if (!forward->rfd6 &&
433 !(forward->rfd6 = allocate_rfd(AF_INET6)))
434 break;
435 daemon->rfd_save = forward->rfd6;
436 fd = forward->rfd6->fd;
437 }
438 else
439 #endif
440 {
441 if (!forward->rfd4 &&
442 !(forward->rfd4 = allocate_rfd(AF_INET)))
443 break;
444 daemon->rfd_save = forward->rfd4;
445 fd = forward->rfd4->fd;
446 }
447
448 #ifdef HAVE_CONNTRACK
449 /* Copy connection mark of incoming query to outgoing connection. */
450 if (option_bool(OPT_CONNTRACK))
451 {
452 unsigned int mark;
453 if (get_incoming_mark(&forward->source, &forward->dest, 0, &mark))
454 setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
455 }
456 #endif
457 }
458
459 if (sendto(fd, (char *)header, plen, 0,
460 &start->addr.sa,
461 sa_len(&start->addr)) == -1)
462 {
463 if (retry_send())
464 continue;
465 }
466 else
467 {
468 /* Keep info in case we want to re-send this packet */
469 daemon->srv_save = start;
470 daemon->packet_len = plen;
471
472 if (!gotname)
473 strcpy(daemon->namebuff, "query");
474 if (start->addr.sa.sa_family == AF_INET)
475 log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
476 (struct all_addr *)&start->addr.in.sin_addr, NULL);
477 #ifdef HAVE_IPV6
478 else
479 log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
480 (struct all_addr *)&start->addr.in6.sin6_addr, NULL);
481 #endif
482 start->queries++;
483 forwarded = 1;
484 forward->sentto = start;
485 if (!forward->forwardall)
486 break;
487 forward->forwardall++;
488 }
489 }
490
491 if (!(start = start->next))
492 start = daemon->servers;
493
494 if (start == firstsentto)
495 break;
496 }
497
498 if (forwarded)
499 return 1;
500
501 /* could not send on, prepare to return */
502 header->id = htons(forward->orig_id);
503 free_frec(forward); /* cancel */
504 }
505
506 /* could not send on, return empty answer or address if known for whole domain */
507 if (udpfd != -1)
508 {
509 plen = setup_reply(header, plen, addrp, flags, daemon->local_ttl);
510 send_from(udpfd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND), (char *)header, plen, udpaddr, dst_addr, dst_iface);
511 }
512
513 return 0;
514 }
515
516 static size_t process_reply(struct dns_header *header, time_t now, struct server *server, size_t n, int check_rebind,
517 int no_cache, int cache_secure, int ad_reqd, int do_bit, int added_pheader, int check_subnet, union mysockaddr *query_source)
518 {
519 unsigned char *pheader, *sizep;
520 char **sets = 0;
521 int munged = 0, is_sign;
522 size_t plen;
523
524 (void)ad_reqd;
525
526 #ifdef HAVE_IPSET
527 /* Similar algorithm to search_servers. */
528 struct ipsets *ipset_pos;
529 unsigned int namelen = strlen(daemon->namebuff);
530 unsigned int matchlen = 0;
531 for (ipset_pos = daemon->ipsets; ipset_pos; ipset_pos = ipset_pos->next)
532 {
533 unsigned int domainlen = strlen(ipset_pos->domain);
534 char *matchstart = daemon->namebuff + namelen - domainlen;
535 if (namelen >= domainlen && hostname_isequal(matchstart, ipset_pos->domain) &&
536 (domainlen == 0 || namelen == domainlen || *(matchstart - 1) == '.' ) &&
537 domainlen >= matchlen)
538 {
539 matchlen = domainlen;
540 sets = ipset_pos->sets;
541 }
542 }
543 #endif
544
545 /* If upstream is advertising a larger UDP packet size
546 than we allow, trim it so that we don't get overlarge
547 requests for the client. We can't do this for signed packets. */
548
549 if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign)))
550 {
551 unsigned short udpsz;
552 unsigned char *psave = sizep;
553
554 GETSHORT(udpsz, sizep);
555
556 if (!is_sign && udpsz > daemon->edns_pktsz)
557 PUTSHORT(daemon->edns_pktsz, psave);
558
559 if (check_subnet && !check_source(header, plen, pheader, query_source))
560 {
561 my_syslog(LOG_WARNING, _("discarding DNS reply: subnet option mismatch"));
562 return 0;
563 }
564
565 if (added_pheader)
566 {
567 pheader = 0;
568 header->arcount = htons(0);
569 }
570 }
571
572 /* RFC 4035 sect 4.6 para 3 */
573 if (!is_sign && !option_bool(OPT_DNSSEC_PROXY))
574 header->hb4 &= ~HB4_AD;
575
576 if (OPCODE(header) != QUERY || (RCODE(header) != NOERROR && RCODE(header) != NXDOMAIN))
577 return n;
578
579 /* Complain loudly if the upstream server is non-recursive. */
580 if (!(header->hb4 & HB4_RA) && RCODE(header) == NOERROR && ntohs(header->ancount) == 0 &&
581 server && !(server->flags & SERV_WARNED_RECURSIVE))
582 {
583 prettyprint_addr(&server->addr, daemon->namebuff);
584 my_syslog(LOG_WARNING, _("nameserver %s refused to do a recursive query"), daemon->namebuff);
585 if (!option_bool(OPT_LOG))
586 server->flags |= SERV_WARNED_RECURSIVE;
587 }
588
589 if (daemon->bogus_addr && RCODE(header) != NXDOMAIN &&
590 check_for_bogus_wildcard(header, n, daemon->namebuff, daemon->bogus_addr, now))
591 {
592 munged = 1;
593 SET_RCODE(header, NXDOMAIN);
594 header->hb3 &= ~HB3_AA;
595 cache_secure = 0;
596 }
597 else
598 {
599 int doctored = 0;
600
601 if (RCODE(header) == NXDOMAIN &&
602 extract_request(header, n, daemon->namebuff, NULL) &&
603 check_for_local_domain(daemon->namebuff, now))
604 {
605 /* if we forwarded a query for a locally known name (because it was for
606 an unknown type) and the answer is NXDOMAIN, convert that to NODATA,
607 since we know that the domain exists, even if upstream doesn't */
608 munged = 1;
609 header->hb3 |= HB3_AA;
610 SET_RCODE(header, NOERROR);
611 cache_secure = 0;
612 }
613
614 if (extract_addresses(header, n, daemon->namebuff, now, sets, is_sign, check_rebind, no_cache, cache_secure, &doctored))
615 {
616 my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected: %s"), daemon->namebuff);
617 munged = 1;
618 cache_secure = 0;
619 }
620
621 if (doctored)
622 cache_secure = 0;
623 }
624
625 #ifdef HAVE_DNSSEC
626 if (no_cache && !(header->hb4 & HB4_CD))
627 {
628 if (!option_bool(OPT_DNSSEC_DEBUG))
629 {
630 /* Bogus reply, turn into SERVFAIL */
631 SET_RCODE(header, SERVFAIL);
632 munged = 1;
633 }
634 }
635
636 if (option_bool(OPT_DNSSEC_VALID))
637 header->hb4 &= ~HB4_AD;
638
639 if (!(header->hb4 & HB4_CD) && ad_reqd && cache_secure)
640 header->hb4 |= HB4_AD;
641
642 /* If the requestor didn't set the DO bit, don't return DNSSEC info. */
643 if (!do_bit)
644 n = filter_rrsigs(header, n);
645 #endif
646
647 /* do this after extract_addresses. Ensure NODATA reply and remove
648 nameserver info. */
649
650 if (munged)
651 {
652 header->ancount = htons(0);
653 header->nscount = htons(0);
654 header->arcount = htons(0);
655 }
656
657 /* the bogus-nxdomain stuff, doctor and NXDOMAIN->NODATA munging can all elide
658 sections of the packet. Find the new length here and put back pseudoheader
659 if it was removed. */
660 return resize_packet(header, n, pheader, plen);
661 }
662
663 /* sets new last_server */
664 void reply_query(int fd, int family, time_t now)
665 {
666 /* packet from peer server, extract data for cache, and send to
667 original requester */
668 struct dns_header *header;
669 union mysockaddr serveraddr;
670 struct frec *forward;
671 socklen_t addrlen = sizeof(serveraddr);
672 ssize_t n = recvfrom(fd, daemon->packet, daemon->packet_buff_sz, 0, &serveraddr.sa, &addrlen);
673 size_t nn;
674 struct server *server;
675 void *hash;
676 #ifndef HAVE_DNSSEC
677 unsigned int crc;
678 #endif
679
680 /* packet buffer overwritten */
681 daemon->srv_save = NULL;
682
683 /* Determine the address of the server replying so that we can mark that as good */
684 serveraddr.sa.sa_family = family;
685 #ifdef HAVE_IPV6
686 if (serveraddr.sa.sa_family == AF_INET6)
687 serveraddr.in6.sin6_flowinfo = 0;
688 #endif
689
690 /* spoof check: answer must come from known server, */
691 for (server = daemon->servers; server; server = server->next)
692 if (!(server->flags & (SERV_LITERAL_ADDRESS | SERV_NO_ADDR)) &&
693 sockaddr_isequal(&server->addr, &serveraddr))
694 break;
695
696 header = (struct dns_header *)daemon->packet;
697
698 #ifdef HAVE_DNSSEC
699 hash = hash_questions(header, n, daemon->namebuff);
700 #else
701 hash = &crc;
702 crc = questions_crc(header, n, daemon->namebuff);
703 #endif
704
705 if (!server ||
706 n < (int)sizeof(struct dns_header) || !(header->hb3 & HB3_QR) ||
707 !(forward = lookup_frec(ntohs(header->id), hash)))
708 return;
709
710 if ((RCODE(header) == SERVFAIL || RCODE(header) == REFUSED) &&
711 !option_bool(OPT_ORDER) &&
712 forward->forwardall == 0)
713 /* for broken servers, attempt to send to another one. */
714 {
715 unsigned char *pheader;
716 size_t plen;
717 int is_sign;
718
719 /* recreate query from reply */
720 pheader = find_pseudoheader(header, (size_t)n, &plen, NULL, &is_sign);
721 if (!is_sign)
722 {
723 header->ancount = htons(0);
724 header->nscount = htons(0);
725 header->arcount = htons(0);
726 if ((nn = resize_packet(header, (size_t)n, pheader, plen)))
727 {
728 header->hb3 &= ~(HB3_QR | HB3_TC);
729 forward_query(-1, NULL, NULL, 0, header, nn, now, forward, 0, 0);
730 return;
731 }
732 }
733 }
734
735 server = forward->sentto;
736
737 if ((forward->sentto->flags & SERV_TYPE) == 0)
738 {
739 if (RCODE(header) == SERVFAIL || RCODE(header) == REFUSED)
740 server = NULL;
741 else
742 {
743 struct server *last_server;
744
745 /* find good server by address if possible, otherwise assume the last one we sent to */
746 for (last_server = daemon->servers; last_server; last_server = last_server->next)
747 if (!(last_server->flags & (SERV_LITERAL_ADDRESS | SERV_HAS_DOMAIN | SERV_FOR_NODOTS | SERV_NO_ADDR)) &&
748 sockaddr_isequal(&last_server->addr, &serveraddr))
749 {
750 server = last_server;
751 break;
752 }
753 }
754 if (!option_bool(OPT_ALL_SERVERS))
755 daemon->last_server = server;
756 }
757
758 /* If the answer is an error, keep the forward record in place in case
759 we get a good reply from another server. Kill it when we've
760 had replies from all to avoid filling the forwarding table when
761 everything is broken */
762 if (forward->forwardall == 0 || --forward->forwardall == 1 ||
763 (RCODE(header) != REFUSED && RCODE(header) != SERVFAIL))
764 {
765 int check_rebind = 0, no_cache_dnssec = 0, cache_secure = 0;
766
767 if (option_bool(OPT_NO_REBIND))
768 check_rebind = !(forward->flags & FREC_NOREBIND);
769
770 /* Don't cache replies where DNSSEC validation was turned off, either
771 the upstream server told us so, or the original query specified it. */
772 if ((header->hb4 & HB4_CD) || (forward->flags & FREC_CHECKING_DISABLED))
773 no_cache_dnssec = 1;
774
775 #ifdef HAVE_DNSSEC
776 if (option_bool(OPT_DNSSEC_VALID) && !(forward->flags & FREC_CHECKING_DISABLED))
777 {
778 int status;
779
780 /* We've had a reply already, which we're validating. Ignore this duplicate */
781 if (forward->blocking_query)
782 return;
783
784 if (header->hb3 & HB3_TC)
785 {
786 /* Truncated answer can't be validated.
787 If this is an answer to a DNSSEC-generated query, we still
788 need to get the client to retry over TCP, so return
789 an answer with the TC bit set, even if the actual answer fits.
790 */
791 status = STAT_TRUNCATED;
792 }
793 else if (forward->flags & FREC_DNSKEY_QUERY)
794 status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
795 else if (forward->flags & FREC_DS_QUERY)
796 status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
797 else
798 status = dnssec_validate_reply(now, header, n, daemon->namebuff, daemon->keyname, &forward->class);
799
800 /* Can't validate, as we're missing key data. Put this
801 answer aside, whilst we get that. */
802 if (status == STAT_NEED_DS || status == STAT_NEED_KEY)
803 {
804 struct frec *new, *orig;
805
806 /* Free any saved query */
807 if (forward->stash)
808 blockdata_free(forward->stash);
809
810 /* Now save reply pending receipt of key data */
811 if (!(forward->stash = blockdata_alloc((char *)header, n)))
812 return;
813 forward->stash_len = n;
814
815 anotherkey:
816 /* Find the original query that started it all.... */
817 for (orig = forward; orig->dependent; orig = orig->dependent);
818
819 if (--orig->work_counter == 0 || !(new = get_new_frec(now, NULL, 1)))
820 status = STAT_INSECURE;
821 else
822 {
823 int fd;
824 struct frec *next = new->next;
825 *new = *forward; /* copy everything, then overwrite */
826 new->next = next;
827 new->blocking_query = NULL;
828 new->rfd4 = NULL;
829 #ifdef HAVE_IPV6
830 new->rfd6 = NULL;
831 #endif
832 new->flags &= ~(FREC_DNSKEY_QUERY | FREC_DS_QUERY);
833
834 new->dependent = forward; /* to find query awaiting new one. */
835 forward->blocking_query = new; /* for garbage cleaning */
836 /* validate routines leave name of required record in daemon->keyname */
837 if (status == STAT_NEED_KEY)
838 {
839 new->flags |= FREC_DNSKEY_QUERY;
840 nn = dnssec_generate_query(header, ((char *) header) + daemon->packet_buff_sz,
841 daemon->keyname, forward->class, T_DNSKEY, &server->addr);
842 }
843 else
844 {
845 new->flags |= FREC_DS_QUERY;
846 nn = dnssec_generate_query(header,((char *) header) + daemon->packet_buff_sz,
847 daemon->keyname, forward->class, T_DS, &server->addr);
848 }
849 if ((hash = hash_questions(header, nn, daemon->namebuff)))
850 memcpy(new->hash, hash, HASH_SIZE);
851 new->new_id = get_id();
852 header->id = htons(new->new_id);
853 /* Save query for retransmission */
854 new->stash = blockdata_alloc((char *)header, nn);
855 new->stash_len = nn;
856
857 /* Don't resend this. */
858 daemon->srv_save = NULL;
859
860 if (server->sfd)
861 fd = server->sfd->fd;
862 else
863 {
864 fd = -1;
865 #ifdef HAVE_IPV6
866 if (server->addr.sa.sa_family == AF_INET6)
867 {
868 if (new->rfd6 || (new->rfd6 = allocate_rfd(AF_INET6)))
869 fd = new->rfd6->fd;
870 }
871 else
872 #endif
873 {
874 if (new->rfd4 || (new->rfd4 = allocate_rfd(AF_INET)))
875 fd = new->rfd4->fd;
876 }
877 }
878
879 if (fd != -1)
880 {
881 while (sendto(fd, (char *)header, nn, 0, &server->addr.sa, sa_len(&server->addr)) == -1 && retry_send());
882 server->queries++;
883 }
884
885 return;
886 }
887 }
888
889 /* Ok, we reached far enough up the chain-of-trust that we can validate something.
890 Now wind back down, pulling back answers which wouldn't previously validate
891 and validate them with the new data. Note that if an answer needs multiple
892 keys to validate, we may find another key is needed, in which case we set off
893 down another branch of the tree. Once we get to the original answer
894 (FREC_DNSSEC_QUERY not set) and it validates, return it to the original requestor. */
895 while (forward->dependent)
896 {
897 struct frec *prev = forward->dependent;
898 free_frec(forward);
899 forward = prev;
900 forward->blocking_query = NULL; /* already gone */
901 blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
902 n = forward->stash_len;
903
904 if (status == STAT_SECURE)
905 {
906 if (forward->flags & FREC_DNSKEY_QUERY)
907 status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
908 else if (forward->flags & FREC_DS_QUERY)
909 status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
910 else
911 status = dnssec_validate_reply(now, header, n, daemon->namebuff, daemon->keyname, &forward->class);
912
913 if (status == STAT_NEED_DS || status == STAT_NEED_KEY)
914 goto anotherkey;
915 }
916 }
917
918 if (status == STAT_TRUNCATED)
919 header->hb3 |= HB3_TC;
920 else
921 {
922 char *result;
923
924 if (forward->work_counter == 0)
925 result = "ABANDONED";
926 else
927 result = (status == STAT_SECURE ? "SECURE" : (status == STAT_INSECURE ? "INSECURE" : "BOGUS"));
928
929 log_query(F_KEYTAG | F_SECSTAT, "result", NULL, result);
930 }
931
932 no_cache_dnssec = 0;
933
934 if (status == STAT_SECURE)
935 cache_secure = 1;
936 else if (status == STAT_BOGUS)
937 no_cache_dnssec = 1;
938 }
939 #endif
940
941 /* restore CD bit to the value in the query */
942 if (forward->flags & FREC_CHECKING_DISABLED)
943 header->hb4 |= HB4_CD;
944 else
945 header->hb4 &= ~HB4_CD;
946
947 if ((nn = process_reply(header, now, server, (size_t)n, check_rebind, no_cache_dnssec, cache_secure,
948 forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION,
949 forward->flags & FREC_ADDED_PHEADER, forward->flags & FREC_HAS_SUBNET, &forward->source)))
950 {
951 header->id = htons(forward->orig_id);
952 header->hb4 |= HB4_RA; /* recursion if available */
953 send_from(forward->fd, option_bool(OPT_NOWILD) || option_bool (OPT_CLEVERBIND), daemon->packet, nn,
954 &forward->source, &forward->dest, forward->iface);
955 }
956 free_frec(forward); /* cancel */
957 }
958 }
959
960
961 void receive_query(struct listener *listen, time_t now)
962 {
963 struct dns_header *header = (struct dns_header *)daemon->packet;
964 union mysockaddr source_addr;
965 unsigned short type;
966 struct all_addr dst_addr;
967 struct in_addr netmask, dst_addr_4;
968 size_t m;
969 ssize_t n;
970 int if_index = 0, auth_dns = 0;
971 #ifdef HAVE_AUTH
972 int local_auth = 0;
973 #endif
974 struct iovec iov[1];
975 struct msghdr msg;
976 struct cmsghdr *cmptr;
977 union {
978 struct cmsghdr align; /* this ensures alignment */
979 #ifdef HAVE_IPV6
980 char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
981 #endif
982 #if defined(HAVE_LINUX_NETWORK)
983 char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
984 #elif defined(IP_RECVDSTADDR) && defined(HAVE_SOLARIS_NETWORK)
985 char control[CMSG_SPACE(sizeof(struct in_addr)) +
986 CMSG_SPACE(sizeof(unsigned int))];
987 #elif defined(IP_RECVDSTADDR)
988 char control[CMSG_SPACE(sizeof(struct in_addr)) +
989 CMSG_SPACE(sizeof(struct sockaddr_dl))];
990 #endif
991 } control_u;
992 #ifdef HAVE_IPV6
993 /* Can always get recvd interface for IPv6 */
994 int check_dst = !option_bool(OPT_NOWILD) || listen->family == AF_INET6;
995 #else
996 int check_dst = !option_bool(OPT_NOWILD);
997 #endif
998
999 /* packet buffer overwritten */
1000 daemon->srv_save = NULL;
1001
1002 dst_addr_4.s_addr = 0;
1003 netmask.s_addr = 0;
1004
1005 if (option_bool(OPT_NOWILD) && listen->iface)
1006 {
1007 auth_dns = listen->iface->dns_auth;
1008
1009 if (listen->family == AF_INET)
1010 {
1011 dst_addr_4 = listen->iface->addr.in.sin_addr;
1012 netmask = listen->iface->netmask;
1013 }
1014 }
1015
1016 iov[0].iov_base = daemon->packet;
1017 iov[0].iov_len = daemon->edns_pktsz;
1018
1019 msg.msg_control = control_u.control;
1020 msg.msg_controllen = sizeof(control_u);
1021 msg.msg_flags = 0;
1022 msg.msg_name = &source_addr;
1023 msg.msg_namelen = sizeof(source_addr);
1024 msg.msg_iov = iov;
1025 msg.msg_iovlen = 1;
1026
1027 if ((n = recvmsg(listen->fd, &msg, 0)) == -1)
1028 return;
1029
1030 if (n < (int)sizeof(struct dns_header) ||
1031 (msg.msg_flags & MSG_TRUNC) ||
1032 (header->hb3 & HB3_QR))
1033 return;
1034
1035 source_addr.sa.sa_family = listen->family;
1036 #ifdef HAVE_IPV6
1037 if (listen->family == AF_INET6)
1038 source_addr.in6.sin6_flowinfo = 0;
1039 #endif
1040
1041 if (check_dst)
1042 {
1043 struct ifreq ifr;
1044
1045 if (msg.msg_controllen < sizeof(struct cmsghdr))
1046 return;
1047
1048 #if defined(HAVE_LINUX_NETWORK)
1049 if (listen->family == AF_INET)
1050 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
1051 if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_PKTINFO)
1052 {
1053 union {
1054 unsigned char *c;
1055 struct in_pktinfo *p;
1056 } p;
1057 p.c = CMSG_DATA(cmptr);
1058 dst_addr_4 = dst_addr.addr.addr4 = p.p->ipi_spec_dst;
1059 if_index = p.p->ipi_ifindex;
1060 }
1061 #elif defined(IP_RECVDSTADDR) && defined(IP_RECVIF)
1062 if (listen->family == AF_INET)
1063 {
1064 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
1065 {
1066 union {
1067 unsigned char *c;
1068 unsigned int *i;
1069 struct in_addr *a;
1070 #ifndef HAVE_SOLARIS_NETWORK
1071 struct sockaddr_dl *s;
1072 #endif
1073 } p;
1074 p.c = CMSG_DATA(cmptr);
1075 if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVDSTADDR)
1076 dst_addr_4 = dst_addr.addr.addr4 = *(p.a);
1077 else if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVIF)
1078 #ifdef HAVE_SOLARIS_NETWORK
1079 if_index = *(p.i);
1080 #else
1081 if_index = p.s->sdl_index;
1082 #endif
1083 }
1084 }
1085 #endif
1086
1087 #ifdef HAVE_IPV6
1088 if (listen->family == AF_INET6)
1089 {
1090 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
1091 if (cmptr->cmsg_level == IPPROTO_IPV6 && cmptr->cmsg_type == daemon->v6pktinfo)
1092 {
1093 union {
1094 unsigned char *c;
1095 struct in6_pktinfo *p;
1096 } p;
1097 p.c = CMSG_DATA(cmptr);
1098
1099 dst_addr.addr.addr6 = p.p->ipi6_addr;
1100 if_index = p.p->ipi6_ifindex;
1101 }
1102 }
1103 #endif
1104
1105 /* enforce available interface configuration */
1106
1107 if (!indextoname(listen->fd, if_index, ifr.ifr_name))
1108 return;
1109
1110 if (!iface_check(listen->family, &dst_addr, ifr.ifr_name, &auth_dns))
1111 {
1112 if (!option_bool(OPT_CLEVERBIND))
1113 enumerate_interfaces(0);
1114 if (!loopback_exception(listen->fd, listen->family, &dst_addr, ifr.ifr_name) &&
1115 !label_exception(if_index, listen->family, &dst_addr))
1116 return;
1117 }
1118
1119 if (listen->family == AF_INET && option_bool(OPT_LOCALISE))
1120 {
1121 struct irec *iface;
1122
1123 /* get the netmask of the interface whch has the address we were sent to.
1124 This is no neccessarily the interface we arrived on. */
1125
1126 for (iface = daemon->interfaces; iface; iface = iface->next)
1127 if (iface->addr.sa.sa_family == AF_INET &&
1128 iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
1129 break;
1130
1131 /* interface may be new */
1132 if (!iface && !option_bool(OPT_CLEVERBIND))
1133 enumerate_interfaces(0);
1134
1135 for (iface = daemon->interfaces; iface; iface = iface->next)
1136 if (iface->addr.sa.sa_family == AF_INET &&
1137 iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
1138 break;
1139
1140 /* If we failed, abandon localisation */
1141 if (iface)
1142 netmask = iface->netmask;
1143 else
1144 dst_addr_4.s_addr = 0;
1145 }
1146 }
1147
1148 if (extract_request(header, (size_t)n, daemon->namebuff, &type))
1149 {
1150 #ifdef HAVE_AUTH
1151 struct auth_zone *zone;
1152 #endif
1153 char *types = querystr(auth_dns ? "auth" : "query", type);
1154
1155 if (listen->family == AF_INET)
1156 log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
1157 (struct all_addr *)&source_addr.in.sin_addr, types);
1158 #ifdef HAVE_IPV6
1159 else
1160 log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
1161 (struct all_addr *)&source_addr.in6.sin6_addr, types);
1162 #endif
1163
1164 #ifdef HAVE_AUTH
1165 /* find queries for zones we're authoritative for, and answer them directly */
1166 if (!auth_dns)
1167 for (zone = daemon->auth_zones; zone; zone = zone->next)
1168 if (in_zone(zone, daemon->namebuff, NULL))
1169 {
1170 auth_dns = 1;
1171 local_auth = 1;
1172 break;
1173 }
1174 #endif
1175 }
1176
1177 #ifdef HAVE_AUTH
1178 if (auth_dns)
1179 {
1180 m = answer_auth(header, ((char *) header) + daemon->packet_buff_sz, (size_t)n, now, &source_addr, local_auth);
1181 if (m >= 1)
1182 {
1183 send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1184 (char *)header, m, &source_addr, &dst_addr, if_index);
1185 daemon->auth_answer++;
1186 }
1187 }
1188 else
1189 #endif
1190 {
1191 int ad_reqd, do_bit;
1192 m = answer_request(header, ((char *) header) + daemon->packet_buff_sz, (size_t)n,
1193 dst_addr_4, netmask, now, &ad_reqd, &do_bit);
1194
1195 if (m >= 1)
1196 {
1197 send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1198 (char *)header, m, &source_addr, &dst_addr, if_index);
1199 daemon->local_answer++;
1200 }
1201 else if (forward_query(listen->fd, &source_addr, &dst_addr, if_index,
1202 header, (size_t)n, now, NULL, ad_reqd, do_bit))
1203 daemon->queries_forwarded++;
1204 else
1205 daemon->local_answer++;
1206 }
1207 }
1208
1209 #ifdef HAVE_DNSSEC
1210 static int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n,
1211 int class, char *name, char *keyname, struct server *server, int *keycount)
1212 {
1213 /* Recurse up the key heirarchy */
1214 int new_status;
1215
1216 /* limit the amount of work we do, to avoid cycling forever on loops in the DNS */
1217 if (--(*keycount) == 0)
1218 return STAT_INSECURE;
1219
1220 if (status == STAT_NEED_KEY)
1221 new_status = dnssec_validate_by_ds(now, header, n, name, keyname, class);
1222 else if (status == STAT_NEED_DS)
1223 new_status = dnssec_validate_ds(now, header, n, name, keyname, class);
1224 else
1225 new_status = dnssec_validate_reply(now, header, n, name, keyname, &class);
1226
1227 /* Can't validate because we need a key/DS whose name now in keyname.
1228 Make query for same, and recurse to validate */
1229 if (new_status == STAT_NEED_DS || new_status == STAT_NEED_KEY)
1230 {
1231 size_t m;
1232 unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
1233 unsigned char *payload = &packet[2];
1234 struct dns_header *new_header = (struct dns_header *)payload;
1235 u16 *length = (u16 *)packet;
1236 unsigned char c1, c2;
1237
1238 if (!packet)
1239 return STAT_INSECURE;
1240
1241 another_tcp_key:
1242 m = dnssec_generate_query(new_header, ((char *) new_header) + 65536, keyname, class,
1243 new_status == STAT_NEED_KEY ? T_DNSKEY : T_DS, &server->addr);
1244
1245 *length = htons(m);
1246
1247 if (!read_write(server->tcpfd, packet, m + sizeof(u16), 0) ||
1248 !read_write(server->tcpfd, &c1, 1, 1) ||
1249 !read_write(server->tcpfd, &c2, 1, 1) ||
1250 !read_write(server->tcpfd, payload, (c1 << 8) | c2, 1))
1251 new_status = STAT_INSECURE;
1252 else
1253 {
1254 m = (c1 << 8) | c2;
1255
1256 if (tcp_key_recurse(now, new_status, new_header, m, class, name, keyname, server, keycount) == STAT_SECURE)
1257 {
1258 /* Reached a validated record, now try again at this level.
1259 Note that we may get ANOTHER NEED_* if an answer needs more than one key.
1260 If so, go round again. */
1261
1262 if (status == STAT_NEED_KEY)
1263 new_status = dnssec_validate_by_ds(now, header, n, name, keyname, class);
1264 else if (status == STAT_NEED_DS)
1265 new_status = dnssec_validate_ds(now, header, n, name, keyname, class);
1266 else
1267 new_status = dnssec_validate_reply(now, header, n, name, keyname, &class);
1268
1269 if (new_status == STAT_NEED_DS || new_status == STAT_NEED_KEY)
1270 goto another_tcp_key;
1271 }
1272 }
1273
1274 free(packet);
1275 }
1276
1277 return new_status;
1278 }
1279 #endif
1280
1281
1282 /* The daemon forks before calling this: it should deal with one connection,
1283 blocking as neccessary, and then return. Note, need to be a bit careful
1284 about resources for debug mode, when the fork is suppressed: that's
1285 done by the caller. */
1286 unsigned char *tcp_request(int confd, time_t now,
1287 union mysockaddr *local_addr, struct in_addr netmask, int auth_dns)
1288 {
1289 size_t size = 0;
1290 int norebind = 0;
1291 #ifdef HAVE_AUTH
1292 int local_auth = 0;
1293 #endif
1294 int checking_disabled, ad_question, do_bit, added_pheader = 0;
1295 int check_subnet, no_cache_dnssec = 0, cache_secure = 0;
1296 size_t m;
1297 unsigned short qtype;
1298 unsigned int gotname;
1299 unsigned char c1, c2;
1300 /* Max TCP packet + slop + size */
1301 unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
1302 unsigned char *payload = &packet[2];
1303 /* largest field in header is 16-bits, so this is still sufficiently aligned */
1304 struct dns_header *header = (struct dns_header *)payload;
1305 u16 *length = (u16 *)packet;
1306 struct server *last_server;
1307 struct in_addr dst_addr_4;
1308 union mysockaddr peer_addr;
1309 socklen_t peer_len = sizeof(union mysockaddr);
1310
1311 if (getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) == -1)
1312 return packet;
1313
1314 while (1)
1315 {
1316 if (!packet ||
1317 !read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
1318 !(size = c1 << 8 | c2) ||
1319 !read_write(confd, payload, size, 1))
1320 return packet;
1321
1322 if (size < (int)sizeof(struct dns_header))
1323 continue;
1324
1325 check_subnet = 0;
1326
1327 /* save state of "cd" flag in query */
1328 if ((checking_disabled = header->hb4 & HB4_CD))
1329 no_cache_dnssec = 1;
1330
1331 if ((gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
1332 {
1333 #ifdef HAVE_AUTH
1334 struct auth_zone *zone;
1335 #endif
1336 char *types = querystr(auth_dns ? "auth" : "query", qtype);
1337
1338 if (peer_addr.sa.sa_family == AF_INET)
1339 log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
1340 (struct all_addr *)&peer_addr.in.sin_addr, types);
1341 #ifdef HAVE_IPV6
1342 else
1343 log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
1344 (struct all_addr *)&peer_addr.in6.sin6_addr, types);
1345 #endif
1346
1347 #ifdef HAVE_AUTH
1348 /* find queries for zones we're authoritative for, and answer them directly */
1349 if (!auth_dns)
1350 for (zone = daemon->auth_zones; zone; zone = zone->next)
1351 if (in_zone(zone, daemon->namebuff, NULL))
1352 {
1353 auth_dns = 1;
1354 local_auth = 1;
1355 break;
1356 }
1357 #endif
1358 }
1359
1360 if (local_addr->sa.sa_family == AF_INET)
1361 dst_addr_4 = local_addr->in.sin_addr;
1362 else
1363 dst_addr_4.s_addr = 0;
1364
1365 #ifdef HAVE_AUTH
1366 if (auth_dns)
1367 m = answer_auth(header, ((char *) header) + 65536, (size_t)size, now, &peer_addr, local_auth);
1368 else
1369 #endif
1370 {
1371 /* m > 0 if answered from cache */
1372 m = answer_request(header, ((char *) header) + 65536, (size_t)size,
1373 dst_addr_4, netmask, now, &ad_question, &do_bit);
1374
1375 /* Do this by steam now we're not in the select() loop */
1376 check_log_writer(NULL);
1377
1378 if (m == 0)
1379 {
1380 unsigned int flags = 0;
1381 struct all_addr *addrp = NULL;
1382 int type = 0;
1383 char *domain = NULL;
1384
1385 if (option_bool(OPT_ADD_MAC))
1386 size = add_mac(header, size, ((char *) header) + 65536, &peer_addr);
1387
1388 if (option_bool(OPT_CLIENT_SUBNET))
1389 {
1390 size_t new = add_source_addr(header, size, ((char *) header) + 65536, &peer_addr);
1391 if (size != new)
1392 {
1393 size = new;
1394 check_subnet = 1;
1395 }
1396 }
1397
1398 if (gotname)
1399 flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
1400
1401 if (type != 0 || option_bool(OPT_ORDER) || !daemon->last_server)
1402 last_server = daemon->servers;
1403 else
1404 last_server = daemon->last_server;
1405
1406 if (!flags && last_server)
1407 {
1408 struct server *firstsendto = NULL;
1409 #ifdef HAVE_DNSSEC
1410 unsigned char *newhash, hash[HASH_SIZE];
1411 if ((newhash = hash_questions(header, (unsigned int)size, daemon->keyname)))
1412 memcpy(hash, newhash, HASH_SIZE);
1413 #else
1414 unsigned int crc = questions_crc(header, (unsigned int)size, daemon->namebuff);
1415 #endif
1416 /* Loop round available servers until we succeed in connecting to one.
1417 Note that this code subtley ensures that consecutive queries on this connection
1418 which can go to the same server, do so. */
1419 while (1)
1420 {
1421 if (!firstsendto)
1422 firstsendto = last_server;
1423 else
1424 {
1425 if (!(last_server = last_server->next))
1426 last_server = daemon->servers;
1427
1428 if (last_server == firstsendto)
1429 break;
1430 }
1431
1432 /* server for wrong domain */
1433 if (type != (last_server->flags & SERV_TYPE) ||
1434 (type == SERV_HAS_DOMAIN && !hostname_isequal(domain, last_server->domain)))
1435 continue;
1436
1437 if (last_server->tcpfd == -1)
1438 {
1439 if ((last_server->tcpfd = socket(last_server->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
1440 continue;
1441
1442 if ((!local_bind(last_server->tcpfd, &last_server->source_addr, last_server->interface, 1) ||
1443 connect(last_server->tcpfd, &last_server->addr.sa, sa_len(&last_server->addr)) == -1))
1444 {
1445 close(last_server->tcpfd);
1446 last_server->tcpfd = -1;
1447 continue;
1448 }
1449
1450 #ifdef HAVE_DNSSEC
1451 if (option_bool(OPT_DNSSEC_VALID))
1452 {
1453 size_t new_size = add_do_bit(header, size, ((char *) header) + 65536);
1454
1455 /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
1456 this allows it to select auth servers when one is returning bad data. */
1457 if (option_bool(OPT_DNSSEC_DEBUG))
1458 header->hb4 |= HB4_CD;
1459
1460 if (size != new_size)
1461 added_pheader = 1;
1462
1463 size = new_size;
1464 }
1465 #endif
1466
1467 #ifdef HAVE_CONNTRACK
1468 /* Copy connection mark of incoming query to outgoing connection. */
1469 if (option_bool(OPT_CONNTRACK))
1470 {
1471 unsigned int mark;
1472 struct all_addr local;
1473 #ifdef HAVE_IPV6
1474 if (local_addr->sa.sa_family == AF_INET6)
1475 local.addr.addr6 = local_addr->in6.sin6_addr;
1476 else
1477 #endif
1478 local.addr.addr4 = local_addr->in.sin_addr;
1479
1480 if (get_incoming_mark(&peer_addr, &local, 1, &mark))
1481 setsockopt(last_server->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
1482 }
1483 #endif
1484 }
1485
1486 *length = htons(size);
1487
1488 if (!read_write(last_server->tcpfd, packet, size + sizeof(u16), 0) ||
1489 !read_write(last_server->tcpfd, &c1, 1, 1) ||
1490 !read_write(last_server->tcpfd, &c2, 1, 1) ||
1491 !read_write(last_server->tcpfd, payload, (c1 << 8) | c2, 1))
1492 {
1493 close(last_server->tcpfd);
1494 last_server->tcpfd = -1;
1495 continue;
1496 }
1497
1498 m = (c1 << 8) | c2;
1499
1500 if (!gotname)
1501 strcpy(daemon->namebuff, "query");
1502 if (last_server->addr.sa.sa_family == AF_INET)
1503 log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
1504 (struct all_addr *)&last_server->addr.in.sin_addr, NULL);
1505 #ifdef HAVE_IPV6
1506 else
1507 log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
1508 (struct all_addr *)&last_server->addr.in6.sin6_addr, NULL);
1509 #endif
1510
1511 #ifdef HAVE_DNSSEC
1512 if (option_bool(OPT_DNSSEC_VALID) && !checking_disabled)
1513 {
1514 int keycount = DNSSEC_WORK; /* Limit to number of DNSSEC questions, to catch loops and avoid filling cache. */
1515 int status = tcp_key_recurse(now, STAT_TRUNCATED, header, m, 0, daemon->namebuff, daemon->keyname, last_server, &keycount);
1516 char *result;
1517
1518 if (keycount == 0)
1519 result = "ABANDONED";
1520 else
1521 result = (status == STAT_SECURE ? "SECURE" : (status == STAT_INSECURE ? "INSECURE" : "BOGUS"));
1522
1523 log_query(F_KEYTAG | F_SECSTAT, "result", NULL, result);
1524
1525 if (status == STAT_BOGUS)
1526 no_cache_dnssec = 1;
1527
1528 if (status == STAT_SECURE)
1529 cache_secure = 1;
1530 }
1531 #endif
1532
1533 /* restore CD bit to the value in the query */
1534 if (checking_disabled)
1535 header->hb4 |= HB4_CD;
1536 else
1537 header->hb4 &= ~HB4_CD;
1538
1539 /* There's no point in updating the cache, since this process will exit and
1540 lose the information after a few queries. We make this call for the alias and
1541 bogus-nxdomain side-effects. */
1542 /* If the crc of the question section doesn't match the crc we sent, then
1543 someone might be attempting to insert bogus values into the cache by
1544 sending replies containing questions and bogus answers. */
1545 #ifdef HAVE_DNSSEC
1546 newhash = hash_questions(header, (unsigned int)m, daemon->namebuff);
1547 if (!newhash || memcmp(hash, newhash, HASH_SIZE) != 0)
1548 {
1549 m = 0;
1550 break;
1551 }
1552 #else
1553 if (crc != questions_crc(header, (unsigned int)m, daemon->namebuff))
1554 {
1555 m = 0;
1556 break;
1557 }
1558 #endif
1559
1560 m = process_reply(header, now, last_server, (unsigned int)m,
1561 option_bool(OPT_NO_REBIND) && !norebind, no_cache_dnssec,
1562 cache_secure, ad_question, do_bit, added_pheader, check_subnet, &peer_addr);
1563
1564 break;
1565 }
1566 }
1567
1568 /* In case of local answer or no connections made. */
1569 if (m == 0)
1570 m = setup_reply(header, (unsigned int)size, addrp, flags, daemon->local_ttl);
1571 }
1572 }
1573
1574 check_log_writer(NULL);
1575
1576 *length = htons(m);
1577
1578 if (m == 0 || !read_write(confd, packet, m + sizeof(u16), 0))
1579 return packet;
1580 }
1581 }
1582
1583 static struct frec *allocate_frec(time_t now)
1584 {
1585 struct frec *f;
1586
1587 if ((f = (struct frec *)whine_malloc(sizeof(struct frec))))
1588 {
1589 f->next = daemon->frec_list;
1590 f->time = now;
1591 f->sentto = NULL;
1592 f->rfd4 = NULL;
1593 f->flags = 0;
1594 #ifdef HAVE_IPV6
1595 f->rfd6 = NULL;
1596 #endif
1597 #ifdef HAVE_DNSSEC
1598 f->dependent = NULL;
1599 f->blocking_query = NULL;
1600 f->stash = NULL;
1601 #endif
1602 daemon->frec_list = f;
1603 }
1604
1605 return f;
1606 }
1607
1608 static struct randfd *allocate_rfd(int family)
1609 {
1610 static int finger = 0;
1611 int i;
1612
1613 /* limit the number of sockets we have open to avoid starvation of
1614 (eg) TFTP. Once we have a reasonable number, randomness should be OK */
1615
1616 for (i = 0; i < RANDOM_SOCKS; i++)
1617 if (daemon->randomsocks[i].refcount == 0)
1618 {
1619 if ((daemon->randomsocks[i].fd = random_sock(family)) == -1)
1620 break;
1621
1622 daemon->randomsocks[i].refcount = 1;
1623 daemon->randomsocks[i].family = family;
1624 return &daemon->randomsocks[i];
1625 }
1626
1627 /* No free ones or cannot get new socket, grab an existing one */
1628 for (i = 0; i < RANDOM_SOCKS; i++)
1629 {
1630 int j = (i+finger) % RANDOM_SOCKS;
1631 if (daemon->randomsocks[j].refcount != 0 &&
1632 daemon->randomsocks[j].family == family &&
1633 daemon->randomsocks[j].refcount != 0xffff)
1634 {
1635 finger = j;
1636 daemon->randomsocks[j].refcount++;
1637 return &daemon->randomsocks[j];
1638 }
1639 }
1640
1641 return NULL; /* doom */
1642 }
1643 static void free_frec(struct frec *f)
1644 {
1645 if (f->rfd4 && --(f->rfd4->refcount) == 0)
1646 close(f->rfd4->fd);
1647
1648 f->rfd4 = NULL;
1649 f->sentto = NULL;
1650 f->flags = 0;
1651
1652 #ifdef HAVE_IPV6
1653 if (f->rfd6 && --(f->rfd6->refcount) == 0)
1654 close(f->rfd6->fd);
1655
1656 f->rfd6 = NULL;
1657 #endif
1658
1659 #ifdef HAVE_DNSSEC
1660 if (f->stash)
1661 {
1662 blockdata_free(f->stash);
1663 f->stash = NULL;
1664 }
1665
1666 /* Anything we're waiting on is pointless now, too */
1667 if (f->blocking_query)
1668 free_frec(f->blocking_query);
1669 f->blocking_query = NULL;
1670 f->dependent = NULL;
1671 #endif
1672 }
1673
1674 /* if wait==NULL return a free or older than TIMEOUT record.
1675 else return *wait zero if one available, or *wait is delay to
1676 when the oldest in-use record will expire. Impose an absolute
1677 limit of 4*TIMEOUT before we wipe things (for random sockets).
1678 If force is set, always return a result, even if we have
1679 to allocate above the limit. */
1680 struct frec *get_new_frec(time_t now, int *wait, int force)
1681 {
1682 struct frec *f, *oldest, *target;
1683 int count;
1684
1685 if (wait)
1686 *wait = 0;
1687
1688 for (f = daemon->frec_list, oldest = NULL, target = NULL, count = 0; f; f = f->next, count++)
1689 if (!f->sentto)
1690 target = f;
1691 else
1692 {
1693 if (difftime(now, f->time) >= 4*TIMEOUT)
1694 {
1695 free_frec(f);
1696 target = f;
1697 }
1698
1699 if (!oldest || difftime(f->time, oldest->time) <= 0)
1700 oldest = f;
1701 }
1702
1703 if (target)
1704 {
1705 target->time = now;
1706 return target;
1707 }
1708
1709 /* can't find empty one, use oldest if there is one
1710 and it's older than timeout */
1711 if (oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
1712 {
1713 /* keep stuff for twice timeout if we can by allocating a new
1714 record instead */
1715 if (difftime(now, oldest->time) < 2*TIMEOUT &&
1716 count <= daemon->ftabsize &&
1717 (f = allocate_frec(now)))
1718 return f;
1719
1720 if (!wait)
1721 {
1722 free_frec(oldest);
1723 oldest->time = now;
1724 }
1725 return oldest;
1726 }
1727
1728 /* none available, calculate time 'till oldest record expires */
1729 if (!force && count > daemon->ftabsize)
1730 {
1731 static time_t last_log = 0;
1732
1733 if (oldest && wait)
1734 *wait = oldest->time + (time_t)TIMEOUT - now;
1735
1736 if ((int)difftime(now, last_log) > 5)
1737 {
1738 last_log = now;
1739 my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
1740 }
1741
1742 return NULL;
1743 }
1744
1745 if (!(f = allocate_frec(now)) && wait)
1746 /* wait one second on malloc failure */
1747 *wait = 1;
1748
1749 return f; /* OK if malloc fails and this is NULL */
1750 }
1751
1752 /* crc is all-ones if not known. */
1753 static struct frec *lookup_frec(unsigned short id, void *hash)
1754 {
1755 struct frec *f;
1756
1757 for(f = daemon->frec_list; f; f = f->next)
1758 if (f->sentto && f->new_id == id &&
1759 (!hash || memcmp(hash, f->hash, HASH_SIZE) == 0))
1760 return f;
1761
1762 return NULL;
1763 }
1764
1765 static struct frec *lookup_frec_by_sender(unsigned short id,
1766 union mysockaddr *addr,
1767 void *hash)
1768 {
1769 struct frec *f;
1770
1771 for(f = daemon->frec_list; f; f = f->next)
1772 if (f->sentto &&
1773 f->orig_id == id &&
1774 memcmp(hash, f->hash, HASH_SIZE) == 0 &&
1775 sockaddr_isequal(&f->source, addr))
1776 return f;
1777
1778 return NULL;
1779 }
1780
1781 /* A server record is going away, remove references to it */
1782 void server_gone(struct server *server)
1783 {
1784 struct frec *f;
1785
1786 for (f = daemon->frec_list; f; f = f->next)
1787 if (f->sentto && f->sentto == server)
1788 free_frec(f);
1789
1790 if (daemon->last_server == server)
1791 daemon->last_server = NULL;
1792
1793 if (daemon->srv_save == server)
1794 daemon->srv_save = NULL;
1795 }
1796
1797 /* return unique random ids. */
1798 static unsigned short get_id(void)
1799 {
1800 unsigned short ret = 0;
1801
1802 do
1803 ret = rand16();
1804 while (lookup_frec(ret, NULL));
1805
1806 return ret;
1807 }
1808
1809
1810
1811
1812