]> git.ipfire.org Git - people/ms/dnsmasq.git/blob - src/forward.c
Merge branch 'master' of ssh://central/var/cache/git/dnsmasq
[people/ms/dnsmasq.git] / src / forward.c
1 /* dnsmasq is Copyright (c) 2000-2014 Simon Kelley
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 dated June, 1991, or
6 (at your option) version 3 dated 29 June, 2007.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17 #include "dnsmasq.h"
18
19 static struct frec *lookup_frec(unsigned short id, unsigned int crc);
20 static struct frec *lookup_frec_by_sender(unsigned short id,
21 union mysockaddr *addr,
22 unsigned int crc);
23 static unsigned short get_id(unsigned int crc);
24 static void free_frec(struct frec *f);
25 static struct randfd *allocate_rfd(int family);
26
27 /* Send a UDP packet with its source address set as "source"
28 unless nowild is true, when we just send it with the kernel default */
29 int send_from(int fd, int nowild, char *packet, size_t len,
30 union mysockaddr *to, struct all_addr *source,
31 unsigned int iface)
32 {
33 struct msghdr msg;
34 struct iovec iov[1];
35 union {
36 struct cmsghdr align; /* this ensures alignment */
37 #if defined(HAVE_LINUX_NETWORK)
38 char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
39 #elif defined(IP_SENDSRCADDR)
40 char control[CMSG_SPACE(sizeof(struct in_addr))];
41 #endif
42 #ifdef HAVE_IPV6
43 char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
44 #endif
45 } control_u;
46
47 iov[0].iov_base = packet;
48 iov[0].iov_len = len;
49
50 msg.msg_control = NULL;
51 msg.msg_controllen = 0;
52 msg.msg_flags = 0;
53 msg.msg_name = to;
54 msg.msg_namelen = sa_len(to);
55 msg.msg_iov = iov;
56 msg.msg_iovlen = 1;
57
58 if (!nowild)
59 {
60 struct cmsghdr *cmptr;
61 msg.msg_control = &control_u;
62 msg.msg_controllen = sizeof(control_u);
63 cmptr = CMSG_FIRSTHDR(&msg);
64
65 if (to->sa.sa_family == AF_INET)
66 {
67 #if defined(HAVE_LINUX_NETWORK)
68 struct in_pktinfo p;
69 p.ipi_ifindex = 0;
70 p.ipi_spec_dst = source->addr.addr4;
71 memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
72 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
73 cmptr->cmsg_level = IPPROTO_IP;
74 cmptr->cmsg_type = IP_PKTINFO;
75 #elif defined(IP_SENDSRCADDR)
76 memcpy(CMSG_DATA(cmptr), &(source->addr.addr4), sizeof(source->addr.addr4));
77 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
78 cmptr->cmsg_level = IPPROTO_IP;
79 cmptr->cmsg_type = IP_SENDSRCADDR;
80 #endif
81 }
82 else
83 #ifdef HAVE_IPV6
84 {
85 struct in6_pktinfo p;
86 p.ipi6_ifindex = iface; /* Need iface for IPv6 to handle link-local addrs */
87 p.ipi6_addr = source->addr.addr6;
88 memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
89 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
90 cmptr->cmsg_type = daemon->v6pktinfo;
91 cmptr->cmsg_level = IPPROTO_IPV6;
92 }
93 #else
94 (void)iface; /* eliminate warning */
95 #endif
96 }
97
98 while (sendmsg(fd, &msg, 0) == -1)
99 {
100 if (retry_send())
101 continue;
102
103 /* If interface is still in DAD, EINVAL results - ignore that. */
104 if (errno == EINVAL)
105 break;
106
107 my_syslog(LOG_ERR, _("failed to send packet: %s"), strerror(errno));
108 return 0;
109 }
110
111 return 1;
112 }
113
114 static unsigned int search_servers(time_t now, struct all_addr **addrpp,
115 unsigned int qtype, char *qdomain, int *type, char **domain, int *norebind)
116
117 {
118 /* If the query ends in the domain in one of our servers, set
119 domain to point to that name. We find the largest match to allow both
120 domain.org and sub.domain.org to exist. */
121
122 unsigned int namelen = strlen(qdomain);
123 unsigned int matchlen = 0;
124 struct server *serv;
125 unsigned int flags = 0;
126
127 for (serv = daemon->servers; serv; serv=serv->next)
128 /* domain matches take priority over NODOTS matches */
129 if ((serv->flags & SERV_FOR_NODOTS) && *type != SERV_HAS_DOMAIN && !strchr(qdomain, '.') && namelen != 0)
130 {
131 unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
132 *type = SERV_FOR_NODOTS;
133 if (serv->flags & SERV_NO_ADDR)
134 flags = F_NXDOMAIN;
135 else if (serv->flags & SERV_LITERAL_ADDRESS)
136 {
137 if (sflag & qtype)
138 {
139 flags = sflag;
140 if (serv->addr.sa.sa_family == AF_INET)
141 *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
142 #ifdef HAVE_IPV6
143 else
144 *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
145 #endif
146 }
147 else if (!flags || (flags & F_NXDOMAIN))
148 flags = F_NOERR;
149 }
150 }
151 else if (serv->flags & SERV_HAS_DOMAIN)
152 {
153 unsigned int domainlen = strlen(serv->domain);
154 char *matchstart = qdomain + namelen - domainlen;
155 if (namelen >= domainlen &&
156 hostname_isequal(matchstart, serv->domain) &&
157 (domainlen == 0 || namelen == domainlen || *(matchstart-1) == '.' ))
158 {
159 if (serv->flags & SERV_NO_REBIND)
160 *norebind = 1;
161 else
162 {
163 unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
164 /* implement priority rules for --address and --server for same domain.
165 --address wins if the address is for the correct AF
166 --server wins otherwise. */
167 if (domainlen != 0 && domainlen == matchlen)
168 {
169 if ((serv->flags & SERV_LITERAL_ADDRESS))
170 {
171 if (!(sflag & qtype) && flags == 0)
172 continue;
173 }
174 else
175 {
176 if (flags & (F_IPV4 | F_IPV6))
177 continue;
178 }
179 }
180
181 if (domainlen >= matchlen)
182 {
183 *type = serv->flags & (SERV_HAS_DOMAIN | SERV_USE_RESOLV | SERV_NO_REBIND);
184 *domain = serv->domain;
185 matchlen = domainlen;
186 if (serv->flags & SERV_NO_ADDR)
187 flags = F_NXDOMAIN;
188 else if (serv->flags & SERV_LITERAL_ADDRESS)
189 {
190 if (sflag & qtype)
191 {
192 flags = sflag;
193 if (serv->addr.sa.sa_family == AF_INET)
194 *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
195 #ifdef HAVE_IPV6
196 else
197 *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
198 #endif
199 }
200 else if (!flags || (flags & F_NXDOMAIN))
201 flags = F_NOERR;
202 }
203 else
204 flags = 0;
205 }
206 }
207 }
208 }
209
210 if (flags == 0 && !(qtype & F_QUERY) &&
211 option_bool(OPT_NODOTS_LOCAL) && !strchr(qdomain, '.') && namelen != 0)
212 /* don't forward A or AAAA queries for simple names, except the empty name */
213 flags = F_NOERR;
214
215 if (flags == F_NXDOMAIN && check_for_local_domain(qdomain, now))
216 flags = F_NOERR;
217
218 if (flags)
219 {
220 int logflags = 0;
221
222 if (flags == F_NXDOMAIN || flags == F_NOERR)
223 logflags = F_NEG | qtype;
224
225 log_query(logflags | flags | F_CONFIG | F_FORWARD, qdomain, *addrpp, NULL);
226 }
227 else if ((*type) & SERV_USE_RESOLV)
228 {
229 *type = 0; /* use normal servers for this domain */
230 *domain = NULL;
231 }
232 return flags;
233 }
234
235 static int forward_query(int udpfd, union mysockaddr *udpaddr,
236 struct all_addr *dst_addr, unsigned int dst_iface,
237 struct dns_header *header, size_t plen, time_t now, struct frec *forward)
238 {
239 char *domain = NULL;
240 int type = 0, norebind = 0;
241 struct all_addr *addrp = NULL;
242 unsigned int crc = questions_crc(header, plen, daemon->namebuff);
243 unsigned int flags = 0;
244 unsigned int gotname = extract_request(header, plen, daemon->namebuff, NULL);
245 struct server *start = NULL;
246
247 /* RFC 4035: sect 4.6 para 2 */
248 header->hb4 &= ~HB4_AD;
249
250 /* may be no servers available. */
251 if (!daemon->servers)
252 forward = NULL;
253 else if (forward || (forward = lookup_frec_by_sender(ntohs(header->id), udpaddr, crc)))
254 {
255 /* retry on existing query, send to all available servers */
256 domain = forward->sentto->domain;
257 forward->sentto->failed_queries++;
258 if (!option_bool(OPT_ORDER))
259 {
260 forward->forwardall = 1;
261 daemon->last_server = NULL;
262 }
263 type = forward->sentto->flags & SERV_TYPE;
264 if (!(start = forward->sentto->next))
265 start = daemon->servers; /* at end of list, recycle */
266 header->id = htons(forward->new_id);
267 }
268 else
269 {
270 if (gotname)
271 flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
272
273 if (!flags && !(forward = get_new_frec(now, NULL, 0)))
274 /* table full - server failure. */
275 flags = F_NEG;
276
277 if (forward)
278 {
279 forward->source = *udpaddr;
280 forward->dest = *dst_addr;
281 forward->iface = dst_iface;
282 forward->orig_id = ntohs(header->id);
283 forward->new_id = get_id(crc);
284 forward->fd = udpfd;
285 forward->crc = crc;
286 forward->forwardall = 0;
287 forward->flags = 0;
288 if (norebind)
289 forward->flags |= FREC_NOREBIND;
290 if (header->hb4 & HB4_CD)
291 forward->flags |= FREC_CHECKING_DISABLED;
292
293 header->id = htons(forward->new_id);
294
295 /* In strict_order mode, always try servers in the order
296 specified in resolv.conf, if a domain is given
297 always try all the available servers,
298 otherwise, use the one last known to work. */
299
300 if (type == 0)
301 {
302 if (option_bool(OPT_ORDER))
303 start = daemon->servers;
304 else if (!(start = daemon->last_server) ||
305 daemon->forwardcount++ > FORWARD_TEST ||
306 difftime(now, daemon->forwardtime) > FORWARD_TIME)
307 {
308 start = daemon->servers;
309 forward->forwardall = 1;
310 daemon->forwardcount = 0;
311 daemon->forwardtime = now;
312 }
313 }
314 else
315 {
316 start = daemon->servers;
317 if (!option_bool(OPT_ORDER))
318 forward->forwardall = 1;
319 }
320 }
321 }
322
323 /* check for send errors here (no route to host)
324 if we fail to send to all nameservers, send back an error
325 packet straight away (helps modem users when offline) */
326
327 if (!flags && forward)
328 {
329 struct server *firstsentto = start;
330 int forwarded = 0;
331
332 if (option_bool(OPT_ADD_MAC))
333 plen = add_mac(header, plen, ((char *) header) + daemon->packet_buff_sz, &forward->source);
334
335 if (option_bool(OPT_CLIENT_SUBNET))
336 {
337 size_t new = add_source_addr(header, plen, ((char *) header) + daemon->packet_buff_sz, &forward->source);
338 if (new != plen)
339 {
340 plen = new;
341 forward->flags |= FREC_HAS_SUBNET;
342 }
343 }
344
345 #ifdef HAVE_DNSSEC
346 if (option_bool(OPT_DNSSEC_VALID))
347 {
348 plen = add_do_bit(header, plen, ((char *) header) + daemon->packet_buff_sz);
349 header->hb4 |= HB4_CD;
350 }
351 #endif
352
353 while (1)
354 {
355 /* only send to servers dealing with our domain.
356 domain may be NULL, in which case server->domain
357 must be NULL also. */
358
359 if (type == (start->flags & SERV_TYPE) &&
360 (type != SERV_HAS_DOMAIN || hostname_isequal(domain, start->domain)) &&
361 !(start->flags & SERV_LITERAL_ADDRESS))
362 {
363 int fd;
364
365 /* find server socket to use, may need to get random one. */
366 if (start->sfd)
367 fd = start->sfd->fd;
368 else
369 {
370 #ifdef HAVE_IPV6
371 if (start->addr.sa.sa_family == AF_INET6)
372 {
373 if (!forward->rfd6 &&
374 !(forward->rfd6 = allocate_rfd(AF_INET6)))
375 break;
376 daemon->rfd_save = forward->rfd6;
377 fd = forward->rfd6->fd;
378 }
379 else
380 #endif
381 {
382 if (!forward->rfd4 &&
383 !(forward->rfd4 = allocate_rfd(AF_INET)))
384 break;
385 daemon->rfd_save = forward->rfd4;
386 fd = forward->rfd4->fd;
387 }
388
389 #ifdef HAVE_CONNTRACK
390 /* Copy connection mark of incoming query to outgoing connection. */
391 if (option_bool(OPT_CONNTRACK))
392 {
393 unsigned int mark;
394 if (get_incoming_mark(&forward->source, &forward->dest, 0, &mark))
395 setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
396 }
397 #endif
398 }
399
400 if (sendto(fd, (char *)header, plen, 0,
401 &start->addr.sa,
402 sa_len(&start->addr)) == -1)
403 {
404 if (retry_send())
405 continue;
406 }
407 else
408 {
409 /* Keep info in case we want to re-send this packet */
410 daemon->srv_save = start;
411 daemon->packet_len = plen;
412
413 if (!gotname)
414 strcpy(daemon->namebuff, "query");
415 if (start->addr.sa.sa_family == AF_INET)
416 log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
417 (struct all_addr *)&start->addr.in.sin_addr, NULL);
418 #ifdef HAVE_IPV6
419 else
420 log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
421 (struct all_addr *)&start->addr.in6.sin6_addr, NULL);
422 #endif
423 start->queries++;
424 forwarded = 1;
425 forward->sentto = start;
426 if (!forward->forwardall)
427 break;
428 forward->forwardall++;
429 }
430 }
431
432 if (!(start = start->next))
433 start = daemon->servers;
434
435 if (start == firstsentto)
436 break;
437 }
438
439 if (forwarded)
440 return 1;
441
442 /* could not send on, prepare to return */
443 header->id = htons(forward->orig_id);
444 free_frec(forward); /* cancel */
445 }
446
447 /* could not send on, return empty answer or address if known for whole domain */
448 if (udpfd != -1)
449 {
450 plen = setup_reply(header, plen, addrp, flags, daemon->local_ttl);
451 send_from(udpfd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND), (char *)header, plen, udpaddr, dst_addr, dst_iface);
452 }
453
454 return 0;
455 }
456
457 static size_t process_reply(struct dns_header *header, time_t now, struct server *server, size_t n, int check_rebind,
458 int no_cache, int cache_secure, int check_subnet, union mysockaddr *query_source)
459 {
460 unsigned char *pheader, *sizep;
461 char **sets = 0;
462 int munged = 0, is_sign;
463 size_t plen;
464
465 #ifdef HAVE_IPSET
466 /* Similar algorithm to search_servers. */
467 struct ipsets *ipset_pos;
468 unsigned int namelen = strlen(daemon->namebuff);
469 unsigned int matchlen = 0;
470 for (ipset_pos = daemon->ipsets; ipset_pos; ipset_pos = ipset_pos->next)
471 {
472 unsigned int domainlen = strlen(ipset_pos->domain);
473 char *matchstart = daemon->namebuff + namelen - domainlen;
474 if (namelen >= domainlen && hostname_isequal(matchstart, ipset_pos->domain) &&
475 (domainlen == 0 || namelen == domainlen || *(matchstart - 1) == '.' ) &&
476 domainlen >= matchlen) {
477 matchlen = domainlen;
478 sets = ipset_pos->sets;
479 }
480 }
481 #endif
482
483 /* If upstream is advertising a larger UDP packet size
484 than we allow, trim it so that we don't get overlarge
485 requests for the client. We can't do this for signed packets. */
486
487 if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign)))
488 {
489 if (!is_sign)
490 {
491 unsigned short udpsz;
492 unsigned char *psave = sizep;
493
494 GETSHORT(udpsz, sizep);
495 if (udpsz > daemon->edns_pktsz)
496 PUTSHORT(daemon->edns_pktsz, psave);
497 }
498
499 if (check_subnet && !check_source(header, plen, pheader, query_source))
500 {
501 my_syslog(LOG_WARNING, _("discarding DNS reply: subnet option mismatch"));
502 return 0;
503 }
504 }
505
506 /* RFC 4035 sect 4.6 para 3 */
507 if (!is_sign && !option_bool(OPT_DNSSEC_PROXY))
508 header->hb4 &= ~HB4_AD;
509
510 #ifdef HAVE_DNSSEC
511 if (option_bool(OPT_DNSSEC_VALID))
512 header->hb4 &= ~HB4_AD;
513
514 if (cache_secure)
515 header->hb4 |= HB4_AD;
516 #endif
517
518 if (OPCODE(header) != QUERY || (RCODE(header) != NOERROR && RCODE(header) != NXDOMAIN))
519 return n;
520
521 /* Complain loudly if the upstream server is non-recursive. */
522 if (!(header->hb4 & HB4_RA) && RCODE(header) == NOERROR && ntohs(header->ancount) == 0 &&
523 server && !(server->flags & SERV_WARNED_RECURSIVE))
524 {
525 prettyprint_addr(&server->addr, daemon->namebuff);
526 my_syslog(LOG_WARNING, _("nameserver %s refused to do a recursive query"), daemon->namebuff);
527 if (!option_bool(OPT_LOG))
528 server->flags |= SERV_WARNED_RECURSIVE;
529 }
530
531 if (daemon->bogus_addr && RCODE(header) != NXDOMAIN &&
532 check_for_bogus_wildcard(header, n, daemon->namebuff, daemon->bogus_addr, now))
533 {
534 munged = 1;
535 SET_RCODE(header, NXDOMAIN);
536 header->hb3 &= ~HB3_AA;
537 }
538 else
539 {
540 if (RCODE(header) == NXDOMAIN &&
541 extract_request(header, n, daemon->namebuff, NULL) &&
542 check_for_local_domain(daemon->namebuff, now))
543 {
544 /* if we forwarded a query for a locally known name (because it was for
545 an unknown type) and the answer is NXDOMAIN, convert that to NODATA,
546 since we know that the domain exists, even if upstream doesn't */
547 munged = 1;
548 header->hb3 |= HB3_AA;
549 SET_RCODE(header, NOERROR);
550 }
551
552 if (extract_addresses(header, n, daemon->namebuff, now, sets, is_sign, check_rebind, no_cache, cache_secure))
553 {
554 my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected: %s"), daemon->namebuff);
555 munged = 1;
556 }
557 }
558
559 /* do this after extract_addresses. Ensure NODATA reply and remove
560 nameserver info. */
561
562 if (munged)
563 {
564 header->ancount = htons(0);
565 header->nscount = htons(0);
566 header->arcount = htons(0);
567 }
568
569 /* the bogus-nxdomain stuff, doctor and NXDOMAIN->NODATA munging can all elide
570 sections of the packet. Find the new length here and put back pseudoheader
571 if it was removed. */
572 return resize_packet(header, n, pheader, plen);
573 }
574
575 /* sets new last_server */
576 void reply_query(int fd, int family, time_t now)
577 {
578 /* packet from peer server, extract data for cache, and send to
579 original requester */
580 struct dns_header *header;
581 union mysockaddr serveraddr;
582 struct frec *forward;
583 socklen_t addrlen = sizeof(serveraddr);
584 ssize_t n = recvfrom(fd, daemon->packet, daemon->packet_buff_sz, 0, &serveraddr.sa, &addrlen);
585 size_t nn;
586 struct server *server;
587
588 /* packet buffer overwritten */
589 daemon->srv_save = NULL;
590
591 /* Determine the address of the server replying so that we can mark that as good */
592 serveraddr.sa.sa_family = family;
593 #ifdef HAVE_IPV6
594 if (serveraddr.sa.sa_family == AF_INET6)
595 serveraddr.in6.sin6_flowinfo = 0;
596 #endif
597
598 /* spoof check: answer must come from known server, */
599 for (server = daemon->servers; server; server = server->next)
600 if (!(server->flags & (SERV_LITERAL_ADDRESS | SERV_NO_ADDR)) &&
601 sockaddr_isequal(&server->addr, &serveraddr))
602 break;
603
604 header = (struct dns_header *)daemon->packet;
605
606 if (!server ||
607 n < (int)sizeof(struct dns_header) || !(header->hb3 & HB3_QR) ||
608 !(forward = lookup_frec(ntohs(header->id), questions_crc(header, n, daemon->namebuff))))
609 return;
610
611 if ((RCODE(header) == SERVFAIL || RCODE(header) == REFUSED) &&
612 !option_bool(OPT_ORDER) &&
613 forward->forwardall == 0)
614 /* for broken servers, attempt to send to another one. */
615 {
616 unsigned char *pheader;
617 size_t plen;
618 int is_sign;
619
620 /* recreate query from reply */
621 pheader = find_pseudoheader(header, (size_t)n, &plen, NULL, &is_sign);
622 if (!is_sign)
623 {
624 header->ancount = htons(0);
625 header->nscount = htons(0);
626 header->arcount = htons(0);
627 if ((nn = resize_packet(header, (size_t)n, pheader, plen)))
628 {
629 header->hb3 &= ~(HB3_QR | HB3_TC);
630 forward_query(-1, NULL, NULL, 0, header, nn, now, forward);
631 return;
632 }
633 }
634 }
635
636 server = forward->sentto;
637
638 if ((forward->sentto->flags & SERV_TYPE) == 0)
639 {
640 if (RCODE(header) == SERVFAIL || RCODE(header) == REFUSED)
641 server = NULL;
642 else
643 {
644 struct server *last_server;
645
646 /* find good server by address if possible, otherwise assume the last one we sent to */
647 for (last_server = daemon->servers; last_server; last_server = last_server->next)
648 if (!(last_server->flags & (SERV_LITERAL_ADDRESS | SERV_HAS_DOMAIN | SERV_FOR_NODOTS | SERV_NO_ADDR)) &&
649 sockaddr_isequal(&last_server->addr, &serveraddr))
650 {
651 server = last_server;
652 break;
653 }
654 }
655 if (!option_bool(OPT_ALL_SERVERS))
656 daemon->last_server = server;
657 }
658
659 /* If the answer is an error, keep the forward record in place in case
660 we get a good reply from another server. Kill it when we've
661 had replies from all to avoid filling the forwarding table when
662 everything is broken */
663 if (forward->forwardall == 0 || --forward->forwardall == 1 ||
664 (RCODE(header) != REFUSED && RCODE(header) != SERVFAIL))
665 {
666 int check_rebind = 0, no_cache_dnssec = 0, cache_secure = 0;
667
668 if (option_bool(OPT_NO_REBIND))
669 check_rebind = !(forward->flags & FREC_NOREBIND);
670
671 /* Don't cache replies where DNSSEC validation was turned off, either
672 the upstream server told us so, or the original query specified it. */
673 if ((header->hb4 & HB4_CD) || (forward->flags & FREC_CHECKING_DISABLED))
674 no_cache_dnssec = 1;
675
676 #ifdef HAVE_DNSSEC
677 if (option_bool(OPT_DNSSEC_VALID) && !(forward->flags & FREC_CHECKING_DISABLED))
678 {
679 int status;
680
681 /* We've had a reply already, which we're validating. Ignore this duplicate */
682 if (forward->stash)
683 return;
684
685 if (header->hb3 & HB3_TC)
686 {
687 /* Truncated answer can't be validated.
688 The client will retry over TCP, but if this is an answer to a
689 DNSSEC-generated query, we have a problem. Should really re-send
690 over TCP. No-one with any sense will make a DNSKEY or DS RRset
691 exceed 4096, so this may not be a real problem. Just log
692 for now. */
693 if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
694 my_syslog(LOG_ERR, _("Reply to DNSSEC query truncated - validation fails."));
695 status = STAT_INSECURE;
696 }
697 else if (forward->flags & FREC_DNSKEY_QUERY)
698 status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
699 else if (forward->flags & FREC_DS_QUERY)
700 status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
701 else
702 status = dnssec_validate_reply(now, header, n, daemon->namebuff, daemon->keyname, &forward->class);
703
704 /* Can't validate, as we're missing key data. Put this
705 answer aside, whilst we get that. */
706 if (status == STAT_NEED_DS || status == STAT_NEED_KEY)
707 {
708 struct frec *new;
709
710 if ((new = get_new_frec(now, NULL, 1)))
711 {
712 struct frec *next = new->next;
713 *new = *forward; /* copy everything, then overwrite */
714 new->next = next;
715 new->stash = NULL;
716 new->blocking_query = NULL;
717 new->rfd4 = NULL;
718 #ifdef HAVE_IPV6
719 new->rfd6 = NULL;
720 #endif
721 new->flags &= ~(FREC_DNSKEY_QUERY | FREC_DS_QUERY);
722
723 if ((forward->stash = blockdata_alloc((char *)header, n)))
724 {
725 int fd;
726
727 forward->stash_len = n;
728
729 new->dependent = forward; /* to find query awaiting new one. */
730 forward->blocking_query = new; /* for garbage cleaning */
731 /* validate routines leave name of required record in daemon->keyname */
732 if (status == STAT_NEED_KEY)
733 {
734 new->flags |= FREC_DNSKEY_QUERY;
735 nn = dnssec_generate_query(header, ((char *) header) + daemon->packet_buff_sz,
736 daemon->keyname, forward->class, T_DNSKEY, &server->addr);
737 }
738 else if (status == STAT_NEED_DS)
739 {
740 new->flags |= FREC_DS_QUERY;
741 nn = dnssec_generate_query(header,((char *) header) + daemon->packet_buff_sz,
742 daemon->keyname, forward->class, T_DS, &server->addr);
743 }
744 new->crc = questions_crc(header, nn, daemon->namebuff);
745 new->new_id = get_id(new->crc);
746 header->id = htons(new->new_id);
747
748 /* Don't resend this. */
749 daemon->srv_save = NULL;
750
751 if (server->sfd)
752 fd = server->sfd->fd;
753 else
754 {
755 fd = -1;
756 #ifdef HAVE_IPV6
757 if (server->addr.sa.sa_family == AF_INET6)
758 {
759 if (new->rfd6 || (new->rfd6 = allocate_rfd(AF_INET6)))
760 fd = new->rfd6->fd;
761 }
762 else
763 #endif
764 {
765 if (new->rfd4 || (new->rfd4 = allocate_rfd(AF_INET)))
766 fd = new->rfd4->fd;
767 }
768 }
769
770 if (fd != -1)
771 {
772 while (sendto(fd, (char *)header, nn, 0, &server->addr.sa, sa_len(&server->addr)) == -1 && retry_send());
773 server->queries++;
774 }
775 }
776 }
777
778 return;
779 }
780
781 /* Ok, we reached far enough up the chain-of-trust that we can validate something.
782 Now wind back down, pulling back answers which wouldn't previously validate
783 and validate them with the new data. Failure to find needed data here is an internal error.
784 Once we get to the original answer (FREC_DNSSEC_QUERY not set) and it validates,
785 return it to the original requestor. */
786 if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
787 {
788 while (forward->dependent)
789 {
790 struct frec *prev;
791
792 if (status == STAT_SECURE)
793 {
794 if (forward->flags & FREC_DNSKEY_QUERY)
795 status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
796 else if (forward->flags & FREC_DS_QUERY)
797 status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
798 }
799
800 prev = forward->dependent;
801 free_frec(forward);
802 forward = prev;
803 forward->blocking_query = NULL; /* already gone */
804 blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
805 n = forward->stash_len;
806 }
807
808 /* All DNSKEY and DS records done and in cache, now finally validate original
809 answer, provided last DNSKEY is OK. */
810 if (status == STAT_SECURE)
811 status = dnssec_validate_reply(now, header, n, daemon->namebuff, daemon->keyname, &forward->class);
812
813 if (status == STAT_NEED_DS || status == STAT_NEED_KEY)
814 {
815 my_syslog(LOG_ERR, _("Unexpected missing data for DNSSEC validation"));
816 status = STAT_INSECURE;
817 }
818 }
819
820 log_query(F_KEYTAG | F_SECSTAT, "result", NULL,
821 status == STAT_SECURE ? "SECURE" : (status == STAT_INSECURE ? "INSECURE" : "BOGUS"));
822
823 no_cache_dnssec = 0;
824
825 if (status == STAT_SECURE)
826 cache_secure = 1;
827 /* TODO return SERVFAIL here */
828 else if (status == STAT_BOGUS)
829 no_cache_dnssec = 1;
830
831 /* restore CD bit to the value in the query */
832 if (forward->flags & FREC_CHECKING_DISABLED)
833 header->hb4 |= HB4_CD;
834 else
835 header->hb4 &= ~HB4_CD;
836 }
837 #endif
838
839 if ((nn = process_reply(header, now, server, (size_t)n, check_rebind, no_cache_dnssec, cache_secure,
840 forward->flags & FREC_HAS_SUBNET, &forward->source)))
841 {
842 header->id = htons(forward->orig_id);
843 header->hb4 |= HB4_RA; /* recursion if available */
844 send_from(forward->fd, option_bool(OPT_NOWILD) || option_bool (OPT_CLEVERBIND), daemon->packet, nn,
845 &forward->source, &forward->dest, forward->iface);
846 }
847 free_frec(forward); /* cancel */
848 }
849 }
850
851
852 void receive_query(struct listener *listen, time_t now)
853 {
854 struct dns_header *header = (struct dns_header *)daemon->packet;
855 union mysockaddr source_addr;
856 unsigned short type;
857 struct all_addr dst_addr;
858 struct in_addr netmask, dst_addr_4;
859 size_t m;
860 ssize_t n;
861 int if_index = 0, auth_dns = 0;
862 #ifdef HAVE_AUTH
863 int local_auth = 0;
864 #endif
865 struct iovec iov[1];
866 struct msghdr msg;
867 struct cmsghdr *cmptr;
868 union {
869 struct cmsghdr align; /* this ensures alignment */
870 #ifdef HAVE_IPV6
871 char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
872 #endif
873 #if defined(HAVE_LINUX_NETWORK)
874 char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
875 #elif defined(IP_RECVDSTADDR) && defined(HAVE_SOLARIS_NETWORK)
876 char control[CMSG_SPACE(sizeof(struct in_addr)) +
877 CMSG_SPACE(sizeof(unsigned int))];
878 #elif defined(IP_RECVDSTADDR)
879 char control[CMSG_SPACE(sizeof(struct in_addr)) +
880 CMSG_SPACE(sizeof(struct sockaddr_dl))];
881 #endif
882 } control_u;
883 #ifdef HAVE_IPV6
884 /* Can always get recvd interface for IPv6 */
885 int check_dst = !option_bool(OPT_NOWILD) || listen->family == AF_INET6;
886 #else
887 int check_dst = !option_bool(OPT_NOWILD);
888 #endif
889
890 /* packet buffer overwritten */
891 daemon->srv_save = NULL;
892
893 dst_addr_4.s_addr = 0;
894 netmask.s_addr = 0;
895
896 if (option_bool(OPT_NOWILD) && listen->iface)
897 {
898 auth_dns = listen->iface->dns_auth;
899
900 if (listen->family == AF_INET)
901 {
902 dst_addr_4 = listen->iface->addr.in.sin_addr;
903 netmask = listen->iface->netmask;
904 }
905 }
906
907 iov[0].iov_base = daemon->packet;
908 iov[0].iov_len = daemon->edns_pktsz;
909
910 msg.msg_control = control_u.control;
911 msg.msg_controllen = sizeof(control_u);
912 msg.msg_flags = 0;
913 msg.msg_name = &source_addr;
914 msg.msg_namelen = sizeof(source_addr);
915 msg.msg_iov = iov;
916 msg.msg_iovlen = 1;
917
918 if ((n = recvmsg(listen->fd, &msg, 0)) == -1)
919 return;
920
921 if (n < (int)sizeof(struct dns_header) ||
922 (msg.msg_flags & MSG_TRUNC) ||
923 (header->hb3 & HB3_QR))
924 return;
925
926 source_addr.sa.sa_family = listen->family;
927 #ifdef HAVE_IPV6
928 if (listen->family == AF_INET6)
929 source_addr.in6.sin6_flowinfo = 0;
930 #endif
931
932 if (check_dst)
933 {
934 struct ifreq ifr;
935
936 if (msg.msg_controllen < sizeof(struct cmsghdr))
937 return;
938
939 #if defined(HAVE_LINUX_NETWORK)
940 if (listen->family == AF_INET)
941 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
942 if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_PKTINFO)
943 {
944 union {
945 unsigned char *c;
946 struct in_pktinfo *p;
947 } p;
948 p.c = CMSG_DATA(cmptr);
949 dst_addr_4 = dst_addr.addr.addr4 = p.p->ipi_spec_dst;
950 if_index = p.p->ipi_ifindex;
951 }
952 #elif defined(IP_RECVDSTADDR) && defined(IP_RECVIF)
953 if (listen->family == AF_INET)
954 {
955 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
956 {
957 union {
958 unsigned char *c;
959 unsigned int *i;
960 struct in_addr *a;
961 #ifndef HAVE_SOLARIS_NETWORK
962 struct sockaddr_dl *s;
963 #endif
964 } p;
965 p.c = CMSG_DATA(cmptr);
966 if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVDSTADDR)
967 dst_addr_4 = dst_addr.addr.addr4 = *(p.a);
968 else if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVIF)
969 #ifdef HAVE_SOLARIS_NETWORK
970 if_index = *(p.i);
971 #else
972 if_index = p.s->sdl_index;
973 #endif
974 }
975 }
976 #endif
977
978 #ifdef HAVE_IPV6
979 if (listen->family == AF_INET6)
980 {
981 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
982 if (cmptr->cmsg_level == IPPROTO_IPV6 && cmptr->cmsg_type == daemon->v6pktinfo)
983 {
984 union {
985 unsigned char *c;
986 struct in6_pktinfo *p;
987 } p;
988 p.c = CMSG_DATA(cmptr);
989
990 dst_addr.addr.addr6 = p.p->ipi6_addr;
991 if_index = p.p->ipi6_ifindex;
992 }
993 }
994 #endif
995
996 /* enforce available interface configuration */
997
998 if (!indextoname(listen->fd, if_index, ifr.ifr_name))
999 return;
1000
1001 if (!iface_check(listen->family, &dst_addr, ifr.ifr_name, &auth_dns))
1002 {
1003 if (!option_bool(OPT_CLEVERBIND))
1004 enumerate_interfaces(0);
1005 if (!loopback_exception(listen->fd, listen->family, &dst_addr, ifr.ifr_name) &&
1006 !label_exception(if_index, listen->family, &dst_addr))
1007 return;
1008 }
1009
1010 if (listen->family == AF_INET && option_bool(OPT_LOCALISE))
1011 {
1012 struct irec *iface;
1013
1014 /* get the netmask of the interface whch has the address we were sent to.
1015 This is no neccessarily the interface we arrived on. */
1016
1017 for (iface = daemon->interfaces; iface; iface = iface->next)
1018 if (iface->addr.sa.sa_family == AF_INET &&
1019 iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
1020 break;
1021
1022 /* interface may be new */
1023 if (!iface && !option_bool(OPT_CLEVERBIND))
1024 enumerate_interfaces(0);
1025
1026 for (iface = daemon->interfaces; iface; iface = iface->next)
1027 if (iface->addr.sa.sa_family == AF_INET &&
1028 iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
1029 break;
1030
1031 /* If we failed, abandon localisation */
1032 if (iface)
1033 netmask = iface->netmask;
1034 else
1035 dst_addr_4.s_addr = 0;
1036 }
1037 }
1038
1039 if (extract_request(header, (size_t)n, daemon->namebuff, &type))
1040 {
1041 char types[20];
1042 #ifdef HAVE_AUTH
1043 struct auth_zone *zone;
1044 #endif
1045
1046 querystr(auth_dns ? "auth" : "query", types, type);
1047
1048 if (listen->family == AF_INET)
1049 log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
1050 (struct all_addr *)&source_addr.in.sin_addr, types);
1051 #ifdef HAVE_IPV6
1052 else
1053 log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
1054 (struct all_addr *)&source_addr.in6.sin6_addr, types);
1055 #endif
1056
1057 #ifdef HAVE_AUTH
1058 /* find queries for zones we're authoritative for, and answer them directly */
1059 if (!auth_dns)
1060 for (zone = daemon->auth_zones; zone; zone = zone->next)
1061 if (in_zone(zone, daemon->namebuff, NULL))
1062 {
1063 auth_dns = 1;
1064 local_auth = 1;
1065 break;
1066 }
1067 #endif
1068 }
1069
1070 #ifdef HAVE_AUTH
1071 if (auth_dns)
1072 {
1073 m = answer_auth(header, ((char *) header) + daemon->packet_buff_sz, (size_t)n, now, &source_addr, local_auth);
1074 if (m >= 1)
1075 {
1076 send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1077 (char *)header, m, &source_addr, &dst_addr, if_index);
1078 daemon->auth_answer++;
1079 }
1080 }
1081 else
1082 #endif
1083 {
1084 m = answer_request(header, ((char *) header) + daemon->packet_buff_sz, (size_t)n,
1085 dst_addr_4, netmask, now);
1086
1087 if (m >= 1)
1088 {
1089 send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1090 (char *)header, m, &source_addr, &dst_addr, if_index);
1091 daemon->local_answer++;
1092 }
1093 else if (forward_query(listen->fd, &source_addr, &dst_addr, if_index,
1094 header, (size_t)n, now, NULL))
1095 daemon->queries_forwarded++;
1096 else
1097 daemon->local_answer++;
1098 }
1099 }
1100
1101 #ifdef HAVE_DNSSEC
1102 static int tcp_key_recurse(time_t now, int status, int class, char *keyname, struct server *server)
1103 {
1104 /* Recurse up the key heirarchy */
1105 size_t n;
1106 unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
1107 unsigned char *payload = &packet[2];
1108 struct dns_header *header = (struct dns_header *)payload;
1109 u16 *length = (u16 *)packet;
1110 int new_status;
1111 unsigned char c1, c2;
1112
1113 n = dnssec_generate_query(header, ((char *) header) + 65536, keyname, class,
1114 status == STAT_NEED_KEY ? T_DNSKEY : T_DS, &server->addr);
1115
1116 *length = htons(n);
1117
1118 if (!read_write(server->tcpfd, packet, n + sizeof(u16), 0) ||
1119 !read_write(server->tcpfd, &c1, 1, 1) ||
1120 !read_write(server->tcpfd, &c2, 1, 1) ||
1121 !read_write(server->tcpfd, payload, (c1 << 8) | c2, 1))
1122 {
1123 close(server->tcpfd);
1124 server->tcpfd = -1;
1125 new_status = STAT_INSECURE;
1126 }
1127 else
1128 {
1129 n = (c1 << 8) | c2;
1130
1131 if (status == STAT_NEED_KEY)
1132 new_status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, class);
1133 else
1134 new_status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, class);
1135
1136 if (new_status == STAT_NEED_DS || new_status == STAT_NEED_KEY)
1137 {
1138 if ((new_status = tcp_key_recurse(now, new_status, class, daemon->keyname, server) == STAT_SECURE))
1139 {
1140 if (status == STAT_NEED_KEY)
1141 new_status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, class);
1142 else
1143 new_status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, class);
1144
1145 if (new_status == STAT_NEED_DS || new_status == STAT_NEED_KEY)
1146 {
1147 my_syslog(LOG_ERR, _("Unexpected missing data for DNSSEC validation"));
1148 status = STAT_INSECURE;
1149 }
1150 }
1151 }
1152 }
1153
1154 free(packet);
1155
1156 return new_status;
1157 }
1158 #endif
1159
1160
1161 /* The daemon forks before calling this: it should deal with one connection,
1162 blocking as neccessary, and then return. Note, need to be a bit careful
1163 about resources for debug mode, when the fork is suppressed: that's
1164 done by the caller. */
1165 unsigned char *tcp_request(int confd, time_t now,
1166 union mysockaddr *local_addr, struct in_addr netmask, int auth_dns)
1167 {
1168 size_t size = 0;
1169 int norebind = 0;
1170 #ifdef HAVE_AUTH
1171 int local_auth = 0;
1172 #endif
1173 int checking_disabled, check_subnet, no_cache_dnssec = 0, cache_secure = 0;
1174 size_t m;
1175 unsigned short qtype;
1176 unsigned int gotname;
1177 unsigned char c1, c2;
1178 /* Max TCP packet + slop + size */
1179 unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
1180 unsigned char *payload = &packet[2];
1181 /* largest field in header is 16-bits, so this is still sufficiently aligned */
1182 struct dns_header *header = (struct dns_header *)payload;
1183 u16 *length = (u16 *)packet;
1184 struct server *last_server;
1185 struct in_addr dst_addr_4;
1186 union mysockaddr peer_addr;
1187 socklen_t peer_len = sizeof(union mysockaddr);
1188
1189 if (getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) == -1)
1190 return packet;
1191
1192 while (1)
1193 {
1194 if (!packet ||
1195 !read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
1196 !(size = c1 << 8 | c2) ||
1197 !read_write(confd, payload, size, 1))
1198 return packet;
1199
1200 if (size < (int)sizeof(struct dns_header))
1201 continue;
1202
1203 check_subnet = 0;
1204
1205 /* save state of "cd" flag in query */
1206 if ((checking_disabled = header->hb4 & HB4_CD))
1207 no_cache_dnssec = 1;
1208
1209 /* RFC 4035: sect 4.6 para 2 */
1210 header->hb4 &= ~HB4_AD;
1211
1212 if ((gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
1213 {
1214 char types[20];
1215 #ifdef HAVE_AUTH
1216 struct auth_zone *zone;
1217 #endif
1218 querystr(auth_dns ? "auth" : "query", types, qtype);
1219
1220 if (peer_addr.sa.sa_family == AF_INET)
1221 log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
1222 (struct all_addr *)&peer_addr.in.sin_addr, types);
1223 #ifdef HAVE_IPV6
1224 else
1225 log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
1226 (struct all_addr *)&peer_addr.in6.sin6_addr, types);
1227 #endif
1228
1229 #ifdef HAVE_AUTH
1230 /* find queries for zones we're authoritative for, and answer them directly */
1231 if (!auth_dns)
1232 for (zone = daemon->auth_zones; zone; zone = zone->next)
1233 if (in_zone(zone, daemon->namebuff, NULL))
1234 {
1235 auth_dns = 1;
1236 local_auth = 1;
1237 break;
1238 }
1239 #endif
1240 }
1241
1242 if (local_addr->sa.sa_family == AF_INET)
1243 dst_addr_4 = local_addr->in.sin_addr;
1244 else
1245 dst_addr_4.s_addr = 0;
1246
1247 #ifdef HAVE_AUTH
1248 if (auth_dns)
1249 m = answer_auth(header, ((char *) header) + 65536, (size_t)size, now, &peer_addr, local_auth);
1250 else
1251 #endif
1252 {
1253 /* m > 0 if answered from cache */
1254 m = answer_request(header, ((char *) header) + 65536, (size_t)size,
1255 dst_addr_4, netmask, now);
1256
1257 /* Do this by steam now we're not in the select() loop */
1258 check_log_writer(NULL);
1259
1260 if (m == 0)
1261 {
1262 unsigned int flags = 0;
1263 struct all_addr *addrp = NULL;
1264 int type = 0;
1265 char *domain = NULL;
1266
1267 if (option_bool(OPT_ADD_MAC))
1268 size = add_mac(header, size, ((char *) header) + 65536, &peer_addr);
1269
1270 if (option_bool(OPT_CLIENT_SUBNET))
1271 {
1272 size_t new = add_source_addr(header, size, ((char *) header) + 65536, &peer_addr);
1273 if (size != new)
1274 {
1275 size = new;
1276 check_subnet = 1;
1277 }
1278 }
1279
1280 if (gotname)
1281 flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
1282
1283 if (type != 0 || option_bool(OPT_ORDER) || !daemon->last_server)
1284 last_server = daemon->servers;
1285 else
1286 last_server = daemon->last_server;
1287
1288 if (!flags && last_server)
1289 {
1290 struct server *firstsendto = NULL;
1291 unsigned int crc = questions_crc(header, (unsigned int)size, daemon->namebuff);
1292
1293 /* Loop round available servers until we succeed in connecting to one.
1294 Note that this code subtley ensures that consecutive queries on this connection
1295 which can go to the same server, do so. */
1296 while (1)
1297 {
1298 if (!firstsendto)
1299 firstsendto = last_server;
1300 else
1301 {
1302 if (!(last_server = last_server->next))
1303 last_server = daemon->servers;
1304
1305 if (last_server == firstsendto)
1306 break;
1307 }
1308
1309 /* server for wrong domain */
1310 if (type != (last_server->flags & SERV_TYPE) ||
1311 (type == SERV_HAS_DOMAIN && !hostname_isequal(domain, last_server->domain)))
1312 continue;
1313
1314 if (last_server->tcpfd == -1)
1315 {
1316 if ((last_server->tcpfd = socket(last_server->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
1317 continue;
1318
1319 if ((!local_bind(last_server->tcpfd, &last_server->source_addr, last_server->interface, 1) ||
1320 connect(last_server->tcpfd, &last_server->addr.sa, sa_len(&last_server->addr)) == -1))
1321 {
1322 close(last_server->tcpfd);
1323 last_server->tcpfd = -1;
1324 continue;
1325 }
1326
1327 #ifdef HAVE_DNSSEC
1328 if (option_bool(OPT_DNSSEC_VALID))
1329 {
1330 size = add_do_bit(header, size, ((char *) header) + 65536);
1331 header->hb4 |= HB4_CD;
1332 }
1333 #endif
1334
1335 #ifdef HAVE_CONNTRACK
1336 /* Copy connection mark of incoming query to outgoing connection. */
1337 if (option_bool(OPT_CONNTRACK))
1338 {
1339 unsigned int mark;
1340 struct all_addr local;
1341 #ifdef HAVE_IPV6
1342 if (local_addr->sa.sa_family == AF_INET6)
1343 local.addr.addr6 = local_addr->in6.sin6_addr;
1344 else
1345 #endif
1346 local.addr.addr4 = local_addr->in.sin_addr;
1347
1348 if (get_incoming_mark(&peer_addr, &local, 1, &mark))
1349 setsockopt(last_server->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
1350 }
1351 #endif
1352 }
1353
1354 *length = htons(size);
1355
1356 if (!read_write(last_server->tcpfd, packet, size + sizeof(u16), 0) ||
1357 !read_write(last_server->tcpfd, &c1, 1, 1) ||
1358 !read_write(last_server->tcpfd, &c2, 1, 1) ||
1359 !read_write(last_server->tcpfd, payload, (c1 << 8) | c2, 1))
1360 {
1361 close(last_server->tcpfd);
1362 last_server->tcpfd = -1;
1363 continue;
1364 }
1365
1366 m = (c1 << 8) | c2;
1367
1368 if (!gotname)
1369 strcpy(daemon->namebuff, "query");
1370 if (last_server->addr.sa.sa_family == AF_INET)
1371 log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
1372 (struct all_addr *)&last_server->addr.in.sin_addr, NULL);
1373 #ifdef HAVE_IPV6
1374 else
1375 log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
1376 (struct all_addr *)&last_server->addr.in6.sin6_addr, NULL);
1377 #endif
1378
1379 #ifdef HAVE_DNSSEC
1380 if (option_bool(OPT_DNSSEC_VALID) && !checking_disabled)
1381 {
1382 int class, status;
1383
1384 status = dnssec_validate_reply(now, header, m, daemon->namebuff, daemon->keyname, &class);
1385
1386 if (status == STAT_NEED_DS || status == STAT_NEED_KEY)
1387 {
1388 if ((status = tcp_key_recurse(now, status, class, daemon->keyname, last_server)) == STAT_SECURE)
1389 status = dnssec_validate_reply(now, header, m, daemon->namebuff, daemon->keyname, &class);
1390 }
1391
1392 log_query(F_KEYTAG | F_SECSTAT, "result", NULL,
1393 status == STAT_SECURE ? "SECURE" : (status == STAT_INSECURE ? "INSECURE" : "BOGUS"));
1394
1395 if (status == STAT_BOGUS)
1396 no_cache_dnssec = 1;
1397
1398 if (status == STAT_SECURE)
1399 cache_secure = 1;
1400 }
1401 #endif
1402
1403 /* restore CD bit to the value in the query */
1404 if (checking_disabled)
1405 header->hb4 |= HB4_CD;
1406 else
1407 header->hb4 &= ~HB4_CD;
1408
1409 /* There's no point in updating the cache, since this process will exit and
1410 lose the information after a few queries. We make this call for the alias and
1411 bogus-nxdomain side-effects. */
1412 /* If the crc of the question section doesn't match the crc we sent, then
1413 someone might be attempting to insert bogus values into the cache by
1414 sending replies containing questions and bogus answers. */
1415 if (crc == questions_crc(header, (unsigned int)m, daemon->namebuff))
1416 m = process_reply(header, now, last_server, (unsigned int)m,
1417 option_bool(OPT_NO_REBIND) && !norebind, no_cache_dnssec,
1418 cache_secure, check_subnet, &peer_addr);
1419
1420 break;
1421 }
1422 }
1423
1424 /* In case of local answer or no connections made. */
1425 if (m == 0)
1426 m = setup_reply(header, (unsigned int)size, addrp, flags, daemon->local_ttl);
1427 }
1428 }
1429
1430 check_log_writer(NULL);
1431
1432 *length = htons(m);
1433
1434 if (m == 0 || !read_write(confd, packet, m + sizeof(u16), 0))
1435 return packet;
1436 }
1437 }
1438
1439 static struct frec *allocate_frec(time_t now)
1440 {
1441 struct frec *f;
1442
1443 if ((f = (struct frec *)whine_malloc(sizeof(struct frec))))
1444 {
1445 f->next = daemon->frec_list;
1446 f->time = now;
1447 f->sentto = NULL;
1448 f->rfd4 = NULL;
1449 f->flags = 0;
1450 #ifdef HAVE_IPV6
1451 f->rfd6 = NULL;
1452 #endif
1453 #ifdef HAVE_DNSSEC
1454 f->blocking_query = NULL;
1455 #endif
1456 daemon->frec_list = f;
1457 }
1458
1459 return f;
1460 }
1461
1462 static struct randfd *allocate_rfd(int family)
1463 {
1464 static int finger = 0;
1465 int i;
1466
1467 /* limit the number of sockets we have open to avoid starvation of
1468 (eg) TFTP. Once we have a reasonable number, randomness should be OK */
1469
1470 for (i = 0; i < RANDOM_SOCKS; i++)
1471 if (daemon->randomsocks[i].refcount == 0)
1472 {
1473 if ((daemon->randomsocks[i].fd = random_sock(family)) == -1)
1474 break;
1475
1476 daemon->randomsocks[i].refcount = 1;
1477 daemon->randomsocks[i].family = family;
1478 return &daemon->randomsocks[i];
1479 }
1480
1481 /* No free ones or cannot get new socket, grab an existing one */
1482 for (i = 0; i < RANDOM_SOCKS; i++)
1483 {
1484 int j = (i+finger) % RANDOM_SOCKS;
1485 if (daemon->randomsocks[j].refcount != 0 &&
1486 daemon->randomsocks[j].family == family &&
1487 daemon->randomsocks[j].refcount != 0xffff)
1488 {
1489 finger = j;
1490 daemon->randomsocks[j].refcount++;
1491 return &daemon->randomsocks[j];
1492 }
1493 }
1494
1495 return NULL; /* doom */
1496 }
1497 static void free_frec(struct frec *f)
1498 {
1499 if (f->rfd4 && --(f->rfd4->refcount) == 0)
1500 close(f->rfd4->fd);
1501
1502 f->rfd4 = NULL;
1503 f->sentto = NULL;
1504 f->flags = 0;
1505
1506 #ifdef HAVE_IPV6
1507 if (f->rfd6 && --(f->rfd6->refcount) == 0)
1508 close(f->rfd6->fd);
1509
1510 f->rfd6 = NULL;
1511 #endif
1512
1513 #ifdef HAVE_DNSSEC
1514 if (f->stash)
1515 {
1516 blockdata_free(f->stash);
1517 f->stash = NULL;
1518 }
1519
1520 /* Anything we're waiting on is pointless now, too */
1521 if (f->blocking_query)
1522 free_frec(f->blocking_query);
1523 f->blocking_query = NULL;
1524
1525 #endif
1526 }
1527
1528 /* if wait==NULL return a free or older than TIMEOUT record.
1529 else return *wait zero if one available, or *wait is delay to
1530 when the oldest in-use record will expire. Impose an absolute
1531 limit of 4*TIMEOUT before we wipe things (for random sockets).
1532 If force is set, always return a result, even if we have
1533 to allocate above the limit. */
1534 struct frec *get_new_frec(time_t now, int *wait, int force)
1535 {
1536 struct frec *f, *oldest, *target;
1537 int count;
1538
1539 if (wait)
1540 *wait = 0;
1541
1542 for (f = daemon->frec_list, oldest = NULL, target = NULL, count = 0; f; f = f->next, count++)
1543 if (!f->sentto)
1544 target = f;
1545 else
1546 {
1547 if (difftime(now, f->time) >= 4*TIMEOUT)
1548 {
1549 free_frec(f);
1550 target = f;
1551 }
1552
1553 if (!oldest || difftime(f->time, oldest->time) <= 0)
1554 oldest = f;
1555 }
1556
1557 if (target)
1558 {
1559 target->time = now;
1560 return target;
1561 }
1562
1563 /* can't find empty one, use oldest if there is one
1564 and it's older than timeout */
1565 if (oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
1566 {
1567 /* keep stuff for twice timeout if we can by allocating a new
1568 record instead */
1569 if (difftime(now, oldest->time) < 2*TIMEOUT &&
1570 count <= daemon->ftabsize &&
1571 (f = allocate_frec(now)))
1572 return f;
1573
1574 if (!wait)
1575 {
1576 free_frec(oldest);
1577 oldest->time = now;
1578 }
1579 return oldest;
1580 }
1581
1582 /* none available, calculate time 'till oldest record expires */
1583 if (!force && count > daemon->ftabsize)
1584 {
1585 static time_t last_log = 0;
1586
1587 if (oldest && wait)
1588 *wait = oldest->time + (time_t)TIMEOUT - now;
1589
1590 if ((int)difftime(now, last_log) > 5)
1591 {
1592 last_log = now;
1593 my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
1594 }
1595
1596 return NULL;
1597 }
1598
1599 if (!(f = allocate_frec(now)) && wait)
1600 /* wait one second on malloc failure */
1601 *wait = 1;
1602
1603 return f; /* OK if malloc fails and this is NULL */
1604 }
1605
1606 /* crc is all-ones if not known. */
1607 static struct frec *lookup_frec(unsigned short id, unsigned int crc)
1608 {
1609 struct frec *f;
1610
1611 for(f = daemon->frec_list; f; f = f->next)
1612 if (f->sentto && f->new_id == id &&
1613 (f->crc == crc || crc == 0xffffffff))
1614 return f;
1615
1616 return NULL;
1617 }
1618
1619 static struct frec *lookup_frec_by_sender(unsigned short id,
1620 union mysockaddr *addr,
1621 unsigned int crc)
1622 {
1623 struct frec *f;
1624
1625 for(f = daemon->frec_list; f; f = f->next)
1626 if (f->sentto &&
1627 f->orig_id == id &&
1628 f->crc == crc &&
1629 sockaddr_isequal(&f->source, addr))
1630 return f;
1631
1632 return NULL;
1633 }
1634
1635 /* A server record is going away, remove references to it */
1636 void server_gone(struct server *server)
1637 {
1638 struct frec *f;
1639
1640 for (f = daemon->frec_list; f; f = f->next)
1641 if (f->sentto && f->sentto == server)
1642 free_frec(f);
1643
1644 if (daemon->last_server == server)
1645 daemon->last_server = NULL;
1646
1647 if (daemon->srv_save == server)
1648 daemon->srv_save = NULL;
1649 }
1650
1651 /* return unique random ids. */
1652 static unsigned short get_id(unsigned int crc)
1653 {
1654 unsigned short ret = 0;
1655
1656 do
1657 ret = rand16();
1658 while (lookup_frec(ret, crc));
1659
1660 return ret;
1661 }
1662
1663
1664
1665
1666