]> git.ipfire.org Git - people/ms/dnsmasq.git/blob - src/forward.c
Merge branch 'master' into dnssec
[people/ms/dnsmasq.git] / src / forward.c
1 /* dnsmasq is Copyright (c) 2000-2013 Simon Kelley
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 dated June, 1991, or
6 (at your option) version 3 dated 29 June, 2007.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17 #include "dnsmasq.h"
18
19 static struct frec *lookup_frec(unsigned short id, unsigned int crc);
20 static struct frec *lookup_frec_by_sender(unsigned short id,
21 union mysockaddr *addr,
22 unsigned int crc);
23 static unsigned short get_id(unsigned int crc);
24 static void free_frec(struct frec *f);
25 static struct randfd *allocate_rfd(int family);
26
27 /* Send a UDP packet with its source address set as "source"
28 unless nowild is true, when we just send it with the kernel default */
29 int send_from(int fd, int nowild, char *packet, size_t len,
30 union mysockaddr *to, struct all_addr *source,
31 unsigned int iface)
32 {
33 struct msghdr msg;
34 struct iovec iov[1];
35 union {
36 struct cmsghdr align; /* this ensures alignment */
37 #if defined(HAVE_LINUX_NETWORK)
38 char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
39 #elif defined(IP_SENDSRCADDR)
40 char control[CMSG_SPACE(sizeof(struct in_addr))];
41 #endif
42 #ifdef HAVE_IPV6
43 char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
44 #endif
45 } control_u;
46
47 iov[0].iov_base = packet;
48 iov[0].iov_len = len;
49
50 msg.msg_control = NULL;
51 msg.msg_controllen = 0;
52 msg.msg_flags = 0;
53 msg.msg_name = to;
54 msg.msg_namelen = sa_len(to);
55 msg.msg_iov = iov;
56 msg.msg_iovlen = 1;
57
58 if (!nowild)
59 {
60 struct cmsghdr *cmptr;
61 msg.msg_control = &control_u;
62 msg.msg_controllen = sizeof(control_u);
63 cmptr = CMSG_FIRSTHDR(&msg);
64
65 if (to->sa.sa_family == AF_INET)
66 {
67 #if defined(HAVE_LINUX_NETWORK)
68 struct in_pktinfo p;
69 p.ipi_ifindex = 0;
70 p.ipi_spec_dst = source->addr.addr4;
71 memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
72 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
73 cmptr->cmsg_level = IPPROTO_IP;
74 cmptr->cmsg_type = IP_PKTINFO;
75 #elif defined(IP_SENDSRCADDR)
76 memcpy(CMSG_DATA(cmptr), &(source->addr.addr4), sizeof(source->addr.addr4));
77 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
78 cmptr->cmsg_level = IPPROTO_IP;
79 cmptr->cmsg_type = IP_SENDSRCADDR;
80 #endif
81 }
82 else
83 #ifdef HAVE_IPV6
84 {
85 struct in6_pktinfo p;
86 p.ipi6_ifindex = iface; /* Need iface for IPv6 to handle link-local addrs */
87 p.ipi6_addr = source->addr.addr6;
88 memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
89 msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
90 cmptr->cmsg_type = daemon->v6pktinfo;
91 cmptr->cmsg_level = IPPROTO_IPV6;
92 }
93 #else
94 (void)iface; /* eliminate warning */
95 #endif
96 }
97
98 while (sendmsg(fd, &msg, 0) == -1)
99 {
100 if (retry_send())
101 continue;
102
103 /* If interface is still in DAD, EINVAL results - ignore that. */
104 if (errno == EINVAL)
105 break;
106
107 my_syslog(LOG_ERR, _("failed to send packet: %s"), strerror(errno));
108 return 0;
109 }
110
111 return 1;
112 }
113
114 static unsigned int search_servers(time_t now, struct all_addr **addrpp,
115 unsigned int qtype, char *qdomain, int *type, char **domain, int *norebind)
116
117 {
118 /* If the query ends in the domain in one of our servers, set
119 domain to point to that name. We find the largest match to allow both
120 domain.org and sub.domain.org to exist. */
121
122 unsigned int namelen = strlen(qdomain);
123 unsigned int matchlen = 0;
124 struct server *serv;
125 unsigned int flags = 0;
126
127 for (serv = daemon->servers; serv; serv=serv->next)
128 /* domain matches take priority over NODOTS matches */
129 if ((serv->flags & SERV_FOR_NODOTS) && *type != SERV_HAS_DOMAIN && !strchr(qdomain, '.') && namelen != 0)
130 {
131 unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
132 *type = SERV_FOR_NODOTS;
133 if (serv->flags & SERV_NO_ADDR)
134 flags = F_NXDOMAIN;
135 else if (serv->flags & SERV_LITERAL_ADDRESS)
136 {
137 if (sflag & qtype)
138 {
139 flags = sflag;
140 if (serv->addr.sa.sa_family == AF_INET)
141 *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
142 #ifdef HAVE_IPV6
143 else
144 *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
145 #endif
146 }
147 else if (!flags || (flags & F_NXDOMAIN))
148 flags = F_NOERR;
149 }
150 }
151 else if (serv->flags & SERV_HAS_DOMAIN)
152 {
153 unsigned int domainlen = strlen(serv->domain);
154 char *matchstart = qdomain + namelen - domainlen;
155 if (namelen >= domainlen &&
156 hostname_isequal(matchstart, serv->domain) &&
157 (domainlen == 0 || namelen == domainlen || *(matchstart-1) == '.' ))
158 {
159 if (serv->flags & SERV_NO_REBIND)
160 *norebind = 1;
161 else
162 {
163 unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
164 /* implement priority rules for --address and --server for same domain.
165 --address wins if the address is for the correct AF
166 --server wins otherwise. */
167 if (domainlen != 0 && domainlen == matchlen)
168 {
169 if ((serv->flags & SERV_LITERAL_ADDRESS))
170 {
171 if (!(sflag & qtype) && flags == 0)
172 continue;
173 }
174 else
175 {
176 if (flags & (F_IPV4 | F_IPV6))
177 continue;
178 }
179 }
180
181 if (domainlen >= matchlen)
182 {
183 *type = serv->flags & (SERV_HAS_DOMAIN | SERV_USE_RESOLV | SERV_NO_REBIND);
184 *domain = serv->domain;
185 matchlen = domainlen;
186 if (serv->flags & SERV_NO_ADDR)
187 flags = F_NXDOMAIN;
188 else if (serv->flags & SERV_LITERAL_ADDRESS)
189 {
190 if (sflag & qtype)
191 {
192 flags = sflag;
193 if (serv->addr.sa.sa_family == AF_INET)
194 *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
195 #ifdef HAVE_IPV6
196 else
197 *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
198 #endif
199 }
200 else if (!flags || (flags & F_NXDOMAIN))
201 flags = F_NOERR;
202 }
203 else
204 flags = 0;
205 }
206 }
207 }
208 }
209
210 if (flags == 0 && !(qtype & F_QUERY) &&
211 option_bool(OPT_NODOTS_LOCAL) && !strchr(qdomain, '.') && namelen != 0)
212 /* don't forward A or AAAA queries for simple names, except the empty name */
213 flags = F_NOERR;
214
215 if (flags == F_NXDOMAIN && check_for_local_domain(qdomain, now))
216 flags = F_NOERR;
217
218 if (flags)
219 {
220 int logflags = 0;
221
222 if (flags == F_NXDOMAIN || flags == F_NOERR)
223 logflags = F_NEG | qtype;
224
225 log_query(logflags | flags | F_CONFIG | F_FORWARD, qdomain, *addrpp, NULL);
226 }
227 else if ((*type) & SERV_USE_RESOLV)
228 {
229 *type = 0; /* use normal servers for this domain */
230 *domain = NULL;
231 }
232 return flags;
233 }
234
235 static int forward_query(int udpfd, union mysockaddr *udpaddr,
236 struct all_addr *dst_addr, unsigned int dst_iface,
237 struct dns_header *header, size_t plen, time_t now, struct frec *forward)
238 {
239 char *domain = NULL;
240 int type = 0, norebind = 0;
241 struct all_addr *addrp = NULL;
242 unsigned int crc = questions_crc(header, plen, daemon->namebuff);
243 unsigned int flags = 0;
244 unsigned int gotname = extract_request(header, plen, daemon->namebuff, NULL);
245 struct server *start = NULL;
246
247 /* RFC 4035: sect 4.6 para 2 */
248 header->hb4 &= ~HB4_AD;
249
250 /* may be no servers available. */
251 if (!daemon->servers)
252 forward = NULL;
253 else if (forward || (forward = lookup_frec_by_sender(ntohs(header->id), udpaddr, crc)))
254 {
255 /* retry on existing query, send to all available servers */
256 domain = forward->sentto->domain;
257 forward->sentto->failed_queries++;
258 if (!option_bool(OPT_ORDER))
259 {
260 forward->forwardall = 1;
261 daemon->last_server = NULL;
262 }
263 type = forward->sentto->flags & SERV_TYPE;
264 if (!(start = forward->sentto->next))
265 start = daemon->servers; /* at end of list, recycle */
266 header->id = htons(forward->new_id);
267 }
268 else
269 {
270 if (gotname)
271 flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
272
273 if (!flags && !(forward = get_new_frec(now, NULL)))
274 /* table full - server failure. */
275 flags = F_NEG;
276
277 if (forward)
278 {
279 forward->source = *udpaddr;
280 forward->dest = *dst_addr;
281 forward->iface = dst_iface;
282 forward->orig_id = ntohs(header->id);
283 forward->new_id = get_id(crc);
284 forward->fd = udpfd;
285 forward->crc = crc;
286 forward->forwardall = 0;
287 forward->flags = 0;
288 if (norebind)
289 forward->flags |= FREC_NOREBIND;
290 if (header->hb4 & HB4_CD)
291 forward->flags |= FREC_CHECKING_DISABLED;
292
293 header->id = htons(forward->new_id);
294
295 /* In strict_order mode, always try servers in the order
296 specified in resolv.conf, if a domain is given
297 always try all the available servers,
298 otherwise, use the one last known to work. */
299
300 if (type == 0)
301 {
302 if (option_bool(OPT_ORDER))
303 start = daemon->servers;
304 else if (!(start = daemon->last_server) ||
305 daemon->forwardcount++ > FORWARD_TEST ||
306 difftime(now, daemon->forwardtime) > FORWARD_TIME)
307 {
308 start = daemon->servers;
309 forward->forwardall = 1;
310 daemon->forwardcount = 0;
311 daemon->forwardtime = now;
312 }
313 }
314 else
315 {
316 start = daemon->servers;
317 if (!option_bool(OPT_ORDER))
318 forward->forwardall = 1;
319 }
320 }
321 }
322
323 /* check for send errors here (no route to host)
324 if we fail to send to all nameservers, send back an error
325 packet straight away (helps modem users when offline) */
326
327 if (!flags && forward)
328 {
329 struct server *firstsentto = start;
330 int forwarded = 0;
331
332 if (option_bool(OPT_ADD_MAC))
333 plen = add_mac(header, plen, ((char *) header) + PACKETSZ, &forward->source);
334
335 if (option_bool(OPT_CLIENT_SUBNET))
336 {
337 size_t new = add_source_addr(header, plen, ((char *) header) + PACKETSZ, &forward->source);
338 if (new != plen)
339 {
340 plen = new;
341 forward->flags |= FREC_HAS_SUBNET;
342 }
343 }
344
345 while (1)
346 {
347 /* only send to servers dealing with our domain.
348 domain may be NULL, in which case server->domain
349 must be NULL also. */
350
351 if (type == (start->flags & SERV_TYPE) &&
352 (type != SERV_HAS_DOMAIN || hostname_isequal(domain, start->domain)) &&
353 !(start->flags & SERV_LITERAL_ADDRESS))
354 {
355 int fd;
356
357 /* find server socket to use, may need to get random one. */
358 if (start->sfd)
359 fd = start->sfd->fd;
360 else
361 {
362 #ifdef HAVE_IPV6
363 if (start->addr.sa.sa_family == AF_INET6)
364 {
365 if (!forward->rfd6 &&
366 !(forward->rfd6 = allocate_rfd(AF_INET6)))
367 break;
368 daemon->rfd_save = forward->rfd6;
369 fd = forward->rfd6->fd;
370 }
371 else
372 #endif
373 {
374 if (!forward->rfd4 &&
375 !(forward->rfd4 = allocate_rfd(AF_INET)))
376 break;
377 daemon->rfd_save = forward->rfd4;
378 fd = forward->rfd4->fd;
379 }
380
381 #ifdef HAVE_CONNTRACK
382 /* Copy connection mark of incoming query to outgoing connection. */
383 if (option_bool(OPT_CONNTRACK))
384 {
385 unsigned int mark;
386 if (get_incoming_mark(&forward->source, &forward->dest, 0, &mark))
387 setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
388 }
389 #endif
390 }
391
392 if (sendto(fd, (char *)header, plen, 0,
393 &start->addr.sa,
394 sa_len(&start->addr)) == -1)
395 {
396 if (retry_send())
397 continue;
398 }
399 else
400 {
401 /* Keep info in case we want to re-send this packet */
402 daemon->srv_save = start;
403 daemon->packet_len = plen;
404
405 if (!gotname)
406 strcpy(daemon->namebuff, "query");
407 if (start->addr.sa.sa_family == AF_INET)
408 log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
409 (struct all_addr *)&start->addr.in.sin_addr, NULL);
410 #ifdef HAVE_IPV6
411 else
412 log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
413 (struct all_addr *)&start->addr.in6.sin6_addr, NULL);
414 #endif
415 start->queries++;
416 forwarded = 1;
417 forward->sentto = start;
418 if (!forward->forwardall)
419 break;
420 forward->forwardall++;
421 }
422 }
423
424 if (!(start = start->next))
425 start = daemon->servers;
426
427 if (start == firstsentto)
428 break;
429 }
430
431 if (forwarded)
432 return 1;
433
434 /* could not send on, prepare to return */
435 header->id = htons(forward->orig_id);
436 free_frec(forward); /* cancel */
437 }
438
439 /* could not send on, return empty answer or address if known for whole domain */
440 if (udpfd != -1)
441 {
442 plen = setup_reply(header, plen, addrp, flags, daemon->local_ttl);
443 send_from(udpfd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND), (char *)header, plen, udpaddr, dst_addr, dst_iface);
444 }
445
446 return 0;
447 }
448
449 static size_t process_reply(struct dns_header *header, time_t now, struct server *server, size_t n, int check_rebind,
450 int checking_disabled, int check_subnet, union mysockaddr *query_source)
451 {
452 unsigned char *pheader, *sizep;
453 char **sets = 0;
454 int munged = 0, is_sign;
455 size_t plen;
456
457 #ifdef HAVE_IPSET
458 /* Similar algorithm to search_servers. */
459 struct ipsets *ipset_pos;
460 unsigned int namelen = strlen(daemon->namebuff);
461 unsigned int matchlen = 0;
462 for (ipset_pos = daemon->ipsets; ipset_pos; ipset_pos = ipset_pos->next)
463 {
464 unsigned int domainlen = strlen(ipset_pos->domain);
465 char *matchstart = daemon->namebuff + namelen - domainlen;
466 if (namelen >= domainlen && hostname_isequal(matchstart, ipset_pos->domain) &&
467 (domainlen == 0 || namelen == domainlen || *(matchstart - 1) == '.' ) &&
468 domainlen >= matchlen) {
469 matchlen = domainlen;
470 sets = ipset_pos->sets;
471 }
472 }
473 #endif
474
475 /* If upstream is advertising a larger UDP packet size
476 than we allow, trim it so that we don't get overlarge
477 requests for the client. We can't do this for signed packets. */
478
479 if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign)))
480 {
481 if (!is_sign)
482 {
483 unsigned short udpsz;
484 unsigned char *psave = sizep;
485
486 GETSHORT(udpsz, sizep);
487 if (udpsz > daemon->edns_pktsz)
488 PUTSHORT(daemon->edns_pktsz, psave);
489 }
490
491 if (check_subnet && !check_source(header, plen, pheader, query_source))
492 {
493 my_syslog(LOG_WARNING, _("discarding DNS reply: subnet option mismatch"));
494 return 0;
495 }
496 }
497
498
499 /* RFC 4035 sect 4.6 para 3 */
500 if (!is_sign && !option_bool(OPT_DNSSEC_PROXY))
501 header->hb4 &= ~HB4_AD;
502
503 if (OPCODE(header) != QUERY || (RCODE(header) != NOERROR && RCODE(header) != NXDOMAIN))
504 return n;
505
506 /* Complain loudly if the upstream server is non-recursive. */
507 if (!(header->hb4 & HB4_RA) && RCODE(header) == NOERROR && ntohs(header->ancount) == 0 &&
508 server && !(server->flags & SERV_WARNED_RECURSIVE))
509 {
510 prettyprint_addr(&server->addr, daemon->namebuff);
511 my_syslog(LOG_WARNING, _("nameserver %s refused to do a recursive query"), daemon->namebuff);
512 if (!option_bool(OPT_LOG))
513 server->flags |= SERV_WARNED_RECURSIVE;
514 }
515
516 #ifdef HAVE_DNSSEC
517 printf("validate\n");
518 dnssec_validate(header, n);
519 #endif
520
521 if (daemon->bogus_addr && RCODE(header) != NXDOMAIN &&
522 check_for_bogus_wildcard(header, n, daemon->namebuff, daemon->bogus_addr, now))
523 {
524 munged = 1;
525 SET_RCODE(header, NXDOMAIN);
526 header->hb3 &= ~HB3_AA;
527 }
528 else
529 {
530 if (RCODE(header) == NXDOMAIN &&
531 extract_request(header, n, daemon->namebuff, NULL) &&
532 check_for_local_domain(daemon->namebuff, now))
533 {
534 /* if we forwarded a query for a locally known name (because it was for
535 an unknown type) and the answer is NXDOMAIN, convert that to NODATA,
536 since we know that the domain exists, even if upstream doesn't */
537 munged = 1;
538 header->hb3 |= HB3_AA;
539 SET_RCODE(header, NOERROR);
540 }
541
542 if (extract_addresses(header, n, daemon->namebuff, now, sets, is_sign, check_rebind, checking_disabled))
543 {
544 my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected: %s"), daemon->namebuff);
545 munged = 1;
546 }
547 }
548
549 /* do this after extract_addresses. Ensure NODATA reply and remove
550 nameserver info. */
551
552 if (munged)
553 {
554 header->ancount = htons(0);
555 header->nscount = htons(0);
556 header->arcount = htons(0);
557 }
558
559 /* the bogus-nxdomain stuff, doctor and NXDOMAIN->NODATA munging can all elide
560 sections of the packet. Find the new length here and put back pseudoheader
561 if it was removed. */
562 return resize_packet(header, n, pheader, plen);
563 }
564
565 /* sets new last_server */
566 void reply_query(int fd, int family, time_t now)
567 {
568 /* packet from peer server, extract data for cache, and send to
569 original requester */
570 struct dns_header *header;
571 union mysockaddr serveraddr;
572 struct frec *forward;
573 socklen_t addrlen = sizeof(serveraddr);
574 ssize_t n = recvfrom(fd, daemon->packet, daemon->edns_pktsz, 0, &serveraddr.sa, &addrlen);
575 size_t nn;
576 struct server *server;
577
578 /* packet buffer overwritten */
579 daemon->srv_save = NULL;
580
581 /* Determine the address of the server replying so that we can mark that as good */
582 serveraddr.sa.sa_family = family;
583 #ifdef HAVE_IPV6
584 if (serveraddr.sa.sa_family == AF_INET6)
585 serveraddr.in6.sin6_flowinfo = 0;
586 #endif
587
588 /* spoof check: answer must come from known server, */
589 for (server = daemon->servers; server; server = server->next)
590 if (!(server->flags & (SERV_LITERAL_ADDRESS | SERV_NO_ADDR)) &&
591 sockaddr_isequal(&server->addr, &serveraddr))
592 break;
593
594 header = (struct dns_header *)daemon->packet;
595
596 if (!server ||
597 n < (int)sizeof(struct dns_header) || !(header->hb3 & HB3_QR) ||
598 !(forward = lookup_frec(ntohs(header->id), questions_crc(header, n, daemon->namebuff))))
599 return;
600
601 server = forward->sentto;
602
603 if ((RCODE(header) == SERVFAIL || RCODE(header) == REFUSED) &&
604 !option_bool(OPT_ORDER) &&
605 forward->forwardall == 0)
606 /* for broken servers, attempt to send to another one. */
607 {
608 unsigned char *pheader;
609 size_t plen;
610 int is_sign;
611
612 /* recreate query from reply */
613 pheader = find_pseudoheader(header, (size_t)n, &plen, NULL, &is_sign);
614 if (!is_sign)
615 {
616 header->ancount = htons(0);
617 header->nscount = htons(0);
618 header->arcount = htons(0);
619 if ((nn = resize_packet(header, (size_t)n, pheader, plen)))
620 {
621 header->hb3 &= ~(HB3_QR | HB3_TC);
622 forward_query(-1, NULL, NULL, 0, header, nn, now, forward);
623 return;
624 }
625 }
626 }
627
628 if ((forward->sentto->flags & SERV_TYPE) == 0)
629 {
630 if (RCODE(header) == SERVFAIL || RCODE(header) == REFUSED)
631 server = NULL;
632 else
633 {
634 struct server *last_server;
635
636 /* find good server by address if possible, otherwise assume the last one we sent to */
637 for (last_server = daemon->servers; last_server; last_server = last_server->next)
638 if (!(last_server->flags & (SERV_LITERAL_ADDRESS | SERV_HAS_DOMAIN | SERV_FOR_NODOTS | SERV_NO_ADDR)) &&
639 sockaddr_isequal(&last_server->addr, &serveraddr))
640 {
641 server = last_server;
642 break;
643 }
644 }
645 if (!option_bool(OPT_ALL_SERVERS))
646 daemon->last_server = server;
647 }
648
649 /* If the answer is an error, keep the forward record in place in case
650 we get a good reply from another server. Kill it when we've
651 had replies from all to avoid filling the forwarding table when
652 everything is broken */
653 if (forward->forwardall == 0 || --forward->forwardall == 1 ||
654 (RCODE(header) != REFUSED && RCODE(header) != SERVFAIL))
655 {
656 int check_rebind = !(forward->flags & FREC_NOREBIND);
657
658 if (!option_bool(OPT_NO_REBIND))
659 check_rebind = 0;
660
661 if ((nn = process_reply(header, now, server, (size_t)n, check_rebind, forward->flags & FREC_CHECKING_DISABLED,
662 forward->flags & FREC_HAS_SUBNET, &forward->source)))
663 {
664 header->id = htons(forward->orig_id);
665 header->hb4 |= HB4_RA; /* recursion if available */
666 send_from(forward->fd, option_bool(OPT_NOWILD) || option_bool (OPT_CLEVERBIND), daemon->packet, nn,
667 &forward->source, &forward->dest, forward->iface);
668 }
669 free_frec(forward); /* cancel */
670 }
671 }
672
673
674 void receive_query(struct listener *listen, time_t now)
675 {
676 struct dns_header *header = (struct dns_header *)daemon->packet;
677 union mysockaddr source_addr;
678 unsigned short type;
679 struct all_addr dst_addr;
680 struct in_addr netmask, dst_addr_4;
681 size_t m;
682 ssize_t n;
683 int if_index = 0;
684 int local_auth = 0, auth_dns = 0;
685 struct iovec iov[1];
686 struct msghdr msg;
687 struct cmsghdr *cmptr;
688 union {
689 struct cmsghdr align; /* this ensures alignment */
690 #ifdef HAVE_IPV6
691 char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
692 #endif
693 #if defined(HAVE_LINUX_NETWORK)
694 char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
695 #elif defined(IP_RECVDSTADDR) && defined(HAVE_SOLARIS_NETWORK)
696 char control[CMSG_SPACE(sizeof(struct in_addr)) +
697 CMSG_SPACE(sizeof(unsigned int))];
698 #elif defined(IP_RECVDSTADDR)
699 char control[CMSG_SPACE(sizeof(struct in_addr)) +
700 CMSG_SPACE(sizeof(struct sockaddr_dl))];
701 #endif
702 } control_u;
703
704 /* packet buffer overwritten */
705 daemon->srv_save = NULL;
706
707 dst_addr_4.s_addr = 0;
708 netmask.s_addr = 0;
709
710 if (option_bool(OPT_NOWILD) && listen->iface)
711 {
712 auth_dns = listen->iface->dns_auth;
713
714 if (listen->family == AF_INET)
715 {
716 dst_addr_4 = listen->iface->addr.in.sin_addr;
717 netmask = listen->iface->netmask;
718 }
719 }
720
721 iov[0].iov_base = daemon->packet;
722 iov[0].iov_len = daemon->edns_pktsz;
723
724 msg.msg_control = control_u.control;
725 msg.msg_controllen = sizeof(control_u);
726 msg.msg_flags = 0;
727 msg.msg_name = &source_addr;
728 msg.msg_namelen = sizeof(source_addr);
729 msg.msg_iov = iov;
730 msg.msg_iovlen = 1;
731
732 if ((n = recvmsg(listen->fd, &msg, 0)) == -1)
733 return;
734
735 if (n < (int)sizeof(struct dns_header) ||
736 (msg.msg_flags & MSG_TRUNC) ||
737 (header->hb3 & HB3_QR))
738 return;
739
740 source_addr.sa.sa_family = listen->family;
741 #ifdef HAVE_IPV6
742 if (listen->family == AF_INET6)
743 source_addr.in6.sin6_flowinfo = 0;
744 #endif
745
746 if (!option_bool(OPT_NOWILD))
747 {
748 struct ifreq ifr;
749
750 if (msg.msg_controllen < sizeof(struct cmsghdr))
751 return;
752
753 #if defined(HAVE_LINUX_NETWORK)
754 if (listen->family == AF_INET)
755 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
756 if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_PKTINFO)
757 {
758 union {
759 unsigned char *c;
760 struct in_pktinfo *p;
761 } p;
762 p.c = CMSG_DATA(cmptr);
763 dst_addr_4 = dst_addr.addr.addr4 = p.p->ipi_spec_dst;
764 if_index = p.p->ipi_ifindex;
765 }
766 #elif defined(IP_RECVDSTADDR) && defined(IP_RECVIF)
767 if (listen->family == AF_INET)
768 {
769 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
770 {
771 union {
772 unsigned char *c;
773 unsigned int *i;
774 struct in_addr *a;
775 #ifndef HAVE_SOLARIS_NETWORK
776 struct sockaddr_dl *s;
777 #endif
778 } p;
779 p.c = CMSG_DATA(cmptr);
780 if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVDSTADDR)
781 dst_addr_4 = dst_addr.addr.addr4 = *(p.a);
782 else if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVIF)
783 #ifdef HAVE_SOLARIS_NETWORK
784 if_index = *(p.i);
785 #else
786 if_index = p.s->sdl_index;
787 #endif
788 }
789 }
790 #endif
791
792 #ifdef HAVE_IPV6
793 if (listen->family == AF_INET6)
794 {
795 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
796 if (cmptr->cmsg_level == IPPROTO_IPV6 && cmptr->cmsg_type == daemon->v6pktinfo)
797 {
798 union {
799 unsigned char *c;
800 struct in6_pktinfo *p;
801 } p;
802 p.c = CMSG_DATA(cmptr);
803
804 dst_addr.addr.addr6 = p.p->ipi6_addr;
805 if_index = p.p->ipi6_ifindex;
806 }
807 }
808 #endif
809
810 /* enforce available interface configuration */
811
812 if (!indextoname(listen->fd, if_index, ifr.ifr_name))
813 return;
814
815 if (!iface_check(listen->family, &dst_addr, ifr.ifr_name, &auth_dns))
816 {
817 if (!option_bool(OPT_CLEVERBIND))
818 enumerate_interfaces(0);
819 if (!loopback_exception(listen->fd, listen->family, &dst_addr, ifr.ifr_name) &&
820 !label_exception(if_index, listen->family, &dst_addr))
821 return;
822 }
823
824 if (listen->family == AF_INET && option_bool(OPT_LOCALISE))
825 {
826 struct irec *iface;
827
828 /* get the netmask of the interface whch has the address we were sent to.
829 This is no neccessarily the interface we arrived on. */
830
831 for (iface = daemon->interfaces; iface; iface = iface->next)
832 if (iface->addr.sa.sa_family == AF_INET &&
833 iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
834 break;
835
836 /* interface may be new */
837 if (!iface && !option_bool(OPT_CLEVERBIND))
838 enumerate_interfaces(0);
839
840 for (iface = daemon->interfaces; iface; iface = iface->next)
841 if (iface->addr.sa.sa_family == AF_INET &&
842 iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
843 break;
844
845 /* If we failed, abandon localisation */
846 if (iface)
847 netmask = iface->netmask;
848 else
849 dst_addr_4.s_addr = 0;
850 }
851 }
852
853 if (extract_request(header, (size_t)n, daemon->namebuff, &type))
854 {
855 char types[20];
856 #ifdef HAVE_AUTH
857 struct auth_zone *zone;
858 #endif
859
860 querystr(auth_dns ? "auth" : "query", types, type);
861
862 if (listen->family == AF_INET)
863 log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
864 (struct all_addr *)&source_addr.in.sin_addr, types);
865 #ifdef HAVE_IPV6
866 else
867 log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
868 (struct all_addr *)&source_addr.in6.sin6_addr, types);
869 #endif
870
871 #ifdef HAVE_AUTH
872 /* find queries for zones we're authoritative for, and answer them directly */
873 if (!auth_dns)
874 for (zone = daemon->auth_zones; zone; zone = zone->next)
875 if (in_zone(zone, daemon->namebuff, NULL))
876 {
877 auth_dns = 1;
878 local_auth = 1;
879 break;
880 }
881 #endif
882 }
883
884 #ifdef HAVE_AUTH
885 if (auth_dns)
886 {
887 m = answer_auth(header, ((char *) header) + PACKETSZ, (size_t)n, now, &source_addr, local_auth);
888 if (m >= 1)
889 {
890 send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
891 (char *)header, m, &source_addr, &dst_addr, if_index);
892 daemon->auth_answer++;
893 }
894 }
895 else
896 #endif
897 {
898 m = answer_request(header, ((char *) header) + PACKETSZ, (size_t)n,
899 dst_addr_4, netmask, now);
900
901 if (m >= 1)
902 {
903 send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
904 (char *)header, m, &source_addr, &dst_addr, if_index);
905 daemon->local_answer++;
906 }
907 else if (forward_query(listen->fd, &source_addr, &dst_addr, if_index,
908 header, (size_t)n, now, NULL))
909 daemon->queries_forwarded++;
910 else
911 daemon->local_answer++;
912 }
913 }
914
915 /* The daemon forks before calling this: it should deal with one connection,
916 blocking as neccessary, and then return. Note, need to be a bit careful
917 about resources for debug mode, when the fork is suppressed: that's
918 done by the caller. */
919 unsigned char *tcp_request(int confd, time_t now,
920 union mysockaddr *local_addr, struct in_addr netmask, int auth_dns)
921 {
922 size_t size = 0;
923 int norebind = 0;
924 int local_auth = 0;
925 int checking_disabled, check_subnet;
926 size_t m;
927 unsigned short qtype;
928 unsigned int gotname;
929 unsigned char c1, c2;
930 /* Max TCP packet + slop + size */
931 unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
932 unsigned char *payload = &packet[2];
933 /* largest field in header is 16-bits, so this is still sufficiently aligned */
934 struct dns_header *header = (struct dns_header *)payload;
935 u16 *length = (u16 *)packet;
936 struct server *last_server;
937 struct in_addr dst_addr_4;
938 union mysockaddr peer_addr;
939 socklen_t peer_len = sizeof(union mysockaddr);
940
941 if (getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) == -1)
942 return packet;
943
944 while (1)
945 {
946 if (!packet ||
947 !read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
948 !(size = c1 << 8 | c2) ||
949 !read_write(confd, payload, size, 1))
950 return packet;
951
952 if (size < (int)sizeof(struct dns_header))
953 continue;
954
955 check_subnet = 0;
956
957 /* save state of "cd" flag in query */
958 checking_disabled = header->hb4 & HB4_CD;
959
960 /* RFC 4035: sect 4.6 para 2 */
961 header->hb4 &= ~HB4_AD;
962
963 if ((gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
964 {
965 char types[20];
966 #ifdef HAVE_AUTH
967 struct auth_zone *zone;
968 #endif
969 querystr(auth_dns ? "auth" : "query", types, qtype);
970
971 if (peer_addr.sa.sa_family == AF_INET)
972 log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
973 (struct all_addr *)&peer_addr.in.sin_addr, types);
974 #ifdef HAVE_IPV6
975 else
976 log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
977 (struct all_addr *)&peer_addr.in6.sin6_addr, types);
978 #endif
979
980 #ifdef HAVE_AUTH
981 /* find queries for zones we're authoritative for, and answer them directly */
982 if (!auth_dns)
983 for (zone = daemon->auth_zones; zone; zone = zone->next)
984 if (in_zone(zone, daemon->namebuff, NULL))
985 {
986 auth_dns = 1;
987 local_auth = 1;
988 break;
989 }
990 #endif
991 }
992
993 if (local_addr->sa.sa_family == AF_INET)
994 dst_addr_4 = local_addr->in.sin_addr;
995 else
996 dst_addr_4.s_addr = 0;
997
998 #ifdef HAVE_AUTH
999 if (auth_dns)
1000 m = answer_auth(header, ((char *) header) + 65536, (size_t)size, now, &peer_addr, local_auth);
1001 else
1002 #endif
1003 {
1004 /* m > 0 if answered from cache */
1005 m = answer_request(header, ((char *) header) + 65536, (size_t)size,
1006 dst_addr_4, netmask, now);
1007
1008 /* Do this by steam now we're not in the select() loop */
1009 check_log_writer(NULL);
1010
1011 if (m == 0)
1012 {
1013 unsigned int flags = 0;
1014 struct all_addr *addrp = NULL;
1015 int type = 0;
1016 char *domain = NULL;
1017
1018 if (option_bool(OPT_ADD_MAC))
1019 size = add_mac(header, size, ((char *) header) + 65536, &peer_addr);
1020
1021 if (option_bool(OPT_CLIENT_SUBNET))
1022 {
1023 size_t new = add_source_addr(header, size, ((char *) header) + 65536, &peer_addr);
1024 if (size != new)
1025 {
1026 size = new;
1027 check_subnet = 1;
1028 }
1029 }
1030
1031 if (gotname)
1032 flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
1033
1034 if (type != 0 || option_bool(OPT_ORDER) || !daemon->last_server)
1035 last_server = daemon->servers;
1036 else
1037 last_server = daemon->last_server;
1038
1039 if (!flags && last_server)
1040 {
1041 struct server *firstsendto = NULL;
1042 unsigned int crc = questions_crc(header, (unsigned int)size, daemon->namebuff);
1043
1044 /* Loop round available servers until we succeed in connecting to one.
1045 Note that this code subtley ensures that consecutive queries on this connection
1046 which can go to the same server, do so. */
1047 while (1)
1048 {
1049 if (!firstsendto)
1050 firstsendto = last_server;
1051 else
1052 {
1053 if (!(last_server = last_server->next))
1054 last_server = daemon->servers;
1055
1056 if (last_server == firstsendto)
1057 break;
1058 }
1059
1060 /* server for wrong domain */
1061 if (type != (last_server->flags & SERV_TYPE) ||
1062 (type == SERV_HAS_DOMAIN && !hostname_isequal(domain, last_server->domain)))
1063 continue;
1064
1065 if (last_server->tcpfd == -1)
1066 {
1067 if ((last_server->tcpfd = socket(last_server->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
1068 continue;
1069
1070 if ((!local_bind(last_server->tcpfd, &last_server->source_addr, last_server->interface, 1) ||
1071 connect(last_server->tcpfd, &last_server->addr.sa, sa_len(&last_server->addr)) == -1))
1072 {
1073 close(last_server->tcpfd);
1074 last_server->tcpfd = -1;
1075 continue;
1076 }
1077
1078 #ifdef HAVE_CONNTRACK
1079 /* Copy connection mark of incoming query to outgoing connection. */
1080 if (option_bool(OPT_CONNTRACK))
1081 {
1082 unsigned int mark;
1083 struct all_addr local;
1084 #ifdef HAVE_IPV6
1085 if (local_addr->sa.sa_family == AF_INET6)
1086 local.addr.addr6 = local_addr->in6.sin6_addr;
1087 else
1088 #endif
1089 local.addr.addr4 = local_addr->in.sin_addr;
1090
1091 if (get_incoming_mark(&peer_addr, &local, 1, &mark))
1092 setsockopt(last_server->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
1093 }
1094 #endif
1095 }
1096
1097 *length = htons(size);
1098
1099 if (!read_write(last_server->tcpfd, packet, size + sizeof(u16), 0) ||
1100 !read_write(last_server->tcpfd, &c1, 1, 1) ||
1101 !read_write(last_server->tcpfd, &c2, 1, 1))
1102 {
1103 close(last_server->tcpfd);
1104 last_server->tcpfd = -1;
1105 continue;
1106 }
1107
1108 m = (c1 << 8) | c2;
1109 if (!read_write(last_server->tcpfd, payload, m, 1))
1110 return packet;
1111
1112 if (!gotname)
1113 strcpy(daemon->namebuff, "query");
1114 if (last_server->addr.sa.sa_family == AF_INET)
1115 log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
1116 (struct all_addr *)&last_server->addr.in.sin_addr, NULL);
1117 #ifdef HAVE_IPV6
1118 else
1119 log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
1120 (struct all_addr *)&last_server->addr.in6.sin6_addr, NULL);
1121 #endif
1122
1123 /* There's no point in updating the cache, since this process will exit and
1124 lose the information after a few queries. We make this call for the alias and
1125 bogus-nxdomain side-effects. */
1126 /* If the crc of the question section doesn't match the crc we sent, then
1127 someone might be attempting to insert bogus values into the cache by
1128 sending replies containing questions and bogus answers. */
1129 if (crc == questions_crc(header, (unsigned int)m, daemon->namebuff))
1130 m = process_reply(header, now, last_server, (unsigned int)m,
1131 option_bool(OPT_NO_REBIND) && !norebind, checking_disabled,
1132 check_subnet, &peer_addr);
1133
1134 break;
1135 }
1136 }
1137
1138 /* In case of local answer or no connections made. */
1139 if (m == 0)
1140 m = setup_reply(header, (unsigned int)size, addrp, flags, daemon->local_ttl);
1141 }
1142 }
1143
1144 check_log_writer(NULL);
1145
1146 *length = htons(m);
1147
1148 if (m == 0 || !read_write(confd, packet, m + sizeof(u16), 0))
1149 return packet;
1150 }
1151 }
1152
1153 static struct frec *allocate_frec(time_t now)
1154 {
1155 struct frec *f;
1156
1157 if ((f = (struct frec *)whine_malloc(sizeof(struct frec))))
1158 {
1159 f->next = daemon->frec_list;
1160 f->time = now;
1161 f->sentto = NULL;
1162 f->rfd4 = NULL;
1163 f->flags = 0;
1164 #ifdef HAVE_IPV6
1165 f->rfd6 = NULL;
1166 #endif
1167 daemon->frec_list = f;
1168 }
1169
1170 return f;
1171 }
1172
1173 static struct randfd *allocate_rfd(int family)
1174 {
1175 static int finger = 0;
1176 int i;
1177
1178 /* limit the number of sockets we have open to avoid starvation of
1179 (eg) TFTP. Once we have a reasonable number, randomness should be OK */
1180
1181 for (i = 0; i < RANDOM_SOCKS; i++)
1182 if (daemon->randomsocks[i].refcount == 0)
1183 {
1184 if ((daemon->randomsocks[i].fd = random_sock(family)) == -1)
1185 break;
1186
1187 daemon->randomsocks[i].refcount = 1;
1188 daemon->randomsocks[i].family = family;
1189 return &daemon->randomsocks[i];
1190 }
1191
1192 /* No free ones or cannot get new socket, grab an existing one */
1193 for (i = 0; i < RANDOM_SOCKS; i++)
1194 {
1195 int j = (i+finger) % RANDOM_SOCKS;
1196 if (daemon->randomsocks[j].refcount != 0 &&
1197 daemon->randomsocks[j].family == family &&
1198 daemon->randomsocks[j].refcount != 0xffff)
1199 {
1200 finger = j;
1201 daemon->randomsocks[j].refcount++;
1202 return &daemon->randomsocks[j];
1203 }
1204 }
1205
1206 return NULL; /* doom */
1207 }
1208
1209 static void free_frec(struct frec *f)
1210 {
1211 if (f->rfd4 && --(f->rfd4->refcount) == 0)
1212 close(f->rfd4->fd);
1213
1214 f->rfd4 = NULL;
1215 f->sentto = NULL;
1216 f->flags = 0;
1217
1218 #ifdef HAVE_IPV6
1219 if (f->rfd6 && --(f->rfd6->refcount) == 0)
1220 close(f->rfd6->fd);
1221
1222 f->rfd6 = NULL;
1223 #endif
1224 }
1225
1226 /* if wait==NULL return a free or older than TIMEOUT record.
1227 else return *wait zero if one available, or *wait is delay to
1228 when the oldest in-use record will expire. Impose an absolute
1229 limit of 4*TIMEOUT before we wipe things (for random sockets) */
1230 struct frec *get_new_frec(time_t now, int *wait)
1231 {
1232 struct frec *f, *oldest, *target;
1233 int count;
1234
1235 if (wait)
1236 *wait = 0;
1237
1238 for (f = daemon->frec_list, oldest = NULL, target = NULL, count = 0; f; f = f->next, count++)
1239 if (!f->sentto)
1240 target = f;
1241 else
1242 {
1243 if (difftime(now, f->time) >= 4*TIMEOUT)
1244 {
1245 free_frec(f);
1246 target = f;
1247 }
1248
1249 if (!oldest || difftime(f->time, oldest->time) <= 0)
1250 oldest = f;
1251 }
1252
1253 if (target)
1254 {
1255 target->time = now;
1256 return target;
1257 }
1258
1259 /* can't find empty one, use oldest if there is one
1260 and it's older than timeout */
1261 if (oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
1262 {
1263 /* keep stuff for twice timeout if we can by allocating a new
1264 record instead */
1265 if (difftime(now, oldest->time) < 2*TIMEOUT &&
1266 count <= daemon->ftabsize &&
1267 (f = allocate_frec(now)))
1268 return f;
1269
1270 if (!wait)
1271 {
1272 free_frec(oldest);
1273 oldest->time = now;
1274 }
1275 return oldest;
1276 }
1277
1278 /* none available, calculate time 'till oldest record expires */
1279 if (count > daemon->ftabsize)
1280 {
1281 static time_t last_log = 0;
1282
1283 if (oldest && wait)
1284 *wait = oldest->time + (time_t)TIMEOUT - now;
1285
1286 if ((int)difftime(now, last_log) > 5)
1287 {
1288 last_log = now;
1289 my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
1290 }
1291
1292 return NULL;
1293 }
1294
1295 if (!(f = allocate_frec(now)) && wait)
1296 /* wait one second on malloc failure */
1297 *wait = 1;
1298
1299 return f; /* OK if malloc fails and this is NULL */
1300 }
1301
1302 /* crc is all-ones if not known. */
1303 static struct frec *lookup_frec(unsigned short id, unsigned int crc)
1304 {
1305 struct frec *f;
1306
1307 for(f = daemon->frec_list; f; f = f->next)
1308 if (f->sentto && f->new_id == id &&
1309 (f->crc == crc || crc == 0xffffffff))
1310 return f;
1311
1312 return NULL;
1313 }
1314
1315 static struct frec *lookup_frec_by_sender(unsigned short id,
1316 union mysockaddr *addr,
1317 unsigned int crc)
1318 {
1319 struct frec *f;
1320
1321 for(f = daemon->frec_list; f; f = f->next)
1322 if (f->sentto &&
1323 f->orig_id == id &&
1324 f->crc == crc &&
1325 sockaddr_isequal(&f->source, addr))
1326 return f;
1327
1328 return NULL;
1329 }
1330
1331 /* A server record is going away, remove references to it */
1332 void server_gone(struct server *server)
1333 {
1334 struct frec *f;
1335
1336 for (f = daemon->frec_list; f; f = f->next)
1337 if (f->sentto && f->sentto == server)
1338 free_frec(f);
1339
1340 if (daemon->last_server == server)
1341 daemon->last_server = NULL;
1342
1343 if (daemon->srv_save == server)
1344 daemon->srv_save = NULL;
1345 }
1346
1347 /* return unique random ids. */
1348 static unsigned short get_id(unsigned int crc)
1349 {
1350 unsigned short ret = 0;
1351
1352 do
1353 ret = rand16();
1354 while (lookup_frec(ret, crc));
1355
1356 return ret;
1357 }
1358
1359
1360
1361
1362