]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/syncres.cc
Merge pull request #1968 from pieterlexis/port-faq
[thirdparty/pdns.git] / pdns / syncres.cc
CommitLineData
86c152f2
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
30b13ef7 3 Copyright (C) 2003 - 2014 PowerDNS.COM BV
86c152f2
BH
4
5 This program is free software; you can redistribute it and/or modify
36c5ee42
BH
6 it under the terms of the GNU General Public License version 2 as published
7 by the Free Software Foundation
86c152f2 8
f782fe38
MH
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
86c152f2
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
86c152f2 21*/
caa6eefa 22
c9f52071 23#include <boost/algorithm/string.hpp>
21f0f88b 24#include <boost/foreach.hpp>
caa6eefa 25#include "utility.hh"
288f4aa9 26#include "syncres.hh"
86c152f2
BH
27#include <iostream>
28#include <map>
29#include <algorithm>
afbe2787 30#include <set>
86c152f2
BH
31#include <cerrno>
32#include <cstdio>
33#include <cstdlib>
86c152f2 34#include <utility>
3de83124 35#include <deque>
c836dc19 36#include "logger.hh"
20177d1d 37#include "misc.hh"
86c152f2
BH
38#include "arguments.hh"
39#include "lwres.hh"
eefd15f9 40#include "recursor_cache.hh"
ea634573 41#include "dnsparser.hh"
51e2144e 42#include "dns_random.hh"
bb4bdbaf 43#include "lock.hh"
c2567ad1 44#include "cachecleaner.hh"
eefd15f9 45
ac0e821b 46__thread SyncRes::StaticStorage* t_sstorage;
bb4bdbaf 47
a9af3782 48unsigned int SyncRes::s_maxnegttl;
c3e753c7 49unsigned int SyncRes::s_maxcachettl;
1051f8a9
BH
50unsigned int SyncRes::s_packetcachettl;
51unsigned int SyncRes::s_packetcacheservfailttl;
628e2c7b
PA
52unsigned int SyncRes::s_serverdownmaxfails;
53unsigned int SyncRes::s_serverdownthrottletime;
75ba907b 54uint64_t SyncRes::s_queries;
55uint64_t SyncRes::s_outgoingtimeouts;
56uint64_t SyncRes::s_outqueries;
57uint64_t SyncRes::s_tcpoutqueries;
58uint64_t SyncRes::s_throttledqueries;
59uint64_t SyncRes::s_dontqueries;
60uint64_t SyncRes::s_nodelegated;
61uint64_t SyncRes::s_unreachables;
aadceba8 62unsigned int SyncRes::s_minimumTTL;
996c89cc 63bool SyncRes::s_doIPv6;
1051f8a9 64bool SyncRes::s_nopacketcache;
173d790e 65unsigned int SyncRes::s_maxqperq;
a9af3782 66string SyncRes::s_serverID;
77499b05 67SyncRes::LogMode SyncRes::s_lm;
c836dc19 68
77499b05 69#define LOG(x) if(d_lm == Log) { L <<Logger::Warning << x; } else if(d_lm == Store) { d_trace << x; }
728485ca 70
840c10ec 71bool SyncRes::s_noEDNSPing;
4bfae16d 72bool SyncRes::s_noEDNS;
3de83124 73
ac0e821b 74SyncRes::SyncRes(const struct timeval& now) : d_outqueries(0), d_tcpoutqueries(0), d_throttledqueries(0), d_timeouts(0), d_unreachables(0),
232f0877
CH
75 d_now(now),
76 d_cacheonly(false), d_nocache(false), d_doEDNS0(false), d_lm(s_lm)
77
ac0e821b
BH
78{
79 if(!t_sstorage) {
80 t_sstorage = new StaticStorage();
81 }
82}
83
728485ca 84/** everything begins here - this is the entry point just after receiving a packet */
a9af3782 85int SyncRes::beginResolve(const string &qname, const QType &qtype, uint16_t qclass, vector<DNSResourceRecord>&ret)
728485ca 86{
c836dc19 87 s_queries++;
693dbe65
BH
88
89 if( (qtype.getCode() == QType::AXFR))
90 return -1;
91
ec6480f3
BH
92 if( (qtype.getCode()==QType::PTR && pdns_iequals(qname, "1.0.0.127.in-addr.arpa.")) ||
93 (qtype.getCode()==QType::A && qname.length()==10 && pdns_iequals(qname, "localhost."))) {
31ad43ab
BH
94 ret.clear();
95 DNSResourceRecord rr;
96 rr.qname=qname;
97 rr.qtype=qtype;
703761cc 98 rr.qclass=QClass::IN;
31ad43ab
BH
99 rr.ttl=86400;
100 if(qtype.getCode()==QType::PTR)
101 rr.content="localhost.";
102 else
103 rr.content="127.0.0.1";
104 ret.push_back(rr);
105 return 0;
106 }
107
703761cc 108 if(qclass==QClass::CHAOS && qtype.getCode()==QType::TXT &&
ec6480f3 109 (pdns_iequals(qname, "version.bind.") || pdns_iequals(qname, "id.server.") || pdns_iequals(qname, "version.pdns.") )
a9af3782
BH
110 ) {
111 ret.clear();
112 DNSResourceRecord rr;
113 rr.qname=qname;
114 rr.qtype=qtype;
115 rr.qclass=qclass;
116 rr.ttl=86400;
ec6480f3 117 if(pdns_iequals(qname,"version.bind.") || pdns_iequals(qname,"version.pdns."))
a9af3782
BH
118 rr.content="\""+::arg()["version-string"]+"\"";
119 else
120 rr.content="\""+s_serverID+"\"";
121 ret.push_back(rr);
122 return 0;
123 }
9a97cc5c 124
703761cc
KM
125 if(qclass==QClass::ANY)
126 qclass=QClass::IN;
127 else if(qclass!=QClass::IN)
9a97cc5c
BH
128 return -1;
129
31ad43ab 130 set<GetBestNSAnswer> beenthere;
809fe23f 131 int res=doResolve(qname, qtype, ret, 0, beenthere);
728485ca
BH
132 return res;
133}
afbe2787 134
ab5c053d 135//! This is the 'out of band resolver', in other words, the authoritative server
e93c956b
BH
136bool SyncRes::doOOBResolve(const string &qname, const QType &qtype, vector<DNSResourceRecord>&ret, int depth, int& res)
137{
5605c067 138 string prefix;
77499b05 139 if(doLog()) {
5605c067
BH
140 prefix=d_prefix;
141 prefix.append(depth, ' ');
142 }
143
77499b05 144 LOG(prefix<<qname<<": checking auth storage for '"<<qname<<"|"<<qtype.getName()<<"'"<<endl);
5605c067
BH
145 string authdomain(qname);
146
147 domainmap_t::const_iterator iter=getBestAuthZone(&authdomain);
49a699c4 148 if(iter==t_sstorage->domainmap->end()) {
77499b05 149 LOG(prefix<<qname<<": auth storage has no zone for this query!"<<endl);
5605c067
BH
150 return false;
151 }
77499b05 152 LOG(prefix<<qname<<": auth storage has data, zone='"<<authdomain<<"'"<<endl);
5605c067
BH
153 pair<AuthDomain::records_t::const_iterator, AuthDomain::records_t::const_iterator> range;
154
155 range=iter->second.d_records.equal_range(tie(qname)); // partial lookup
7bddb139 156
5605c067
BH
157 ret.clear();
158 AuthDomain::records_t::const_iterator ziter;
9e9844e2 159 bool somedata=false;
5605c067 160 for(ziter=range.first; ziter!=range.second; ++ziter) {
9e9844e2 161 somedata=true;
9bc8c14c 162 if(qtype.getCode()==QType::ANY || ziter->qtype==qtype || ziter->qtype.getCode()==QType::CNAME) // let rest of nameserver do the legwork on this one
5605c067
BH
163 ret.push_back(*ziter);
164 }
9bc8c14c 165 if(!ret.empty()) {
77499b05 166 LOG(prefix<<qname<<": exact match in zone '"<<authdomain<<"'"<<endl);
9bc8c14c
BH
167 res=0;
168 return true;
5605c067 169 }
9e9844e2 170 if(somedata) {
77499b05 171 LOG(prefix<<qname<<": found record in '"<<authdomain<<"', but nothing of the right type, sending SOA"<<endl);
41ea0e50 172 ziter=iter->second.d_records.find(boost::make_tuple(authdomain, QType(QType::SOA)));
9e9844e2
BH
173 if(ziter!=iter->second.d_records.end()) {
174 DNSResourceRecord rr=*ziter;
175 rr.d_place=DNSResourceRecord::AUTHORITY;
176 ret.push_back(rr);
177 }
178 else
77499b05 179 LOG(prefix<<qname<<": can't find SOA record '"<<authdomain<<"' in our zone!"<<endl);
9e9844e2
BH
180 res=RCode::NoError;
181 return true;
182 }
5605c067 183
77499b05 184 LOG(prefix<<qname<<": nothing found so far in '"<<authdomain<<"', trying wildcards"<<endl);
0d1e259a
BH
185 string wcarddomain(qname);
186 while(!pdns_iequals(wcarddomain, iter->first) && chopOffDotted(wcarddomain)) {
77499b05 187 LOG(prefix<<qname<<": trying '*."+wcarddomain+"' in "<<authdomain<<endl);
41ea0e50 188 range=iter->second.d_records.equal_range(boost::make_tuple("*."+wcarddomain));
0d1e259a
BH
189 if(range.first==range.second)
190 continue;
191
192 for(ziter=range.first; ziter!=range.second; ++ziter) {
193 DNSResourceRecord rr=*ziter;
194 if(rr.qtype == qtype || qtype.getCode() == QType::ANY) {
195 rr.qname = qname;
196 rr.d_place=DNSResourceRecord::ANSWER;
197 ret.push_back(rr);
198 }
199 }
77499b05 200 LOG(prefix<<qname<<": in '"<<authdomain<<"', had wildcard match on '*."+wcarddomain+"'"<<endl);
0d1e259a
BH
201 res=RCode::NoError;
202 return true;
203 }
204
5605c067
BH
205 string nsdomain(qname);
206
ec6480f3 207 while(chopOffDotted(nsdomain) && !pdns_iequals(nsdomain, iter->first)) {
41ea0e50 208 range=iter->second.d_records.equal_range(boost::make_tuple(nsdomain,QType(QType::NS)));
5605c067
BH
209 if(range.first==range.second)
210 continue;
211
212 for(ziter=range.first; ziter!=range.second; ++ziter) {
213 DNSResourceRecord rr=*ziter;
214 rr.d_place=DNSResourceRecord::AUTHORITY;
215 ret.push_back(rr);
216 }
217 }
7bddb139 218 if(ret.empty()) {
77499b05 219 LOG(prefix<<qname<<": no NS match in zone '"<<authdomain<<"' either, handing out SOA"<<endl);
41ea0e50 220 ziter=iter->second.d_records.find(boost::make_tuple(authdomain, QType(QType::SOA)));
5605c067
BH
221 if(ziter!=iter->second.d_records.end()) {
222 DNSResourceRecord rr=*ziter;
223 rr.d_place=DNSResourceRecord::AUTHORITY;
224 ret.push_back(rr);
225 }
226 else
77499b05 227 LOG(prefix<<qname<<": can't find SOA record '"<<authdomain<<"' in our zone!"<<endl);
5605c067
BH
228 res=RCode::NXDomain;
229 }
230 else
231 res=0;
232
9e9844e2 233 return true;
e93c956b
BH
234}
235
ff1872cf
BH
236void SyncRes::doEDNSDumpAndClose(int fd)
237{
238 FILE* fp=fdopen(fd, "w");
239 fprintf(fp,"IP Address\tMode\tMode last updated at\n");
bb4bdbaf 240
49a699c4 241 for(ednsstatus_t::const_iterator iter = t_sstorage->ednsstatus.begin(); iter != t_sstorage->ednsstatus.end(); ++iter) {
ff1872cf
BH
242 fprintf(fp, "%s\t%d\t%s", iter->first.toString().c_str(), (int)iter->second.mode, ctime(&iter->second.modeSetAt));
243 }
bb4bdbaf 244
ff1872cf
BH
245 fclose(fp);
246}
247
c1d73d94 248int SyncRes::asyncresolveWrapper(const ComboAddress& ip, const string& domain, int type, bool doTCP, bool sendRDQuery, struct timeval* now, LWResult* res)
81883dcc
BH
249{
250 /* what is your QUEST?
251 the goal is to get as many remotes as possible on the highest level of hipness: EDNS PING responders.
252 The levels are:
253
254 -1) CONFIRMEDPINGER: Confirmed pinger!
255 0) UNKNOWN Unknown state
256 1) EDNSNOPING: Honors EDNS0 if no PING is included
257 2) EDNSPINGOK: Ignores EDNS0+PING, but does generate EDNS0 response
258 3) EDNSIGNORANT: Ignores EDNS0+PING, gives replies without EDNS0 nor PING
259 4) NOEDNS: Generates FORMERR on EDNS queries
260
261 Everybody starts out assumed to be '0'.
262 If '-1', send out EDNS0+Ping
263 If we get a FormErr, ignore
4957a608
BH
264 If we get a incorrect PING, ignore
265 If we get no PING, ignore
81883dcc
BH
266 If '0', send out EDNS0+Ping
267 If we get a pure EDNS response, you are downgraded to '2'.
268 If you FORMERR us, go to '1',
269 If no EDNS in response, go to '3' - 3 and 0 are really identical, except confirmed
4957a608 270 If with correct PING, upgrade to -1
81883dcc
BH
271 If '1', send out EDNS0, no PING
272 If FORMERR, downgrade to 4
273 If '2', keep on including EDNS0+PING, just don't expect PING to be correct
274 If PING correct, move to '0', and cheer in the log file!
275 If '3', keep on including EDNS0+PING, see what happens
276 Same behaviour as 0
277 If '4', send bare queries
278 */
279
4bfae16d
BH
280 if(s_noEDNS) {
281 g_stats.noEdnsOutQueries++;
282 return asyncresolve(ip, domain, type, doTCP, sendRDQuery, 0, now, res);
283 }
284
bb4bdbaf 285 SyncRes::EDNSStatus* ednsstatus;
49a699c4 286 ednsstatus = &t_sstorage->ednsstatus[ip];
81883dcc 287
bb4bdbaf
BH
288 if(ednsstatus->modeSetAt && ednsstatus->modeSetAt + 3600 < d_now.tv_sec) {
289 *ednsstatus=SyncRes::EDNSStatus();
77499b05 290 // cerr<<"Resetting EDNS Status for "<<ip.toString()<<endl);
81883dcc
BH
291 }
292
bb4bdbaf
BH
293 if(s_noEDNSPing && ednsstatus->mode == EDNSStatus::UNKNOWN)
294 ednsstatus->mode = EDNSStatus::EDNSNOPING;
840c10ec 295
bb4bdbaf 296 SyncRes::EDNSStatus::EDNSMode& mode=ednsstatus->mode;
81883dcc
BH
297 SyncRes::EDNSStatus::EDNSMode oldmode = mode;
298 int EDNSLevel=0;
299
300 int ret;
ff1872cf 301 for(int tries = 0; tries < 3; ++tries) {
77499b05 302 // cerr<<"Remote '"<<ip.toString()<<"' currently in mode "<<mode<<endl);
81883dcc
BH
303
304 if(mode==EDNSStatus::CONFIRMEDPINGER || mode==EDNSStatus::UNKNOWN || mode==EDNSStatus::EDNSPINGOK || mode==EDNSStatus::EDNSIGNORANT)
305 EDNSLevel = 2;
306 else if(mode==EDNSStatus::EDNSNOPING) {
307 EDNSLevel = 1;
308 g_stats.noPingOutQueries++;
309 }
310 else if(mode==EDNSStatus::NOEDNS) {
311 g_stats.noEdnsOutQueries++;
312 EDNSLevel = 0;
313 }
314
c1d73d94 315 ret=asyncresolve(ip, domain, type, doTCP, sendRDQuery, EDNSLevel, now, res);
81883dcc 316 if(ret == 0 || ret < 0) {
77499b05 317 // cerr<<"Transport error or timeout (ret="<<ret<<"), no change in mode"<<endl);
81883dcc
BH
318 return ret;
319 }
320
321 if(mode== EDNSStatus::CONFIRMEDPINGER) { // confirmed pinger!
322 if(!res->d_pingCorrect) {
4957a608 323 L<<Logger::Error<<"Confirmed EDNS-PING enabled host "<<ip.toString()<<" did not send back correct ping"<<endl;
232f0877 324 // perhaps lower some kind of count here, don't want to punnish a downgrader too long!
4957a608
BH
325 ret = 0;
326 res->d_rcode = RCode::ServFail;
327 g_stats.ednsPingMismatches++;
81883dcc
BH
328 }
329 else {
4957a608
BH
330 g_stats.ednsPingMatches++;
331 ednsstatus->modeSetAt=d_now.tv_sec; // only the very best mode self-perpetuates
81883dcc
BH
332 }
333 }
334 else if(mode==EDNSStatus::UNKNOWN || mode==EDNSStatus::EDNSPINGOK || mode == EDNSStatus::EDNSIGNORANT ) {
335 if(res->d_rcode == RCode::FormErr) {
232f0877 336 // cerr<<"Downgrading to EDNSNOPING because of FORMERR!"<<endl);
4957a608
BH
337 mode = EDNSStatus::EDNSNOPING;
338 continue;
98b58a72
BH
339 }
340 else if(mode==EDNSStatus::UNKNOWN && (res->d_rcode == RCode::Refused || res->d_rcode == RCode::NotImp) ) { // this "fixes" F5
232f0877 341 // cerr<<"Downgrading an unknown status to EDNSNOPING because of RCODE="<<res->d_rcode<<endl;
4957a608
BH
342 mode = EDNSStatus::EDNSNOPING;
343 continue;
81883dcc
BH
344 }
345 else if(!res->d_pingCorrect && res->d_haveEDNS)
4957a608 346 mode = EDNSStatus::EDNSPINGOK;
81883dcc 347 else if(res->d_pingCorrect) {
4957a608
BH
348 L<<Logger::Warning<<"We welcome "<<ip.toString()<<" to the land of EDNS-PING!"<<endl;
349 mode = EDNSStatus::CONFIRMEDPINGER;
350 g_stats.ednsPingMatches++;
81883dcc
BH
351 }
352 else if(!res->d_haveEDNS) {
4957a608
BH
353 if(mode != EDNSStatus::EDNSIGNORANT) {
354 mode = EDNSStatus::EDNSIGNORANT;
232f0877 355 // cerr<<"We find that "<<ip.toString()<<" is an EDNS-ignorer, moving to mode 3"<<endl);
4957a608 356 }
81883dcc
BH
357 }
358 }
359 else if(mode==EDNSStatus::EDNSNOPING) {
360 if(res->d_rcode == RCode::FormErr) {
232f0877 361 // cerr<<"Downgrading to mode 4, FORMERR!"<<endl);
4957a608
BH
362 mode = EDNSStatus::NOEDNS;
363 continue;
81883dcc
BH
364 }
365 }
366 else if(mode==EDNSStatus::EDNSPINGOK) {
367 if(res->d_pingCorrect) {
4957a608
BH
368 // an upgrade!
369 L<<Logger::Warning<<"We welcome "<<ip.toString()<<" to the land of EDNS-PING!"<<endl;
370 mode = EDNSStatus::CONFIRMEDPINGER;
81883dcc
BH
371 }
372 }
373 if(oldmode != mode)
bb4bdbaf 374 ednsstatus->modeSetAt=d_now.tv_sec;
77499b05 375 // cerr<<"Result: ret="<<ret<<", EDNS-level: "<<EDNSLevel<<", haveEDNS: "<<res->d_haveEDNS<<", EDNS-PING correct: "<<res->d_pingCorrect<<", new mode: "<<mode<<endl);
81883dcc
BH
376
377 return ret;
378 }
379 return ret;
380}
381
7b35aa49 382int SyncRes::doResolve(const string &qname, const QType &qtype, vector<DNSResourceRecord>&ret, int depth, set<GetBestNSAnswer>& beenthere)
afbe2787 383{
ded77b10 384 string prefix;
77499b05 385 if(doLog()) {
ded77b10
BH
386 prefix=d_prefix;
387 prefix.append(depth, ' ');
388 }
728485ca 389
f4df5e89 390 int res=0;
5605c067 391 if(!(d_nocache && qtype.getCode()==QType::NS && qname==".")) {
115d07ad 392 if(d_cacheonly) { // very limited OOB support
263f6a5a 393 LWResult lwr;
77499b05 394 LOG(prefix<<qname<<": Recursion not requested for '"<<qname<<"|"<<qtype.getName()<<"', peeking at auth/forward zones"<<endl);
115d07ad
BH
395 string authname(qname);
396 domainmap_t::const_iterator iter=getBestAuthZone(&authname);
49a699c4 397 if(iter != t_sstorage->domainmap->end()) {
4957a608
BH
398 const vector<ComboAddress>& servers = iter->second.d_servers;
399 if(servers.empty()) {
400 ret.clear();
401 doOOBResolve(qname, qtype, ret, depth, res);
402 return res;
403 }
404 else {
405 const ComboAddress remoteIP = servers.front();
77499b05 406 LOG(prefix<<qname<<": forwarding query to hardcoded nameserver '"<< remoteIP.toStringWithPort()<<"' for zone '"<<authname<<"'"<<endl);
4957a608
BH
407
408 res=asyncresolveWrapper(remoteIP, qname, qtype.getCode(), false, false, &d_now, &lwr);
409 // filter out the good stuff from lwr.result()
410
411 for(LWResult::res_t::const_iterator i=lwr.d_result.begin();i!=lwr.d_result.end();++i) {
412 if(i->d_place == DNSResourceRecord::ANSWER)
413 ret.push_back(*i);
414 }
415 return res;
416 }
115d07ad
BH
417 }
418 }
419
c836dc19
BH
420 if(doCNAMECacheCheck(qname,qtype,ret,depth,res)) // will reroute us if needed
421 return res;
422
423 if(doCacheCheck(qname,qtype,ret,depth,res)) // we done
424 return res;
425 }
afbe2787 426
115d07ad 427 if(d_cacheonly)
c836dc19 428 return 0;
115d07ad 429
77499b05 430 LOG(prefix<<qname<<": No cache hit for '"<<qname<<"|"<<qtype.getName()<<"', trying to find an appropriate NS record"<<endl);
728485ca
BH
431
432 string subdomain(qname);
433
7738a23f 434 set<string, CIStringCompare> nsset;
7305df82 435 bool flawedNSSet=false;
97df07f8
PD
436
437 // the two retries allow getBestNSNamesFromCache&co to reprime the root
438 // hints, in case they ever go missing
bdf40704 439 for(int tries=0;tries<2 && nsset.empty();++tries) {
7305df82 440 subdomain=getBestNSNamesFromCache(subdomain, nsset, &flawedNSSet, depth, beenthere); // pass beenthere to both occasions
bdf40704
BH
441 }
442
7305df82 443 if(!(res=doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, beenthere)))
728485ca
BH
444 return 0;
445
77499b05 446 LOG(prefix<<qname<<": failed (res="<<res<<")"<<endl);
20177d1d 447 return res<0 ? RCode::ServFail : res;
afbe2787
BH
448}
449
c2567ad1 450#if 0
a67dd0cf 451// for testing purposes
fdf05fd4
BH
452static bool ipv6First(const ComboAddress& a, const ComboAddress& b)
453{
454 return !(a.sin4.sin_family < a.sin4.sin_family);
455}
c2567ad1 456#endif
fdf05fd4 457
21f0f88b 458/** This function explicitly goes out for A or AAAA addresses
996c89cc 459*/
d96e88da 460vector<ComboAddress> SyncRes::getAddrs(const string &qname, int depth, set<GetBestNSAnswer>& beenthere)
75b49099 461{
bfea0d0b
BH
462 typedef vector<DNSResourceRecord> res_t;
463 res_t res;
75b49099 464
996c89cc
BH
465 typedef vector<ComboAddress> ret_t;
466 ret_t ret;
75b49099 467
d96e88da 468 QType type;
76c01aec 469 for(int j=1-s_doIPv6; j<2+s_doIPv6; j++)
d96e88da 470 {
76c01aec
PD
471 bool done=false;
472 // j=0: ANY
473 // j=1: A
474 // j=2: AAAA
475
476 switch(j) {
477 case 0:
478 type = QType::ANY;
479 break;
480 case 1:
481 type = QType::A;
482 break;
483 case 2:
484 type = QType::AAAA;
485 break;
486 }
d96e88da
PD
487
488 if(!doResolve(qname, type, res,depth+1,beenthere) && !res.empty()) { // this consults cache, OR goes out
489 for(res_t::const_iterator i=res.begin(); i!= res.end(); ++i) {
490 if(i->qtype.getCode()==QType::A || i->qtype.getCode()==QType::AAAA) {
76c01aec
PD
491 ret.push_back(ComboAddress(i->content, 53));
492 if(!j) done=true;
d96e88da 493 }
42724edf 494 }
f4df5e89 495 }
76c01aec 496 if(done) break;
bfea0d0b 497 }
21f0f88b 498
996c89cc 499 if(ret.size() > 1) {
51e2144e 500 random_shuffle(ret.begin(), ret.end(), dns_random);
996c89cc 501
ae4d8cf1 502 // move 'best' address for this nameserver name up front
5ea6f7de 503 nsspeeds_t::iterator best = t_sstorage->nsSpeeds.find(qname);
996c89cc 504
49a699c4 505 if(best != t_sstorage->nsSpeeds.end())
ae4d8cf1 506 for(ret_t::iterator i=ret.begin(); i != ret.end(); ++i) {
4957a608
BH
507 if(*i==best->second.d_best) { // got the fastest one
508 if(i!=ret.begin()) {
509 *i=*ret.begin();
510 *ret.begin()=best->second.d_best;
511 }
512 break;
513 }
996c89cc
BH
514 }
515 }
fdf05fd4 516
728485ca 517 return ret;
75b49099
BH
518}
519
7305df82 520void SyncRes::getBestNSFromCache(const string &qname, set<DNSResourceRecord>&bestns, bool* flawedNSSet, int depth, set<GetBestNSAnswer>& beenthere)
86c152f2 521{
ded77b10 522 string prefix, subdomain(qname);
77499b05 523 if(doLog()) {
ded77b10
BH
524 prefix=d_prefix;
525 prefix.append(depth, ' ');
526 }
75b49099
BH
527 bestns.clear();
528
75b49099 529 do {
77499b05 530 LOG(prefix<<qname<<": Checking if we have NS in cache for '"<<subdomain<<"'"<<endl);
7738a23f 531 set<DNSResourceRecord> ns;
7305df82 532 *flawedNSSet = false;
49a699c4 533 if(t_RC->get(d_now.tv_sec, subdomain, QType(QType::NS), &ns) > 0) {
7bf26383 534 for(set<DNSResourceRecord>::const_iterator k=ns.begin();k!=ns.end();++k) {
4957a608
BH
535 if(k->ttl > (unsigned int)d_now.tv_sec ) {
536 set<DNSResourceRecord> aset;
537
538 DNSResourceRecord rr=*k;
539 rr.content=k->content;
49a699c4 540 if(!dottedEndsOn(rr.content, subdomain) || t_RC->get(d_now.tv_sec, rr.content, s_doIPv6 ? QType(QType::ADDR) : QType(QType::A),
232f0877 541 doLog() ? &aset : 0) > 5) {
4957a608 542 bestns.insert(rr);
77499b05
BH
543 LOG(prefix<<qname<<": NS (with ip, or non-glue) in cache for '"<<subdomain<<"' -> '"<<rr.content<<"'"<<endl);
544 LOG(prefix<<qname<<": within bailiwick: "<<dottedEndsOn(rr.content, subdomain));
4957a608 545 if(!aset.empty()) {
77499b05 546 LOG(", in cache, ttl="<<(unsigned int)(((time_t)aset.begin()->ttl- d_now.tv_sec ))<<endl);
4957a608
BH
547 }
548 else {
77499b05 549 LOG(", not in cache / did not look at cache"<<endl);
4957a608
BH
550 }
551 }
552 else {
553 *flawedNSSet=true;
77499b05 554 LOG(prefix<<qname<<": NS in cache for '"<<subdomain<<"', but needs glue ("<<k->content<<") which we miss or is expired"<<endl);
4957a608
BH
555 }
556 }
afbe2787 557 }
75b49099 558 if(!bestns.empty()) {
4957a608
BH
559 GetBestNSAnswer answer;
560 answer.qname=qname; answer.bestns=bestns;
561 if(beenthere.count(answer)) {
8a1ff051 562 LOG(prefix<<qname<<": We have NS in cache for '"<<subdomain<<"' but part of LOOP (already seen "<<answer.qname<<")! Trying less specific NS"<<endl);
77499b05
BH
563 if(doLog())
564 for( set<GetBestNSAnswer>::const_iterator j=beenthere.begin();j!=beenthere.end();++j) {
565 LOG(prefix<<qname<<": beenthere: "<<j->qname<<" ("<<(unsigned int)j->bestns.size()<<")"<<endl);
566 }
4957a608
BH
567 bestns.clear();
568 }
569 else {
570 beenthere.insert(answer);
77499b05 571 LOG(prefix<<qname<<": We have NS in cache for '"<<subdomain<<"' (flawedNSSet="<<*flawedNSSet<<")"<<endl);
4957a608
BH
572 return;
573 }
75b49099 574 }
afbe2787 575 }
77499b05 576 LOG(prefix<<qname<<": no valid/useful NS in cache for '"<<subdomain<<"'"<<endl);
97df07f8 577 if(subdomain==".") { primeHints(); }
7738a23f 578 }while(chopOffDotted(subdomain));
75b49099
BH
579}
580
5605c067
BH
581SyncRes::domainmap_t::const_iterator SyncRes::getBestAuthZone(string* qname)
582{
583 SyncRes::domainmap_t::const_iterator ret;
584 do {
49a699c4
BH
585 ret=t_sstorage->domainmap->find(*qname);
586 if(ret!=t_sstorage->domainmap->end())
5605c067
BH
587 break;
588 }while(chopOffDotted(*qname));
589 return ret;
590}
288f4aa9 591
7bf26383 592/** doesn't actually do the work, leaves that to getBestNSFromCache */
7305df82 593string SyncRes::getBestNSNamesFromCache(const string &qname, set<string, CIStringCompare>& nsset, bool* flawedNSSet, int depth, set<GetBestNSAnswer>&beenthere)
75b49099
BH
594{
595 string subdomain(qname);
5605c067
BH
596 string authdomain(qname);
597
598 domainmap_t::const_iterator iter=getBestAuthZone(&authdomain);
49a699c4 599 if(iter!=t_sstorage->domainmap->end()) {
2e5ae2b2
BH
600 if( iter->second.d_servers.empty() )
601 nsset.insert(string()); // this gets picked up in doResolveAt, if empty it means "we are auth", otherwise it denotes a forward
602 else {
603 for(vector<ComboAddress>::const_iterator server=iter->second.d_servers.begin(); server != iter->second.d_servers.end(); ++server)
4957a608 604 nsset.insert((iter->second.d_rdForward ? "+" : "-") + server->toStringWithPort()); // add a '+' if the rd bit should be set
2e5ae2b2
BH
605 }
606
5605c067
BH
607 return authdomain;
608 }
609
75b49099 610 set<DNSResourceRecord> bestns;
7305df82 611 getBestNSFromCache(subdomain, bestns, flawedNSSet, depth, beenthere);
75b49099
BH
612
613 for(set<DNSResourceRecord>::const_iterator k=bestns.begin();k!=bestns.end();++k) {
614 nsset.insert(k->content);
e93c956b
BH
615 if(k==bestns.begin())
616 subdomain=k->qname;
86c152f2 617 }
75b49099 618 return subdomain;
afbe2787
BH
619}
620
7b35aa49 621bool SyncRes::doCNAMECacheCheck(const string &qname, const QType &qtype, vector<DNSResourceRecord>&ret, int depth, int &res)
afbe2787 622{
ded77b10 623 string prefix;
77499b05 624 if(doLog()) {
ded77b10
BH
625 prefix=d_prefix;
626 prefix.append(depth, ' ');
627 }
36f5e3db 628
40de2910 629 if((depth>9 && d_outqueries>10 && d_throttledqueries>5) || depth > 15) {
5bc8686b 630 LOG(prefix<<qname<<": recursing (CNAME or other indirection) too deep, depth="<<depth<<endl);
c6644fc5
BH
631 res=RCode::ServFail;
632 return true;
633 }
36f5e3db 634
77499b05 635 LOG(prefix<<qname<<": Looking for CNAME cache hit of '"<<(qname+"|CNAME")<<"'"<<endl);
7bf26383 636 set<DNSResourceRecord> cset;
49a699c4 637 if(t_RC->get(d_now.tv_sec, qname,QType(QType::CNAME),&cset) > 0) {
36c5ee42 638
7bf26383 639 for(set<DNSResourceRecord>::const_iterator j=cset.begin();j!=cset.end();++j) {
d6d5dea7 640 if(j->ttl>(unsigned int) d_now.tv_sec) {
77499b05 641 LOG(prefix<<qname<<": Found cache CNAME hit for '"<< (qname+"|CNAME") <<"' to '"<<j->content<<"'"<<endl);
4957a608
BH
642 DNSResourceRecord rr=*j;
643 rr.ttl-=d_now.tv_sec;
644 ret.push_back(rr);
645 if(!(qtype==QType(QType::CNAME))) { // perhaps they really wanted a CNAME!
646 set<GetBestNSAnswer>beenthere;
647 res=doResolve(j->content, qtype, ret, depth+1, beenthere);
648 }
649 else
650 res=0;
651 return true;
ac539791
BH
652 }
653 }
afbe2787 654 }
77499b05 655 LOG(prefix<<qname<<": No CNAME cache hit of '"<< (qname+"|CNAME") <<"' found"<<endl);
75b49099
BH
656 return false;
657}
658
bb4bdbaf
BH
659
660
661
7b35aa49 662bool SyncRes::doCacheCheck(const string &qname, const QType &qtype, vector<DNSResourceRecord>&ret, int depth, int &res)
75b49099 663{
fd8bc993 664 bool giveNegative=false;
ded77b10 665
be718669 666 string prefix;
77499b05 667 if(doLog()) {
ded77b10
BH
668 prefix=d_prefix;
669 prefix.append(depth, ' ');
670 }
afbe2787 671
288f4aa9
BH
672 string sqname(qname);
673 QType sqt(qtype);
092f210a 674 uint32_t sttl=0;
f4df5e89 675 // cout<<"Lookup for '"<<qname<<"|"<<qtype.getName()<<"'\n";
bb4bdbaf 676
49a699c4
BH
677 pair<negcache_t::const_iterator, negcache_t::const_iterator> range=t_sstorage->negcache.equal_range(tie(qname));
678 negcache_t::iterator ni;
679 for(ni=range.first; ni != range.second; ni++) {
680 // we have something
681 if(ni->d_qtype.getCode() == 0 || ni->d_qtype == qtype) {
682 res=0;
683 if((uint32_t)d_now.tv_sec < ni->d_ttd) {
684 sttl=ni->d_ttd - d_now.tv_sec;
685 if(ni->d_qtype.getCode()) {
77499b05 686 LOG(prefix<<qname<<": "<<qtype.getName()<<" is negatively cached via '"<<ni->d_qname<<"' for another "<<sttl<<" seconds"<<endl);
49a699c4 687 res = RCode::NoError;
4957a608
BH
688 }
689 else {
77499b05 690 LOG(prefix<<qname<<": Entire record '"<<qname<<"', is negatively cached via '"<<ni->d_qname<<"' for another "<<sttl<<" seconds"<<endl);
49a699c4 691 res= RCode::NXDomain;
4957a608 692 }
49a699c4
BH
693 giveNegative=true;
694 sqname=ni->d_qname;
695 sqt=QType::SOA;
c2567ad1 696 moveCacheItemToBack(t_sstorage->negcache, ni);
49a699c4
BH
697 break;
698 }
699 else {
77499b05 700 LOG(prefix<<qname<<": Entire record '"<<qname<<"' or type was negatively cached, but entry expired"<<endl);
c2567ad1 701 moveCacheItemToFront(t_sstorage->negcache, ni);
38e22b5a 702 }
fd8bc993
BH
703 }
704 }
20177d1d 705
7bf26383 706 set<DNSResourceRecord> cset;
75b49099 707 bool found=false, expired=false;
be718669 708
49a699c4 709 if(t_RC->get(d_now.tv_sec, sqname, sqt, &cset) > 0) {
77499b05 710 LOG(prefix<<sqname<<": Found cache hit for "<<sqt.getName()<<": ");
7bf26383 711 for(set<DNSResourceRecord>::const_iterator j=cset.begin();j!=cset.end();++j) {
77499b05 712 LOG(j->content);
d6d5dea7 713 if(j->ttl>(unsigned int) d_now.tv_sec) {
4957a608
BH
714 DNSResourceRecord rr=*j;
715 rr.ttl-=d_now.tv_sec;
716 if(giveNegative) {
717 rr.d_place=DNSResourceRecord::AUTHORITY;
718 rr.ttl=sttl;
719 }
720 ret.push_back(rr);
77499b05 721 LOG("[ttl="<<rr.ttl<<"] ");
4957a608 722 found=true;
ac539791 723 }
75b49099 724 else {
77499b05 725 LOG("[expired] ");
4957a608 726 expired=true;
75b49099 727 }
afbe2787 728 }
ac539791 729
77499b05 730 LOG(endl);
f4df5e89 731 if(found && !expired) {
f15a5b2a 732 if(!giveNegative)
4957a608 733 res=0;
75b49099 734 return true;
f4df5e89 735 }
75b49099 736 else
77499b05 737 LOG(prefix<<qname<<": cache had only stale entries"<<endl);
afbe2787 738 }
f4df5e89 739
75b49099
BH
740 return false;
741}
afbe2787 742
7b35aa49 743bool SyncRes::moreSpecificThan(const string& a, const string &b)
75b49099 744{
7738a23f
BH
745 static string dot(".");
746 int counta=(a!=dot), countb=(b!=dot);
afbe2787 747
728485ca
BH
748 for(string::size_type n=0;n<a.size();++n)
749 if(a[n]=='.')
750 counta++;
751 for(string::size_type n=0;n<b.size();++n)
752 if(b[n]=='.')
753 countb++;
754 return counta>countb;
afbe2787
BH
755}
756
d8d0bb8f 757struct speedOrder
eefd15f9 758{
5ea6f7de
PD
759 speedOrder(map<string,double> &speeds) : d_speeds(speeds) {}
760 bool operator()(const string &a, const string &b) const
c3d9d009
BH
761 {
762 return d_speeds[a] < d_speeds[b];
c3d9d009 763 }
5ea6f7de 764 map<string, double>& d_speeds;
c3d9d009
BH
765};
766
5ea6f7de 767inline vector<string> SyncRes::shuffleInSpeedOrder(set<string, CIStringCompare> &tnameservers, const string &prefix)
afbe2787 768{
5ea6f7de
PD
769 vector<string> rnameservers;
770 rnameservers.reserve(tnameservers.size());
21f0f88b 771 BOOST_FOREACH(const string& str, tnameservers) {
5ea6f7de 772 rnameservers.push_back(str);
21f0f88b 773 }
5ea6f7de
PD
774 map<string, double> speeds;
775 BOOST_FOREACH(const string& val, rnameservers) {
79b8cdcc 776 double speed;
21f0f88b
BH
777 speed=t_sstorage->nsSpeeds[val].get(&d_now);
778 speeds[val]=speed;
eefd15f9 779 }
51e2144e 780 random_shuffle(rnameservers.begin(),rnameservers.end(), dns_random);
996c89cc
BH
781 speedOrder so(speeds);
782 stable_sort(rnameservers.begin(),rnameservers.end(), so);
eefd15f9 783
77499b05
BH
784 if(doLog()) {
785 LOG(prefix<<"Nameservers: ");
5ea6f7de 786 for(vector<string>::const_iterator i=rnameservers.begin();i!=rnameservers.end();++i) {
d8d0bb8f 787 if(i!=rnameservers.begin()) {
77499b05
BH
788 LOG(", ");
789 if(!((i-rnameservers.begin())%3)) {
790 LOG(endl<<prefix<<" ");
791 }
d8d0bb8f 792 }
5ea6f7de 793 LOG(*i<<"(" << (boost::format("%0.2f") % (speeds[*i]/1000.0)).str() <<"ms)");
d8d0bb8f 794 }
77499b05 795 LOG(endl);
d8d0bb8f 796 }
728485ca 797 return rnameservers;
afbe2787
BH
798}
799
7738a23f
BH
800struct TCacheComp
801{
802 bool operator()(const pair<string, QType>& a, const pair<string, QType>& b) const
803 {
ec6480f3 804 if(pdns_ilexicographical_compare(a.first, b.first))
7738a23f 805 return true;
ec6480f3 806 if(pdns_ilexicographical_compare(b.first, a.first))
7738a23f 807 return false;
edb1c9ee 808
7738a23f
BH
809 return a.second < b.second;
810 }
811};
812
bf7e4a70
BH
813static bool magicAddrMatch(const QType& query, const QType& answer)
814{
815 if(query.getCode() != QType::ADDR)
816 return false;
817 return answer.getCode() == QType::A || answer.getCode() == QType::AAAA;
818}
7738a23f 819
ac539791 820/** returns -1 in case of no results, rcode otherwise */
7305df82 821int SyncRes::doResolveAt(set<string, CIStringCompare> nameservers, string auth, bool flawedNSSet, const string &qname, const QType &qtype,
232f0877
CH
822 vector<DNSResourceRecord>&ret,
823 int depth, set<GetBestNSAnswer>&beenthere)
86c152f2 824{
ded77b10 825 string prefix;
77499b05 826 if(doLog()) {
ded77b10
BH
827 prefix=d_prefix;
828 prefix.append(depth, ' ');
829 }
86c152f2 830
77499b05 831 LOG(prefix<<qname<<": Cache consultations done, have "<<(unsigned int)nameservers.size()<<" NS to contact"<<endl);
afbe2787
BH
832
833 for(;;) { // we may get more specific nameservers
5ea6f7de 834 vector<string > rnameservers = shuffleInSpeedOrder(nameservers, doLog() ? (prefix+qname+": ") : string() );
21f0f88b 835
5ea6f7de 836 for(vector<string >::const_iterator tns=rnameservers.begin();;++tns) {
728485ca 837 if(tns==rnameservers.end()) {
77499b05 838 LOG(prefix<<qname<<": Failed to resolve via any of the "<<(unsigned int)rnameservers.size()<<" offered NS at level '"<<auth<<"'"<<endl);
4957a608 839 if(auth!="." && flawedNSSet) {
77499b05 840 LOG(prefix<<qname<<": Ageing nameservers for level '"<<auth<<"', next query might succeed"<<endl);
49a699c4 841 if(t_RC->doAgeCache(d_now.tv_sec, auth, QType::NS, 10))
4957a608
BH
842 g_stats.nsSetInvalidations++;
843 }
844 return -1;
afbe2787 845 }
21f0f88b 846 // this line needs to identify the 'self-resolving' behaviour, but we get it wrong now
5ea6f7de 847 if(pdns_iequals(qname, *tns) && qtype.getCode()==QType::A && rnameservers.size() > (unsigned)(1+1*s_doIPv6)) {
77499b05 848 LOG(prefix<<qname<<": Not using NS to resolve itself!"<<endl);
4957a608 849 continue;
20177d1d 850 }
5605c067 851
996c89cc 852 typedef vector<ComboAddress> remoteIPs_t;
5605c067 853 remoteIPs_t remoteIPs;
bfea0d0b 854 remoteIPs_t::const_iterator remoteIP;
5c633640 855 bool doTCP=false;
5605c067 856 int resolveret;
1c21f389 857 bool pierceDontQuery=false;
c1d73d94 858 bool sendRDQuery=false;
263f6a5a 859 LWResult lwr;
5ea6f7de 860 if(tns->empty()) {
77499b05 861 LOG(prefix<<qname<<": Domain is out-of-band"<<endl);
4957a608
BH
862 doOOBResolve(qname, qtype, lwr.d_result, depth, lwr.d_rcode);
863 lwr.d_tcbit=false;
864 lwr.d_aabit=true;
5605c067
BH
865 }
866 else {
5ea6f7de 867 LOG(prefix<<qname<<": Trying to resolve NS '"<<*tns<< "' ("<<1+tns-rnameservers.begin()<<"/"<<(unsigned int)rnameservers.size()<<")"<<endl);
4957a608 868
5ea6f7de 869 if(!isCanonical(*tns)) {
77499b05 870 LOG(prefix<<qname<<": Domain has hardcoded nameserver(s)"<<endl);
4957a608 871
5ea6f7de
PD
872 string txtAddr = *tns;
873 if(!tns->empty()) {
4957a608
BH
874 sendRDQuery = txtAddr[0] == '+';
875 txtAddr=txtAddr.c_str()+1;
876 }
877 ComboAddress addr=parseIPAndPort(txtAddr, 53);
878
879 remoteIPs.push_back(addr);
880 pierceDontQuery=true;
881 }
882 else {
54e58e44 883 remoteIPs=getAddrs(*tns, depth+2, beenthere);
4957a608
BH
884 pierceDontQuery=false;
885 }
886
887 if(remoteIPs.empty()) {
5ea6f7de 888 LOG(prefix<<qname<<": Failed to get IP for NS "<<*tns<<", trying next if available"<<endl);
4957a608
BH
889 flawedNSSet=true;
890 continue;
891 }
892 else {
21f0f88b 893
5ea6f7de 894 LOG(prefix<<qname<<": Resolved '"+auth+"' NS "<<*tns<<" to: ");
4957a608 895 for(remoteIP = remoteIPs.begin(); remoteIP != remoteIPs.end(); ++remoteIP) {
77499b05
BH
896 if(remoteIP != remoteIPs.begin()) {
897 LOG(", ");
898 }
899 LOG(remoteIP->toString());
4957a608 900 }
77499b05 901 LOG(endl);
4957a608
BH
902
903 }
904
905 for(remoteIP = remoteIPs.begin(); remoteIP != remoteIPs.end(); ++remoteIP) {
77499b05 906 LOG(prefix<<qname<<": Trying IP "<< remoteIP->toStringWithPort() <<", asking '"<<qname<<"|"<<qtype.getName()<<"'"<<endl);
4957a608
BH
907 extern NetmaskGroup* g_dontQuery;
908
41ea0e50 909 if(t_sstorage->throttle.shouldThrottle(d_now.tv_sec, boost::make_tuple(*remoteIP, "", 0))) {
628e2c7b
PA
910 LOG(prefix<<qname<<": server throttled "<<endl);
911 s_throttledqueries++; d_throttledqueries++;
912 continue;
913 }
41ea0e50 914 else if(t_sstorage->throttle.shouldThrottle(d_now.tv_sec, boost::make_tuple(*remoteIP, qname, qtype.getCode()))) {
77499b05 915 LOG(prefix<<qname<<": query throttled "<<endl);
4957a608
BH
916 s_throttledqueries++; d_throttledqueries++;
917 continue;
918 }
919 else if(!pierceDontQuery && g_dontQuery && g_dontQuery->match(&*remoteIP)) {
77499b05 920 LOG(prefix<<qname<<": not sending query to " << remoteIP->toString() << ", blocked by 'dont-query' setting" << endl);
66e0b6ea 921 s_dontqueries++;
4957a608
BH
922 continue;
923 }
924 else {
925 s_outqueries++; d_outqueries++;
173d790e 926 if(d_outqueries > s_maxqperq) throw ImmediateServFailException("more than "+lexical_cast<string>(s_maxqperq)+" (max-qperq) queries sent while resolving "+qname);
4957a608
BH
927 TryTCP:
928 if(doTCP) {
77499b05 929 LOG(prefix<<qname<<": using TCP with "<< remoteIP->toStringWithPort() <<endl);
4957a608
BH
930 s_tcpoutqueries++; d_tcpoutqueries++;
931 }
932
21f0f88b 933 resolveret=asyncresolveWrapper(*remoteIP, qname, qtype.getCode(),
232f0877 934 doTCP, sendRDQuery, &d_now, &lwr); // <- we go out on the wire!
30b13ef7 935
936 if(resolveret != 1) {
4957a608 937 if(resolveret==0) {
863ca18d 938 LOG(prefix<<qname<<": timeout resolving after "<<lwr.d_usec/1000.0<<"msec "<< (doTCP ? "over TCP" : "")<<endl);
232f0877
CH
939 d_timeouts++;
940 s_outgoingtimeouts++;
4957a608
BH
941 }
942 else if(resolveret==-2) {
232f0877
CH
943 LOG(prefix<<qname<<": hit a local resource limit resolving"<< (doTCP ? " over TCP" : "")<<", probable error: "<<stringerror()<<endl);
944 g_stats.resourceLimits++;
4957a608
BH
945 }
946 else {
232f0877
CH
947 s_unreachables++; d_unreachables++;
948 LOG(prefix<<qname<<": error resolving"<< (doTCP ? " over TCP" : "") <<", possible error: "<<strerror(errno)<< endl);
4957a608 949 }
30b13ef7 950
4957a608 951 if(resolveret!=-2) { // don't account for resource limits, they are our own fault
30b13ef7 952 t_sstorage->nsSpeeds[*tns].submit(*remoteIP, 1000000, &d_now); // 1 sec
953
954 // code below makes sure we don't filter COM or the root
955 if (s_serverdownmaxfails > 0 && (auth.find('.')+1 != auth.size()) && t_sstorage->fails.incr(*remoteIP) >= s_serverdownmaxfails) {
628e2c7b 956 LOG(prefix<<qname<<": Max fails reached resolving on "<< remoteIP->toString() <<". Going full throttle for 1 minute" <<endl);
41ea0e50 957 t_sstorage->throttle.throttle(d_now.tv_sec, boost::make_tuple(*remoteIP, "", 0), s_serverdownthrottletime, 10000); // mark server as down
628e2c7b 958 } else if(resolveret==-1)
41ea0e50 959 t_sstorage->throttle.throttle(d_now.tv_sec, boost::make_tuple(*remoteIP, qname, qtype.getCode()), 60, 100); // unreachable, 1 minute or 100 queries
232f0877 960 else
41ea0e50 961 t_sstorage->throttle.throttle(d_now.tv_sec, boost::make_tuple(*remoteIP, qname, qtype.getCode()), 10, 5); // timeout
4957a608
BH
962 }
963 continue;
964 }
352b4183
PD
965
966 if(lwr.d_rcode==RCode::ServFail || lwr.d_rcode==RCode::Refused) {
5ea6f7de 967 LOG(prefix<<qname<<": "<<*tns<<" returned a "<< (lwr.d_rcode==RCode::ServFail ? "ServFail" : "Refused") << ", trying sibling IP or NS"<<endl);
41ea0e50 968 t_sstorage->throttle.throttle(d_now.tv_sec,boost::make_tuple(*remoteIP, qname, qtype.getCode()),60,3); // servfail or refused
352b4183
PD
969 continue;
970 }
4957a608 971
628e2c7b
PA
972 if(s_serverdownmaxfails > 0)
973 t_sstorage->fails.clear(*remoteIP);
974
4957a608
BH
975 break; // this IP address worked!
976 wasLame:; // well, it didn't
5ea6f7de 977 LOG(prefix<<qname<<": status=NS "<<*tns<<" ("<< remoteIP->toString() <<") is lame for '"<<auth<<"', trying sibling IP or NS"<<endl);
41ea0e50 978 t_sstorage->throttle.throttle(d_now.tv_sec, boost::make_tuple(*remoteIP, qname, qtype.getCode()), 60, 100); // lame
4957a608
BH
979 }
980 }
981
982 if(remoteIP == remoteIPs.end()) // we tried all IP addresses, none worked
983 continue;
984
985 if(lwr.d_tcbit) {
986 if(!doTCP) {
987 doTCP=true;
77499b05 988 LOG(prefix<<qname<<": truncated bit set, retrying via TCP"<<endl);
4957a608
BH
989 goto TryTCP;
990 }
77499b05 991 LOG(prefix<<qname<<": truncated bit set, over TCP?"<<endl);
4957a608
BH
992 return RCode::ServFail;
993 }
994
1ef28707 995 LOG(prefix<<qname<<": Got "<<(unsigned int)lwr.d_result.size()<<" answers from "<<*tns<<" ("<< remoteIP->toString() <<"), rcode="<<lwr.d_rcode<<" ("<<RCode::to_s(lwr.d_rcode)<<"), aa="<<lwr.d_aabit<<", in "<<lwr.d_usec/1000<<"ms"<<endl);
4957a608
BH
996
997 /* // for you IPv6 fanatics :-)
998 if(remoteIP->sin4.sin_family==AF_INET6)
999 lwr.d_usec/=3;
1000 */
232f0877 1001 // cout<<"msec: "<<lwr.d_usec/1000.0<<", "<<g_avgLatency/1000.0<<'\n';
4957a608 1002
49a699c4 1003 t_sstorage->nsSpeeds[*tns].submit(*remoteIP, lwr.d_usec, &d_now);
20177d1d 1004 }
20177d1d 1005
aadceba8 1006 if(s_minimumTTL) {
1007 for(LWResult::res_t::iterator i=lwr.d_result.begin();i != lwr.d_result.end();++i) {
1008 i->ttl = max(i->ttl, s_minimumTTL);
1009 }
1010 }
1011
7738a23f 1012 typedef map<pair<string, QType>, set<DNSResourceRecord>, TCacheComp > tcache_t;
1ac4e536
BH
1013 tcache_t tcache;
1014
728485ca 1015 // reap all answers from this packet that are acceptable
c3e753c7 1016 for(LWResult::res_t::iterator i=lwr.d_result.begin();i != lwr.d_result.end();++i) {
4957a608 1017 if(i->qtype.getCode() == QType::OPT) {
77499b05 1018 LOG(prefix<<qname<<": skipping OPT answer '"<<i->qname<<"' from '"<<auth<<"' nameservers" <<endl);
4957a608
BH
1019 continue;
1020 }
77499b05 1021 LOG(prefix<<qname<<": accept answer '"<<i->qname<<"|"<<i->qtype.getName()<<"|"<<i->content<<"' from '"<<auth<<"' nameservers? ");
4957a608 1022 if(i->qtype.getCode()==QType::ANY) {
77499b05 1023 LOG("NO! - we don't accept 'ANY' data"<<endl);
4957a608
BH
1024 continue;
1025 }
1026
1027 if(dottedEndsOn(i->qname, auth)) {
1028 if(lwr.d_aabit && lwr.d_rcode==RCode::NoError && i->d_place==DNSResourceRecord::ANSWER && ::arg().contains("delegation-only",auth)) {
77499b05 1029 LOG("NO! Is from delegation-only zone"<<endl);
4957a608
BH
1030 s_nodelegated++;
1031 return RCode::NXDomain;
1032 }
1033 else {
77499b05 1034 LOG("YES!"<<endl);
4957a608
BH
1035
1036 i->ttl=min(s_maxcachettl, i->ttl);
1037
1038 DNSResourceRecord rr=*i;
1039 rr.d_place=DNSResourceRecord::ANSWER;
1040
1041 rr.ttl += d_now.tv_sec;
1042
1043 if(rr.qtype.getCode() == QType::NS) // people fiddle with the case
1044 rr.content=toLower(rr.content); // this must stay! (the cache can't be case-insensitive on the RHS of records)
10d31f4e 1045
4957a608
BH
1046 tcache[make_pair(i->qname,i->qtype)].insert(rr);
1047 }
232f0877 1048 }
4957a608 1049 else
77499b05 1050 LOG("NO!"<<endl);
86c152f2 1051 }
728485ca
BH
1052
1053 // supplant
60b859b9 1054 for(tcache_t::iterator i=tcache.begin();i!=tcache.end();++i) {
4957a608 1055 if(i->second.size() > 1) { // need to group the ttl to be the minimum of the RRSET (RFC 2181, 5.2)
331c187c 1056 uint32_t lowestTTL=std::numeric_limits<uint32_t>::max();
4957a608
BH
1057 for(tcache_t::value_type::second_type::iterator j=i->second.begin(); j != i->second.end(); ++j)
1058 lowestTTL=min(lowestTTL, j->ttl);
1059
1060 for(tcache_t::value_type::second_type::iterator j=i->second.begin(); j != i->second.end(); ++j)
1061 ((tcache_t::value_type::second_type::value_type*)&(*j))->ttl=lowestTTL;
1062 }
1063
49a699c4 1064 t_RC->replace(d_now.tv_sec, i->first.first, i->first.second, i->second, lwr.d_aabit);
288f4aa9 1065 }
7738a23f 1066 set<string, CIStringCompare> nsset;
77499b05 1067 LOG(prefix<<qname<<": determining status after receiving this packet"<<endl);
728485ca 1068
20177d1d 1069 bool done=false, realreferral=false, negindic=false;
c6644fc5 1070 string newauth, soaname, newtarget;
728485ca 1071
596aa4a1 1072 for(LWResult::res_t::iterator i=lwr.d_result.begin();i!=lwr.d_result.end();++i) {
501ef49c
BH
1073 if(i->d_place==DNSResourceRecord::AUTHORITY && i->qtype.getCode()==QType::SOA &&
1074 lwr.d_rcode==RCode::NXDomain && dottedEndsOn(qname,i->qname) && dottedEndsOn(i->qname, auth)) {
77499b05 1075 LOG(prefix<<qname<<": got negative caching indication for RECORD '"<<qname+"' (accept="<<dottedEndsOn(i->qname, auth)<<"), newtarget='"<<newtarget<<"'"<<endl);
501ef49c 1076
596aa4a1 1077 i->ttl = min(i->ttl, s_maxnegttl);
fa363535
PD
1078 if(!newtarget.length()) // only add a SOA if we're not going anywhere after this
1079 ret.push_back(*i);
4957a608
BH
1080
1081 NegCacheEntry ne;
1082
1083 ne.d_qname=i->qname;
596aa4a1 1084
478348a0 1085 ne.d_ttd=d_now.tv_sec + i->ttl;
232f0877 1086
4957a608
BH
1087 ne.d_name=qname;
1088 ne.d_qtype=QType(0); // this encodes 'whole record'
1089
49a699c4
BH
1090 replacing_insert(t_sstorage->negcache, ne);
1091
4957a608
BH
1092 negindic=true;
1093 }
1094 else if(i->d_place==DNSResourceRecord::ANSWER && pdns_iequals(i->qname, qname) && i->qtype.getCode()==QType::CNAME && (!(qtype==QType(QType::CNAME)))) {
1095 ret.push_back(*i);
1096 newtarget=i->content;
1097 }
7c696097 1098 // for ANY answers we *must* have an authoritative answer, unless we are forwarding recursively
4957a608 1099 else if(i->d_place==DNSResourceRecord::ANSWER && pdns_iequals(i->qname, qname) &&
232f0877
CH
1100 (
1101 i->qtype==qtype || (lwr.d_aabit && (qtype==QType(QType::ANY) || magicAddrMatch(qtype, i->qtype) ) ) || sendRDQuery
1102 )
4957a608
BH
1103 )
1104 {
1105
77499b05 1106 LOG(prefix<<qname<<": answer is in: resolved to '"<< i->content<<"|"<<i->qtype.getName()<<"'"<<endl);
4957a608
BH
1107
1108 done=true;
1109 ret.push_back(*i);
1110 }
1111 else if(i->d_place==DNSResourceRecord::AUTHORITY && dottedEndsOn(qname,i->qname) && i->qtype.getCode()==QType::NS) {
1112 if(moreSpecificThan(i->qname,auth)) {
1113 newauth=i->qname;
77499b05 1114 LOG(prefix<<qname<<": got NS record '"<<i->qname<<"' -> '"<<i->content<<"'"<<endl);
4957a608
BH
1115 realreferral=true;
1116 }
1117 else
77499b05 1118 LOG(prefix<<qname<<": got upwards/level NS record '"<<i->qname<<"' -> '"<<i->content<<"', had '"<<auth<<"'"<<endl);
4957a608
BH
1119 nsset.insert(i->content);
1120 }
1121 else if(!done && i->d_place==DNSResourceRecord::AUTHORITY && dottedEndsOn(qname,i->qname) && i->qtype.getCode()==QType::SOA &&
1122 lwr.d_rcode==RCode::NoError) {
b4fbe592 1123 LOG(prefix<<qname<<": got negative caching indication for '"<< (qname+"|"+qtype.getName()+"'") <<endl);
4957a608 1124
bac969f0 1125 if(!newtarget.empty()) {
77499b05 1126 LOG(prefix<<qname<<": Hang on! Got a redirect to '"<<newtarget<<"' already"<<endl);
bac969f0
BH
1127 }
1128 else {
478348a0 1129 i-> ttl = min(s_maxnegttl, i->ttl);
bac969f0
BH
1130 ret.push_back(*i);
1131 NegCacheEntry ne;
1132 ne.d_qname=i->qname;
596aa4a1 1133 ne.d_ttd=d_now.tv_sec + i->ttl;
bac969f0
BH
1134 ne.d_name=qname;
1135 ne.d_qtype=qtype;
1136 if(qtype.getCode()) { // prevents us from blacking out a whole domain
1137 replacing_insert(t_sstorage->negcache, ne);
1138 }
1139 negindic=true;
4957a608 1140 }
4957a608 1141 }
86c152f2 1142 }
86c152f2 1143
728485ca 1144 if(done){
77499b05 1145 LOG(prefix<<qname<<": status=got results, this level of recursion done"<<endl);
4957a608 1146 return 0;
ac539791 1147 }
c6644fc5 1148 if(!newtarget.empty()) {
4957a608 1149 if(pdns_iequals(newtarget,qname)) {
77499b05 1150 LOG(prefix<<qname<<": status=got a CNAME referral to self, returning SERVFAIL"<<endl);
4957a608
BH
1151 return RCode::ServFail;
1152 }
1153 if(depth > 10) {
77499b05 1154 LOG(prefix<<qname<<": status=got a CNAME referral, but recursing too deep, returning SERVFAIL"<<endl);
4957a608
BH
1155 return RCode::ServFail;
1156 }
77499b05 1157 LOG(prefix<<qname<<": status=got a CNAME referral, starting over with "<<newtarget<<endl);
4957a608
BH
1158
1159 set<GetBestNSAnswer> beenthere2;
1160 return doResolve(newtarget, qtype, ret, depth + 1, beenthere2);
c6644fc5 1161 }
331c187c 1162 if(lwr.d_rcode==RCode::NXDomain) {
77499b05 1163 LOG(prefix<<qname<<": status=NXDOMAIN, we are done "<<(negindic ? "(have negative SOA)" : "")<<endl);
331c187c
BH
1164 return RCode::NXDomain;
1165 }
6ffd6bad 1166 if(nsset.empty() && !lwr.d_rcode && (negindic || lwr.d_aabit)) {
f5cbaeca 1167 LOG(prefix<<qname<<": status=noerror, other types may exist, but we are done "<<(negindic ? "(have negative SOA) " : "")<<(lwr.d_aabit ? "(have aa bit) " : "")<<endl);
4957a608 1168 return 0;
caa6eefa 1169 }
728485ca 1170 else if(realreferral) {
77499b05 1171 LOG(prefix<<qname<<": status=did not resolve, got "<<(unsigned int)nsset.size()<<" NS, looping to them"<<endl);
4957a608
BH
1172 auth=newauth;
1173 nameservers=nsset;
1174 break;
728485ca 1175 }
5ea6f7de 1176 else if(isCanonical(*tns)) { // means: not OOB (I think)
4957a608 1177 goto wasLame;
86c152f2
BH
1178 }
1179 }
86c152f2 1180 }
ac539791 1181 return -1;
86c152f2
BH
1182}
1183
7b35aa49 1184void SyncRes::addAuthorityRecords(const string& qname, vector<DNSResourceRecord>& ret, int depth)
86c152f2 1185{
288f4aa9 1186 set<DNSResourceRecord> bestns;
7305df82
BH
1187 set<GetBestNSAnswer> beenthere;
1188 bool dontcare;
1189 getBestNSFromCache(qname, bestns, &dontcare, depth, beenthere);
36c5ee42 1190
288f4aa9
BH
1191 for(set<DNSResourceRecord>::const_iterator k=bestns.begin();k!=bestns.end();++k) {
1192 DNSResourceRecord ns=*k;
1193 ns.d_place=DNSResourceRecord::AUTHORITY;
d6d5dea7 1194 ns.ttl-=d_now.tv_sec;
288f4aa9 1195 ret.push_back(ns);
86c152f2
BH
1196 }
1197}
bd53ea9d 1198
8ce79a22 1199// used by PowerDNSLua - note that this neglects to add the packet count & statistics back to pdns_ercursor.cc
bd53ea9d
PD
1200int directResolve(const std::string& qname, const QType& qtype, int qclass, vector<DNSResourceRecord>& ret)
1201{
1202 struct timeval now;
1203 gettimeofday(&now, 0);
1204
1205 SyncRes sr(now);
1206
1207 int res = sr.beginResolve(qname, QType(qtype), qclass, ret);
bd53ea9d
PD
1208 return res;
1209}