]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/syncres.cc
Merge pull request #7951 from pieterlexis/update-boost-m4
[thirdparty/pdns.git] / pdns / syncres.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include "arguments.hh"
27 #include "cachecleaner.hh"
28 #include "dns_random.hh"
29 #include "dnsparser.hh"
30 #include "dnsrecords.hh"
31 #include "ednssubnet.hh"
32 #include "logger.hh"
33 #include "lua-recursor4.hh"
34 #include "rec-lua-conf.hh"
35 #include "syncres.hh"
36 #include "dnsseckeeper.hh"
37 #include "validate-recursor.hh"
38
39 thread_local SyncRes::ThreadLocalStorage SyncRes::t_sstorage;
40 thread_local std::unique_ptr<addrringbuf_t> t_timeouts;
41
42 std::unordered_set<DNSName> SyncRes::s_delegationOnly;
43 std::unique_ptr<NetmaskGroup> SyncRes::s_dontQuery{nullptr};
44 NetmaskGroup SyncRes::s_ednslocalsubnets;
45 NetmaskGroup SyncRes::s_ednsremotesubnets;
46 SuffixMatchNode SyncRes::s_ednsdomains;
47 EDNSSubnetOpts SyncRes::s_ecsScopeZero;
48 string SyncRes::s_serverID;
49 SyncRes::LogMode SyncRes::s_lm;
50 const std::unordered_set<uint16_t> SyncRes::s_redirectionQTypes = {QType::CNAME, QType::DNAME};
51
52 unsigned int SyncRes::s_maxnegttl;
53 unsigned int SyncRes::s_maxbogusttl;
54 unsigned int SyncRes::s_maxcachettl;
55 unsigned int SyncRes::s_maxqperq;
56 unsigned int SyncRes::s_maxtotusec;
57 unsigned int SyncRes::s_maxdepth;
58 unsigned int SyncRes::s_minimumTTL;
59 unsigned int SyncRes::s_minimumECSTTL;
60 unsigned int SyncRes::s_packetcachettl;
61 unsigned int SyncRes::s_packetcacheservfailttl;
62 unsigned int SyncRes::s_serverdownmaxfails;
63 unsigned int SyncRes::s_serverdownthrottletime;
64 unsigned int SyncRes::s_ecscachelimitttl;
65 std::atomic<uint64_t> SyncRes::s_authzonequeries;
66 std::atomic<uint64_t> SyncRes::s_queries;
67 std::atomic<uint64_t> SyncRes::s_outgoingtimeouts;
68 std::atomic<uint64_t> SyncRes::s_outgoing4timeouts;
69 std::atomic<uint64_t> SyncRes::s_outgoing6timeouts;
70 std::atomic<uint64_t> SyncRes::s_outqueries;
71 std::atomic<uint64_t> SyncRes::s_tcpoutqueries;
72 std::atomic<uint64_t> SyncRes::s_throttledqueries;
73 std::atomic<uint64_t> SyncRes::s_dontqueries;
74 std::atomic<uint64_t> SyncRes::s_nodelegated;
75 std::atomic<uint64_t> SyncRes::s_unreachables;
76 std::atomic<uint64_t> SyncRes::s_ecsqueries;
77 std::atomic<uint64_t> SyncRes::s_ecsresponses;
78 std::map<uint8_t, std::atomic<uint64_t>> SyncRes::s_ecsResponsesBySubnetSize4;
79 std::map<uint8_t, std::atomic<uint64_t>> SyncRes::s_ecsResponsesBySubnetSize6;
80
81 uint8_t SyncRes::s_ecsipv4limit;
82 uint8_t SyncRes::s_ecsipv6limit;
83 uint8_t SyncRes::s_ecsipv4cachelimit;
84 uint8_t SyncRes::s_ecsipv6cachelimit;
85
86 bool SyncRes::s_doIPv6;
87 bool SyncRes::s_nopacketcache;
88 bool SyncRes::s_rootNXTrust;
89 bool SyncRes::s_noEDNS;
90 bool SyncRes::s_qnameminimization;
91
92 #define LOG(x) if(d_lm == Log) { g_log <<Logger::Warning << x; } else if(d_lm == Store) { d_trace << x; }
93
94 static void accountAuthLatency(int usec, int family)
95 {
96 if(family == AF_INET) {
97 if(usec < 1000)
98 g_stats.auth4Answers0_1++;
99 else if(usec < 10000)
100 g_stats.auth4Answers1_10++;
101 else if(usec < 100000)
102 g_stats.auth4Answers10_100++;
103 else if(usec < 1000000)
104 g_stats.auth4Answers100_1000++;
105 else
106 g_stats.auth4AnswersSlow++;
107 } else {
108 if(usec < 1000)
109 g_stats.auth6Answers0_1++;
110 else if(usec < 10000)
111 g_stats.auth6Answers1_10++;
112 else if(usec < 100000)
113 g_stats.auth6Answers10_100++;
114 else if(usec < 1000000)
115 g_stats.auth6Answers100_1000++;
116 else
117 g_stats.auth6AnswersSlow++;
118 }
119
120 }
121
122
123 SyncRes::SyncRes(const struct timeval& now) : d_authzonequeries(0), d_outqueries(0), d_tcpoutqueries(0), d_throttledqueries(0), d_timeouts(0), d_unreachables(0),
124 d_totUsec(0), d_now(now),
125 d_cacheonly(false), d_doDNSSEC(false), d_doEDNS0(false), d_qNameMinimization(s_qnameminimization), d_lm(s_lm)
126
127 {
128 }
129
130 /** everything begins here - this is the entry point just after receiving a packet */
131 int SyncRes::beginResolve(const DNSName &qname, const QType &qtype, uint16_t qclass, vector<DNSRecord>&ret)
132 {
133 vState state = Indeterminate;
134 s_queries++;
135 d_wasVariable=false;
136 d_wasOutOfBand=false;
137
138 if (doSpecialNamesResolve(qname, qtype, qclass, ret)) {
139 d_queryValidationState = Insecure; // this could fool our stats into thinking a validation took place
140 return 0; // so do check before updating counters (we do now)
141 }
142
143 auto qtypeCode = qtype.getCode();
144 /* rfc6895 section 3.1 */
145 if ((qtypeCode >= 128 && qtypeCode <= 254) || qtypeCode == QType::RRSIG || qtypeCode == QType::NSEC3 || qtypeCode == QType::OPT || qtypeCode == 65535) {
146 return -1;
147 }
148
149 if(qclass==QClass::ANY)
150 qclass=QClass::IN;
151 else if(qclass!=QClass::IN)
152 return -1;
153
154 set<GetBestNSAnswer> beenthere;
155 int res=doResolve(qname, qtype, ret, 0, beenthere, state);
156 d_queryValidationState = state;
157
158 if (shouldValidate()) {
159 if (d_queryValidationState != Indeterminate) {
160 g_stats.dnssecValidations++;
161 }
162 increaseDNSSECStateCounter(d_queryValidationState);
163 }
164
165 return res;
166 }
167
168 /*! Handles all special, built-in names
169 * Fills ret with an answer and returns true if it handled the query.
170 *
171 * Handles the following queries (and their ANY variants):
172 *
173 * - localhost. IN A
174 * - localhost. IN AAAA
175 * - 1.0.0.127.in-addr.arpa. IN PTR
176 * - 1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa. IN PTR
177 * - version.bind. CH TXT
178 * - version.pdns. CH TXT
179 * - id.server. CH TXT
180 * - trustanchor.server CH TXT
181 * - negativetrustanchor.server CH TXT
182 */
183 bool SyncRes::doSpecialNamesResolve(const DNSName &qname, const QType &qtype, const uint16_t qclass, vector<DNSRecord> &ret)
184 {
185 static const DNSName arpa("1.0.0.127.in-addr.arpa."), ip6_arpa("1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa."),
186 localhost("localhost."), versionbind("version.bind."), idserver("id.server."), versionpdns("version.pdns."), trustanchorserver("trustanchor.server."),
187 negativetrustanchorserver("negativetrustanchor.server.");
188
189 bool handled = false;
190 vector<pair<QType::typeenum, string> > answers;
191
192 if ((qname == arpa || qname == ip6_arpa) &&
193 qclass == QClass::IN) {
194 handled = true;
195 if (qtype == QType::PTR || qtype == QType::ANY)
196 answers.push_back({QType::PTR, "localhost."});
197 }
198
199 if (qname == localhost &&
200 qclass == QClass::IN) {
201 handled = true;
202 if (qtype == QType::A || qtype == QType::ANY)
203 answers.push_back({QType::A, "127.0.0.1"});
204 if (qtype == QType::AAAA || qtype == QType::ANY)
205 answers.push_back({QType::AAAA, "::1"});
206 }
207
208 if ((qname == versionbind || qname == idserver || qname == versionpdns) &&
209 qclass == QClass::CHAOS) {
210 handled = true;
211 if (qtype == QType::TXT || qtype == QType::ANY) {
212 if(qname == versionbind || qname == versionpdns)
213 answers.push_back({QType::TXT, "\""+::arg()["version-string"]+"\""});
214 else if (s_serverID != "disabled")
215 answers.push_back({QType::TXT, "\""+s_serverID+"\""});
216 }
217 }
218
219 if (qname == trustanchorserver && qclass == QClass::CHAOS &&
220 ::arg().mustDo("allow-trust-anchor-query")) {
221 handled = true;
222 if (qtype == QType::TXT || qtype == QType::ANY) {
223 auto luaLocal = g_luaconfs.getLocal();
224 for (auto const &dsAnchor : luaLocal->dsAnchors) {
225 ostringstream ans;
226 ans<<"\"";
227 ans<<dsAnchor.first.toString(); // Explicit toString to have a trailing dot
228 for (auto const &dsRecord : dsAnchor.second) {
229 ans<<" ";
230 ans<<dsRecord.d_tag;
231 }
232 ans << "\"";
233 answers.push_back({QType::TXT, ans.str()});
234 }
235 }
236 }
237
238 if (qname == negativetrustanchorserver && qclass == QClass::CHAOS &&
239 ::arg().mustDo("allow-trust-anchor-query")) {
240 handled = true;
241 if (qtype == QType::TXT || qtype == QType::ANY) {
242 auto luaLocal = g_luaconfs.getLocal();
243 for (auto const &negAnchor : luaLocal->negAnchors) {
244 ostringstream ans;
245 ans<<"\"";
246 ans<<negAnchor.first.toString(); // Explicit toString to have a trailing dot
247 if (negAnchor.second.length())
248 ans<<" "<<negAnchor.second;
249 ans << "\"";
250 answers.push_back({QType::TXT, ans.str()});
251 }
252 }
253 }
254
255 if (handled && !answers.empty()) {
256 ret.clear();
257 d_wasOutOfBand=true;
258
259 DNSRecord dr;
260 dr.d_name = qname;
261 dr.d_place = DNSResourceRecord::ANSWER;
262 dr.d_class = qclass;
263 dr.d_ttl = 86400;
264 for (const auto& ans : answers) {
265 dr.d_type = ans.first;
266 dr.d_content = DNSRecordContent::mastermake(ans.first, qclass, ans.second);
267 ret.push_back(dr);
268 }
269 }
270
271 return handled;
272 }
273
274
275 //! This is the 'out of band resolver', in other words, the authoritative server
276 void SyncRes::AuthDomain::addSOA(std::vector<DNSRecord>& records) const
277 {
278 SyncRes::AuthDomain::records_t::const_iterator ziter = d_records.find(boost::make_tuple(getName(), QType::SOA));
279 if (ziter != d_records.end()) {
280 DNSRecord dr = *ziter;
281 dr.d_place = DNSResourceRecord::AUTHORITY;
282 records.push_back(dr);
283 }
284 else {
285 // cerr<<qname<<": can't find SOA record '"<<getName()<<"' in our zone!"<<endl;
286 }
287 }
288
289 int SyncRes::AuthDomain::getRecords(const DNSName& qname, uint16_t qtype, std::vector<DNSRecord>& records) const
290 {
291 int result = RCode::NoError;
292 records.clear();
293
294 // partial lookup
295 std::pair<records_t::const_iterator,records_t::const_iterator> range = d_records.equal_range(tie(qname));
296
297 SyncRes::AuthDomain::records_t::const_iterator ziter;
298 bool somedata = false;
299
300 for(ziter = range.first; ziter != range.second; ++ziter) {
301 somedata = true;
302
303 if(qtype == QType::ANY || ziter->d_type == qtype || ziter->d_type == QType::CNAME) {
304 // let rest of nameserver do the legwork on this one
305 records.push_back(*ziter);
306 }
307 else if (ziter->d_type == QType::NS && ziter->d_name.countLabels() > getName().countLabels()) {
308 // we hit a delegation point!
309 DNSRecord dr = *ziter;
310 dr.d_place=DNSResourceRecord::AUTHORITY;
311 records.push_back(dr);
312 }
313 }
314
315 if (!records.empty()) {
316 /* We have found an exact match, we're done */
317 // cerr<<qname<<": exact match in zone '"<<getName()<<"'"<<endl;
318 return result;
319 }
320
321 if (somedata) {
322 /* We have records for that name, but not of the wanted qtype */
323 // cerr<<qname<<": found record in '"<<getName()<<"', but nothing of the right type, sending SOA"<<endl;
324 addSOA(records);
325
326 return result;
327 }
328
329 // cerr<<qname<<": nothing found so far in '"<<getName()<<"', trying wildcards"<<endl;
330 DNSName wcarddomain(qname);
331 while(wcarddomain != getName() && wcarddomain.chopOff()) {
332 // cerr<<qname<<": trying '*."<<wcarddomain<<"' in "<<getName()<<endl;
333 range = d_records.equal_range(boost::make_tuple(g_wildcarddnsname + wcarddomain));
334 if (range.first==range.second)
335 continue;
336
337 for(ziter = range.first; ziter != range.second; ++ziter) {
338 DNSRecord dr = *ziter;
339 // if we hit a CNAME, just answer that - rest of recursor will do the needful & follow
340 if(dr.d_type == qtype || qtype == QType::ANY || dr.d_type == QType::CNAME) {
341 dr.d_name = qname;
342 dr.d_place = DNSResourceRecord::ANSWER;
343 records.push_back(dr);
344 }
345 }
346
347 if (records.empty()) {
348 addSOA(records);
349 }
350
351 // cerr<<qname<<": in '"<<getName()<<"', had wildcard match on '*."<<wcarddomain<<"'"<<endl;
352 return result;
353 }
354
355 /* Nothing for this name, no wildcard, let's see if there is some NS */
356 DNSName nsdomain(qname);
357 while (nsdomain.chopOff() && nsdomain != getName()) {
358 range = d_records.equal_range(boost::make_tuple(nsdomain,QType::NS));
359 if(range.first == range.second)
360 continue;
361
362 for(ziter = range.first; ziter != range.second; ++ziter) {
363 DNSRecord dr = *ziter;
364 dr.d_place = DNSResourceRecord::AUTHORITY;
365 records.push_back(dr);
366 }
367 }
368
369 if(records.empty()) {
370 // cerr<<qname<<": no NS match in zone '"<<getName()<<"' either, handing out SOA"<<endl;
371 addSOA(records);
372 result = RCode::NXDomain;
373 }
374
375 return result;
376 }
377
378 bool SyncRes::doOOBResolve(const AuthDomain& domain, const DNSName &qname, const QType &qtype, vector<DNSRecord>&ret, int& res)
379 {
380 d_authzonequeries++;
381 s_authzonequeries++;
382
383 res = domain.getRecords(qname, qtype.getCode(), ret);
384 return true;
385 }
386
387 bool SyncRes::doOOBResolve(const DNSName &qname, const QType &qtype, vector<DNSRecord>&ret, unsigned int depth, int& res)
388 {
389 string prefix;
390 if(doLog()) {
391 prefix=d_prefix;
392 prefix.append(depth, ' ');
393 }
394
395 DNSName authdomain(qname);
396 domainmap_t::const_iterator iter=getBestAuthZone(&authdomain);
397 if(iter==t_sstorage.domainmap->end() || !iter->second.isAuth()) {
398 LOG(prefix<<qname<<": auth storage has no zone for this query!"<<endl);
399 return false;
400 }
401
402 LOG(prefix<<qname<<": auth storage has data, zone='"<<authdomain<<"'"<<endl);
403 return doOOBResolve(iter->second, qname, qtype, ret, res);
404 }
405
406 uint64_t SyncRes::doEDNSDump(int fd)
407 {
408 auto fp = std::unique_ptr<FILE, int(*)(FILE*)>(fdopen(dup(fd), "w"), fclose);
409 if (!fp) {
410 return 0;
411 }
412 uint64_t count = 0;
413
414 fprintf(fp.get(),"; edns from thread follows\n;\n");
415 for(const auto& eds : t_sstorage.ednsstatus) {
416 count++;
417 fprintf(fp.get(), "%s\t%d\t%s", eds.first.toString().c_str(), (int)eds.second.mode, ctime(&eds.second.modeSetAt));
418 }
419 return count;
420 }
421
422 uint64_t SyncRes::doDumpNSSpeeds(int fd)
423 {
424 auto fp = std::unique_ptr<FILE, int(*)(FILE*)>(fdopen(dup(fd), "w"), fclose);
425 if(!fp)
426 return 0;
427 fprintf(fp.get(), "; nsspeed dump from thread follows\n;\n");
428 uint64_t count=0;
429
430 for(const auto& i : t_sstorage.nsSpeeds)
431 {
432 count++;
433
434 // an <empty> can appear hear in case of authoritative (hosted) zones
435 fprintf(fp.get(), "%s -> ", i.first.toLogString().c_str());
436 for(const auto& j : i.second.d_collection)
437 {
438 // typedef vector<pair<ComboAddress, DecayingEwma> > collection_t;
439 fprintf(fp.get(), "%s/%f ", j.first.toString().c_str(), j.second.peek());
440 }
441 fprintf(fp.get(), "\n");
442 }
443 return count;
444 }
445
446 uint64_t SyncRes::doDumpThrottleMap(int fd)
447 {
448 auto fp = std::unique_ptr<FILE, int(*)(FILE*)>(fdopen(dup(fd), "w"), fclose);
449 if(!fp)
450 return 0;
451 fprintf(fp.get(), "; throttle map dump follows\n");
452 fprintf(fp.get(), "; remote IP\tqname\tqtype\tcount\tttd\n");
453 uint64_t count=0;
454
455 const auto& throttleMap = t_sstorage.throttle.getThrottleMap();
456 for(const auto& i : throttleMap)
457 {
458 count++;
459 // remote IP, dns name, qtype, count, ttd
460 fprintf(fp.get(), "%s\t%s\t%d\t%u\t%s", i.first.get<0>().toString().c_str(), i.first.get<1>().toLogString().c_str(), i.first.get<2>(), i.second.count, ctime(&i.second.ttd));
461 }
462
463 return count;
464 }
465
466 /* so here is the story. First we complete the full resolution process for a domain name. And only THEN do we decide
467 to also do DNSSEC validation, which leads to new queries. To make this simple, we *always* ask for DNSSEC records
468 so that if there are RRSIGs for a name, we'll have them.
469
470 However, some hosts simply can't answer questions which ask for DNSSEC. This can manifest itself as:
471 * No answer
472 * FormErr
473 * Nonsense answer
474
475 The cause of "No answer" may be fragmentation, and it is tempting to probe if smaller answers would get through.
476 Another cause of "No answer" may simply be a network condition.
477 Nonsense answers are a clearer indication this host won't be able to do DNSSEC evah.
478
479 Previous implementations have suffered from turning off DNSSEC questions for an authoritative server based on timeouts.
480 A clever idea is to only turn off DNSSEC if we know a domain isn't signed anyhow. The problem with that really
481 clever idea however is that at this point in PowerDNS, we may simply not know that yet. All the DNSSEC thinking happens
482 elsewhere. It may not have happened yet.
483
484 For now this means we can't be clever, but will turn off DNSSEC if you reply with FormError or gibberish.
485 */
486
487 int SyncRes::asyncresolveWrapper(const ComboAddress& ip, bool ednsMANDATORY, const DNSName& domain, const DNSName& auth, int type, bool doTCP, bool sendRDQuery, struct timeval* now, boost::optional<Netmask>& srcmask, LWResult* res, bool* chained) const
488 {
489 /* what is your QUEST?
490 the goal is to get as many remotes as possible on the highest level of EDNS support
491 The levels are:
492
493 0) UNKNOWN Unknown state
494 1) EDNS: Honors EDNS0
495 2) EDNSIGNORANT: Ignores EDNS0, gives replies without EDNS0
496 3) NOEDNS: Generates FORMERR on EDNS queries
497
498 Everybody starts out assumed to be '0'.
499 If '0', send out EDNS0
500 If you FORMERR us, go to '3',
501 If no EDNS in response, go to '2'
502 If '1', send out EDNS0
503 If FORMERR, downgrade to 3
504 If '2', keep on including EDNS0, see what happens
505 Same behaviour as 0
506 If '3', send bare queries
507 */
508
509 SyncRes::EDNSStatus* ednsstatus;
510 ednsstatus = &t_sstorage.ednsstatus[ip]; // does this include port? YES
511
512 if(ednsstatus->modeSetAt && ednsstatus->modeSetAt + 3600 < d_now.tv_sec) {
513 *ednsstatus=SyncRes::EDNSStatus();
514 // cerr<<"Resetting EDNS Status for "<<ip.toString()<<endl);
515 }
516
517 SyncRes::EDNSStatus::EDNSMode& mode=ednsstatus->mode;
518 SyncRes::EDNSStatus::EDNSMode oldmode = mode;
519 int EDNSLevel = 0;
520 auto luaconfsLocal = g_luaconfs.getLocal();
521 ResolveContext ctx;
522 #ifdef HAVE_PROTOBUF
523 ctx.d_initialRequestId = d_initialRequestId;
524 #endif
525 #ifdef HAVE_FSTRM
526 ctx.d_auth = auth;
527 #endif
528
529 int ret;
530 for(int tries = 0; tries < 3; ++tries) {
531 // cerr<<"Remote '"<<ip.toString()<<"' currently in mode "<<mode<<endl;
532
533 if(mode==EDNSStatus::NOEDNS) {
534 g_stats.noEdnsOutQueries++;
535 EDNSLevel = 0; // level != mode
536 }
537 else if(ednsMANDATORY || mode==EDNSStatus::UNKNOWN || mode==EDNSStatus::EDNSOK || mode==EDNSStatus::EDNSIGNORANT)
538 EDNSLevel = 1;
539
540 DNSName sendQname(domain);
541 if (g_lowercaseOutgoing)
542 sendQname.makeUsLowerCase();
543
544 if (d_asyncResolve) {
545 ret = d_asyncResolve(ip, sendQname, type, doTCP, sendRDQuery, EDNSLevel, now, srcmask, ctx, res, chained);
546 }
547 else {
548 ret=asyncresolve(ip, sendQname, type, doTCP, sendRDQuery, EDNSLevel, now, srcmask, ctx, d_outgoingProtobufServers, d_frameStreamServers, luaconfsLocal->outgoingProtobufExportConfig.exportTypes, res, chained);
549 }
550 if(ret < 0) {
551 return ret; // transport error, nothing to learn here
552 }
553
554 if(ret == 0) { // timeout, not doing anything with it now
555 return ret;
556 }
557 else if(mode==EDNSStatus::UNKNOWN || mode==EDNSStatus::EDNSOK || mode == EDNSStatus::EDNSIGNORANT ) {
558 if(res->d_validpacket && !res->d_haveEDNS && res->d_rcode == RCode::FormErr) {
559 // cerr<<"Downgrading to NOEDNS because of "<<RCode::to_s(res->d_rcode)<<" for query to "<<ip.toString()<<" for '"<<domain<<"'"<<endl;
560 mode = EDNSStatus::NOEDNS;
561 continue;
562 }
563 else if(!res->d_haveEDNS) {
564 if(mode != EDNSStatus::EDNSIGNORANT) {
565 mode = EDNSStatus::EDNSIGNORANT;
566 // cerr<<"We find that "<<ip.toString()<<" is an EDNS-ignorer for '"<<domain<<"', moving to mode 2"<<endl;
567 }
568 }
569 else {
570 mode = EDNSStatus::EDNSOK;
571 // cerr<<"We find that "<<ip.toString()<<" is EDNS OK!"<<endl;
572 }
573
574 }
575 if(oldmode != mode || !ednsstatus->modeSetAt)
576 ednsstatus->modeSetAt=d_now.tv_sec;
577 // cerr<<"Result: ret="<<ret<<", EDNS-level: "<<EDNSLevel<<", haveEDNS: "<<res->d_haveEDNS<<", new mode: "<<mode<<endl;
578 return ret;
579 }
580 return ret;
581 }
582
583 #define QLOG(x) LOG(prefix << " child=" << child << ": " << x << endl)
584
585 int SyncRes::doResolve(const DNSName &qname, const QType &qtype, vector<DNSRecord>&ret, unsigned int depth, set<GetBestNSAnswer>& beenthere, vState& state) {
586
587 if (!getQNameMinimization()) {
588 return doResolveNoQNameMinimization(qname, qtype, ret, depth, beenthere, state);
589 }
590
591 // The qname minimization algorithm is a simplified version of the one in RFC 7816 (bis).
592 // It could be simplified because the cache maintenance (both positive and negative)
593 // is already done by doResolveNoQNameMinimization().
594 //
595 // Sketch of algorithm:
596 // Check cache
597 // If result found: done
598 // Otherwise determine closes ancestor from cache data
599 // Repeat querying A, adding more labels of the original qname
600 // If we get a delegation continue at ancestor determination
601 // Until we have the full name.
602 //
603 // The algorithm starts with adding a single label per iteration, and
604 // moves to three labels per iteration after three iterations.
605
606 DNSName child;
607 string prefix = d_prefix;
608 prefix.append(depth, ' ');
609 prefix.append(string("QM ") + qname.toString() + "|" + qtype.getName());
610
611 QLOG("doResolve");
612
613 // Look in cache only
614 vector<DNSRecord> retq;
615 bool old = setCacheOnly(true);
616 bool fromCache = false;
617 int res = doResolveNoQNameMinimization(qname, qtype, retq, depth + 1, beenthere, state, &fromCache);
618 setCacheOnly(old);
619 if (fromCache) {
620 QLOG("Step0 Found in cache");
621 ret.insert(ret.end(), retq.begin(), retq.end());
622 return res;
623 }
624 QLOG("Step0 Not cached");
625
626 const unsigned int qnamelen = qname.countLabels();
627
628 for (unsigned int i = 0; i <= qnamelen; ) {
629
630 // Step 1
631 vector<DNSRecord> bestns;
632 // the two retries allow getBestNSFromCache&co to reprime the root
633 // hints, in case they ever go missing
634 for (int tries = 0; tries < 2 && bestns.empty(); ++tries) {
635 bool flawedNSSet = false;
636 set<GetBestNSAnswer> beenthereIgnored;
637 getBestNSFromCache(qname, qtype, bestns, &flawedNSSet, depth + 1, beenthereIgnored);
638 }
639
640 if (bestns.size() == 0) {
641 // Something terrible is wrong
642 QLOG("Step1 No ancestor found return ServFail");
643 return RCode::ServFail;
644 }
645
646 const DNSName& ancestor(bestns[0].d_name);
647 QLOG("Step1 Ancestor from cache is " << ancestor.toString());
648 child = ancestor;
649
650 unsigned int targetlen = std::min(child.countLabels() + (i > 3 ? 3 : 1), qnamelen);
651
652 for (; i <= qnamelen; i++) {
653 // Step 2
654 while (child.countLabels() < targetlen) {
655 child.prependRawLabel(qname.getRawLabel(qnamelen - child.countLabels() - 1));
656 }
657 targetlen += i > 3 ? 3 : 1;
658 targetlen = std::min(targetlen, qnamelen);
659
660 QLOG("Step2 New child");
661
662 // Step 3 resolve
663 if (child == qname) {
664 QLOG("Step3 Going to do final resolve");
665 res = doResolveNoQNameMinimization(qname, qtype, ret, depth + 1, beenthere, state);
666 QLOG("Step3 Final resolve: " << RCode::to_s(res) << "/" << ret.size());
667 return res;
668 }
669
670 // Step 6
671 QLOG("Step4 Resolve A for child");
672 retq.resize(0);
673 StopAtDelegation stopAtDelegation = Stop;
674 res = doResolveNoQNameMinimization(child, QType::A, retq, depth + 1, beenthere, state, NULL, &stopAtDelegation);
675 QLOG("Step4 Resolve A result is " << RCode::to_s(res) << "/" << retq.size() << "/" << stopAtDelegation);
676 if (stopAtDelegation == Stopped) {
677 QLOG("Delegation seen, continue at step 1");
678 break;
679 }
680 if (res != RCode::NoError) {
681 // Case 5: unexpected answer
682 QLOG("Step5: other rcode, last effort final resolve");
683 setQNameMinimization(false);
684 res = doResolveNoQNameMinimization(qname, qtype, ret, depth + 1, beenthere, state);
685 QLOG("Step5 End resolve: " << RCode::to_s(res) << "/" << ret.size());
686 return res;
687 }
688 }
689 }
690
691 // Should not be reached
692 QLOG("Max iterations reached, return ServFail");
693 return RCode::ServFail;
694 }
695
696 /*! This function will check the cache and go out to the internet if the answer is not in cache
697 *
698 * \param qname The name we need an answer for
699 * \param qtype
700 * \param ret The vector of DNSRecords we need to fill with the answers
701 * \param depth The recursion depth we are in
702 * \param beenthere
703 * \param fromCache tells the caller the result came from the cache, may be nullptr
704 * \param stopAtDelegation if non-nullptr and pointed-to value is Stop requests the callee to stop at a delegation, if so pointed-to value is set to Stopped
705 * \return DNS RCODE or -1 (Error) or -2 (RPZ hit)
706 */
707 int SyncRes::doResolveNoQNameMinimization(const DNSName &qname, const QType &qtype, vector<DNSRecord>&ret, unsigned int depth, set<GetBestNSAnswer>& beenthere, vState& state, bool *fromCache, StopAtDelegation *stopAtDelegation)
708 {
709 string prefix;
710 if(doLog()) {
711 prefix=d_prefix;
712 prefix.append(depth, ' ');
713 }
714
715 LOG(prefix<<qname<<": Wants "<< (d_doDNSSEC ? "" : "NO ") << "DNSSEC processing, "<<(d_requireAuthData ? "" : "NO ")<<"auth data in query for "<<qtype.getName()<<endl);
716
717 state = Indeterminate;
718
719 if(s_maxdepth && depth > s_maxdepth)
720 throw ImmediateServFailException("More than "+std::to_string(s_maxdepth)+" (max-recursion-depth) levels of recursion needed while resolving "+qname.toLogString());
721
722 int res=0;
723
724 // This is a difficult way of expressing "this is a normal query", i.e. not getRootNS.
725 if(!(d_updatingRootNS && qtype.getCode()==QType::NS && qname.isRoot())) {
726 if(d_cacheonly) { // very limited OOB support
727 LWResult lwr;
728 LOG(prefix<<qname<<": Recursion not requested for '"<<qname<<"|"<<qtype.getName()<<"', peeking at auth/forward zones"<<endl);
729 DNSName authname(qname);
730 domainmap_t::const_iterator iter=getBestAuthZone(&authname);
731 if(iter != t_sstorage.domainmap->end()) {
732 if(iter->second.isAuth()) {
733 ret.clear();
734 d_wasOutOfBand = doOOBResolve(qname, qtype, ret, depth, res);
735 if (fromCache)
736 *fromCache = d_wasOutOfBand;
737 return res;
738 }
739 else {
740 const vector<ComboAddress>& servers = iter->second.d_servers;
741 const ComboAddress remoteIP = servers.front();
742 LOG(prefix<<qname<<": forwarding query to hardcoded nameserver '"<< remoteIP.toStringWithPort()<<"' for zone '"<<authname<<"'"<<endl);
743
744 boost::optional<Netmask> nm;
745 bool chained = false;
746 res=asyncresolveWrapper(remoteIP, d_doDNSSEC, qname, authname, qtype.getCode(), false, false, &d_now, nm, &lwr, &chained);
747
748 d_totUsec += lwr.d_usec;
749 accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
750 if (fromCache)
751 *fromCache = true;
752
753 // filter out the good stuff from lwr.result()
754 if (res == 1) {
755 for(const auto& rec : lwr.d_records) {
756 if(rec.d_place == DNSResourceRecord::ANSWER)
757 ret.push_back(rec);
758 }
759 return 0;
760 }
761 else {
762 return RCode::ServFail;
763 }
764 }
765 }
766 }
767
768 DNSName authname(qname);
769 bool wasForwardedOrAuthZone = false;
770 bool wasAuthZone = false;
771 bool wasForwardRecurse = false;
772 domainmap_t::const_iterator iter = getBestAuthZone(&authname);
773 if(iter != t_sstorage.domainmap->end()) {
774 const auto& domain = iter->second;
775 wasForwardedOrAuthZone = true;
776
777 if (domain.isAuth()) {
778 wasAuthZone = true;
779 } else if (domain.shouldRecurse()) {
780 wasForwardRecurse = true;
781 }
782 }
783
784 if(!d_skipCNAMECheck && doCNAMECacheCheck(qname, qtype, ret, depth, res, state, wasAuthZone, wasForwardRecurse)) { // will reroute us if needed
785 d_wasOutOfBand = wasAuthZone;
786 return res;
787 }
788
789 if(doCacheCheck(qname, authname, wasForwardedOrAuthZone, wasAuthZone, wasForwardRecurse, qtype, ret, depth, res, state)) {
790 // we done
791 d_wasOutOfBand = wasAuthZone;
792 if (fromCache)
793 *fromCache = true;
794 return res;
795 }
796 }
797
798 if(d_cacheonly)
799 return 0;
800
801 LOG(prefix<<qname<<": No cache hit for '"<<qname<<"|"<<qtype.getName()<<"', trying to find an appropriate NS record"<<endl);
802
803 DNSName subdomain(qname);
804 if(qtype == QType::DS) subdomain.chopOff();
805
806 NsSet nsset;
807 bool flawedNSSet=false;
808
809 /* we use subdomain here instead of qname because for DS queries we only care about the state of the parent zone */
810 computeZoneCuts(subdomain, g_rootdnsname, depth);
811
812 // the two retries allow getBestNSNamesFromCache&co to reprime the root
813 // hints, in case they ever go missing
814 for(int tries=0;tries<2 && nsset.empty();++tries) {
815 subdomain=getBestNSNamesFromCache(subdomain, qtype, nsset, &flawedNSSet, depth, beenthere); // pass beenthere to both occasions
816 }
817
818 state = getValidationStatus(qname, false);
819
820 LOG(prefix<<qname<<": initial validation status for "<<qname<<" is "<<vStates[state]<<endl);
821
822 if(!(res=doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, beenthere, state, stopAtDelegation)))
823 return 0;
824
825 LOG(prefix<<qname<<": failed (res="<<res<<")"<<endl);
826
827 if (res == -2)
828 return res;
829
830 return res<0 ? RCode::ServFail : res;
831 }
832
833 #if 0
834 // for testing purposes
835 static bool ipv6First(const ComboAddress& a, const ComboAddress& b)
836 {
837 return !(a.sin4.sin_family < a.sin4.sin_family);
838 }
839 #endif
840
841 struct speedOrderCA
842 {
843 speedOrderCA(std::map<ComboAddress,double>& speeds): d_speeds(speeds) {}
844 bool operator()(const ComboAddress& a, const ComboAddress& b) const
845 {
846 return d_speeds[a] < d_speeds[b];
847 }
848 std::map<ComboAddress, double>& d_speeds;
849 };
850
851 /** This function explicitly goes out for A or AAAA addresses
852 */
853 vector<ComboAddress> SyncRes::getAddrs(const DNSName &qname, unsigned int depth, set<GetBestNSAnswer>& beenthere, bool cacheOnly)
854 {
855 typedef vector<DNSRecord> res_t;
856 typedef vector<ComboAddress> ret_t;
857 ret_t ret;
858
859 bool oldCacheOnly = setCacheOnly(cacheOnly);
860 bool oldRequireAuthData = d_requireAuthData;
861 bool oldValidationRequested = d_DNSSECValidationRequested;
862 d_requireAuthData = false;
863 d_DNSSECValidationRequested = false;
864
865 vState newState = Indeterminate;
866 res_t resv4;
867 // If IPv4 ever becomes second class, we should revisit this
868 if (doResolve(qname, QType::A, resv4, depth+1, beenthere, newState) == 0) { // this consults cache, OR goes out
869 for (auto const &i : resv4) {
870 if (i.d_type == QType::A) {
871 if (auto rec = getRR<ARecordContent>(i)) {
872 ret.push_back(rec->getCA(53));
873 }
874 }
875 }
876 }
877 if (s_doIPv6) {
878 if (ret.empty()) {
879 // We did not find IPv4 addresses, try to get IPv6 ones
880 newState = Indeterminate;
881 res_t resv6;
882 if (doResolve(qname, QType::AAAA, resv6, depth+1, beenthere, newState) == 0) { // this consults cache, OR goes out
883 for (const auto &i : resv6) {
884 if (i.d_type == QType::AAAA) {
885 if (auto rec = getRR<AAAARecordContent>(i))
886 ret.push_back(rec->getCA(53));
887 }
888 }
889 }
890 } else {
891 // We have some IPv4 records, don't bother with going out to get IPv6, but do consult the cache
892 // Once IPv6 adoption matters, this needs to be revisited
893 res_t cset;
894 if (t_RC->get(d_now.tv_sec, qname, QType(QType::AAAA), false, &cset, d_cacheRemote) > 0) {
895 for (const auto &i : cset) {
896 if (i.d_ttl > (unsigned int)d_now.tv_sec ) {
897 if (auto rec = getRR<AAAARecordContent>(i)) {
898 ret.push_back(rec->getCA(53));
899 }
900 }
901 }
902 }
903 }
904 }
905
906 d_requireAuthData = oldRequireAuthData;
907 d_DNSSECValidationRequested = oldValidationRequested;
908 setCacheOnly(oldCacheOnly);
909
910 /* we need to remove from the nsSpeeds collection the existing IPs
911 for this nameserver that are no longer in the set, even if there
912 is only one or none at all in the current set.
913 */
914 map<ComboAddress, double> speeds;
915 auto& collection = t_sstorage.nsSpeeds[qname].d_collection;
916 for(const auto& val: ret) {
917 speeds[val] = collection[val].get(&d_now);
918 }
919
920 t_sstorage.nsSpeeds[qname].purge(speeds);
921
922 if(ret.size() > 1) {
923 random_shuffle(ret.begin(), ret.end());
924 speedOrderCA so(speeds);
925 stable_sort(ret.begin(), ret.end(), so);
926
927 if(doLog()) {
928 string prefix=d_prefix;
929 prefix.append(depth, ' ');
930 LOG(prefix<<"Nameserver "<<qname<<" IPs: ");
931 bool first = true;
932 for(const auto& addr : ret) {
933 if (first) {
934 first = false;
935 }
936 else {
937 LOG(", ");
938 }
939 LOG((addr.toString())<<"(" << (boost::format("%0.2f") % (speeds[addr]/1000.0)).str() <<"ms)");
940 }
941 LOG(endl);
942 }
943 }
944
945 return ret;
946 }
947
948 void SyncRes::getBestNSFromCache(const DNSName &qname, const QType& qtype, vector<DNSRecord>& bestns, bool* flawedNSSet, unsigned int depth, set<GetBestNSAnswer>& beenthere)
949 {
950 string prefix;
951 DNSName subdomain(qname);
952 if(doLog()) {
953 prefix=d_prefix;
954 prefix.append(depth, ' ');
955 }
956 bestns.clear();
957 bool brokeloop;
958 do {
959 brokeloop=false;
960 LOG(prefix<<qname<<": Checking if we have NS in cache for '"<<subdomain<<"'"<<endl);
961 vector<DNSRecord> ns;
962 *flawedNSSet = false;
963
964 if(t_RC->get(d_now.tv_sec, subdomain, QType(QType::NS), false, &ns, d_cacheRemote) > 0) {
965 bestns.reserve(ns.size());
966
967 for(auto k=ns.cbegin();k!=ns.cend(); ++k) {
968 if(k->d_ttl > (unsigned int)d_now.tv_sec ) {
969 vector<DNSRecord> aset;
970
971 const DNSRecord& dr=*k;
972 auto nrr = getRR<NSRecordContent>(dr);
973 if(nrr && (!nrr->getNS().isPartOf(subdomain) || t_RC->get(d_now.tv_sec, nrr->getNS(), s_doIPv6 ? QType(QType::ADDR) : QType(QType::A),
974 false, doLog() ? &aset : 0, d_cacheRemote) > 5)) {
975 bestns.push_back(dr);
976 LOG(prefix<<qname<<": NS (with ip, or non-glue) in cache for '"<<subdomain<<"' -> '"<<nrr->getNS()<<"'"<<endl);
977 LOG(prefix<<qname<<": within bailiwick: "<< nrr->getNS().isPartOf(subdomain));
978 if(!aset.empty()) {
979 LOG(", in cache, ttl="<<(unsigned int)(((time_t)aset.begin()->d_ttl- d_now.tv_sec ))<<endl);
980 }
981 else {
982 LOG(", not in cache / did not look at cache"<<endl);
983 }
984 }
985 else {
986 *flawedNSSet=true;
987 LOG(prefix<<qname<<": NS in cache for '"<<subdomain<<"', but needs glue ("<<nrr->getNS()<<") which we miss or is expired"<<endl);
988 }
989 }
990 }
991
992 if(!bestns.empty()) {
993 GetBestNSAnswer answer;
994 answer.qname=qname;
995 answer.qtype=qtype.getCode();
996 for(const auto& dr : bestns) {
997 if (auto nsContent = getRR<NSRecordContent>(dr)) {
998 answer.bestns.insert(make_pair(dr.d_name, nsContent->getNS()));
999 }
1000 }
1001
1002 auto insertionPair = beenthere.insert(std::move(answer));
1003 if(!insertionPair.second) {
1004 brokeloop=true;
1005 LOG(prefix<<qname<<": We have NS in cache for '"<<subdomain<<"' but part of LOOP (already seen "<<answer.qname<<")! Trying less specific NS"<<endl);
1006 ;
1007 if(doLog())
1008 for( set<GetBestNSAnswer>::const_iterator j=beenthere.begin();j!=beenthere.end();++j) {
1009 bool neo = (j == insertionPair.first);
1010 LOG(prefix<<qname<<": beenthere"<<(neo?"*":"")<<": "<<j->qname<<"|"<<DNSRecordContent::NumberToType(j->qtype)<<" ("<<(unsigned int)j->bestns.size()<<")"<<endl);
1011 }
1012 bestns.clear();
1013 }
1014 else {
1015 LOG(prefix<<qname<<": We have NS in cache for '"<<subdomain<<"' (flawedNSSet="<<*flawedNSSet<<")"<<endl);
1016 return;
1017 }
1018 }
1019 }
1020 LOG(prefix<<qname<<": no valid/useful NS in cache for '"<<subdomain<<"'"<<endl);
1021
1022 if(subdomain.isRoot() && !brokeloop) {
1023 // We lost the root NS records
1024 primeHints();
1025 LOG(prefix<<qname<<": reprimed the root"<<endl);
1026 /* let's prevent an infinite loop */
1027 if (!d_updatingRootNS) {
1028 getRootNS(d_now, d_asyncResolve);
1029 }
1030 }
1031 } while(subdomain.chopOff());
1032 }
1033
1034 SyncRes::domainmap_t::const_iterator SyncRes::getBestAuthZone(DNSName* qname) const
1035 {
1036 if (t_sstorage.domainmap->empty()) {
1037 return t_sstorage.domainmap->end();
1038 }
1039
1040 SyncRes::domainmap_t::const_iterator ret;
1041 do {
1042 ret=t_sstorage.domainmap->find(*qname);
1043 if(ret!=t_sstorage.domainmap->end())
1044 break;
1045 }while(qname->chopOff());
1046 return ret;
1047 }
1048
1049 /** doesn't actually do the work, leaves that to getBestNSFromCache */
1050 DNSName SyncRes::getBestNSNamesFromCache(const DNSName &qname, const QType& qtype, NsSet& nsset, bool* flawedNSSet, unsigned int depth, set<GetBestNSAnswer>&beenthere)
1051 {
1052 DNSName authdomain(qname);
1053
1054 domainmap_t::const_iterator iter=getBestAuthZone(&authdomain);
1055 if(iter!=t_sstorage.domainmap->end()) {
1056 if( iter->second.isAuth() )
1057 // this gets picked up in doResolveAt, the empty DNSName, combined with the
1058 // empty vector means 'we are auth for this zone'
1059 nsset.insert({DNSName(), {{}, false}});
1060 else {
1061 // Again, picked up in doResolveAt. An empty DNSName, combined with a
1062 // non-empty vector of ComboAddresses means 'this is a forwarded domain'
1063 // This is actually picked up in retrieveAddressesForNS called from doResolveAt.
1064 nsset.insert({DNSName(), {iter->second.d_servers, iter->second.shouldRecurse() }});
1065 }
1066 return authdomain;
1067 }
1068
1069 DNSName subdomain(qname);
1070 vector<DNSRecord> bestns;
1071 getBestNSFromCache(subdomain, qtype, bestns, flawedNSSet, depth, beenthere);
1072
1073 for(auto k=bestns.cbegin() ; k != bestns.cend(); ++k) {
1074 // The actual resolver code will not even look at the ComboAddress or bool
1075 const auto nsContent = getRR<NSRecordContent>(*k);
1076 if (nsContent) {
1077 nsset.insert({nsContent->getNS(), {{}, false}});
1078 if(k==bestns.cbegin())
1079 subdomain=k->d_name;
1080 }
1081 }
1082 return subdomain;
1083 }
1084
1085 void SyncRes::updateValidationStatusInCache(const DNSName &qname, const QType& qt, bool aa, vState newState) const
1086 {
1087 if (newState == Bogus) {
1088 t_RC->updateValidationStatus(d_now.tv_sec, qname, qt, d_cacheRemote, aa, newState, s_maxbogusttl + d_now.tv_sec);
1089 }
1090 else {
1091 t_RC->updateValidationStatus(d_now.tv_sec, qname, qt, d_cacheRemote, aa, newState, boost::none);
1092 }
1093 }
1094
1095 bool SyncRes::doCNAMECacheCheck(const DNSName &qname, const QType &qtype, vector<DNSRecord>& ret, unsigned int depth, int &res, vState& state, bool wasAuthZone, bool wasForwardRecurse)
1096 {
1097 string prefix;
1098 if(doLog()) {
1099 prefix=d_prefix;
1100 prefix.append(depth, ' ');
1101 }
1102
1103 if((depth>9 && d_outqueries>10 && d_throttledqueries>5) || depth > 15) {
1104 LOG(prefix<<qname<<": recursing (CNAME or other indirection) too deep, depth="<<depth<<endl);
1105 res=RCode::ServFail;
1106 return true;
1107 }
1108
1109 vector<DNSRecord> cset;
1110 vector<std::shared_ptr<RRSIGRecordContent>> signatures;
1111 vector<std::shared_ptr<DNSRecord>> authorityRecs;
1112 bool wasAuth;
1113 uint32_t capTTL = std::numeric_limits<uint32_t>::max();
1114 DNSName foundName;
1115 QType foundQT = QType(0); // 0 == QTYPE::ENT
1116
1117 LOG(prefix<<qname<<": Looking for CNAME cache hit of '"<<qname<<"|CNAME"<<"'"<<endl);
1118 /* we don't require auth data for forward-recurse lookups */
1119 if (t_RC->get(d_now.tv_sec, qname, QType(QType::CNAME), !wasForwardRecurse && d_requireAuthData, &cset, d_cacheRemote, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &state, &wasAuth) > 0) {
1120 foundName = qname;
1121 foundQT = QType(QType::CNAME);
1122 }
1123
1124 if (foundName.empty() && qname != g_rootdnsname) {
1125 // look for a DNAME cache hit
1126 auto labels = qname.getRawLabels();
1127 DNSName dnameName(g_rootdnsname);
1128
1129 LOG(prefix<<qname<<": Looking for DNAME cache hit of '"<<qname<<"|DNAME' or its ancestors"<<endl);
1130 do {
1131 dnameName.prependRawLabel(labels.back());
1132 labels.pop_back();
1133 if (dnameName == qname && qtype != QType::DNAME) { // The client does not want a DNAME, but we've reached the QNAME already. So there is no match
1134 break;
1135 }
1136 if (t_RC->get(d_now.tv_sec, dnameName, QType(QType::DNAME), !wasForwardRecurse && d_requireAuthData, &cset, d_cacheRemote, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &state, &wasAuth) > 0) {
1137 foundName = dnameName;
1138 foundQT = QType(QType::DNAME);
1139 break;
1140 }
1141 } while(!labels.empty());
1142 }
1143
1144 if (foundName.empty()) {
1145 LOG(prefix<<qname<<": No CNAME or DNAME cache hit of '"<< qname <<"' found"<<endl);
1146 return false;
1147 }
1148
1149 for(auto const &record : cset) {
1150 if (record.d_class != QClass::IN) {
1151 continue;
1152 }
1153
1154 if(record.d_ttl > (unsigned int) d_now.tv_sec) {
1155
1156 if (!wasAuthZone && shouldValidate() && (wasAuth || wasForwardRecurse) && state == Indeterminate && d_requireAuthData) {
1157 /* This means we couldn't figure out the state when this entry was cached,
1158 most likely because we hadn't computed the zone cuts yet. */
1159 /* make sure they are computed before validating */
1160 DNSName subdomain(foundName);
1161 /* if we are retrieving a DS, we only care about the state of the parent zone */
1162 if(qtype == QType::DS)
1163 subdomain.chopOff();
1164
1165 computeZoneCuts(subdomain, g_rootdnsname, depth);
1166
1167 vState recordState = getValidationStatus(foundName, false);
1168 if (recordState == Secure) {
1169 LOG(prefix<<qname<<": got Indeterminate state from the "<<foundQT.getName()<<" cache, validating.."<<endl);
1170 state = SyncRes::validateRecordsWithSigs(depth, foundName, foundQT, foundName, cset, signatures);
1171 if (state != Indeterminate) {
1172 LOG(prefix<<qname<<": got Indeterminate state from the CNAME cache, new validation result is "<<vStates[state]<<endl);
1173 if (state == Bogus) {
1174 capTTL = s_maxbogusttl;
1175 }
1176 updateValidationStatusInCache(foundName, foundQT, wasAuth, state);
1177 }
1178 }
1179 }
1180
1181 LOG(prefix<<qname<<": Found cache "<<foundQT.getName()<<" hit for '"<< foundName << "|"<<foundQT.getName()<<"' to '"<<record.d_content->getZoneRepresentation()<<"', validation state is "<<vStates[state]<<endl);
1182
1183 DNSRecord dr = record;
1184 dr.d_ttl -= d_now.tv_sec;
1185 dr.d_ttl = std::min(dr.d_ttl, capTTL);
1186 const uint32_t ttl = dr.d_ttl;
1187 ret.reserve(ret.size() + 2 + signatures.size() + authorityRecs.size());
1188 ret.push_back(dr);
1189
1190 for(const auto& signature : signatures) {
1191 DNSRecord sigdr;
1192 sigdr.d_type=QType::RRSIG;
1193 sigdr.d_name=foundName;
1194 sigdr.d_ttl=ttl;
1195 sigdr.d_content=signature;
1196 sigdr.d_place=DNSResourceRecord::ANSWER;
1197 sigdr.d_class=QClass::IN;
1198 ret.push_back(sigdr);
1199 }
1200
1201 for(const auto& rec : authorityRecs) {
1202 DNSRecord authDR(*rec);
1203 authDR.d_ttl=ttl;
1204 ret.push_back(authDR);
1205 }
1206
1207 DNSName newTarget;
1208 if (foundQT == QType::DNAME) {
1209 if (qtype == QType::DNAME && qname == foundName) { // client wanted the DNAME, no need to synthesize a CNAME
1210 res = 0;
1211 return true;
1212 }
1213 // Synthesize a CNAME
1214 auto dnameRR = getRR<DNAMERecordContent>(record);
1215 if (dnameRR == nullptr) {
1216 throw ImmediateServFailException("Unable to get record content for "+foundName.toLogString()+"|DNAME cache entry");
1217 }
1218 const auto& dnameSuffix = dnameRR->getTarget();
1219 DNSName targetPrefix = qname.makeRelative(foundName);
1220 try {
1221 dr.d_type = QType::CNAME;
1222 dr.d_name = targetPrefix + foundName;
1223 newTarget = targetPrefix + dnameSuffix;
1224 dr.d_content = std::make_shared<CNAMERecordContent>(CNAMERecordContent(newTarget));
1225 ret.push_back(dr);
1226 } catch (const std::exception &e) {
1227 // We should probably catch an std::range_error here and set the rcode to YXDOMAIN (RFC 6672, section 2.2)
1228 // But this is consistent with processRecords
1229 throw ImmediateServFailException("Unable to perform DNAME substitution(DNAME owner: '" + foundName.toLogString() +
1230 "', DNAME target: '" + dnameSuffix.toLogString() + "', substituted name: '" +
1231 targetPrefix.toLogString() + "." + dnameSuffix.toLogString() +
1232 "' : " + e.what());
1233 }
1234
1235 LOG(prefix<<qname<<": Synthesized "<<dr.d_name<<"|CNAME "<<newTarget<<endl);
1236 }
1237
1238 if(qtype == QType::CNAME) { // perhaps they really wanted a CNAME!
1239 res = 0;
1240 return true;
1241 }
1242
1243 // We have a DNAME _or_ CNAME cache hit and the client wants something else than those two.
1244 // Let's find the answer!
1245 if (foundQT == QType::CNAME) {
1246 const auto cnameContent = getRR<CNAMERecordContent>(record);
1247 if (cnameContent == nullptr) {
1248 throw ImmediateServFailException("Unable to get record content for "+foundName.toLogString()+"|CNAME cache entry");
1249 }
1250 newTarget = cnameContent->getTarget();
1251 }
1252
1253 set<GetBestNSAnswer>beenthere;
1254 vState cnameState = Indeterminate;
1255 res = doResolve(newTarget, qtype, ret, depth+1, beenthere, cnameState);
1256 LOG(prefix<<qname<<": updating validation state for response to "<<qname<<" from "<<vStates[state]<<" with the state from the DNAME/CNAME quest: "<<vStates[cnameState]<<endl);
1257 updateValidationState(state, cnameState);
1258
1259 return true;
1260 }
1261 }
1262 throw ImmediateServFailException("Could not determine whether or not there was a CNAME or DNAME in cache for '" + qname.toLogString() + "'");
1263 }
1264
1265 namespace {
1266 struct CacheEntry
1267 {
1268 vector<DNSRecord> records;
1269 vector<shared_ptr<RRSIGRecordContent>> signatures;
1270 uint32_t signaturesTTL{std::numeric_limits<uint32_t>::max()};
1271 };
1272 struct CacheKey
1273 {
1274 DNSName name;
1275 uint16_t type;
1276 DNSResourceRecord::Place place;
1277 bool operator<(const CacheKey& rhs) const {
1278 return tie(type, place, name) < tie(rhs.type, rhs.place, rhs.name);
1279 }
1280 };
1281 typedef map<CacheKey, CacheEntry> tcache_t;
1282 }
1283
1284 static void reapRecordsFromNegCacheEntryForValidation(tcache_t& tcache, const vector<DNSRecord>& records)
1285 {
1286 for (const auto& rec : records) {
1287 if (rec.d_type == QType::RRSIG) {
1288 auto rrsig = getRR<RRSIGRecordContent>(rec);
1289 if (rrsig) {
1290 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signatures.push_back(rrsig);
1291 }
1292 } else {
1293 tcache[{rec.d_name,rec.d_type,rec.d_place}].records.push_back(rec);
1294 }
1295 }
1296 }
1297
1298 /*!
1299 * Convience function to push the records from records into ret with a new TTL
1300 *
1301 * \param records DNSRecords that need to go into ret
1302 * \param ttl The new TTL for these records
1303 * \param ret The vector of DNSRecords that should contian the records with the modified TTL
1304 */
1305 static void addTTLModifiedRecords(const vector<DNSRecord>& records, const uint32_t ttl, vector<DNSRecord>& ret) {
1306 for (const auto& rec : records) {
1307 DNSRecord r(rec);
1308 r.d_ttl = ttl;
1309 ret.push_back(r);
1310 }
1311 }
1312
1313 void SyncRes::computeNegCacheValidationStatus(const NegCache::NegCacheEntry* ne, const DNSName& qname, const QType& qtype, const int res, vState& state, unsigned int depth)
1314 {
1315 DNSName subdomain(qname);
1316 /* if we are retrieving a DS, we only care about the state of the parent zone */
1317 if(qtype == QType::DS)
1318 subdomain.chopOff();
1319
1320 computeZoneCuts(subdomain, g_rootdnsname, depth);
1321
1322 tcache_t tcache;
1323 reapRecordsFromNegCacheEntryForValidation(tcache, ne->authoritySOA.records);
1324 reapRecordsFromNegCacheEntryForValidation(tcache, ne->authoritySOA.signatures);
1325 reapRecordsFromNegCacheEntryForValidation(tcache, ne->DNSSECRecords.records);
1326 reapRecordsFromNegCacheEntryForValidation(tcache, ne->DNSSECRecords.signatures);
1327
1328 for (const auto& entry : tcache) {
1329 // this happens when we did store signatures, but passed on the records themselves
1330 if (entry.second.records.empty()) {
1331 continue;
1332 }
1333
1334 const DNSName& owner = entry.first.name;
1335
1336 vState recordState = getValidationStatus(owner, false);
1337 if (state == Indeterminate) {
1338 state = recordState;
1339 }
1340
1341 if (recordState == Secure) {
1342 recordState = SyncRes::validateRecordsWithSigs(depth, qname, qtype, owner, entry.second.records, entry.second.signatures);
1343 }
1344
1345 if (recordState != Indeterminate && recordState != state) {
1346 updateValidationState(state, recordState);
1347 if (state != Secure) {
1348 break;
1349 }
1350 }
1351 }
1352
1353 if (state == Secure) {
1354 vState neValidationState = ne->d_validationState;
1355 dState expectedState = res == RCode::NXDomain ? NXDOMAIN : NXQTYPE;
1356 dState denialState = getDenialValidationState(*ne, state, expectedState, false);
1357 updateDenialValidationState(neValidationState, ne->d_name, state, denialState, expectedState, qtype == QType::DS);
1358 }
1359 if (state != Indeterminate) {
1360 /* validation succeeded, let's update the cache entry so we don't have to validate again */
1361 boost::optional<uint32_t> capTTD = boost::none;
1362 if (state == Bogus) {
1363 capTTD = d_now.tv_sec + s_maxbogusttl;
1364 }
1365 t_sstorage.negcache.updateValidationStatus(ne->d_name, ne->d_qtype, state, capTTD);
1366 }
1367 }
1368
1369 bool SyncRes::doCacheCheck(const DNSName &qname, const DNSName& authname, bool wasForwardedOrAuthZone, bool wasAuthZone, bool wasForwardRecurse, const QType &qtype, vector<DNSRecord>&ret, unsigned int depth, int &res, vState& state)
1370 {
1371 bool giveNegative=false;
1372
1373 string prefix;
1374 if(doLog()) {
1375 prefix=d_prefix;
1376 prefix.append(depth, ' ');
1377 }
1378
1379 // sqname and sqtype are used contain 'higher' names if we have them (e.g. powerdns.com|SOA when we find a negative entry for doesnotexists.powerdns.com|A)
1380 DNSName sqname(qname);
1381 QType sqt(qtype);
1382 uint32_t sttl=0;
1383 // cout<<"Lookup for '"<<qname<<"|"<<qtype.getName()<<"' -> "<<getLastLabel(qname)<<endl;
1384 vState cachedState;
1385 const NegCache::NegCacheEntry* ne = nullptr;
1386
1387 if(s_rootNXTrust &&
1388 t_sstorage.negcache.getRootNXTrust(qname, d_now, &ne) &&
1389 ne->d_auth.isRoot() &&
1390 !(wasForwardedOrAuthZone && !authname.isRoot())) { // when forwarding, the root may only neg-cache if it was forwarded to.
1391 sttl = ne->d_ttd - d_now.tv_sec;
1392 LOG(prefix<<qname<<": Entire name '"<<qname<<"', is negatively cached via '"<<ne->d_auth<<"' & '"<<ne->d_name<<"' for another "<<sttl<<" seconds"<<endl);
1393 res = RCode::NXDomain;
1394 giveNegative = true;
1395 cachedState = ne->d_validationState;
1396 }
1397 else if (t_sstorage.negcache.get(qname, qtype, d_now, &ne)) {
1398 /* If we are looking for a DS, discard NXD if auth == qname
1399 and ask for a specific denial instead */
1400 if (qtype != QType::DS || ne->d_qtype.getCode() || ne->d_auth != qname ||
1401 t_sstorage.negcache.get(qname, qtype, d_now, &ne, true))
1402 {
1403 res = 0;
1404 sttl = ne->d_ttd - d_now.tv_sec;
1405 giveNegative = true;
1406 cachedState = ne->d_validationState;
1407 if(ne->d_qtype.getCode()) {
1408 LOG(prefix<<qname<<": "<<qtype.getName()<<" is negatively cached via '"<<ne->d_auth<<"' for another "<<sttl<<" seconds"<<endl);
1409 res = RCode::NoError;
1410 }
1411 else {
1412 LOG(prefix<<qname<<": Entire name '"<<qname<<"', is negatively cached via '"<<ne->d_auth<<"' for another "<<sttl<<" seconds"<<endl);
1413 res = RCode::NXDomain;
1414 }
1415 }
1416 }
1417
1418 if (giveNegative) {
1419
1420 state = cachedState;
1421
1422 if (!wasAuthZone && shouldValidate() && state == Indeterminate) {
1423 LOG(prefix<<qname<<": got Indeterminate state for records retrieved from the negative cache, validating.."<<endl);
1424 computeNegCacheValidationStatus(ne, qname, qtype, res, state, depth);
1425
1426 if (state != cachedState && state == Bogus) {
1427 sttl = std::min(sttl, s_maxbogusttl);
1428 }
1429 }
1430
1431 // Transplant SOA to the returned packet
1432 addTTLModifiedRecords(ne->authoritySOA.records, sttl, ret);
1433 if(d_doDNSSEC) {
1434 addTTLModifiedRecords(ne->authoritySOA.signatures, sttl, ret);
1435 addTTLModifiedRecords(ne->DNSSECRecords.records, sttl, ret);
1436 addTTLModifiedRecords(ne->DNSSECRecords.signatures, sttl, ret);
1437 }
1438
1439 LOG(prefix<<qname<<": updating validation state with negative cache content for "<<qname<<" to "<<vStates[state]<<endl);
1440 return true;
1441 }
1442
1443 vector<DNSRecord> cset;
1444 bool found=false, expired=false;
1445 vector<std::shared_ptr<RRSIGRecordContent>> signatures;
1446 vector<std::shared_ptr<DNSRecord>> authorityRecs;
1447 uint32_t ttl=0;
1448 uint32_t capTTL = std::numeric_limits<uint32_t>::max();
1449 bool wasCachedAuth;
1450 if(t_RC->get(d_now.tv_sec, sqname, sqt, !wasForwardRecurse && d_requireAuthData, &cset, d_cacheRemote, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &cachedState, &wasCachedAuth) > 0) {
1451
1452 LOG(prefix<<sqname<<": Found cache hit for "<<sqt.getName()<<": ");
1453
1454 if (!wasAuthZone && shouldValidate() && (wasCachedAuth || wasForwardRecurse) && cachedState == Indeterminate && d_requireAuthData) {
1455
1456 /* This means we couldn't figure out the state when this entry was cached,
1457 most likely because we hadn't computed the zone cuts yet. */
1458 /* make sure they are computed before validating */
1459 DNSName subdomain(sqname);
1460 /* if we are retrieving a DS, we only care about the state of the parent zone */
1461 if(qtype == QType::DS)
1462 subdomain.chopOff();
1463
1464 computeZoneCuts(subdomain, g_rootdnsname, depth);
1465
1466 vState recordState = getValidationStatus(qname, false);
1467 if (recordState == Secure) {
1468 LOG(prefix<<sqname<<": got Indeterminate state from the cache, validating.."<<endl);
1469 cachedState = SyncRes::validateRecordsWithSigs(depth, sqname, sqt, sqname, cset, signatures);
1470 }
1471 else {
1472 cachedState = recordState;
1473 }
1474
1475 if (cachedState != Indeterminate) {
1476 LOG(prefix<<qname<<": got Indeterminate state from the cache, validation result is "<<vStates[cachedState]<<endl);
1477 if (cachedState == Bogus) {
1478 capTTL = s_maxbogusttl;
1479 }
1480 updateValidationStatusInCache(sqname, sqt, wasCachedAuth, cachedState);
1481 }
1482 }
1483
1484 for(auto j=cset.cbegin() ; j != cset.cend() ; ++j) {
1485
1486 LOG(j->d_content->getZoneRepresentation());
1487
1488 if (j->d_class != QClass::IN) {
1489 continue;
1490 }
1491
1492 if(j->d_ttl>(unsigned int) d_now.tv_sec) {
1493 DNSRecord dr=*j;
1494 dr.d_ttl -= d_now.tv_sec;
1495 dr.d_ttl = std::min(dr.d_ttl, capTTL);
1496 ttl = dr.d_ttl;
1497 ret.push_back(dr);
1498 LOG("[ttl="<<dr.d_ttl<<"] ");
1499 found=true;
1500 }
1501 else {
1502 LOG("[expired] ");
1503 expired=true;
1504 }
1505 }
1506
1507 ret.reserve(ret.size() + signatures.size() + authorityRecs.size());
1508
1509 for(const auto& signature : signatures) {
1510 DNSRecord dr;
1511 dr.d_type=QType::RRSIG;
1512 dr.d_name=sqname;
1513 dr.d_ttl=ttl;
1514 dr.d_content=signature;
1515 dr.d_place = DNSResourceRecord::ANSWER;
1516 dr.d_class=QClass::IN;
1517 ret.push_back(dr);
1518 }
1519
1520 for(const auto& rec : authorityRecs) {
1521 DNSRecord dr(*rec);
1522 dr.d_ttl=ttl;
1523 ret.push_back(dr);
1524 }
1525
1526 LOG(endl);
1527 if(found && !expired) {
1528 if (!giveNegative)
1529 res=0;
1530 LOG(prefix<<qname<<": updating validation state with cache content for "<<qname<<" to "<<vStates[cachedState]<<endl);
1531 state = cachedState;
1532 return true;
1533 }
1534 else
1535 LOG(prefix<<qname<<": cache had only stale entries"<<endl);
1536 }
1537
1538 return false;
1539 }
1540
1541 bool SyncRes::moreSpecificThan(const DNSName& a, const DNSName &b) const
1542 {
1543 return (a.isPartOf(b) && a.countLabels() > b.countLabels());
1544 }
1545
1546 struct speedOrder
1547 {
1548 bool operator()(const std::pair<DNSName, double> &a, const std::pair<DNSName, double> &b) const
1549 {
1550 return a.second < b.second;
1551 }
1552 };
1553
1554 inline std::vector<std::pair<DNSName, double>> SyncRes::shuffleInSpeedOrder(NsSet &tnameservers, const string &prefix)
1555 {
1556 std::vector<std::pair<DNSName, double>> rnameservers;
1557 rnameservers.reserve(tnameservers.size());
1558 for(const auto& tns: tnameservers) {
1559 double speed = t_sstorage.nsSpeeds[tns.first].get(&d_now);
1560 rnameservers.push_back({tns.first, speed});
1561 if(tns.first.empty()) // this was an authoritative OOB zone, don't pollute the nsSpeeds with that
1562 return rnameservers;
1563 }
1564
1565 random_shuffle(rnameservers.begin(),rnameservers.end());
1566 speedOrder so;
1567 stable_sort(rnameservers.begin(),rnameservers.end(), so);
1568
1569 if(doLog()) {
1570 LOG(prefix<<"Nameservers: ");
1571 for(auto i=rnameservers.begin();i!=rnameservers.end();++i) {
1572 if(i!=rnameservers.begin()) {
1573 LOG(", ");
1574 if(!((i-rnameservers.begin())%3)) {
1575 LOG(endl<<prefix<<" ");
1576 }
1577 }
1578 LOG(i->first.toLogString()<<"(" << (boost::format("%0.2f") % (i->second/1000.0)).str() <<"ms)");
1579 }
1580 LOG(endl);
1581 }
1582 return rnameservers;
1583 }
1584
1585 inline vector<ComboAddress> SyncRes::shuffleForwardSpeed(const vector<ComboAddress> &rnameservers, const string &prefix, const bool wasRd)
1586 {
1587 vector<ComboAddress> nameservers = rnameservers;
1588 map<ComboAddress, double> speeds;
1589
1590 for(const auto& val: nameservers) {
1591 double speed;
1592 DNSName nsName = DNSName(val.toStringWithPort());
1593 speed=t_sstorage.nsSpeeds[nsName].get(&d_now);
1594 speeds[val]=speed;
1595 }
1596 random_shuffle(nameservers.begin(),nameservers.end());
1597 speedOrderCA so(speeds);
1598 stable_sort(nameservers.begin(),nameservers.end(), so);
1599
1600 if(doLog()) {
1601 LOG(prefix<<"Nameservers: ");
1602 for(vector<ComboAddress>::const_iterator i=nameservers.cbegin();i!=nameservers.cend();++i) {
1603 if(i!=nameservers.cbegin()) {
1604 LOG(", ");
1605 if(!((i-nameservers.cbegin())%3)) {
1606 LOG(endl<<prefix<<" ");
1607 }
1608 }
1609 LOG((wasRd ? string("+") : string("-")) << i->toStringWithPort() <<"(" << (boost::format("%0.2f") % (speeds[*i]/1000.0)).str() <<"ms)");
1610 }
1611 LOG(endl);
1612 }
1613 return nameservers;
1614 }
1615
1616 static uint32_t getRRSIGTTL(const time_t now, const std::shared_ptr<RRSIGRecordContent>& rrsig)
1617 {
1618 uint32_t res = 0;
1619 if (now < rrsig->d_sigexpire) {
1620 res = static_cast<uint32_t>(rrsig->d_sigexpire) - now;
1621 }
1622 return res;
1623 }
1624
1625 static const set<uint16_t> nsecTypes = {QType::NSEC, QType::NSEC3};
1626
1627 /* Fills the authoritySOA and DNSSECRecords fields from ne with those found in the records
1628 *
1629 * \param records The records to parse for the authority SOA and NSEC(3) records
1630 * \param ne The NegCacheEntry to be filled out (will not be cleared, only appended to
1631 */
1632 static void harvestNXRecords(const vector<DNSRecord>& records, NegCache::NegCacheEntry& ne, const time_t now, uint32_t* lowestTTL) {
1633 for(const auto& rec : records) {
1634 if(rec.d_place != DNSResourceRecord::AUTHORITY)
1635 // RFC 4035 section 3.1.3. indicates that NSEC records MUST be placed in
1636 // the AUTHORITY section. Section 3.1.1 indicates that that RRSIGs for
1637 // records MUST be in the same section as the records they cover.
1638 // Hence, we ignore all records outside of the AUTHORITY section.
1639 continue;
1640
1641 if(rec.d_type == QType::RRSIG) {
1642 auto rrsig = getRR<RRSIGRecordContent>(rec);
1643 if(rrsig) {
1644 if(rrsig->d_type == QType::SOA) {
1645 ne.authoritySOA.signatures.push_back(rec);
1646 if (lowestTTL && isRRSIGNotExpired(now, rrsig)) {
1647 *lowestTTL = min(*lowestTTL, rec.d_ttl);
1648 *lowestTTL = min(*lowestTTL, getRRSIGTTL(now, rrsig));
1649 }
1650 }
1651 if(nsecTypes.count(rrsig->d_type)) {
1652 ne.DNSSECRecords.signatures.push_back(rec);
1653 if (lowestTTL && isRRSIGNotExpired(now, rrsig)) {
1654 *lowestTTL = min(*lowestTTL, rec.d_ttl);
1655 *lowestTTL = min(*lowestTTL, getRRSIGTTL(now, rrsig));
1656 }
1657 }
1658 }
1659 continue;
1660 }
1661 if(rec.d_type == QType::SOA) {
1662 ne.authoritySOA.records.push_back(rec);
1663 if (lowestTTL) {
1664 *lowestTTL = min(*lowestTTL, rec.d_ttl);
1665 }
1666 continue;
1667 }
1668 if(nsecTypes.count(rec.d_type)) {
1669 ne.DNSSECRecords.records.push_back(rec);
1670 if (lowestTTL) {
1671 *lowestTTL = min(*lowestTTL, rec.d_ttl);
1672 }
1673 continue;
1674 }
1675 }
1676 }
1677
1678 static cspmap_t harvestCSPFromNE(const NegCache::NegCacheEntry& ne)
1679 {
1680 cspmap_t cspmap;
1681 for(const auto& rec : ne.DNSSECRecords.signatures) {
1682 if(rec.d_type == QType::RRSIG) {
1683 auto rrc = getRR<RRSIGRecordContent>(rec);
1684 if (rrc) {
1685 cspmap[{rec.d_name,rrc->d_type}].signatures.push_back(rrc);
1686 }
1687 }
1688 }
1689 for(const auto& rec : ne.DNSSECRecords.records) {
1690 cspmap[{rec.d_name, rec.d_type}].records.push_back(rec.d_content);
1691 }
1692 return cspmap;
1693 }
1694
1695 // TODO remove after processRecords is fixed!
1696 // Adds the RRSIG for the SOA and the NSEC(3) + RRSIGs to ret
1697 static void addNXNSECS(vector<DNSRecord>&ret, const vector<DNSRecord>& records)
1698 {
1699 NegCache::NegCacheEntry ne;
1700 harvestNXRecords(records, ne, 0, nullptr);
1701 ret.insert(ret.end(), ne.authoritySOA.signatures.begin(), ne.authoritySOA.signatures.end());
1702 ret.insert(ret.end(), ne.DNSSECRecords.records.begin(), ne.DNSSECRecords.records.end());
1703 ret.insert(ret.end(), ne.DNSSECRecords.signatures.begin(), ne.DNSSECRecords.signatures.end());
1704 }
1705
1706 bool SyncRes::nameserversBlockedByRPZ(const DNSFilterEngine& dfe, const NsSet& nameservers)
1707 {
1708 if(d_wantsRPZ) {
1709 for (auto const &ns : nameservers) {
1710 d_appliedPolicy = dfe.getProcessingPolicy(ns.first, d_discardedPolicies);
1711 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
1712 LOG(", however nameserver "<<ns.first<<" was blocked by RPZ policy '"<<(d_appliedPolicy.d_name ? *d_appliedPolicy.d_name : "")<<"'"<<endl);
1713 return true;
1714 }
1715
1716 // Traverse all IP addresses for this NS to see if they have an RPN NSIP policy
1717 for (auto const &address : ns.second.first) {
1718 d_appliedPolicy = dfe.getProcessingPolicy(address, d_discardedPolicies);
1719 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
1720 LOG(", however nameserver "<<ns.first<<" IP address "<<address.toString()<<" was blocked by RPZ policy '"<<(d_appliedPolicy.d_name ? *d_appliedPolicy.d_name : "")<<"'"<<endl);
1721 return true;
1722 }
1723 }
1724 }
1725 }
1726 return false;
1727 }
1728
1729 bool SyncRes::nameserverIPBlockedByRPZ(const DNSFilterEngine& dfe, const ComboAddress& remoteIP)
1730 {
1731 if (d_wantsRPZ) {
1732 d_appliedPolicy = dfe.getProcessingPolicy(remoteIP, d_discardedPolicies);
1733 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) {
1734 LOG(" (blocked by RPZ policy '"+(d_appliedPolicy.d_name ? *d_appliedPolicy.d_name : "")+"')");
1735 return true;
1736 }
1737 }
1738 return false;
1739 }
1740
1741 vector<ComboAddress> SyncRes::retrieveAddressesForNS(const std::string& prefix, const DNSName& qname, std::vector<std::pair<DNSName, double>>::const_iterator& tns, const unsigned int depth, set<GetBestNSAnswer>& beenthere, const vector<std::pair<DNSName, double>>& rnameservers, NsSet& nameservers, bool& sendRDQuery, bool& pierceDontQuery, bool& flawedNSSet, bool cacheOnly)
1742 {
1743 vector<ComboAddress> result;
1744
1745 if(!tns->first.empty()) {
1746 LOG(prefix<<qname<<": Trying to resolve NS '"<<tns->first<< "' ("<<1+tns-rnameservers.begin()<<"/"<<(unsigned int)rnameservers.size()<<")"<<endl);
1747 result = getAddrs(tns->first, depth+2, beenthere, cacheOnly);
1748 pierceDontQuery=false;
1749 }
1750 else {
1751 LOG(prefix<<qname<<": Domain has hardcoded nameserver");
1752
1753 if(nameservers[tns->first].first.size() > 1) {
1754 LOG("s");
1755 }
1756 LOG(endl);
1757
1758 sendRDQuery = nameservers[tns->first].second;
1759 result = shuffleForwardSpeed(nameservers[tns->first].first, doLog() ? (prefix+qname.toString()+": ") : string(), sendRDQuery);
1760 pierceDontQuery=true;
1761 }
1762 return result;
1763 }
1764
1765 bool SyncRes::throttledOrBlocked(const std::string& prefix, const ComboAddress& remoteIP, const DNSName& qname, const QType& qtype, bool pierceDontQuery)
1766 {
1767 if(t_sstorage.throttle.shouldThrottle(d_now.tv_sec, boost::make_tuple(remoteIP, "", 0))) {
1768 LOG(prefix<<qname<<": server throttled "<<endl);
1769 s_throttledqueries++; d_throttledqueries++;
1770 return true;
1771 }
1772 else if(t_sstorage.throttle.shouldThrottle(d_now.tv_sec, boost::make_tuple(remoteIP, qname, qtype.getCode()))) {
1773 LOG(prefix<<qname<<": query throttled "<<remoteIP.toString()<<", "<<qname<<"; "<<qtype.getName()<<endl);
1774 s_throttledqueries++; d_throttledqueries++;
1775 return true;
1776 }
1777 else if(!pierceDontQuery && s_dontQuery && s_dontQuery->match(&remoteIP)) {
1778 LOG(prefix<<qname<<": not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
1779 s_dontqueries++;
1780 return true;
1781 }
1782 return false;
1783 }
1784
1785 bool SyncRes::validationEnabled() const
1786 {
1787 return g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate;
1788 }
1789
1790 uint32_t SyncRes::computeLowestTTD(const std::vector<DNSRecord>& records, const std::vector<std::shared_ptr<RRSIGRecordContent> >& signatures, uint32_t signaturesTTL) const
1791 {
1792 uint32_t lowestTTD = std::numeric_limits<uint32_t>::max();
1793 for(const auto& record : records)
1794 lowestTTD = min(lowestTTD, record.d_ttl);
1795
1796 /* even if it was not requested for that request (Process, and neither AD nor DO set),
1797 it might be requested at a later time so we need to be careful with the TTL. */
1798 if (validationEnabled() && !signatures.empty()) {
1799 /* if we are validating, we don't want to cache records after their signatures expire. */
1800 /* records TTL are now TTD, let's add 'now' to the signatures lowest TTL */
1801 lowestTTD = min(lowestTTD, static_cast<uint32_t>(signaturesTTL + d_now.tv_sec));
1802
1803 for(const auto& sig : signatures) {
1804 if (isRRSIGNotExpired(d_now.tv_sec, sig)) {
1805 // we don't decerement d_sigexpire by 'now' because we actually want a TTD, not a TTL */
1806 lowestTTD = min(lowestTTD, static_cast<uint32_t>(sig->d_sigexpire));
1807 }
1808 }
1809 }
1810
1811 return lowestTTD;
1812 }
1813
1814 void SyncRes::updateValidationState(vState& state, const vState stateUpdate)
1815 {
1816 LOG(d_prefix<<"validation state was "<<std::string(vStates[state])<<", state update is "<<std::string(vStates[stateUpdate]));
1817
1818 if (stateUpdate == TA) {
1819 state = Secure;
1820 }
1821 else if (stateUpdate == NTA) {
1822 state = Insecure;
1823 }
1824 else if (stateUpdate == Bogus) {
1825 state = Bogus;
1826 }
1827 else if (state == Indeterminate) {
1828 state = stateUpdate;
1829 }
1830 else if (stateUpdate == Insecure) {
1831 if (state != Bogus) {
1832 state = Insecure;
1833 }
1834 }
1835 LOG(", validation state is now "<<std::string(vStates[state])<<endl);
1836 }
1837
1838 vState SyncRes::getTA(const DNSName& zone, dsmap_t& ds)
1839 {
1840 auto luaLocal = g_luaconfs.getLocal();
1841
1842 if (luaLocal->dsAnchors.empty()) {
1843 LOG(d_prefix<<": No trust anchors configured, everything is Insecure"<<endl);
1844 /* We have no TA, everything is insecure */
1845 return Insecure;
1846 }
1847
1848 std::string reason;
1849 if (haveNegativeTrustAnchor(luaLocal->negAnchors, zone, reason)) {
1850 LOG(d_prefix<<": got NTA for '"<<zone<<"'"<<endl);
1851 return NTA;
1852 }
1853
1854 if (getTrustAnchor(luaLocal->dsAnchors, zone, ds)) {
1855 LOG(d_prefix<<": got TA for '"<<zone<<"'"<<endl);
1856 return TA;
1857 }
1858 else {
1859 LOG(d_prefix<<": no TA found for '"<<zone<<"' among "<< luaLocal->dsAnchors.size()<<endl);
1860 }
1861
1862 if (zone.isRoot()) {
1863 /* No TA for the root */
1864 return Insecure;
1865 }
1866
1867 return Indeterminate;
1868 }
1869
1870 static size_t countSupportedDS(const dsmap_t& dsmap)
1871 {
1872 size_t count = 0;
1873
1874 for (const auto& ds : dsmap) {
1875 if (isSupportedDS(ds)) {
1876 count++;
1877 }
1878 }
1879
1880 return count;
1881 }
1882
1883 vState SyncRes::getDSRecords(const DNSName& zone, dsmap_t& ds, bool taOnly, unsigned int depth, bool bogusOnNXD, bool* foundCut)
1884 {
1885 vState result = getTA(zone, ds);
1886
1887 if (result != Indeterminate || taOnly) {
1888 if (foundCut) {
1889 *foundCut = (result != Indeterminate);
1890 }
1891
1892 if (result == TA) {
1893 if (countSupportedDS(ds) == 0) {
1894 ds.clear();
1895 result = Insecure;
1896 }
1897 else {
1898 result = Secure;
1899 }
1900 }
1901 else if (result == NTA) {
1902 result = Insecure;
1903 }
1904
1905 return result;
1906 }
1907
1908 bool oldSkipCNAME = d_skipCNAMECheck;
1909 d_skipCNAMECheck = true;
1910
1911 std::set<GetBestNSAnswer> beenthere;
1912 std::vector<DNSRecord> dsrecords;
1913
1914 vState state = Indeterminate;
1915 int rcode = doResolve(zone, QType(QType::DS), dsrecords, depth + 1, beenthere, state);
1916 d_skipCNAMECheck = oldSkipCNAME;
1917
1918 if (rcode == RCode::NoError || (rcode == RCode::NXDomain && !bogusOnNXD)) {
1919 uint8_t bestDigestType = 0;
1920
1921 bool gotCNAME = false;
1922 for (const auto& record : dsrecords) {
1923 if (record.d_type == QType::DS) {
1924 const auto dscontent = getRR<DSRecordContent>(record);
1925 if (dscontent && isSupportedDS(*dscontent)) {
1926 // Make GOST a lower prio than SHA256
1927 if (dscontent->d_digesttype == DNSSECKeeper::GOST && bestDigestType == DNSSECKeeper::SHA256) {
1928 continue;
1929 }
1930 if (dscontent->d_digesttype > bestDigestType || (bestDigestType == DNSSECKeeper::GOST && dscontent->d_digesttype == DNSSECKeeper::SHA256)) {
1931 bestDigestType = dscontent->d_digesttype;
1932 }
1933 ds.insert(*dscontent);
1934 }
1935 }
1936 else if (record.d_type == QType::CNAME && record.d_name == zone) {
1937 gotCNAME = true;
1938 }
1939 }
1940
1941 /* RFC 4509 section 3: "Validator implementations SHOULD ignore DS RRs containing SHA-1
1942 * digests if DS RRs with SHA-256 digests are present in the DS RRset."
1943 * As SHA348 is specified as well, the spirit of the this line is "use the best algorithm".
1944 */
1945 for (auto dsrec = ds.begin(); dsrec != ds.end(); ) {
1946 if (dsrec->d_digesttype != bestDigestType) {
1947 dsrec = ds.erase(dsrec);
1948 }
1949 else {
1950 ++dsrec;
1951 }
1952 }
1953
1954 if (rcode == RCode::NoError) {
1955 if (ds.empty()) {
1956 /* we have no DS, it's either:
1957 - a delegation to a non-DNSSEC signed zone
1958 - no delegation, we stay in the same zone
1959 */
1960 if (gotCNAME || denialProvesNoDelegation(zone, dsrecords)) {
1961 /* we are still inside the same zone */
1962
1963 if (foundCut) {
1964 *foundCut = false;
1965 }
1966 return state;
1967 }
1968
1969 /* delegation with no DS, might be Secure -> Insecure */
1970 if (foundCut) {
1971 *foundCut = true;
1972 }
1973
1974 /* a delegation with no DS is either:
1975 - a signed zone (Secure) to an unsigned one (Insecure)
1976 - an unsigned zone to another unsigned one (Insecure stays Insecure, Bogus stays Bogus)
1977 */
1978 return state == Secure ? Insecure : state;
1979 } else {
1980 /* we have a DS */
1981 if (foundCut) {
1982 *foundCut = true;
1983 }
1984 }
1985 }
1986
1987 return state;
1988 }
1989
1990 LOG(d_prefix<<": returning Bogus state from "<<__func__<<"("<<zone<<")"<<endl);
1991 return Bogus;
1992 }
1993
1994 bool SyncRes::haveExactValidationStatus(const DNSName& domain)
1995 {
1996 if (!shouldValidate()) {
1997 return false;
1998 }
1999 const auto& it = d_cutStates.find(domain);
2000 if (it != d_cutStates.cend()) {
2001 return true;
2002 }
2003 return false;
2004 }
2005
2006 vState SyncRes::getValidationStatus(const DNSName& subdomain, bool allowIndeterminate)
2007 {
2008 vState result = Indeterminate;
2009
2010 if (!shouldValidate()) {
2011 return result;
2012 }
2013 DNSName name(subdomain);
2014 do {
2015 const auto& it = d_cutStates.find(name);
2016 if (it != d_cutStates.cend()) {
2017 if (allowIndeterminate || it->second != Indeterminate) {
2018 LOG(d_prefix<<": got status "<<vStates[it->second]<<" for name "<<subdomain<<" (from "<<name<<")"<<endl);
2019 return it->second;
2020 }
2021 }
2022 }
2023 while (name.chopOff());
2024
2025 return result;
2026 }
2027
2028 bool SyncRes::lookForCut(const DNSName& qname, unsigned int depth, const vState existingState, vState& newState)
2029 {
2030 bool foundCut = false;
2031 dsmap_t ds;
2032 vState dsState = getDSRecords(qname, ds, newState == Bogus || existingState == Insecure || existingState == Bogus, depth, false, &foundCut);
2033
2034 if (dsState != Indeterminate) {
2035 newState = dsState;
2036 }
2037
2038 return foundCut;
2039 }
2040
2041 void SyncRes::computeZoneCuts(const DNSName& begin, const DNSName& end, unsigned int depth)
2042 {
2043 if(!begin.isPartOf(end)) {
2044 LOG(d_prefix<<" "<<begin.toLogString()<<" is not part of "<<end.toLogString()<<endl);
2045 throw PDNSException(begin.toLogString() + " is not part of " + end.toLogString());
2046 }
2047
2048 if (d_cutStates.count(begin) != 0) {
2049 return;
2050 }
2051
2052 dsmap_t ds;
2053 vState cutState = getDSRecords(end, ds, false, depth);
2054 LOG(d_prefix<<": setting cut state for "<<end<<" to "<<vStates[cutState]<<endl);
2055 d_cutStates[end] = cutState;
2056
2057 if (!shouldValidate()) {
2058 return;
2059 }
2060
2061 DNSName qname(end);
2062 std::vector<string> labelsToAdd = begin.makeRelative(end).getRawLabels();
2063
2064 bool oldSkipCNAME = d_skipCNAMECheck;
2065 d_skipCNAMECheck = true;
2066
2067 while(qname != begin) {
2068 if (labelsToAdd.empty())
2069 break;
2070
2071 qname.prependRawLabel(labelsToAdd.back());
2072 labelsToAdd.pop_back();
2073 LOG(d_prefix<<": - Looking for a cut at "<<qname<<endl);
2074
2075 const auto cutIt = d_cutStates.find(qname);
2076 if (cutIt != d_cutStates.cend()) {
2077 if (cutIt->second != Indeterminate) {
2078 LOG(d_prefix<<": - Cut already known at "<<qname<<endl);
2079 cutState = cutIt->second;
2080 continue;
2081 }
2082 }
2083
2084 /* no need to look for NS and DS if we are already insecure or bogus,
2085 just look for (N)TA
2086 */
2087 if (cutState == Insecure || cutState == Bogus) {
2088 dsmap_t cutDS;
2089 vState newState = getDSRecords(qname, cutDS, true, depth);
2090 if (newState == Indeterminate) {
2091 continue;
2092 }
2093
2094 LOG(d_prefix<<": New state for "<<qname<<" is "<<vStates[newState]<<endl);
2095 cutState = newState;
2096
2097 d_cutStates[qname] = cutState;
2098
2099 continue;
2100 }
2101
2102 vState newState = Indeterminate;
2103 /* temporarily mark as Indeterminate, so that we won't enter an endless loop
2104 trying to determine that zone cut again. */
2105 d_cutStates[qname] = newState;
2106 bool foundCut = lookForCut(qname, depth + 1, cutState, newState);
2107 if (foundCut) {
2108 LOG(d_prefix<<": - Found cut at "<<qname<<endl);
2109 if (newState != Indeterminate) {
2110 cutState = newState;
2111 }
2112 LOG(d_prefix<<": New state for "<<qname<<" is "<<vStates[cutState]<<endl);
2113 d_cutStates[qname] = cutState;
2114 }
2115 else {
2116 /* remove the temporary cut */
2117 LOG(d_prefix<<qname<<": removing cut state for "<<qname<<endl);
2118 d_cutStates.erase(qname);
2119 }
2120 }
2121
2122 d_skipCNAMECheck = oldSkipCNAME;
2123
2124 LOG(d_prefix<<": list of cuts from "<<begin<<" to "<<end<<endl);
2125 for (const auto& cut : d_cutStates) {
2126 if (cut.first.isRoot() || (begin.isPartOf(cut.first) && cut.first.isPartOf(end))) {
2127 LOG(" - "<<cut.first<<": "<<vStates[cut.second]<<endl);
2128 }
2129 }
2130 }
2131
2132 vState SyncRes::validateDNSKeys(const DNSName& zone, const std::vector<DNSRecord>& dnskeys, const std::vector<std::shared_ptr<RRSIGRecordContent> >& signatures, unsigned int depth)
2133 {
2134 dsmap_t ds;
2135 if (!signatures.empty()) {
2136 DNSName signer = getSigner(signatures);
2137
2138 if (!signer.empty() && zone.isPartOf(signer)) {
2139 vState state = getDSRecords(signer, ds, false, depth);
2140
2141 if (state != Secure) {
2142 return state;
2143 }
2144 }
2145 }
2146
2147 skeyset_t tentativeKeys;
2148 std::vector<shared_ptr<DNSRecordContent> > toSign;
2149
2150 for (const auto& dnskey : dnskeys) {
2151 if (dnskey.d_type == QType::DNSKEY) {
2152 auto content = getRR<DNSKEYRecordContent>(dnskey);
2153 if (content) {
2154 tentativeKeys.insert(content);
2155 toSign.push_back(content);
2156 }
2157 }
2158 }
2159
2160 LOG(d_prefix<<": trying to validate "<<std::to_string(tentativeKeys.size())<<" DNSKEYs with "<<std::to_string(ds.size())<<" DS"<<endl);
2161 skeyset_t validatedKeys;
2162 validateDNSKeysAgainstDS(d_now.tv_sec, zone, ds, tentativeKeys, toSign, signatures, validatedKeys);
2163
2164 LOG(d_prefix<<": we now have "<<std::to_string(validatedKeys.size())<<" DNSKEYs"<<endl);
2165
2166 /* if we found at least one valid RRSIG covering the set,
2167 all tentative keys are validated keys. Otherwise it means
2168 we haven't found at least one DNSKEY and a matching RRSIG
2169 covering this set, this looks Bogus. */
2170 if (validatedKeys.size() != tentativeKeys.size()) {
2171 LOG(d_prefix<<": returning Bogus state from "<<__func__<<"("<<zone<<")"<<endl);
2172 return Bogus;
2173 }
2174
2175 return Secure;
2176 }
2177
2178 vState SyncRes::getDNSKeys(const DNSName& signer, skeyset_t& keys, unsigned int depth)
2179 {
2180 std::vector<DNSRecord> records;
2181 std::set<GetBestNSAnswer> beenthere;
2182 LOG(d_prefix<<"Retrieving DNSKeys for "<<signer<<endl);
2183
2184 vState state = Indeterminate;
2185 /* following CNAME might lead to us to the wrong DNSKEY */
2186 bool oldSkipCNAME = d_skipCNAMECheck;
2187 d_skipCNAMECheck = true;
2188 int rcode = doResolve(signer, QType(QType::DNSKEY), records, depth + 1, beenthere, state);
2189 d_skipCNAMECheck = oldSkipCNAME;
2190
2191 if (rcode == RCode::NoError) {
2192 if (state == Secure) {
2193 for (const auto& key : records) {
2194 if (key.d_type == QType::DNSKEY) {
2195 auto content = getRR<DNSKEYRecordContent>(key);
2196 if (content) {
2197 keys.insert(content);
2198 }
2199 }
2200 }
2201 }
2202 LOG(d_prefix<<"Retrieved "<<keys.size()<<" DNSKeys for "<<signer<<", state is "<<vStates[state]<<endl);
2203 return state;
2204 }
2205
2206 LOG(d_prefix<<"Returning Bogus state from "<<__func__<<"("<<signer<<")"<<endl);
2207 return Bogus;
2208 }
2209
2210 vState SyncRes::validateRecordsWithSigs(unsigned int depth, const DNSName& qname, const QType& qtype, const DNSName& name, const std::vector<DNSRecord>& records, const std::vector<std::shared_ptr<RRSIGRecordContent> >& signatures)
2211 {
2212 skeyset_t keys;
2213 if (!signatures.empty()) {
2214 const DNSName signer = getSigner(signatures);
2215 if (!signer.empty() && name.isPartOf(signer)) {
2216 if ((qtype == QType::DNSKEY || qtype == QType::DS) && signer == qname) {
2217 /* we are already retrieving those keys, sorry */
2218 if (qtype == QType::DS) {
2219 /* something is very wrong */
2220 LOG(d_prefix<<"The DS for "<<qname<<" is signed by itself, going Bogus"<<endl);
2221 return Bogus;
2222 }
2223 return Indeterminate;
2224 }
2225 vState state = getDNSKeys(signer, keys, depth);
2226 if (state != Secure) {
2227 return state;
2228 }
2229 }
2230 } else {
2231 LOG(d_prefix<<"Bogus!"<<endl);
2232 return Bogus;
2233 }
2234
2235 std::vector<std::shared_ptr<DNSRecordContent> > recordcontents;
2236 for (const auto& record : records) {
2237 recordcontents.push_back(record.d_content);
2238 }
2239
2240 LOG(d_prefix<<"Going to validate "<<recordcontents.size()<< " record contents with "<<signatures.size()<<" sigs and "<<keys.size()<<" keys for "<<name<<endl);
2241 if (validateWithKeySet(d_now.tv_sec, name, recordcontents, signatures, keys, false)) {
2242 LOG(d_prefix<<"Secure!"<<endl);
2243 return Secure;
2244 }
2245
2246 LOG(d_prefix<<"Bogus!"<<endl);
2247 return Bogus;
2248 }
2249
2250 static bool allowAdditionalEntry(std::unordered_set<DNSName>& allowedAdditionals, const DNSRecord& rec)
2251 {
2252 switch(rec.d_type) {
2253 case QType::MX:
2254 {
2255 if (auto mxContent = getRR<MXRecordContent>(rec)) {
2256 allowedAdditionals.insert(mxContent->d_mxname);
2257 }
2258 return true;
2259 }
2260 case QType::NS:
2261 {
2262 if (auto nsContent = getRR<NSRecordContent>(rec)) {
2263 allowedAdditionals.insert(nsContent->getNS());
2264 }
2265 return true;
2266 }
2267 case QType::SRV:
2268 {
2269 if (auto srvContent = getRR<SRVRecordContent>(rec)) {
2270 allowedAdditionals.insert(srvContent->d_target);
2271 }
2272 return true;
2273 }
2274 default:
2275 return false;
2276 }
2277 }
2278
2279 void SyncRes::sanitizeRecords(const std::string& prefix, LWResult& lwr, const DNSName& qname, const QType& qtype, const DNSName& auth, bool wasForwarded, bool rdQuery)
2280 {
2281 const bool wasForwardRecurse = wasForwarded && rdQuery;
2282 /* list of names for which we will allow A and AAAA records in the additional section
2283 to remain */
2284 std::unordered_set<DNSName> allowedAdditionals = { qname };
2285 bool haveAnswers = false;
2286 bool isNXDomain = false;
2287 bool isNXQType = false;
2288
2289 for(auto rec = lwr.d_records.begin(); rec != lwr.d_records.end(); ) {
2290
2291 if (rec->d_type == QType::OPT) {
2292 ++rec;
2293 continue;
2294 }
2295
2296 if (rec->d_class != QClass::IN) {
2297 LOG(prefix<<"Removing non internet-classed data received from "<<auth<<endl);
2298 rec = lwr.d_records.erase(rec);
2299 continue;
2300 }
2301
2302 if (rec->d_type == QType::ANY) {
2303 LOG(prefix<<"Removing 'ANY'-typed data received from "<<auth<<endl);
2304 rec = lwr.d_records.erase(rec);
2305 continue;
2306 }
2307
2308 if (!rec->d_name.isPartOf(auth)) {
2309 LOG(prefix<<"Removing record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2310 rec = lwr.d_records.erase(rec);
2311 continue;
2312 }
2313
2314 /* dealing with the records in answer */
2315 if (!(lwr.d_aabit || wasForwardRecurse) && rec->d_place == DNSResourceRecord::ANSWER) {
2316 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
2317 are sending such responses */
2318 if (!(rec->d_type == QType::CNAME && qname == rec->d_name)) {
2319 LOG(prefix<<"Removing record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the answer section without the AA bit set received from "<<auth<<endl);
2320 rec = lwr.d_records.erase(rec);
2321 continue;
2322 }
2323 }
2324
2325 if (rec->d_type == QType::DNAME && (rec->d_place != DNSResourceRecord::ANSWER || !qname.isPartOf(rec->d_name))) {
2326 LOG(prefix<<"Removing invalid DNAME record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2327 rec = lwr.d_records.erase(rec);
2328 continue;
2329 }
2330
2331 if (rec->d_place == DNSResourceRecord::ANSWER && (qtype != QType::ANY && rec->d_type != qtype.getCode() && s_redirectionQTypes.count(rec->d_type) == 0 && rec->d_type != QType::SOA && rec->d_type != QType::RRSIG)) {
2332 LOG(prefix<<"Removing irrelevant record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2333 rec = lwr.d_records.erase(rec);
2334 continue;
2335 }
2336
2337 if (rec->d_place == DNSResourceRecord::ANSWER && !haveAnswers) {
2338 haveAnswers = true;
2339 }
2340
2341 if (rec->d_place == DNSResourceRecord::ANSWER) {
2342 allowAdditionalEntry(allowedAdditionals, *rec);
2343 }
2344
2345 /* dealing with the records in authority */
2346 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type != QType::NS && rec->d_type != QType::DS && rec->d_type != QType::SOA && rec->d_type != QType::RRSIG && rec->d_type != QType::NSEC && rec->d_type != QType::NSEC3) {
2347 LOG(prefix<<"Removing irrelevant record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2348 rec = lwr.d_records.erase(rec);
2349 continue;
2350 }
2351
2352 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::SOA) {
2353 if (!qname.isPartOf(rec->d_name)) {
2354 LOG(prefix<<"Removing irrelevant record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2355 rec = lwr.d_records.erase(rec);
2356 continue;
2357 }
2358
2359 if (!(lwr.d_aabit || wasForwardRecurse)) {
2360 LOG(prefix<<"Removing irrelevant record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2361 rec = lwr.d_records.erase(rec);
2362 continue;
2363 }
2364
2365 if (!haveAnswers) {
2366 if (lwr.d_rcode == RCode::NXDomain) {
2367 isNXDomain = true;
2368 }
2369 else if (lwr.d_rcode == RCode::NoError) {
2370 isNXQType = true;
2371 }
2372 }
2373 }
2374
2375 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS && (isNXDomain || isNXQType)) {
2376 /* we don't want to pick up NS records in AUTHORITY or ADDITIONAL sections of NXDomain answers
2377 because they are somewhat easy to insert into a large, fragmented UDP response
2378 for an off-path attacker by injecting spoofed UDP fragments.
2379 */
2380 LOG(prefix<<"Removing NS record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section of a "<<(isNXDomain ? "NXD" : "NXQTYPE")<<" response received from "<<auth<<endl);
2381 rec = lwr.d_records.erase(rec);
2382 continue;
2383 }
2384
2385 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS) {
2386 allowAdditionalEntry(allowedAdditionals, *rec);
2387 }
2388
2389 /* dealing with the records in additional */
2390 if (rec->d_place == DNSResourceRecord::ADDITIONAL && rec->d_type != QType::A && rec->d_type != QType::AAAA && rec->d_type != QType::RRSIG) {
2391 LOG(prefix<<"Removing irrelevant record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2392 rec = lwr.d_records.erase(rec);
2393 continue;
2394 }
2395
2396 if (rec->d_place == DNSResourceRecord::ADDITIONAL && allowedAdditionals.count(rec->d_name) == 0) {
2397 LOG(prefix<<"Removing irrelevant additional record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2398 rec = lwr.d_records.erase(rec);
2399 continue;
2400 }
2401
2402 ++rec;
2403 }
2404 }
2405
2406 RCode::rcodes_ SyncRes::updateCacheFromRecords(unsigned int depth, LWResult& lwr, const DNSName& qname, const QType& qtype, const DNSName& auth, bool wasForwarded, const boost::optional<Netmask> ednsmask, vState& state, bool& needWildcardProof, bool& gatherWildcardProof, unsigned int& wildcardLabelsCount, bool rdQuery)
2407 {
2408 bool wasForwardRecurse = wasForwarded && rdQuery;
2409 tcache_t tcache;
2410
2411 string prefix;
2412 if(doLog()) {
2413 prefix=d_prefix;
2414 prefix.append(depth, ' ');
2415 }
2416
2417 sanitizeRecords(prefix, lwr, qname, qtype, auth, wasForwarded, rdQuery);
2418
2419 std::vector<std::shared_ptr<DNSRecord>> authorityRecs;
2420 const unsigned int labelCount = qname.countLabels();
2421 bool isCNAMEAnswer = false;
2422 bool isDNAMEAnswer = false;
2423 for(const auto& rec : lwr.d_records) {
2424 if (rec.d_class != QClass::IN) {
2425 continue;
2426 }
2427
2428 if(!isCNAMEAnswer && rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == QType::CNAME && (!(qtype==QType(QType::CNAME))) && rec.d_name == qname && !isDNAMEAnswer) {
2429 isCNAMEAnswer = true;
2430 }
2431 if(!isDNAMEAnswer && rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == QType::DNAME && qtype != QType(QType::DNAME) && qname.isPartOf(rec.d_name)) {
2432 isDNAMEAnswer = true;
2433 isCNAMEAnswer = false;
2434 }
2435
2436 /* if we have a positive answer synthetized from a wildcard,
2437 we need to store the corresponding NSEC/NSEC3 records proving
2438 that the exact name did not exist in the negative cache */
2439 if(gatherWildcardProof) {
2440 if (nsecTypes.count(rec.d_type)) {
2441 authorityRecs.push_back(std::make_shared<DNSRecord>(rec));
2442 }
2443 else if (rec.d_type == QType::RRSIG) {
2444 auto rrsig = getRR<RRSIGRecordContent>(rec);
2445 if (rrsig && nsecTypes.count(rrsig->d_type)) {
2446 authorityRecs.push_back(std::make_shared<DNSRecord>(rec));
2447 }
2448 }
2449 }
2450 if(rec.d_type == QType::RRSIG) {
2451 auto rrsig = getRR<RRSIGRecordContent>(rec);
2452 if (rrsig) {
2453 /* As illustrated in rfc4035's Appendix B.6, the RRSIG label
2454 count can be lower than the name's label count if it was
2455 synthetized from the wildcard. Note that the difference might
2456 be > 1. */
2457 if (rec.d_name == qname && isWildcardExpanded(labelCount, rrsig)) {
2458 gatherWildcardProof = true;
2459 if (!isWildcardExpandedOntoItself(rec.d_name, labelCount, rrsig)) {
2460 /* if we have a wildcard expanded onto itself, we don't need to prove
2461 that the exact name doesn't exist because it actually does.
2462 We still want to gather the corresponding NSEC/NSEC3 records
2463 to pass them to our client in case it wants to validate by itself.
2464 */
2465 LOG(prefix<<qname<<": RRSIG indicates the name was synthetized from a wildcard, we need a wildcard proof"<<endl);
2466 needWildcardProof = true;
2467 }
2468 else {
2469 LOG(prefix<<qname<<": RRSIG indicates the name was synthetized from a wildcard expanded onto itself, we need to gather wildcard proof"<<endl);
2470 }
2471 wildcardLabelsCount = rrsig->d_labels;
2472 }
2473
2474 // cerr<<"Got an RRSIG for "<<DNSRecordContent::NumberToType(rrsig->d_type)<<" with name '"<<rec.d_name<<"'"<<endl;
2475 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signatures.push_back(rrsig);
2476 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signaturesTTL = std::min(tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signaturesTTL, rec.d_ttl);
2477 }
2478 }
2479 }
2480
2481 // reap all answers from this packet that are acceptable
2482 for(auto& rec : lwr.d_records) {
2483 if(rec.d_type == QType::OPT) {
2484 LOG(prefix<<qname<<": OPT answer '"<<rec.d_name<<"' from '"<<auth<<"' nameservers" <<endl);
2485 continue;
2486 }
2487 LOG(prefix<<qname<<": accept answer '"<<rec.d_name<<"|"<<DNSRecordContent::NumberToType(rec.d_type)<<"|"<<rec.d_content->getZoneRepresentation()<<"' from '"<<auth<<"' nameservers? ttl="<<rec.d_ttl<<", place="<<(int)rec.d_place<<" ");
2488 if(rec.d_type == QType::ANY) {
2489 LOG("NO! - we don't accept 'ANY'-typed data"<<endl);
2490 continue;
2491 }
2492
2493 if(rec.d_class != QClass::IN) {
2494 LOG("NO! - we don't accept records for any other class than 'IN'"<<endl);
2495 continue;
2496 }
2497
2498 if (!(lwr.d_aabit || wasForwardRecurse) && rec.d_place == DNSResourceRecord::ANSWER) {
2499 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
2500 are sending such responses */
2501 if (!(rec.d_type == QType::CNAME && rec.d_name == qname)) {
2502 LOG("NO! - we don't accept records in the answers section without the AA bit set"<<endl);
2503 continue;
2504 }
2505 }
2506
2507 if(rec.d_name.isPartOf(auth)) {
2508 if(rec.d_type == QType::RRSIG) {
2509 LOG("RRSIG - separate"<<endl);
2510 }
2511 else if(lwr.d_aabit && lwr.d_rcode==RCode::NoError && rec.d_place==DNSResourceRecord::ANSWER && ((rec.d_type != QType::DNSKEY && rec.d_type != QType::DS) || rec.d_name != auth) && s_delegationOnly.count(auth)) {
2512 LOG("NO! Is from delegation-only zone"<<endl);
2513 s_nodelegated++;
2514 return RCode::NXDomain;
2515 }
2516 else {
2517 bool haveLogged = false;
2518 if (isDNAMEAnswer && rec.d_type == QType::CNAME) {
2519 LOG("NO - we already have a DNAME answer for this domain");
2520 continue;
2521 }
2522 if (!t_sstorage.domainmap->empty()) {
2523 // Check if we are authoritative for a zone in this answer
2524 DNSName tmp_qname(rec.d_name);
2525 auto auth_domain_iter=getBestAuthZone(&tmp_qname);
2526 if(auth_domain_iter!=t_sstorage.domainmap->end() &&
2527 auth.countLabels() <= auth_domain_iter->first.countLabels()) {
2528 if (auth_domain_iter->first != auth) {
2529 LOG("NO! - we are authoritative for the zone "<<auth_domain_iter->first<<endl);
2530 continue;
2531 } else {
2532 LOG("YES! - This answer was ");
2533 if (!wasForwarded) {
2534 LOG("retrieved from the local auth store.");
2535 } else {
2536 LOG("received from a server we forward to.");
2537 }
2538 haveLogged = true;
2539 LOG(endl);
2540 }
2541 }
2542 }
2543 if (!haveLogged) {
2544 LOG("YES!"<<endl);
2545 }
2546
2547 rec.d_ttl=min(s_maxcachettl, rec.d_ttl);
2548
2549 DNSRecord dr(rec);
2550 dr.d_ttl += d_now.tv_sec;
2551 dr.d_place=DNSResourceRecord::ANSWER;
2552 tcache[{rec.d_name,rec.d_type,rec.d_place}].records.push_back(dr);
2553 }
2554 }
2555 else
2556 LOG("NO!"<<endl);
2557 }
2558
2559 // supplant
2560 for(tcache_t::iterator i = tcache.begin(); i != tcache.end(); ++i) {
2561 if((i->second.records.size() + i->second.signatures.size()) > 1) { // need to group the ttl to be the minimum of the RRSET (RFC 2181, 5.2)
2562 uint32_t lowestTTD=computeLowestTTD(i->second.records, i->second.signatures, i->second.signaturesTTL);
2563
2564 for(auto& record : i->second.records)
2565 record.d_ttl = lowestTTD; // boom
2566 }
2567
2568 // cout<<"Have "<<i->second.records.size()<<" records and "<<i->second.signatures.size()<<" signatures for "<<i->first.name;
2569 // cout<<'|'<<DNSRecordContent::NumberToType(i->first.type)<<endl;
2570 }
2571
2572 for(tcache_t::iterator i = tcache.begin(); i != tcache.end(); ++i) {
2573
2574 if(i->second.records.empty()) // this happens when we did store signatures, but passed on the records themselves
2575 continue;
2576
2577 /* Even if the AA bit is set, additional data cannot be considered
2578 as authoritative. This is especially important during validation
2579 because keeping records in the additional section is allowed even
2580 if the corresponding RRSIGs are not included, without setting the TC
2581 bit, as stated in rfc4035's section 3.1.1. Including RRSIG RRs in a Response:
2582 "When placing a signed RRset in the Additional section, the name
2583 server MUST also place its RRSIG RRs in the Additional section.
2584 If space does not permit inclusion of both the RRset and its
2585 associated RRSIG RRs, the name server MAY retain the RRset while
2586 dropping the RRSIG RRs. If this happens, the name server MUST NOT
2587 set the TC bit solely because these RRSIG RRs didn't fit."
2588 */
2589 bool isAA = lwr.d_aabit && i->first.place != DNSResourceRecord::ADDITIONAL;
2590 /* if we forwarded the query to a recursor, we can expect the answer to be signed,
2591 even if the answer is not AA. Of course that's not only true inside a Secure
2592 zone, but we check that below. */
2593 bool expectSignature = i->first.place == DNSResourceRecord::ANSWER || ((lwr.d_aabit || wasForwardRecurse) && i->first.place != DNSResourceRecord::ADDITIONAL);
2594 if (isCNAMEAnswer && (i->first.place != DNSResourceRecord::ANSWER || i->first.type != QType::CNAME || i->first.name != qname)) {
2595 /*
2596 rfc2181 states:
2597 Note that the answer section of an authoritative answer normally
2598 contains only authoritative data. However when the name sought is an
2599 alias (see section 10.1.1) only the record describing that alias is
2600 necessarily authoritative. Clients should assume that other records
2601 may have come from the server's cache. Where authoritative answers
2602 are required, the client should query again, using the canonical name
2603 associated with the alias.
2604 */
2605 isAA = false;
2606 expectSignature = false;
2607 }
2608
2609 if (isCNAMEAnswer && i->first.place == DNSResourceRecord::AUTHORITY && i->first.type == QType::NS && auth == i->first.name) {
2610 /* These NS can't be authoritative since we have a CNAME answer for which (see above) only the
2611 record describing that alias is necessarily authoritative.
2612 But if we allow the current auth, which might be serving the child zone, to raise the TTL
2613 of non-authoritative NS in the cache, they might be able to keep a "ghost" zone alive forever,
2614 even after the delegation is gone from the parent.
2615 So let's just do nothing with them, we can fetch them directly if we need them.
2616 */
2617 LOG(d_prefix<<": skipping authority NS from '"<<auth<<"' nameservers in CNAME answer "<<i->first.name<<"|"<<DNSRecordContent::NumberToType(i->first.type)<<endl);
2618 continue;
2619 }
2620
2621 vState recordState = getValidationStatus(i->first.name, false);
2622 LOG(d_prefix<<": got initial zone status "<<vStates[recordState]<<" for record "<<i->first.name<<"|"<<DNSRecordContent::NumberToType(i->first.type)<<endl);
2623
2624 if (shouldValidate() && recordState == Secure) {
2625 vState initialState = recordState;
2626
2627 if (expectSignature) {
2628 if (i->first.place != DNSResourceRecord::ADDITIONAL) {
2629 /* the additional entries can be insecure,
2630 like glue:
2631 "Glue address RRsets associated with delegations MUST NOT be signed"
2632 */
2633 if (i->first.type == QType::DNSKEY && i->first.place == DNSResourceRecord::ANSWER) {
2634 LOG(d_prefix<<"Validating DNSKEY for "<<i->first.name<<endl);
2635 recordState = validateDNSKeys(i->first.name, i->second.records, i->second.signatures, depth);
2636 }
2637 else {
2638 /*
2639 * RFC 6672 section 5.3.1
2640 * In any response, a signed DNAME RR indicates a non-terminal
2641 * redirection of the query. There might or might not be a server-
2642 * synthesized CNAME in the answer section; if there is, the CNAME will
2643 * never be signed. For a DNSSEC validator, verification of the DNAME
2644 * RR and then that the CNAME was properly synthesized is sufficient
2645 * proof.
2646 *
2647 * We do the synthesis check in processRecords, here we make sure we
2648 * don't validate the CNAME.
2649 */
2650 if (!(isDNAMEAnswer && i->first.type == QType::CNAME)) {
2651 LOG(d_prefix<<"Validating non-additional record for "<<i->first.name<<endl);
2652 recordState = validateRecordsWithSigs(depth, qname, qtype, i->first.name, i->second.records, i->second.signatures);
2653 /* we might have missed a cut (zone cut within the same auth servers), causing the NS query for an Insecure zone to seem Bogus during zone cut determination */
2654 if (qtype == QType::NS && i->second.signatures.empty() && recordState == Bogus && haveExactValidationStatus(i->first.name) && getValidationStatus(i->first.name) == Indeterminate) {
2655 recordState = Indeterminate;
2656 }
2657 }
2658 }
2659 }
2660 }
2661 else {
2662 recordState = Indeterminate;
2663
2664 /* in a non authoritative answer, we only care about the DS record (or lack of) */
2665 if ((i->first.type == QType::DS || i->first.type == QType::NSEC || i->first.type == QType::NSEC3) && i->first.place == DNSResourceRecord::AUTHORITY) {
2666 LOG(d_prefix<<"Validating DS record for "<<i->first.name<<endl);
2667 recordState = validateRecordsWithSigs(depth, qname, qtype, i->first.name, i->second.records, i->second.signatures);
2668 }
2669 }
2670
2671 if (initialState == Secure && state != recordState && expectSignature) {
2672 updateValidationState(state, recordState);
2673 }
2674 }
2675 else {
2676 if (shouldValidate()) {
2677 LOG(d_prefix<<"Skipping validation because the current state is "<<vStates[recordState]<<endl);
2678 }
2679 }
2680
2681 if (recordState == Bogus) {
2682 /* this is a TTD by now, be careful */
2683 for(auto& record : i->second.records) {
2684 record.d_ttl = std::min(record.d_ttl, static_cast<uint32_t>(s_maxbogusttl + d_now.tv_sec));
2685 }
2686 }
2687
2688 /* We don't need to store NSEC3 records in the positive cache because:
2689 - we don't allow direct NSEC3 queries
2690 - denial of existence proofs in wildcard expanded positive responses are stored in authorityRecs
2691 - denial of existence proofs for negative responses are stored in the negative cache
2692 We also don't want to cache non-authoritative data except for:
2693 - records coming from non forward-recurse servers (those will never be AA)
2694 - DS (special case)
2695 - NS, A and AAAA (used for infra queries)
2696 */
2697 if (i->first.type != QType::NSEC3 && (i->first.type == QType::DS || i->first.type == QType::NS || i->first.type == QType::A || i->first.type == QType::AAAA || isAA || wasForwardRecurse)) {
2698
2699 bool doCache = true;
2700 if (i->first.place == DNSResourceRecord::ANSWER && ednsmask) {
2701 // If ednsmask is relevant, we do not want to cache if the scope prefix length is large and TTL is small
2702 if (SyncRes::s_ecscachelimitttl > 0) {
2703 bool manyMaskBits = (ednsmask->isIpv4() && ednsmask->getBits() > SyncRes::s_ecsipv4cachelimit) ||
2704 (ednsmask->isIpv6() && ednsmask->getBits() > SyncRes::s_ecsipv6cachelimit);
2705
2706 if (manyMaskBits) {
2707 uint32_t minttl = UINT32_MAX;
2708 for (const auto &it : i->second.records) {
2709 if (it.d_ttl < minttl)
2710 minttl = it.d_ttl;
2711 }
2712 bool ttlIsSmall = minttl < SyncRes::s_ecscachelimitttl + d_now.tv_sec;
2713 if (ttlIsSmall) {
2714 // Case: many bits and ttlIsSmall
2715 doCache = false;
2716 }
2717 }
2718 }
2719 }
2720 if (doCache) {
2721 t_RC->replace(d_now.tv_sec, i->first.name, QType(i->first.type), i->second.records, i->second.signatures, authorityRecs, i->first.type == QType::DS ? true : isAA, i->first.place == DNSResourceRecord::ANSWER ? ednsmask : boost::none, recordState);
2722 }
2723 }
2724
2725 if(i->first.place == DNSResourceRecord::ANSWER && ednsmask)
2726 d_wasVariable=true;
2727 }
2728
2729 return RCode::NoError;
2730 }
2731
2732 void SyncRes::updateDenialValidationState(vState& neValidationState, const DNSName& neName, vState& state, const dState denialState, const dState expectedState, bool allowOptOut)
2733 {
2734 if (denialState == expectedState) {
2735 neValidationState = Secure;
2736 }
2737 else {
2738 if (denialState == OPTOUT && allowOptOut) {
2739 LOG(d_prefix<<"OPT-out denial found for "<<neName<<endl);
2740 neValidationState = Secure;
2741 return;
2742 }
2743 else if (denialState == INSECURE) {
2744 LOG(d_prefix<<"Insecure denial found for "<<neName<<", returning Insecure"<<endl);
2745 neValidationState = Insecure;
2746 }
2747 else {
2748 LOG(d_prefix<<"Invalid denial found for "<<neName<<", returning Bogus, res="<<denialState<<", expectedState="<<expectedState<<endl);
2749 neValidationState = Bogus;
2750 }
2751 updateValidationState(state, neValidationState);
2752 }
2753 }
2754
2755 dState SyncRes::getDenialValidationState(const NegCache::NegCacheEntry& ne, const vState state, const dState expectedState, bool referralToUnsigned)
2756 {
2757 cspmap_t csp = harvestCSPFromNE(ne);
2758 return getDenial(csp, ne.d_name, ne.d_qtype.getCode(), referralToUnsigned, expectedState == NXQTYPE);
2759 }
2760
2761 bool SyncRes::processRecords(const std::string& prefix, const DNSName& qname, const QType& qtype, const DNSName& auth, LWResult& lwr, const bool sendRDQuery, vector<DNSRecord>& ret, set<DNSName>& nsset, DNSName& newtarget, DNSName& newauth, bool& realreferral, bool& negindic, vState& state, const bool needWildcardProof, const bool gatherWildcardProof, const unsigned int wildcardLabelsCount)
2762 {
2763 bool done = false;
2764 DNSName dnameTarget, dnameOwner;
2765 uint32_t dnameTTL = 0;
2766
2767 for(auto& rec : lwr.d_records) {
2768 if (rec.d_type!=QType::OPT && rec.d_class!=QClass::IN)
2769 continue;
2770
2771 if (rec.d_place==DNSResourceRecord::ANSWER && !(lwr.d_aabit || sendRDQuery)) {
2772 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
2773 are sending such responses */
2774 if (!(rec.d_type == QType::CNAME && rec.d_name == qname)) {
2775 continue;
2776 }
2777 }
2778
2779 if(rec.d_place==DNSResourceRecord::AUTHORITY && rec.d_type==QType::SOA &&
2780 lwr.d_rcode==RCode::NXDomain && qname.isPartOf(rec.d_name) && rec.d_name.isPartOf(auth)) {
2781 LOG(prefix<<qname<<": got negative caching indication for name '"<<qname<<"' (accept="<<rec.d_name.isPartOf(auth)<<"), newtarget='"<<newtarget<<"'"<<endl);
2782
2783 rec.d_ttl = min(rec.d_ttl, s_maxnegttl);
2784 if(newtarget.empty()) // only add a SOA if we're not going anywhere after this
2785 ret.push_back(rec);
2786
2787 NegCache::NegCacheEntry ne;
2788
2789 uint32_t lowestTTL = rec.d_ttl;
2790 /* if we get an NXDomain answer with a CNAME, the name
2791 does exist but the target does not */
2792 ne.d_name = newtarget.empty() ? qname : newtarget;
2793 ne.d_qtype = QType(0); // this encodes 'whole record'
2794 ne.d_auth = rec.d_name;
2795 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
2796
2797 if (state == Secure) {
2798 dState denialState = getDenialValidationState(ne, state, NXDOMAIN, false);
2799 updateDenialValidationState(ne.d_validationState, ne.d_name, state, denialState, NXDOMAIN, false);
2800 }
2801 else {
2802 ne.d_validationState = state;
2803 }
2804
2805 if (ne.d_validationState == Bogus) {
2806 lowestTTL = min(lowestTTL, s_maxbogusttl);
2807 }
2808
2809 ne.d_ttd = d_now.tv_sec + lowestTTL;
2810 /* if we get an NXDomain answer with a CNAME, let's not cache the
2811 target, even the server was authoritative for it,
2812 and do an additional query for the CNAME target.
2813 We have a regression test making sure we do exactly that.
2814 */
2815 if(!wasVariable() && newtarget.empty()) {
2816 t_sstorage.negcache.add(ne);
2817 if(s_rootNXTrust && ne.d_auth.isRoot() && auth.isRoot() && lwr.d_aabit) {
2818 ne.d_name = ne.d_name.getLastLabel();
2819 t_sstorage.negcache.add(ne);
2820 }
2821 }
2822
2823 negindic=true;
2824 }
2825 else if(rec.d_place==DNSResourceRecord::ANSWER && s_redirectionQTypes.count(rec.d_type) > 0 && // CNAME or DNAME answer
2826 s_redirectionQTypes.count(qtype.getCode()) == 0) { // But not in response to a CNAME or DNAME query
2827 if (rec.d_type == QType::CNAME && rec.d_name == qname) {
2828 if (!dnameOwner.empty()) { // We synthesize ourselves
2829 continue;
2830 }
2831 ret.push_back(rec);
2832 if (auto content = getRR<CNAMERecordContent>(rec)) {
2833 newtarget=content->getTarget();
2834 }
2835 } else if (rec.d_type == QType::DNAME && qname.isPartOf(rec.d_name)) { // DNAME
2836 ret.push_back(rec);
2837 if (auto content = getRR<DNAMERecordContent>(rec)) {
2838 dnameOwner = rec.d_name;
2839 dnameTarget = content->getTarget();
2840 dnameTTL = rec.d_ttl;
2841 if (!newtarget.empty()) { // We had a CNAME before, remove it from ret so we don't cache it
2842 ret.erase(std::remove_if(
2843 ret.begin(),
2844 ret.end(),
2845 [&qname](DNSRecord& rr) {
2846 return (rr.d_place == DNSResourceRecord::ANSWER && rr.d_type == QType::CNAME && rr.d_name == qname);
2847 }),
2848 ret.end());
2849 }
2850 try {
2851 newtarget = qname.makeRelative(dnameOwner) + dnameTarget;
2852 } catch (const std::exception &e) {
2853 // We should probably catch an std::range_error here and set the rcode to YXDOMAIN (RFC 6672, section 2.2)
2854 // But there is no way to set the RCODE from this function
2855 throw ImmediateServFailException("Unable to perform DNAME substitution(DNAME owner: '" + dnameOwner.toLogString() +
2856 "', DNAME target: '" + dnameTarget.toLogString() + "', substituted name: '" +
2857 qname.makeRelative(dnameOwner).toLogString() + "." + dnameTarget.toLogString() +
2858 "' : " + e.what());
2859 }
2860 }
2861 }
2862 }
2863 /* if we have a positive answer synthetized from a wildcard, we need to
2864 return the corresponding NSEC/NSEC3 records from the AUTHORITY section
2865 proving that the exact name did not exist */
2866 else if(gatherWildcardProof && (rec.d_type==QType::RRSIG || rec.d_type==QType::NSEC || rec.d_type==QType::NSEC3) && rec.d_place==DNSResourceRecord::AUTHORITY) {
2867 ret.push_back(rec); // enjoy your DNSSEC
2868 }
2869 // for ANY answers we *must* have an authoritative answer, unless we are forwarding recursively
2870 else if(rec.d_place==DNSResourceRecord::ANSWER && rec.d_name == qname &&
2871 (
2872 rec.d_type==qtype.getCode() || ((lwr.d_aabit || sendRDQuery) && qtype == QType(QType::ANY))
2873 )
2874 )
2875 {
2876 LOG(prefix<<qname<<": answer is in: resolved to '"<< rec.d_content->getZoneRepresentation()<<"|"<<DNSRecordContent::NumberToType(rec.d_type)<<"'"<<endl);
2877
2878 done=true;
2879
2880 if (state == Secure && needWildcardProof) {
2881 /* We have a positive answer synthetized from a wildcard, we need to check that we have
2882 proof that the exact name doesn't exist so the wildcard can be used,
2883 as described in section 5.3.4 of RFC 4035 and 5.3 of FRC 7129.
2884 */
2885 NegCache::NegCacheEntry ne;
2886
2887 uint32_t lowestTTL = rec.d_ttl;
2888 ne.d_name = qname;
2889 ne.d_qtype = QType(0); // this encodes 'whole record'
2890 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
2891
2892 cspmap_t csp = harvestCSPFromNE(ne);
2893 dState res = getDenial(csp, qname, ne.d_qtype.getCode(), false, false, false, wildcardLabelsCount);
2894 if (res != NXDOMAIN) {
2895 vState st = Bogus;
2896 if (res == INSECURE) {
2897 /* Some part could not be validated, for example a NSEC3 record with a too large number of iterations,
2898 this is not enough to warrant a Bogus, but go Insecure. */
2899 st = Insecure;
2900 LOG(d_prefix<<"Unable to validate denial in wildcard expanded positive response found for "<<qname<<", returning Insecure, res="<<res<<endl);
2901 }
2902 else {
2903 LOG(d_prefix<<"Invalid denial in wildcard expanded positive response found for "<<qname<<", returning Bogus, res="<<res<<endl);
2904 rec.d_ttl = std::min(rec.d_ttl, s_maxbogusttl);
2905 }
2906
2907 updateValidationState(state, st);
2908 /* we already stored the record with a different validation status, let's fix it */
2909 updateValidationStatusInCache(qname, qtype, lwr.d_aabit, st);
2910 }
2911 }
2912 ret.push_back(rec);
2913 }
2914 else if((rec.d_type==QType::RRSIG || rec.d_type==QType::NSEC || rec.d_type==QType::NSEC3) && rec.d_place==DNSResourceRecord::ANSWER) {
2915 if(rec.d_type != QType::RRSIG || rec.d_name == qname) {
2916 ret.push_back(rec); // enjoy your DNSSEC
2917 } else if(rec.d_type == QType::RRSIG && qname.isPartOf(rec.d_name)) {
2918 auto rrsig = getRR<RRSIGRecordContent>(rec);
2919 if (rrsig != nullptr && rrsig->d_type == QType::DNAME) {
2920 ret.push_back(rec);
2921 }
2922 }
2923 }
2924 else if(rec.d_place==DNSResourceRecord::AUTHORITY && rec.d_type==QType::NS && qname.isPartOf(rec.d_name)) {
2925 if(moreSpecificThan(rec.d_name,auth)) {
2926 newauth=rec.d_name;
2927 LOG(prefix<<qname<<": got NS record '"<<rec.d_name<<"' -> '"<<rec.d_content->getZoneRepresentation()<<"'"<<endl);
2928 realreferral=true;
2929 }
2930 else {
2931 LOG(prefix<<qname<<": got upwards/level NS record '"<<rec.d_name<<"' -> '"<<rec.d_content->getZoneRepresentation()<<"', had '"<<auth<<"'"<<endl);
2932 }
2933 if (auto content = getRR<NSRecordContent>(rec)) {
2934 nsset.insert(content->getNS());
2935 }
2936 }
2937 else if(rec.d_place==DNSResourceRecord::AUTHORITY && rec.d_type==QType::DS && qname.isPartOf(rec.d_name)) {
2938 LOG(prefix<<qname<<": got DS record '"<<rec.d_name<<"' -> '"<<rec.d_content->getZoneRepresentation()<<"'"<<endl);
2939 }
2940 else if(realreferral && rec.d_place==DNSResourceRecord::AUTHORITY && (rec.d_type==QType::NSEC || rec.d_type==QType::NSEC3) && newauth.isPartOf(auth)) {
2941 /* we might have received a denial of the DS, let's check */
2942 if (state == Secure) {
2943 NegCache::NegCacheEntry ne;
2944 ne.d_auth = auth;
2945 ne.d_name = newauth;
2946 ne.d_qtype = QType::DS;
2947 rec.d_ttl = min(s_maxnegttl, rec.d_ttl);
2948 uint32_t lowestTTL = rec.d_ttl;
2949 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
2950
2951 dState denialState = getDenialValidationState(ne, state, NXQTYPE, true);
2952
2953 if (denialState == NXQTYPE || denialState == OPTOUT || denialState == INSECURE) {
2954 ne.d_ttd = lowestTTL + d_now.tv_sec;
2955 ne.d_validationState = Secure;
2956 LOG(prefix<<qname<<": got negative indication of DS record for '"<<newauth<<"'"<<endl);
2957
2958 if(!wasVariable()) {
2959 t_sstorage.negcache.add(ne);
2960 }
2961
2962 if (qname == newauth && qtype == QType::DS) {
2963 /* we are actually done! */
2964 negindic=true;
2965 nsset.clear();
2966 }
2967 }
2968 }
2969 }
2970 else if(!done && rec.d_place==DNSResourceRecord::AUTHORITY && rec.d_type==QType::SOA &&
2971 lwr.d_rcode==RCode::NoError && qname.isPartOf(rec.d_name)) {
2972 LOG(prefix<<qname<<": got negative caching indication for '"<< qname<<"|"<<qtype.getName()<<"'"<<endl);
2973
2974 if(!newtarget.empty()) {
2975 LOG(prefix<<qname<<": Hang on! Got a redirect to '"<<newtarget<<"' already"<<endl);
2976 }
2977 else {
2978 rec.d_ttl = min(s_maxnegttl, rec.d_ttl);
2979
2980 NegCache::NegCacheEntry ne;
2981 ne.d_auth = rec.d_name;
2982 uint32_t lowestTTL = rec.d_ttl;
2983 ne.d_name = qname;
2984 ne.d_qtype = qtype;
2985 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
2986
2987 if (state == Secure) {
2988 dState denialState = getDenialValidationState(ne, state, NXQTYPE, false);
2989 updateDenialValidationState(ne.d_validationState, ne.d_name, state, denialState, NXQTYPE, qtype == QType::DS);
2990 } else {
2991 ne.d_validationState = state;
2992 }
2993
2994 if (ne.d_validationState == Bogus) {
2995 lowestTTL = min(lowestTTL, s_maxbogusttl);
2996 rec.d_ttl = min(rec.d_ttl, s_maxbogusttl);
2997 }
2998 ne.d_ttd = d_now.tv_sec + lowestTTL;
2999
3000 if(!wasVariable()) {
3001 if(qtype.getCode()) { // prevents us from blacking out a whole domain
3002 t_sstorage.negcache.add(ne);
3003 }
3004 }
3005
3006 ret.push_back(rec);
3007 negindic=true;
3008 }
3009 }
3010 }
3011
3012 if (!dnameTarget.empty()) {
3013 // Synthesize a CNAME
3014 auto cnamerec = DNSRecord();
3015 cnamerec.d_name = qname;
3016 cnamerec.d_type = QType::CNAME;
3017 cnamerec.d_ttl = dnameTTL;
3018 cnamerec.d_content = std::make_shared<CNAMERecordContent>(CNAMERecordContent(newtarget));
3019 ret.push_back(cnamerec);
3020 }
3021 return done;
3022 }
3023
3024 bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname, const QType& qtype, LWResult& lwr, boost::optional<Netmask>& ednsmask, const DNSName& auth, bool const sendRDQuery, const DNSName& nsName, const ComboAddress& remoteIP, bool doTCP, bool* truncated)
3025 {
3026 bool chained = false;
3027 int resolveret = RCode::NoError;
3028 s_outqueries++;
3029 d_outqueries++;
3030
3031 if(d_outqueries + d_throttledqueries > s_maxqperq) {
3032 throw ImmediateServFailException("more than "+std::to_string(s_maxqperq)+" (max-qperq) queries sent while resolving "+qname.toLogString());
3033 }
3034
3035 if(s_maxtotusec && d_totUsec > s_maxtotusec) {
3036 throw ImmediateServFailException("Too much time waiting for "+qname.toLogString()+"|"+qtype.getName()+", timeouts: "+std::to_string(d_timeouts) +", throttles: "+std::to_string(d_throttledqueries) + ", queries: "+std::to_string(d_outqueries)+", "+std::to_string(d_totUsec/1000)+"msec");
3037 }
3038
3039 if(doTCP) {
3040 LOG(prefix<<qname<<": using TCP with "<< remoteIP.toStringWithPort() <<endl);
3041 s_tcpoutqueries++;
3042 d_tcpoutqueries++;
3043 }
3044
3045 if(d_pdl && d_pdl->preoutquery(remoteIP, d_requestor, qname, qtype, doTCP, lwr.d_records, resolveret)) {
3046 LOG(prefix<<qname<<": query handled by Lua"<<endl);
3047 }
3048 else {
3049 ednsmask=getEDNSSubnetMask(qname, remoteIP);
3050 if(ednsmask) {
3051 LOG(prefix<<qname<<": Adding EDNS Client Subnet Mask "<<ednsmask->toString()<<" to query"<<endl);
3052 s_ecsqueries++;
3053 }
3054 resolveret = asyncresolveWrapper(remoteIP, d_doDNSSEC, qname, auth, qtype.getCode(),
3055 doTCP, sendRDQuery, &d_now, ednsmask, &lwr, &chained); // <- we go out on the wire!
3056 if(ednsmask) {
3057 s_ecsresponses++;
3058 LOG(prefix<<qname<<": Received EDNS Client Subnet Mask "<<ednsmask->toString()<<" on response"<<endl);
3059 if (ednsmask->getBits() > 0) {
3060 if (ednsmask->isIpv4()) {
3061 ++SyncRes::s_ecsResponsesBySubnetSize4.at(ednsmask->getBits()-1);
3062 }
3063 else {
3064 ++SyncRes::s_ecsResponsesBySubnetSize6.at(ednsmask->getBits()-1);
3065 }
3066 }
3067 }
3068 }
3069
3070 /* preoutquery killed the query by setting dq.rcode to -3 */
3071 if(resolveret==-3) {
3072 throw ImmediateServFailException("Query killed by policy");
3073 }
3074
3075 d_totUsec += lwr.d_usec;
3076 accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
3077
3078 bool dontThrottle = false;
3079 {
3080 auto dontThrottleNames = g_dontThrottleNames.getLocal();
3081 auto dontThrottleNetmasks = g_dontThrottleNetmasks.getLocal();
3082 dontThrottle = dontThrottleNames->check(nsName) || dontThrottleNetmasks->match(remoteIP);
3083 }
3084
3085 if(resolveret != 1) {
3086 /* Error while resolving */
3087 if(resolveret == 0) {
3088 /* Time out */
3089
3090 LOG(prefix<<qname<<": timeout resolving after "<<lwr.d_usec/1000.0<<"msec "<< (doTCP ? "over TCP" : "")<<endl);
3091 d_timeouts++;
3092 s_outgoingtimeouts++;
3093
3094 if(remoteIP.sin4.sin_family == AF_INET)
3095 s_outgoing4timeouts++;
3096 else
3097 s_outgoing6timeouts++;
3098
3099 if(t_timeouts)
3100 t_timeouts->push_back(remoteIP);
3101 }
3102 else if(resolveret == -2) {
3103 /* OS resource limit reached */
3104 LOG(prefix<<qname<<": hit a local resource limit resolving"<< (doTCP ? " over TCP" : "")<<", probable error: "<<stringerror()<<endl);
3105 g_stats.resourceLimits++;
3106 }
3107 else {
3108 /* -1 means server unreachable */
3109 s_unreachables++;
3110 d_unreachables++;
3111 LOG(prefix<<qname<<": error resolving from "<<remoteIP.toString()<< (doTCP ? " over TCP" : "") <<", possible error: "<<strerror(errno)<< endl);
3112 }
3113
3114 if(resolveret != -2 && !chained && !dontThrottle) {
3115 // don't account for resource limits, they are our own fault
3116 // And don't throttle when the IP address is on the dontThrottleNetmasks list or the name is part of dontThrottleNames
3117 t_sstorage.nsSpeeds[nsName.empty()? DNSName(remoteIP.toStringWithPort()) : nsName].submit(remoteIP, 1000000, &d_now); // 1 sec
3118
3119 // code below makes sure we don't filter COM or the root
3120 if (s_serverdownmaxfails > 0 && (auth != g_rootdnsname) && t_sstorage.fails.incr(remoteIP) >= s_serverdownmaxfails) {
3121 LOG(prefix<<qname<<": Max fails reached resolving on "<< remoteIP.toString() <<". Going full throttle for "<< s_serverdownthrottletime <<" seconds" <<endl);
3122 // mark server as down
3123 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(remoteIP, "", 0), s_serverdownthrottletime, 10000);
3124 }
3125 else if (resolveret == -1) {
3126 // unreachable, 1 minute or 100 queries
3127 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(remoteIP, qname, qtype.getCode()), 60, 100);
3128 }
3129 else {
3130 // timeout, 10 seconds or 5 queries
3131 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(remoteIP, qname, qtype.getCode()), 10, 5);
3132 }
3133 }
3134
3135 return false;
3136 }
3137
3138 /* we got an answer */
3139 if(lwr.d_rcode==RCode::ServFail || lwr.d_rcode==RCode::Refused) {
3140 LOG(prefix<<qname<<": "<<nsName<<" ("<<remoteIP.toString()<<") returned a "<< (lwr.d_rcode==RCode::ServFail ? "ServFail" : "Refused") << ", trying sibling IP or NS"<<endl);
3141 if (!chained && !dontThrottle) {
3142 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(remoteIP, qname, qtype.getCode()), 60, 3);
3143 }
3144 return false;
3145 }
3146
3147 /* this server sent a valid answer, mark it backup up if it was down */
3148 if(s_serverdownmaxfails > 0) {
3149 t_sstorage.fails.clear(remoteIP);
3150 }
3151
3152 if(lwr.d_tcbit) {
3153 *truncated = true;
3154
3155 if (doTCP) {
3156 LOG(prefix<<qname<<": truncated bit set, over TCP?"<<endl);
3157 if (!dontThrottle) {
3158 /* let's treat that as a ServFail answer from this server */
3159 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(remoteIP, qname, qtype.getCode()), 60, 3);
3160 }
3161 return false;
3162 }
3163 LOG(prefix<<qname<<": truncated bit set, over UDP"<<endl);
3164
3165 return true;
3166 }
3167
3168 return true;
3169 }
3170
3171 bool SyncRes::processAnswer(unsigned int depth, LWResult& lwr, const DNSName& qname, const QType& qtype, DNSName& auth, bool wasForwarded, const boost::optional<Netmask> ednsmask, bool sendRDQuery, NsSet &nameservers, std::vector<DNSRecord>& ret, const DNSFilterEngine& dfe, bool* gotNewServers, int* rcode, vState& state)
3172 {
3173 string prefix;
3174 if(doLog()) {
3175 prefix=d_prefix;
3176 prefix.append(depth, ' ');
3177 }
3178
3179 if(s_minimumTTL) {
3180 for(auto& rec : lwr.d_records) {
3181 rec.d_ttl = max(rec.d_ttl, s_minimumTTL);
3182 }
3183 }
3184
3185 /* if the answer is ECS-specific, a minimum TTL is set for this kind of answers
3186 and it's higher than the global minimum TTL */
3187 if (ednsmask && s_minimumECSTTL > 0 && (s_minimumTTL == 0 || s_minimumECSTTL > s_minimumTTL)) {
3188 for(auto& rec : lwr.d_records) {
3189 if (rec.d_place == DNSResourceRecord::ANSWER) {
3190 rec.d_ttl = max(rec.d_ttl, s_minimumECSTTL);
3191 }
3192 }
3193 }
3194
3195 bool needWildcardProof = false;
3196 bool gatherWildcardProof = false;
3197 unsigned int wildcardLabelsCount;
3198 *rcode = updateCacheFromRecords(depth, lwr, qname, qtype, auth, wasForwarded, ednsmask, state, needWildcardProof, gatherWildcardProof, wildcardLabelsCount, sendRDQuery);
3199 if (*rcode != RCode::NoError) {
3200 return true;
3201 }
3202
3203 LOG(prefix<<qname<<": determining status after receiving this packet"<<endl);
3204
3205 set<DNSName> nsset;
3206 bool realreferral=false, negindic=false;
3207 DNSName newauth;
3208 DNSName newtarget;
3209
3210 bool done = processRecords(prefix, qname, qtype, auth, lwr, sendRDQuery, ret, nsset, newtarget, newauth, realreferral, negindic, state, needWildcardProof, gatherWildcardProof, wildcardLabelsCount);
3211
3212 if(done){
3213 LOG(prefix<<qname<<": status=got results, this level of recursion done"<<endl);
3214 LOG(prefix<<qname<<": validation status is "<<vStates[state]<<endl);
3215 *rcode = RCode::NoError;
3216 return true;
3217 }
3218
3219 if(!newtarget.empty()) {
3220 if(newtarget == qname) {
3221 LOG(prefix<<qname<<": status=got a CNAME referral to self, returning SERVFAIL"<<endl);
3222 *rcode = RCode::ServFail;
3223 return true;
3224 }
3225
3226 if(depth > 10) {
3227 LOG(prefix<<qname<<": status=got a CNAME referral, but recursing too deep, returning SERVFAIL"<<endl);
3228 *rcode = RCode::ServFail;
3229 return true;
3230 }
3231
3232 if (qtype == QType::DS) {
3233 LOG(prefix<<qname<<": status=got a CNAME referral, but we are looking for a DS"<<endl);
3234
3235 if(d_doDNSSEC)
3236 addNXNSECS(ret, lwr.d_records);
3237
3238 *rcode = RCode::NoError;
3239 return true;
3240 }
3241 else {
3242 LOG(prefix<<qname<<": status=got a CNAME referral, starting over with "<<newtarget<<endl);
3243
3244 set<GetBestNSAnswer> beenthere2;
3245 vState cnameState = Indeterminate;
3246 *rcode = doResolve(newtarget, qtype, ret, depth + 1, beenthere2, cnameState);
3247 LOG(prefix<<qname<<": updating validation state for response to "<<qname<<" from "<<vStates[state]<<" with the state from the CNAME quest: "<<vStates[cnameState]<<endl);
3248 updateValidationState(state, cnameState);
3249 return true;
3250 }
3251 }
3252
3253 if(lwr.d_rcode == RCode::NXDomain) {
3254 LOG(prefix<<qname<<": status=NXDOMAIN, we are done "<<(negindic ? "(have negative SOA)" : "")<<endl);
3255
3256 if(d_doDNSSEC)
3257 addNXNSECS(ret, lwr.d_records);
3258
3259 *rcode = RCode::NXDomain;
3260 return true;
3261 }
3262
3263 if(nsset.empty() && !lwr.d_rcode && (negindic || lwr.d_aabit || sendRDQuery)) {
3264 LOG(prefix<<qname<<": status=noerror, other types may exist, but we are done "<<(negindic ? "(have negative SOA) " : "")<<(lwr.d_aabit ? "(have aa bit) " : "")<<endl);
3265
3266 if(state == Secure && (lwr.d_aabit || sendRDQuery) && !negindic) {
3267 updateValidationState(state, Bogus);
3268 }
3269
3270 if(d_doDNSSEC)
3271 addNXNSECS(ret, lwr.d_records);
3272
3273 *rcode = RCode::NoError;
3274 return true;
3275 }
3276
3277 if(realreferral) {
3278 LOG(prefix<<qname<<": status=did not resolve, got "<<(unsigned int)nsset.size()<<" NS, ");
3279
3280 nameservers.clear();
3281 for (auto const &nameserver : nsset) {
3282 if (d_wantsRPZ) {
3283 d_appliedPolicy = dfe.getProcessingPolicy(nameserver, d_discardedPolicies);
3284 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
3285 LOG("however "<<nameserver<<" was blocked by RPZ policy '"<<(d_appliedPolicy.d_name ? *d_appliedPolicy.d_name : "")<<"'"<<endl);
3286 *rcode = -2;
3287 return true;
3288 }
3289 }
3290 nameservers.insert({nameserver, {{}, false}});
3291 }
3292 LOG("looping to them"<<endl);
3293 *gotNewServers = true;
3294 auth=newauth;
3295
3296 return false;
3297 }
3298
3299 return false;
3300 }
3301
3302 /** returns:
3303 * -1 in case of no results
3304 * -2 when a FilterEngine Policy was hit
3305 * rcode otherwise
3306 */
3307 int SyncRes::doResolveAt(NsSet &nameservers, DNSName auth, bool flawedNSSet, const DNSName &qname, const QType &qtype,
3308 vector<DNSRecord>&ret,
3309 unsigned int depth, set<GetBestNSAnswer>&beenthere, vState& state, StopAtDelegation* stopAtDelegation)
3310 {
3311 auto luaconfsLocal = g_luaconfs.getLocal();
3312 string prefix;
3313 if(doLog()) {
3314 prefix=d_prefix;
3315 prefix.append(depth, ' ');
3316 }
3317
3318 LOG(prefix<<qname<<": Cache consultations done, have "<<(unsigned int)nameservers.size()<<" NS to contact");
3319
3320 if (nameserversBlockedByRPZ(luaconfsLocal->dfe, nameservers)) {
3321 return -2;
3322 }
3323
3324 LOG(endl);
3325
3326 for(;;) { // we may get more specific nameservers
3327 auto rnameservers = shuffleInSpeedOrder(nameservers, doLog() ? (prefix+qname.toString()+": ") : string() );
3328
3329 for(auto tns=rnameservers.cbegin();;++tns) {
3330 if(tns==rnameservers.cend()) {
3331 LOG(prefix<<qname<<": Failed to resolve via any of the "<<(unsigned int)rnameservers.size()<<" offered NS at level '"<<auth<<"'"<<endl);
3332 if(!auth.isRoot() && flawedNSSet) {
3333 LOG(prefix<<qname<<": Ageing nameservers for level '"<<auth<<"', next query might succeed"<<endl);
3334
3335 if(t_RC->doAgeCache(d_now.tv_sec, auth, QType::NS, 10))
3336 g_stats.nsSetInvalidations++;
3337 }
3338 return -1;
3339 }
3340
3341 bool cacheOnly = false;
3342 // this line needs to identify the 'self-resolving' behaviour
3343 if(qname == tns->first && (qtype.getCode() == QType::A || qtype.getCode() == QType::AAAA)) {
3344 /* we might have a glue entry in cache so let's try this NS
3345 but only if we have enough in the cache to know how to reach it */
3346 LOG(prefix<<qname<<": Using NS to resolve itself, but only using what we have in cache ("<<(1+tns-rnameservers.cbegin())<<"/"<<rnameservers.size()<<")"<<endl);
3347 cacheOnly = true;
3348 }
3349
3350 typedef vector<ComboAddress> remoteIPs_t;
3351 remoteIPs_t remoteIPs;
3352 remoteIPs_t::const_iterator remoteIP;
3353 bool pierceDontQuery=false;
3354 bool sendRDQuery=false;
3355 boost::optional<Netmask> ednsmask;
3356 LWResult lwr;
3357 const bool wasForwarded = tns->first.empty() && (!nameservers[tns->first].first.empty());
3358 int rcode = RCode::NoError;
3359 bool gotNewServers = false;
3360
3361 if(tns->first.empty() && !wasForwarded) {
3362 LOG(prefix<<qname<<": Domain is out-of-band"<<endl);
3363 /* setting state to indeterminate since validation is disabled for local auth zone,
3364 and Insecure would be misleading. */
3365 state = Indeterminate;
3366 d_wasOutOfBand = doOOBResolve(qname, qtype, lwr.d_records, depth, lwr.d_rcode);
3367 lwr.d_tcbit=false;
3368 lwr.d_aabit=true;
3369
3370 /* we have received an answer, are we done ? */
3371 bool done = processAnswer(depth, lwr, qname, qtype, auth, false, ednsmask, sendRDQuery, nameservers, ret, luaconfsLocal->dfe, &gotNewServers, &rcode, state);
3372 if (done) {
3373 return rcode;
3374 }
3375 if (gotNewServers) {
3376 if (stopAtDelegation && *stopAtDelegation == Stop) {
3377 *stopAtDelegation = Stopped;
3378 return rcode;
3379 }
3380 break;
3381 }
3382 }
3383 else {
3384 /* if tns is empty, retrieveAddressesForNS() knows we have hardcoded servers (i.e. "forwards") */
3385 remoteIPs = retrieveAddressesForNS(prefix, qname, tns, depth, beenthere, rnameservers, nameservers, sendRDQuery, pierceDontQuery, flawedNSSet, cacheOnly);
3386
3387 if(remoteIPs.empty()) {
3388 LOG(prefix<<qname<<": Failed to get IP for NS "<<tns->first<<", trying next if available"<<endl);
3389 flawedNSSet=true;
3390 continue;
3391 }
3392 else {
3393 bool hitPolicy{false};
3394 LOG(prefix<<qname<<": Resolved '"<<auth<<"' NS "<<tns->first<<" to: ");
3395 for(remoteIP = remoteIPs.cbegin(); remoteIP != remoteIPs.cend(); ++remoteIP) {
3396 if(remoteIP != remoteIPs.cbegin()) {
3397 LOG(", ");
3398 }
3399 LOG(remoteIP->toString());
3400 if(nameserverIPBlockedByRPZ(luaconfsLocal->dfe, *remoteIP)) {
3401 hitPolicy = true;
3402 }
3403 }
3404 LOG(endl);
3405 if (hitPolicy) //implies d_wantsRPZ
3406 return -2;
3407 }
3408
3409 for(remoteIP = remoteIPs.cbegin(); remoteIP != remoteIPs.cend(); ++remoteIP) {
3410 LOG(prefix<<qname<<": Trying IP "<< remoteIP->toStringWithPort() <<", asking '"<<qname<<"|"<<qtype.getName()<<"'"<<endl);
3411
3412 if (throttledOrBlocked(prefix, *remoteIP, qname, qtype, pierceDontQuery)) {
3413 continue;
3414 }
3415
3416 bool truncated = false;
3417 bool gotAnswer = doResolveAtThisIP(prefix, qname, qtype, lwr, ednsmask, auth, sendRDQuery,
3418 tns->first, *remoteIP, false, &truncated);
3419 if (gotAnswer && truncated ) {
3420 /* retry, over TCP this time */
3421 gotAnswer = doResolveAtThisIP(prefix, qname, qtype, lwr, ednsmask, auth, sendRDQuery,
3422 tns->first, *remoteIP, true, &truncated);
3423 }
3424
3425 if (!gotAnswer) {
3426 continue;
3427 }
3428
3429 LOG(prefix<<qname<<": Got "<<(unsigned int)lwr.d_records.size()<<" answers from "<<tns->first<<" ("<< remoteIP->toString() <<"), rcode="<<lwr.d_rcode<<" ("<<RCode::to_s(lwr.d_rcode)<<"), aa="<<lwr.d_aabit<<", in "<<lwr.d_usec/1000<<"ms"<<endl);
3430
3431 /* // for you IPv6 fanatics :-)
3432 if(remoteIP->sin4.sin_family==AF_INET6)
3433 lwr.d_usec/=3;
3434 */
3435 // cout<<"msec: "<<lwr.d_usec/1000.0<<", "<<g_avgLatency/1000.0<<'\n';
3436
3437 t_sstorage.nsSpeeds[tns->first.empty()? DNSName(remoteIP->toStringWithPort()) : tns->first].submit(*remoteIP, lwr.d_usec, &d_now);
3438
3439 /* we have received an answer, are we done ? */
3440 bool done = processAnswer(depth, lwr, qname, qtype, auth, wasForwarded, ednsmask, sendRDQuery, nameservers, ret, luaconfsLocal->dfe, &gotNewServers, &rcode, state);
3441 if (done) {
3442 return rcode;
3443 }
3444 if (gotNewServers) {
3445 if (stopAtDelegation && *stopAtDelegation == Stop) {
3446 *stopAtDelegation = Stopped;
3447 return rcode;
3448 }
3449 break;
3450 }
3451 /* was lame */
3452 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(*remoteIP, qname, qtype.getCode()), 60, 100);
3453 }
3454
3455 if (gotNewServers) {
3456 break;
3457 }
3458
3459 if(remoteIP == remoteIPs.cend()) // we tried all IP addresses, none worked
3460 continue;
3461
3462 }
3463 }
3464 }
3465 return -1;
3466 }
3467
3468 void SyncRes::setQuerySource(const ComboAddress& requestor, boost::optional<const EDNSSubnetOpts&> incomingECS)
3469 {
3470 d_requestor = requestor;
3471
3472 if (incomingECS && incomingECS->source.getBits() > 0) {
3473 d_cacheRemote = incomingECS->source.getMaskedNetwork();
3474 uint8_t bits = std::min(incomingECS->source.getBits(), (incomingECS->source.isIpv4() ? s_ecsipv4limit : s_ecsipv6limit));
3475 ComboAddress trunc = incomingECS->source.getNetwork();
3476 trunc.truncate(bits);
3477 d_outgoingECSNetwork = boost::optional<Netmask>(Netmask(trunc, bits));
3478 } else {
3479 d_cacheRemote = d_requestor;
3480 if(!incomingECS && s_ednslocalsubnets.match(d_requestor)) {
3481 ComboAddress trunc = d_requestor;
3482 uint8_t bits = d_requestor.isIPv4() ? 32 : 128;
3483 bits = std::min(bits, (trunc.isIPv4() ? s_ecsipv4limit : s_ecsipv6limit));
3484 trunc.truncate(bits);
3485 d_outgoingECSNetwork = boost::optional<Netmask>(Netmask(trunc, bits));
3486 } else if (s_ecsScopeZero.source.getBits() > 0) {
3487 /* RFC7871 says we MUST NOT send any ECS if the source scope is 0.
3488 But using an empty ECS in that case would mean inserting
3489 a non ECS-specific entry into the cache, preventing any further
3490 ECS-specific query to be sent.
3491 So instead we use the trick described in section 7.1.2:
3492 "The subsequent Recursive Resolver query to the Authoritative Nameserver
3493 will then either not include an ECS option or MAY optionally include
3494 its own address information, which is what the Authoritative
3495 Nameserver will almost certainly use to generate any Tailored
3496 Response in lieu of an option. This allows the answer to be handled
3497 by the same caching mechanism as other queries, with an explicit
3498 indicator of the applicable scope. Subsequent Stub Resolver queries
3499 for /0 can then be answered from this cached response.
3500 */
3501 d_outgoingECSNetwork = boost::optional<Netmask>(s_ecsScopeZero.source.getMaskedNetwork());
3502 d_cacheRemote = s_ecsScopeZero.source.getNetwork();
3503 } else {
3504 // ECS disabled because no scope-zero address could be derived.
3505 d_outgoingECSNetwork = boost::none;
3506 }
3507 }
3508 }
3509
3510 boost::optional<Netmask> SyncRes::getEDNSSubnetMask(const DNSName& dn, const ComboAddress& rem)
3511 {
3512 if(d_outgoingECSNetwork && (s_ednsdomains.check(dn) || s_ednsremotesubnets.match(rem))) {
3513 return d_outgoingECSNetwork;
3514 }
3515 return boost::none;
3516 }
3517
3518 void SyncRes::parseEDNSSubnetWhitelist(const std::string& wlist)
3519 {
3520 vector<string> parts;
3521 stringtok(parts, wlist, ",; ");
3522 for(const auto& a : parts) {
3523 try {
3524 s_ednsremotesubnets.addMask(Netmask(a));
3525 }
3526 catch(...) {
3527 s_ednsdomains.add(DNSName(a));
3528 }
3529 }
3530 }
3531
3532 void SyncRes::parseEDNSSubnetAddFor(const std::string& subnetlist)
3533 {
3534 vector<string> parts;
3535 stringtok(parts, subnetlist, ",; ");
3536 for(const auto& a : parts) {
3537 s_ednslocalsubnets.addMask(a);
3538 }
3539 }
3540
3541 // used by PowerDNSLua - note that this neglects to add the packet count & statistics back to pdns_ercursor.cc
3542 int directResolve(const DNSName& qname, const QType& qtype, int qclass, vector<DNSRecord>& ret)
3543 {
3544 struct timeval now;
3545 gettimeofday(&now, 0);
3546
3547 SyncRes sr(now);
3548 int res = -1;
3549 try {
3550 res = sr.beginResolve(qname, QType(qtype), qclass, ret);
3551 }
3552 catch(const PDNSException& e) {
3553 g_log<<Logger::Error<<"Failed to resolve "<<qname.toLogString()<<", got pdns exception: "<<e.reason<<endl;
3554 ret.clear();
3555 }
3556 catch(const ImmediateServFailException& e) {
3557 g_log<<Logger::Error<<"Failed to resolve "<<qname.toLogString()<<", got ImmediateServFailException: "<<e.reason<<endl;
3558 ret.clear();
3559 }
3560 catch(const std::exception& e) {
3561 g_log<<Logger::Error<<"Failed to resolve "<<qname.toLogString()<<", got STL error: "<<e.what()<<endl;
3562 ret.clear();
3563 }
3564 catch(...) {
3565 g_log<<Logger::Error<<"Failed to resolve "<<qname.toLogString()<<", got an exception"<<endl;
3566 ret.clear();
3567 }
3568
3569 return res;
3570 }
3571
3572 int SyncRes::getRootNS(struct timeval now, asyncresolve_t asyncCallback) {
3573 SyncRes sr(now);
3574 sr.setDoEDNS0(true);
3575 sr.setUpdatingRootNS();
3576 sr.setDoDNSSEC(g_dnssecmode != DNSSECMode::Off);
3577 sr.setDNSSECValidationRequested(g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate);
3578 sr.setAsyncCallback(asyncCallback);
3579
3580 vector<DNSRecord> ret;
3581 int res=-1;
3582 try {
3583 res=sr.beginResolve(g_rootdnsname, QType(QType::NS), 1, ret);
3584 if (g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate) {
3585 auto state = sr.getValidationState();
3586 if (state == Bogus)
3587 throw PDNSException("Got Bogus validation result for .|NS");
3588 }
3589 return res;
3590 }
3591 catch(const PDNSException& e) {
3592 g_log<<Logger::Error<<"Failed to update . records, got an exception: "<<e.reason<<endl;
3593 }
3594 catch(const ImmediateServFailException& e) {
3595 g_log<<Logger::Error<<"Failed to update . records, got an exception: "<<e.reason<<endl;
3596 }
3597 catch(const std::exception& e) {
3598 g_log<<Logger::Error<<"Failed to update . records, got an exception: "<<e.what()<<endl;
3599 }
3600 catch(...) {
3601 g_log<<Logger::Error<<"Failed to update . records, got an exception"<<endl;
3602 }
3603
3604 if(!res) {
3605 g_log<<Logger::Notice<<"Refreshed . records"<<endl;
3606 }
3607 else
3608 g_log<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
3609
3610 return res;
3611 }