]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/syncres.cc
Merge pull request #7962 from pieterlexis/auth-add-edited-serial-to-Zone
[thirdparty/pdns.git] / pdns / syncres.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include "arguments.hh"
27 #include "cachecleaner.hh"
28 #include "dns_random.hh"
29 #include "dnsparser.hh"
30 #include "dnsrecords.hh"
31 #include "ednssubnet.hh"
32 #include "logger.hh"
33 #include "lua-recursor4.hh"
34 #include "rec-lua-conf.hh"
35 #include "syncres.hh"
36 #include "dnsseckeeper.hh"
37 #include "validate-recursor.hh"
38
39 thread_local SyncRes::ThreadLocalStorage SyncRes::t_sstorage;
40 thread_local std::unique_ptr<addrringbuf_t> t_timeouts;
41
42 std::unordered_set<DNSName> SyncRes::s_delegationOnly;
43 std::unique_ptr<NetmaskGroup> SyncRes::s_dontQuery{nullptr};
44 NetmaskGroup SyncRes::s_ednslocalsubnets;
45 NetmaskGroup SyncRes::s_ednsremotesubnets;
46 SuffixMatchNode SyncRes::s_ednsdomains;
47 EDNSSubnetOpts SyncRes::s_ecsScopeZero;
48 string SyncRes::s_serverID;
49 SyncRes::LogMode SyncRes::s_lm;
50 const std::unordered_set<uint16_t> SyncRes::s_redirectionQTypes = {QType::CNAME, QType::DNAME};
51
52 unsigned int SyncRes::s_maxnegttl;
53 unsigned int SyncRes::s_maxbogusttl;
54 unsigned int SyncRes::s_maxcachettl;
55 unsigned int SyncRes::s_maxqperq;
56 unsigned int SyncRes::s_maxtotusec;
57 unsigned int SyncRes::s_maxdepth;
58 unsigned int SyncRes::s_minimumTTL;
59 unsigned int SyncRes::s_minimumECSTTL;
60 unsigned int SyncRes::s_packetcachettl;
61 unsigned int SyncRes::s_packetcacheservfailttl;
62 unsigned int SyncRes::s_serverdownmaxfails;
63 unsigned int SyncRes::s_serverdownthrottletime;
64 unsigned int SyncRes::s_ecscachelimitttl;
65 std::atomic<uint64_t> SyncRes::s_authzonequeries;
66 std::atomic<uint64_t> SyncRes::s_queries;
67 std::atomic<uint64_t> SyncRes::s_outgoingtimeouts;
68 std::atomic<uint64_t> SyncRes::s_outgoing4timeouts;
69 std::atomic<uint64_t> SyncRes::s_outgoing6timeouts;
70 std::atomic<uint64_t> SyncRes::s_outqueries;
71 std::atomic<uint64_t> SyncRes::s_tcpoutqueries;
72 std::atomic<uint64_t> SyncRes::s_throttledqueries;
73 std::atomic<uint64_t> SyncRes::s_dontqueries;
74 std::atomic<uint64_t> SyncRes::s_nodelegated;
75 std::atomic<uint64_t> SyncRes::s_unreachables;
76 std::atomic<uint64_t> SyncRes::s_ecsqueries;
77 std::atomic<uint64_t> SyncRes::s_ecsresponses;
78 std::map<uint8_t, std::atomic<uint64_t>> SyncRes::s_ecsResponsesBySubnetSize4;
79 std::map<uint8_t, std::atomic<uint64_t>> SyncRes::s_ecsResponsesBySubnetSize6;
80
81 uint8_t SyncRes::s_ecsipv4limit;
82 uint8_t SyncRes::s_ecsipv6limit;
83 uint8_t SyncRes::s_ecsipv4cachelimit;
84 uint8_t SyncRes::s_ecsipv6cachelimit;
85
86 bool SyncRes::s_doIPv6;
87 bool SyncRes::s_nopacketcache;
88 bool SyncRes::s_rootNXTrust;
89 bool SyncRes::s_noEDNS;
90 bool SyncRes::s_qnameminimization;
91
92 #define LOG(x) if(d_lm == Log) { g_log <<Logger::Warning << x; } else if(d_lm == Store) { d_trace << x; }
93
94 static void accountAuthLatency(int usec, int family)
95 {
96 if(family == AF_INET) {
97 if(usec < 1000)
98 g_stats.auth4Answers0_1++;
99 else if(usec < 10000)
100 g_stats.auth4Answers1_10++;
101 else if(usec < 100000)
102 g_stats.auth4Answers10_100++;
103 else if(usec < 1000000)
104 g_stats.auth4Answers100_1000++;
105 else
106 g_stats.auth4AnswersSlow++;
107 } else {
108 if(usec < 1000)
109 g_stats.auth6Answers0_1++;
110 else if(usec < 10000)
111 g_stats.auth6Answers1_10++;
112 else if(usec < 100000)
113 g_stats.auth6Answers10_100++;
114 else if(usec < 1000000)
115 g_stats.auth6Answers100_1000++;
116 else
117 g_stats.auth6AnswersSlow++;
118 }
119
120 }
121
122
123 SyncRes::SyncRes(const struct timeval& now) : d_authzonequeries(0), d_outqueries(0), d_tcpoutqueries(0), d_throttledqueries(0), d_timeouts(0), d_unreachables(0),
124 d_totUsec(0), d_now(now),
125 d_cacheonly(false), d_doDNSSEC(false), d_doEDNS0(false), d_qNameMinimization(s_qnameminimization), d_lm(s_lm)
126
127 {
128 }
129
130 /** everything begins here - this is the entry point just after receiving a packet */
131 int SyncRes::beginResolve(const DNSName &qname, const QType &qtype, uint16_t qclass, vector<DNSRecord>&ret)
132 {
133 vState state = Indeterminate;
134 s_queries++;
135 d_wasVariable=false;
136 d_wasOutOfBand=false;
137
138 if (doSpecialNamesResolve(qname, qtype, qclass, ret)) {
139 d_queryValidationState = Insecure; // this could fool our stats into thinking a validation took place
140 return 0; // so do check before updating counters (we do now)
141 }
142
143 auto qtypeCode = qtype.getCode();
144 /* rfc6895 section 3.1 */
145 if ((qtypeCode >= 128 && qtypeCode <= 254) || qtypeCode == QType::RRSIG || qtypeCode == QType::NSEC3 || qtypeCode == QType::OPT || qtypeCode == 65535) {
146 return -1;
147 }
148
149 if(qclass==QClass::ANY)
150 qclass=QClass::IN;
151 else if(qclass!=QClass::IN)
152 return -1;
153
154 set<GetBestNSAnswer> beenthere;
155 int res=doResolve(qname, qtype, ret, 0, beenthere, state);
156 d_queryValidationState = state;
157
158 if (shouldValidate()) {
159 if (d_queryValidationState != Indeterminate) {
160 g_stats.dnssecValidations++;
161 }
162 increaseDNSSECStateCounter(d_queryValidationState);
163 }
164
165 return res;
166 }
167
168 /*! Handles all special, built-in names
169 * Fills ret with an answer and returns true if it handled the query.
170 *
171 * Handles the following queries (and their ANY variants):
172 *
173 * - localhost. IN A
174 * - localhost. IN AAAA
175 * - 1.0.0.127.in-addr.arpa. IN PTR
176 * - 1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa. IN PTR
177 * - version.bind. CH TXT
178 * - version.pdns. CH TXT
179 * - id.server. CH TXT
180 * - trustanchor.server CH TXT
181 * - negativetrustanchor.server CH TXT
182 */
183 bool SyncRes::doSpecialNamesResolve(const DNSName &qname, const QType &qtype, const uint16_t qclass, vector<DNSRecord> &ret)
184 {
185 static const DNSName arpa("1.0.0.127.in-addr.arpa."), ip6_arpa("1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa."),
186 localhost("localhost."), versionbind("version.bind."), idserver("id.server."), versionpdns("version.pdns."), trustanchorserver("trustanchor.server."),
187 negativetrustanchorserver("negativetrustanchor.server.");
188
189 bool handled = false;
190 vector<pair<QType::typeenum, string> > answers;
191
192 if ((qname == arpa || qname == ip6_arpa) &&
193 qclass == QClass::IN) {
194 handled = true;
195 if (qtype == QType::PTR || qtype == QType::ANY)
196 answers.push_back({QType::PTR, "localhost."});
197 }
198
199 if (qname == localhost &&
200 qclass == QClass::IN) {
201 handled = true;
202 if (qtype == QType::A || qtype == QType::ANY)
203 answers.push_back({QType::A, "127.0.0.1"});
204 if (qtype == QType::AAAA || qtype == QType::ANY)
205 answers.push_back({QType::AAAA, "::1"});
206 }
207
208 if ((qname == versionbind || qname == idserver || qname == versionpdns) &&
209 qclass == QClass::CHAOS) {
210 handled = true;
211 if (qtype == QType::TXT || qtype == QType::ANY) {
212 if(qname == versionbind || qname == versionpdns)
213 answers.push_back({QType::TXT, "\""+::arg()["version-string"]+"\""});
214 else if (s_serverID != "disabled")
215 answers.push_back({QType::TXT, "\""+s_serverID+"\""});
216 }
217 }
218
219 if (qname == trustanchorserver && qclass == QClass::CHAOS &&
220 ::arg().mustDo("allow-trust-anchor-query")) {
221 handled = true;
222 if (qtype == QType::TXT || qtype == QType::ANY) {
223 auto luaLocal = g_luaconfs.getLocal();
224 for (auto const &dsAnchor : luaLocal->dsAnchors) {
225 ostringstream ans;
226 ans<<"\"";
227 ans<<dsAnchor.first.toString(); // Explicit toString to have a trailing dot
228 for (auto const &dsRecord : dsAnchor.second) {
229 ans<<" ";
230 ans<<dsRecord.d_tag;
231 }
232 ans << "\"";
233 answers.push_back({QType::TXT, ans.str()});
234 }
235 }
236 }
237
238 if (qname == negativetrustanchorserver && qclass == QClass::CHAOS &&
239 ::arg().mustDo("allow-trust-anchor-query")) {
240 handled = true;
241 if (qtype == QType::TXT || qtype == QType::ANY) {
242 auto luaLocal = g_luaconfs.getLocal();
243 for (auto const &negAnchor : luaLocal->negAnchors) {
244 ostringstream ans;
245 ans<<"\"";
246 ans<<negAnchor.first.toString(); // Explicit toString to have a trailing dot
247 if (negAnchor.second.length())
248 ans<<" "<<negAnchor.second;
249 ans << "\"";
250 answers.push_back({QType::TXT, ans.str()});
251 }
252 }
253 }
254
255 if (handled && !answers.empty()) {
256 ret.clear();
257 d_wasOutOfBand=true;
258
259 DNSRecord dr;
260 dr.d_name = qname;
261 dr.d_place = DNSResourceRecord::ANSWER;
262 dr.d_class = qclass;
263 dr.d_ttl = 86400;
264 for (const auto& ans : answers) {
265 dr.d_type = ans.first;
266 dr.d_content = DNSRecordContent::mastermake(ans.first, qclass, ans.second);
267 ret.push_back(dr);
268 }
269 }
270
271 return handled;
272 }
273
274
275 //! This is the 'out of band resolver', in other words, the authoritative server
276 void SyncRes::AuthDomain::addSOA(std::vector<DNSRecord>& records) const
277 {
278 SyncRes::AuthDomain::records_t::const_iterator ziter = d_records.find(boost::make_tuple(getName(), QType::SOA));
279 if (ziter != d_records.end()) {
280 DNSRecord dr = *ziter;
281 dr.d_place = DNSResourceRecord::AUTHORITY;
282 records.push_back(dr);
283 }
284 else {
285 // cerr<<qname<<": can't find SOA record '"<<getName()<<"' in our zone!"<<endl;
286 }
287 }
288
289 int SyncRes::AuthDomain::getRecords(const DNSName& qname, uint16_t qtype, std::vector<DNSRecord>& records) const
290 {
291 int result = RCode::NoError;
292 records.clear();
293
294 // partial lookup
295 std::pair<records_t::const_iterator,records_t::const_iterator> range = d_records.equal_range(tie(qname));
296
297 SyncRes::AuthDomain::records_t::const_iterator ziter;
298 bool somedata = false;
299
300 for(ziter = range.first; ziter != range.second; ++ziter) {
301 somedata = true;
302
303 if(qtype == QType::ANY || ziter->d_type == qtype || ziter->d_type == QType::CNAME) {
304 // let rest of nameserver do the legwork on this one
305 records.push_back(*ziter);
306 }
307 else if (ziter->d_type == QType::NS && ziter->d_name.countLabels() > getName().countLabels()) {
308 // we hit a delegation point!
309 DNSRecord dr = *ziter;
310 dr.d_place=DNSResourceRecord::AUTHORITY;
311 records.push_back(dr);
312 }
313 }
314
315 if (!records.empty()) {
316 /* We have found an exact match, we're done */
317 // cerr<<qname<<": exact match in zone '"<<getName()<<"'"<<endl;
318 return result;
319 }
320
321 if (somedata) {
322 /* We have records for that name, but not of the wanted qtype */
323 // cerr<<qname<<": found record in '"<<getName()<<"', but nothing of the right type, sending SOA"<<endl;
324 addSOA(records);
325
326 return result;
327 }
328
329 // cerr<<qname<<": nothing found so far in '"<<getName()<<"', trying wildcards"<<endl;
330 DNSName wcarddomain(qname);
331 while(wcarddomain != getName() && wcarddomain.chopOff()) {
332 // cerr<<qname<<": trying '*."<<wcarddomain<<"' in "<<getName()<<endl;
333 range = d_records.equal_range(boost::make_tuple(g_wildcarddnsname + wcarddomain));
334 if (range.first==range.second)
335 continue;
336
337 for(ziter = range.first; ziter != range.second; ++ziter) {
338 DNSRecord dr = *ziter;
339 // if we hit a CNAME, just answer that - rest of recursor will do the needful & follow
340 if(dr.d_type == qtype || qtype == QType::ANY || dr.d_type == QType::CNAME) {
341 dr.d_name = qname;
342 dr.d_place = DNSResourceRecord::ANSWER;
343 records.push_back(dr);
344 }
345 }
346
347 if (records.empty()) {
348 addSOA(records);
349 }
350
351 // cerr<<qname<<": in '"<<getName()<<"', had wildcard match on '*."<<wcarddomain<<"'"<<endl;
352 return result;
353 }
354
355 /* Nothing for this name, no wildcard, let's see if there is some NS */
356 DNSName nsdomain(qname);
357 while (nsdomain.chopOff() && nsdomain != getName()) {
358 range = d_records.equal_range(boost::make_tuple(nsdomain,QType::NS));
359 if(range.first == range.second)
360 continue;
361
362 for(ziter = range.first; ziter != range.second; ++ziter) {
363 DNSRecord dr = *ziter;
364 dr.d_place = DNSResourceRecord::AUTHORITY;
365 records.push_back(dr);
366 }
367 }
368
369 if(records.empty()) {
370 // cerr<<qname<<": no NS match in zone '"<<getName()<<"' either, handing out SOA"<<endl;
371 addSOA(records);
372 result = RCode::NXDomain;
373 }
374
375 return result;
376 }
377
378 bool SyncRes::doOOBResolve(const AuthDomain& domain, const DNSName &qname, const QType &qtype, vector<DNSRecord>&ret, int& res)
379 {
380 d_authzonequeries++;
381 s_authzonequeries++;
382
383 res = domain.getRecords(qname, qtype.getCode(), ret);
384 return true;
385 }
386
387 bool SyncRes::doOOBResolve(const DNSName &qname, const QType &qtype, vector<DNSRecord>&ret, unsigned int depth, int& res)
388 {
389 string prefix;
390 if(doLog()) {
391 prefix=d_prefix;
392 prefix.append(depth, ' ');
393 }
394
395 DNSName authdomain(qname);
396 domainmap_t::const_iterator iter=getBestAuthZone(&authdomain);
397 if(iter==t_sstorage.domainmap->end() || !iter->second.isAuth()) {
398 LOG(prefix<<qname<<": auth storage has no zone for this query!"<<endl);
399 return false;
400 }
401
402 LOG(prefix<<qname<<": auth storage has data, zone='"<<authdomain<<"'"<<endl);
403 return doOOBResolve(iter->second, qname, qtype, ret, res);
404 }
405
406 uint64_t SyncRes::doEDNSDump(int fd)
407 {
408 auto fp = std::unique_ptr<FILE, int(*)(FILE*)>(fdopen(dup(fd), "w"), fclose);
409 if (!fp) {
410 return 0;
411 }
412 uint64_t count = 0;
413
414 fprintf(fp.get(),"; edns from thread follows\n;\n");
415 for(const auto& eds : t_sstorage.ednsstatus) {
416 count++;
417 fprintf(fp.get(), "%s\t%d\t%s", eds.first.toString().c_str(), (int)eds.second.mode, ctime(&eds.second.modeSetAt));
418 }
419 return count;
420 }
421
422 uint64_t SyncRes::doDumpNSSpeeds(int fd)
423 {
424 auto fp = std::unique_ptr<FILE, int(*)(FILE*)>(fdopen(dup(fd), "w"), fclose);
425 if(!fp)
426 return 0;
427 fprintf(fp.get(), "; nsspeed dump from thread follows\n;\n");
428 uint64_t count=0;
429
430 for(const auto& i : t_sstorage.nsSpeeds)
431 {
432 count++;
433
434 // an <empty> can appear hear in case of authoritative (hosted) zones
435 fprintf(fp.get(), "%s -> ", i.first.toLogString().c_str());
436 for(const auto& j : i.second.d_collection)
437 {
438 // typedef vector<pair<ComboAddress, DecayingEwma> > collection_t;
439 fprintf(fp.get(), "%s/%f ", j.first.toString().c_str(), j.second.peek());
440 }
441 fprintf(fp.get(), "\n");
442 }
443 return count;
444 }
445
446 uint64_t SyncRes::doDumpThrottleMap(int fd)
447 {
448 auto fp = std::unique_ptr<FILE, int(*)(FILE*)>(fdopen(dup(fd), "w"), fclose);
449 if(!fp)
450 return 0;
451 fprintf(fp.get(), "; throttle map dump follows\n");
452 fprintf(fp.get(), "; remote IP\tqname\tqtype\tcount\tttd\n");
453 uint64_t count=0;
454
455 const auto& throttleMap = t_sstorage.throttle.getThrottleMap();
456 for(const auto& i : throttleMap)
457 {
458 count++;
459 // remote IP, dns name, qtype, count, ttd
460 fprintf(fp.get(), "%s\t%s\t%d\t%u\t%s", i.first.get<0>().toString().c_str(), i.first.get<1>().toLogString().c_str(), i.first.get<2>(), i.second.count, ctime(&i.second.ttd));
461 }
462
463 return count;
464 }
465
466 /* so here is the story. First we complete the full resolution process for a domain name. And only THEN do we decide
467 to also do DNSSEC validation, which leads to new queries. To make this simple, we *always* ask for DNSSEC records
468 so that if there are RRSIGs for a name, we'll have them.
469
470 However, some hosts simply can't answer questions which ask for DNSSEC. This can manifest itself as:
471 * No answer
472 * FormErr
473 * Nonsense answer
474
475 The cause of "No answer" may be fragmentation, and it is tempting to probe if smaller answers would get through.
476 Another cause of "No answer" may simply be a network condition.
477 Nonsense answers are a clearer indication this host won't be able to do DNSSEC evah.
478
479 Previous implementations have suffered from turning off DNSSEC questions for an authoritative server based on timeouts.
480 A clever idea is to only turn off DNSSEC if we know a domain isn't signed anyhow. The problem with that really
481 clever idea however is that at this point in PowerDNS, we may simply not know that yet. All the DNSSEC thinking happens
482 elsewhere. It may not have happened yet.
483
484 For now this means we can't be clever, but will turn off DNSSEC if you reply with FormError or gibberish.
485 */
486
487 int SyncRes::asyncresolveWrapper(const ComboAddress& ip, bool ednsMANDATORY, const DNSName& domain, const DNSName& auth, int type, bool doTCP, bool sendRDQuery, struct timeval* now, boost::optional<Netmask>& srcmask, LWResult* res, bool* chained) const
488 {
489 /* what is your QUEST?
490 the goal is to get as many remotes as possible on the highest level of EDNS support
491 The levels are:
492
493 0) UNKNOWN Unknown state
494 1) EDNS: Honors EDNS0
495 2) EDNSIGNORANT: Ignores EDNS0, gives replies without EDNS0
496 3) NOEDNS: Generates FORMERR on EDNS queries
497
498 Everybody starts out assumed to be '0'.
499 If '0', send out EDNS0
500 If you FORMERR us, go to '3',
501 If no EDNS in response, go to '2'
502 If '1', send out EDNS0
503 If FORMERR, downgrade to 3
504 If '2', keep on including EDNS0, see what happens
505 Same behaviour as 0
506 If '3', send bare queries
507 */
508
509 SyncRes::EDNSStatus* ednsstatus;
510 ednsstatus = &t_sstorage.ednsstatus[ip]; // does this include port? YES
511
512 if(ednsstatus->modeSetAt && ednsstatus->modeSetAt + 3600 < d_now.tv_sec) {
513 *ednsstatus=SyncRes::EDNSStatus();
514 // cerr<<"Resetting EDNS Status for "<<ip.toString()<<endl);
515 }
516
517 SyncRes::EDNSStatus::EDNSMode& mode=ednsstatus->mode;
518 SyncRes::EDNSStatus::EDNSMode oldmode = mode;
519 int EDNSLevel = 0;
520 auto luaconfsLocal = g_luaconfs.getLocal();
521 ResolveContext ctx;
522 #ifdef HAVE_PROTOBUF
523 ctx.d_initialRequestId = d_initialRequestId;
524 #endif
525 #ifdef HAVE_FSTRM
526 ctx.d_auth = auth;
527 #endif
528
529 int ret;
530 for(int tries = 0; tries < 3; ++tries) {
531 // cerr<<"Remote '"<<ip.toString()<<"' currently in mode "<<mode<<endl;
532
533 if(mode==EDNSStatus::NOEDNS) {
534 g_stats.noEdnsOutQueries++;
535 EDNSLevel = 0; // level != mode
536 }
537 else if(ednsMANDATORY || mode==EDNSStatus::UNKNOWN || mode==EDNSStatus::EDNSOK || mode==EDNSStatus::EDNSIGNORANT)
538 EDNSLevel = 1;
539
540 DNSName sendQname(domain);
541 if (g_lowercaseOutgoing)
542 sendQname.makeUsLowerCase();
543
544 if (d_asyncResolve) {
545 ret = d_asyncResolve(ip, sendQname, type, doTCP, sendRDQuery, EDNSLevel, now, srcmask, ctx, res, chained);
546 }
547 else {
548 ret=asyncresolve(ip, sendQname, type, doTCP, sendRDQuery, EDNSLevel, now, srcmask, ctx, d_outgoingProtobufServers, d_frameStreamServers, luaconfsLocal->outgoingProtobufExportConfig.exportTypes, res, chained);
549 }
550 if(ret < 0) {
551 return ret; // transport error, nothing to learn here
552 }
553
554 if(ret == 0) { // timeout, not doing anything with it now
555 return ret;
556 }
557 else if(mode==EDNSStatus::UNKNOWN || mode==EDNSStatus::EDNSOK || mode == EDNSStatus::EDNSIGNORANT ) {
558 if(res->d_validpacket && !res->d_haveEDNS && res->d_rcode == RCode::FormErr) {
559 // cerr<<"Downgrading to NOEDNS because of "<<RCode::to_s(res->d_rcode)<<" for query to "<<ip.toString()<<" for '"<<domain<<"'"<<endl;
560 mode = EDNSStatus::NOEDNS;
561 continue;
562 }
563 else if(!res->d_haveEDNS) {
564 if(mode != EDNSStatus::EDNSIGNORANT) {
565 mode = EDNSStatus::EDNSIGNORANT;
566 // cerr<<"We find that "<<ip.toString()<<" is an EDNS-ignorer for '"<<domain<<"', moving to mode 2"<<endl;
567 }
568 }
569 else {
570 mode = EDNSStatus::EDNSOK;
571 // cerr<<"We find that "<<ip.toString()<<" is EDNS OK!"<<endl;
572 }
573
574 }
575 if(oldmode != mode || !ednsstatus->modeSetAt)
576 ednsstatus->modeSetAt=d_now.tv_sec;
577 // cerr<<"Result: ret="<<ret<<", EDNS-level: "<<EDNSLevel<<", haveEDNS: "<<res->d_haveEDNS<<", new mode: "<<mode<<endl;
578 return ret;
579 }
580 return ret;
581 }
582
583 #define QLOG(x) LOG(prefix << " child=" << child << ": " << x << endl)
584
585 int SyncRes::doResolve(const DNSName &qname, const QType &qtype, vector<DNSRecord>&ret, unsigned int depth, set<GetBestNSAnswer>& beenthere, vState& state) {
586
587 if (!getQNameMinimization()) {
588 return doResolveNoQNameMinimization(qname, qtype, ret, depth, beenthere, state);
589 }
590
591 // The qname minimization algorithm is a simplified version of the one in RFC 7816 (bis).
592 // It could be simplified because the cache maintenance (both positive and negative)
593 // is already done by doResolveNoQNameMinimization().
594 //
595 // Sketch of algorithm:
596 // Check cache
597 // If result found: done
598 // Otherwise determine closes ancestor from cache data
599 // Repeat querying A, adding more labels of the original qname
600 // If we get a delegation continue at ancestor determination
601 // Until we have the full name.
602 //
603 // The algorithm starts with adding a single label per iteration, and
604 // moves to three labels per iteration after three iterations.
605
606 DNSName child;
607 string prefix = d_prefix;
608 prefix.append(depth, ' ');
609 prefix.append(string("QM ") + qname.toString() + "|" + qtype.getName());
610
611 QLOG("doResolve");
612
613 // Look in cache only
614 vector<DNSRecord> retq;
615 bool old = setCacheOnly(true);
616 bool fromCache = false;
617 int res = doResolveNoQNameMinimization(qname, qtype, retq, depth + 1, beenthere, state, &fromCache);
618 setCacheOnly(old);
619 if (fromCache) {
620 QLOG("Step0 Found in cache");
621 ret.insert(ret.end(), retq.begin(), retq.end());
622 return res;
623 }
624 QLOG("Step0 Not cached");
625
626 const unsigned int qnamelen = qname.countLabels();
627
628 for (unsigned int i = 0; i <= qnamelen; ) {
629
630 // Step 1
631 vector<DNSRecord> bestns;
632 // the two retries allow getBestNSFromCache&co to reprime the root
633 // hints, in case they ever go missing
634 for (int tries = 0; tries < 2 && bestns.empty(); ++tries) {
635 bool flawedNSSet = false;
636 set<GetBestNSAnswer> beenthereIgnored;
637 getBestNSFromCache(qname, qtype, bestns, &flawedNSSet, depth + 1, beenthereIgnored);
638 }
639
640 if (bestns.size() == 0) {
641 // Something terrible is wrong
642 QLOG("Step1 No ancestor found return ServFail");
643 return RCode::ServFail;
644 }
645
646 const DNSName& ancestor(bestns[0].d_name);
647 QLOG("Step1 Ancestor from cache is " << ancestor.toString());
648 child = ancestor;
649
650 unsigned int targetlen = std::min(child.countLabels() + (i > 3 ? 3 : 1), qnamelen);
651
652 for (; i <= qnamelen; i++) {
653 // Step 2
654 while (child.countLabels() < targetlen) {
655 child.prependRawLabel(qname.getRawLabel(qnamelen - child.countLabels() - 1));
656 }
657 targetlen += i > 3 ? 3 : 1;
658 targetlen = std::min(targetlen, qnamelen);
659
660 QLOG("Step2 New child");
661
662 // Step 3 resolve
663 if (child == qname) {
664 QLOG("Step3 Going to do final resolve");
665 res = doResolveNoQNameMinimization(qname, qtype, ret, depth + 1, beenthere, state);
666 QLOG("Step3 Final resolve: " << RCode::to_s(res) << "/" << ret.size());
667 return res;
668 }
669
670 // Step 6
671 QLOG("Step4 Resolve A for child");
672 retq.resize(0);
673 StopAtDelegation stopAtDelegation = Stop;
674 res = doResolveNoQNameMinimization(child, QType::A, retq, depth + 1, beenthere, state, NULL, &stopAtDelegation);
675 QLOG("Step4 Resolve A result is " << RCode::to_s(res) << "/" << retq.size() << "/" << stopAtDelegation);
676 if (stopAtDelegation == Stopped) {
677 QLOG("Delegation seen, continue at step 1");
678 break;
679 }
680 if (res != RCode::NoError) {
681 // Case 5: unexpected answer
682 QLOG("Step5: other rcode, last effort final resolve");
683 setQNameMinimization(false);
684 res = doResolveNoQNameMinimization(qname, qtype, ret, depth + 1, beenthere, state);
685 QLOG("Step5 End resolve: " << RCode::to_s(res) << "/" << ret.size());
686 return res;
687 }
688 }
689 }
690
691 // Should not be reached
692 QLOG("Max iterations reached, return ServFail");
693 return RCode::ServFail;
694 }
695
696 /*! This function will check the cache and go out to the internet if the answer is not in cache
697 *
698 * \param qname The name we need an answer for
699 * \param qtype
700 * \param ret The vector of DNSRecords we need to fill with the answers
701 * \param depth The recursion depth we are in
702 * \param beenthere
703 * \param fromCache tells the caller the result came from the cache, may be nullptr
704 * \param stopAtDelegation if non-nullptr and pointed-to value is Stop requests the callee to stop at a delegation, if so pointed-to value is set to Stopped
705 * \return DNS RCODE or -1 (Error) or -2 (RPZ hit)
706 */
707 int SyncRes::doResolveNoQNameMinimization(const DNSName &qname, const QType &qtype, vector<DNSRecord>&ret, unsigned int depth, set<GetBestNSAnswer>& beenthere, vState& state, bool *fromCache, StopAtDelegation *stopAtDelegation)
708 {
709 string prefix;
710 if(doLog()) {
711 prefix=d_prefix;
712 prefix.append(depth, ' ');
713 }
714
715 LOG(prefix<<qname<<": Wants "<< (d_doDNSSEC ? "" : "NO ") << "DNSSEC processing, "<<(d_requireAuthData ? "" : "NO ")<<"auth data in query for "<<qtype.getName()<<endl);
716
717 state = Indeterminate;
718
719 if(s_maxdepth && depth > s_maxdepth)
720 throw ImmediateServFailException("More than "+std::to_string(s_maxdepth)+" (max-recursion-depth) levels of recursion needed while resolving "+qname.toLogString());
721
722 int res=0;
723
724 // This is a difficult way of expressing "this is a normal query", i.e. not getRootNS.
725 if(!(d_updatingRootNS && qtype.getCode()==QType::NS && qname.isRoot())) {
726 if(d_cacheonly) { // very limited OOB support
727 LWResult lwr;
728 LOG(prefix<<qname<<": Recursion not requested for '"<<qname<<"|"<<qtype.getName()<<"', peeking at auth/forward zones"<<endl);
729 DNSName authname(qname);
730 domainmap_t::const_iterator iter=getBestAuthZone(&authname);
731 if(iter != t_sstorage.domainmap->end()) {
732 if(iter->second.isAuth()) {
733 ret.clear();
734 d_wasOutOfBand = doOOBResolve(qname, qtype, ret, depth, res);
735 if (fromCache)
736 *fromCache = d_wasOutOfBand;
737 return res;
738 }
739 else {
740 const vector<ComboAddress>& servers = iter->second.d_servers;
741 const ComboAddress remoteIP = servers.front();
742 LOG(prefix<<qname<<": forwarding query to hardcoded nameserver '"<< remoteIP.toStringWithPort()<<"' for zone '"<<authname<<"'"<<endl);
743
744 boost::optional<Netmask> nm;
745 bool chained = false;
746 res=asyncresolveWrapper(remoteIP, d_doDNSSEC, qname, authname, qtype.getCode(), false, false, &d_now, nm, &lwr, &chained);
747
748 d_totUsec += lwr.d_usec;
749 accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
750 if (fromCache)
751 *fromCache = true;
752
753 // filter out the good stuff from lwr.result()
754 if (res == 1) {
755 for(const auto& rec : lwr.d_records) {
756 if(rec.d_place == DNSResourceRecord::ANSWER)
757 ret.push_back(rec);
758 }
759 return 0;
760 }
761 else {
762 return RCode::ServFail;
763 }
764 }
765 }
766 }
767
768 DNSName authname(qname);
769 bool wasForwardedOrAuthZone = false;
770 bool wasAuthZone = false;
771 bool wasForwardRecurse = false;
772 domainmap_t::const_iterator iter = getBestAuthZone(&authname);
773 if(iter != t_sstorage.domainmap->end()) {
774 const auto& domain = iter->second;
775 wasForwardedOrAuthZone = true;
776
777 if (domain.isAuth()) {
778 wasAuthZone = true;
779 } else if (domain.shouldRecurse()) {
780 wasForwardRecurse = true;
781 }
782 }
783
784 if(!d_skipCNAMECheck && doCNAMECacheCheck(qname, qtype, ret, depth, res, state, wasAuthZone, wasForwardRecurse)) { // will reroute us if needed
785 d_wasOutOfBand = wasAuthZone;
786 return res;
787 }
788
789 if(doCacheCheck(qname, authname, wasForwardedOrAuthZone, wasAuthZone, wasForwardRecurse, qtype, ret, depth, res, state)) {
790 // we done
791 d_wasOutOfBand = wasAuthZone;
792 if (fromCache)
793 *fromCache = true;
794 return res;
795 }
796 }
797
798 if(d_cacheonly)
799 return 0;
800
801 LOG(prefix<<qname<<": No cache hit for '"<<qname<<"|"<<qtype.getName()<<"', trying to find an appropriate NS record"<<endl);
802
803 DNSName subdomain(qname);
804 if(qtype == QType::DS) subdomain.chopOff();
805
806 NsSet nsset;
807 bool flawedNSSet=false;
808
809 /* we use subdomain here instead of qname because for DS queries we only care about the state of the parent zone */
810 computeZoneCuts(subdomain, g_rootdnsname, depth);
811
812 // the two retries allow getBestNSNamesFromCache&co to reprime the root
813 // hints, in case they ever go missing
814 for(int tries=0;tries<2 && nsset.empty();++tries) {
815 subdomain=getBestNSNamesFromCache(subdomain, qtype, nsset, &flawedNSSet, depth, beenthere); // pass beenthere to both occasions
816 }
817
818 state = getValidationStatus(qname, false);
819
820 LOG(prefix<<qname<<": initial validation status for "<<qname<<" is "<<vStates[state]<<endl);
821
822 if(!(res=doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, beenthere, state, stopAtDelegation)))
823 return 0;
824
825 LOG(prefix<<qname<<": failed (res="<<res<<")"<<endl);
826
827 if (res == -2)
828 return res;
829
830 return res<0 ? RCode::ServFail : res;
831 }
832
833 #if 0
834 // for testing purposes
835 static bool ipv6First(const ComboAddress& a, const ComboAddress& b)
836 {
837 return !(a.sin4.sin_family < a.sin4.sin_family);
838 }
839 #endif
840
841 struct speedOrderCA
842 {
843 speedOrderCA(std::map<ComboAddress,double>& speeds): d_speeds(speeds) {}
844 bool operator()(const ComboAddress& a, const ComboAddress& b) const
845 {
846 return d_speeds[a] < d_speeds[b];
847 }
848 std::map<ComboAddress, double>& d_speeds;
849 };
850
851 /** This function explicitly goes out for A or AAAA addresses
852 */
853 vector<ComboAddress> SyncRes::getAddrs(const DNSName &qname, unsigned int depth, set<GetBestNSAnswer>& beenthere, bool cacheOnly)
854 {
855 typedef vector<DNSRecord> res_t;
856 typedef vector<ComboAddress> ret_t;
857 ret_t ret;
858
859 bool oldCacheOnly = setCacheOnly(cacheOnly);
860 bool oldRequireAuthData = d_requireAuthData;
861 bool oldValidationRequested = d_DNSSECValidationRequested;
862 d_requireAuthData = false;
863 d_DNSSECValidationRequested = false;
864
865 vState newState = Indeterminate;
866 res_t resv4;
867 // If IPv4 ever becomes second class, we should revisit this
868 if (doResolve(qname, QType::A, resv4, depth+1, beenthere, newState) == 0) { // this consults cache, OR goes out
869 for (auto const &i : resv4) {
870 if (i.d_type == QType::A) {
871 if (auto rec = getRR<ARecordContent>(i)) {
872 ret.push_back(rec->getCA(53));
873 }
874 }
875 }
876 }
877 if (s_doIPv6) {
878 if (ret.empty()) {
879 // We did not find IPv4 addresses, try to get IPv6 ones
880 newState = Indeterminate;
881 res_t resv6;
882 if (doResolve(qname, QType::AAAA, resv6, depth+1, beenthere, newState) == 0) { // this consults cache, OR goes out
883 for (const auto &i : resv6) {
884 if (i.d_type == QType::AAAA) {
885 if (auto rec = getRR<AAAARecordContent>(i))
886 ret.push_back(rec->getCA(53));
887 }
888 }
889 }
890 } else {
891 // We have some IPv4 records, don't bother with going out to get IPv6, but do consult the cache
892 // Once IPv6 adoption matters, this needs to be revisited
893 res_t cset;
894 if (t_RC->get(d_now.tv_sec, qname, QType(QType::AAAA), false, &cset, d_cacheRemote) > 0) {
895 for (const auto &i : cset) {
896 if (i.d_ttl > (unsigned int)d_now.tv_sec ) {
897 if (auto rec = getRR<AAAARecordContent>(i)) {
898 ret.push_back(rec->getCA(53));
899 }
900 }
901 }
902 }
903 }
904 }
905
906 d_requireAuthData = oldRequireAuthData;
907 d_DNSSECValidationRequested = oldValidationRequested;
908 setCacheOnly(oldCacheOnly);
909
910 /* we need to remove from the nsSpeeds collection the existing IPs
911 for this nameserver that are no longer in the set, even if there
912 is only one or none at all in the current set.
913 */
914 map<ComboAddress, double> speeds;
915 auto& collection = t_sstorage.nsSpeeds[qname].d_collection;
916 for(const auto& val: ret) {
917 speeds[val] = collection[val].get(&d_now);
918 }
919
920 t_sstorage.nsSpeeds[qname].purge(speeds);
921
922 if(ret.size() > 1) {
923 random_shuffle(ret.begin(), ret.end());
924 speedOrderCA so(speeds);
925 stable_sort(ret.begin(), ret.end(), so);
926
927 if(doLog()) {
928 string prefix=d_prefix;
929 prefix.append(depth, ' ');
930 LOG(prefix<<"Nameserver "<<qname<<" IPs: ");
931 bool first = true;
932 for(const auto& addr : ret) {
933 if (first) {
934 first = false;
935 }
936 else {
937 LOG(", ");
938 }
939 LOG((addr.toString())<<"(" << (boost::format("%0.2f") % (speeds[addr]/1000.0)).str() <<"ms)");
940 }
941 LOG(endl);
942 }
943 }
944
945 return ret;
946 }
947
948 void SyncRes::getBestNSFromCache(const DNSName &qname, const QType& qtype, vector<DNSRecord>& bestns, bool* flawedNSSet, unsigned int depth, set<GetBestNSAnswer>& beenthere)
949 {
950 string prefix;
951 DNSName subdomain(qname);
952 if(doLog()) {
953 prefix=d_prefix;
954 prefix.append(depth, ' ');
955 }
956 bestns.clear();
957 bool brokeloop;
958 do {
959 brokeloop=false;
960 LOG(prefix<<qname<<": Checking if we have NS in cache for '"<<subdomain<<"'"<<endl);
961 vector<DNSRecord> ns;
962 *flawedNSSet = false;
963
964 if(t_RC->get(d_now.tv_sec, subdomain, QType(QType::NS), false, &ns, d_cacheRemote) > 0) {
965 bestns.reserve(ns.size());
966
967 for(auto k=ns.cbegin();k!=ns.cend(); ++k) {
968 if(k->d_ttl > (unsigned int)d_now.tv_sec ) {
969 vector<DNSRecord> aset;
970
971 const DNSRecord& dr=*k;
972 auto nrr = getRR<NSRecordContent>(dr);
973 if(nrr && (!nrr->getNS().isPartOf(subdomain) || t_RC->get(d_now.tv_sec, nrr->getNS(), s_doIPv6 ? QType(QType::ADDR) : QType(QType::A),
974 false, doLog() ? &aset : 0, d_cacheRemote) > 5)) {
975 bestns.push_back(dr);
976 LOG(prefix<<qname<<": NS (with ip, or non-glue) in cache for '"<<subdomain<<"' -> '"<<nrr->getNS()<<"'"<<endl);
977 LOG(prefix<<qname<<": within bailiwick: "<< nrr->getNS().isPartOf(subdomain));
978 if(!aset.empty()) {
979 LOG(", in cache, ttl="<<(unsigned int)(((time_t)aset.begin()->d_ttl- d_now.tv_sec ))<<endl);
980 }
981 else {
982 LOG(", not in cache / did not look at cache"<<endl);
983 }
984 }
985 else {
986 *flawedNSSet=true;
987 LOG(prefix<<qname<<": NS in cache for '"<<subdomain<<"', but needs glue ("<<nrr->getNS()<<") which we miss or is expired"<<endl);
988 }
989 }
990 }
991
992 if(!bestns.empty()) {
993 GetBestNSAnswer answer;
994 answer.qname=qname;
995 answer.qtype=qtype.getCode();
996 for(const auto& dr : bestns) {
997 if (auto nsContent = getRR<NSRecordContent>(dr)) {
998 answer.bestns.insert(make_pair(dr.d_name, nsContent->getNS()));
999 }
1000 }
1001
1002 if(beenthere.count(answer)) {
1003 brokeloop=true;
1004 LOG(prefix<<qname<<": We have NS in cache for '"<<subdomain<<"' but part of LOOP (already seen "<<answer.qname<<")! Trying less specific NS"<<endl);
1005 ;
1006 if(doLog())
1007 for( set<GetBestNSAnswer>::const_iterator j=beenthere.begin();j!=beenthere.end();++j) {
1008 bool neo = !(*j< answer || answer<*j);
1009 LOG(prefix<<qname<<": beenthere"<<(neo?"*":"")<<": "<<j->qname<<"|"<<DNSRecordContent::NumberToType(j->qtype)<<" ("<<(unsigned int)j->bestns.size()<<")"<<endl);
1010 }
1011 bestns.clear();
1012 }
1013 else {
1014 beenthere.insert(answer);
1015 LOG(prefix<<qname<<": We have NS in cache for '"<<subdomain<<"' (flawedNSSet="<<*flawedNSSet<<")"<<endl);
1016 return;
1017 }
1018 }
1019 }
1020 LOG(prefix<<qname<<": no valid/useful NS in cache for '"<<subdomain<<"'"<<endl);
1021
1022 if(subdomain.isRoot() && !brokeloop) {
1023 // We lost the root NS records
1024 primeHints();
1025 LOG(prefix<<qname<<": reprimed the root"<<endl);
1026 /* let's prevent an infinite loop */
1027 if (!d_updatingRootNS) {
1028 getRootNS(d_now, d_asyncResolve);
1029 }
1030 }
1031 } while(subdomain.chopOff());
1032 }
1033
1034 SyncRes::domainmap_t::const_iterator SyncRes::getBestAuthZone(DNSName* qname) const
1035 {
1036 SyncRes::domainmap_t::const_iterator ret;
1037 do {
1038 ret=t_sstorage.domainmap->find(*qname);
1039 if(ret!=t_sstorage.domainmap->end())
1040 break;
1041 }while(qname->chopOff());
1042 return ret;
1043 }
1044
1045 /** doesn't actually do the work, leaves that to getBestNSFromCache */
1046 DNSName SyncRes::getBestNSNamesFromCache(const DNSName &qname, const QType& qtype, NsSet& nsset, bool* flawedNSSet, unsigned int depth, set<GetBestNSAnswer>&beenthere)
1047 {
1048 DNSName subdomain(qname);
1049 DNSName authdomain(qname);
1050
1051 domainmap_t::const_iterator iter=getBestAuthZone(&authdomain);
1052 if(iter!=t_sstorage.domainmap->end()) {
1053 if( iter->second.isAuth() )
1054 // this gets picked up in doResolveAt, the empty DNSName, combined with the
1055 // empty vector means 'we are auth for this zone'
1056 nsset.insert({DNSName(), {{}, false}});
1057 else {
1058 // Again, picked up in doResolveAt. An empty DNSName, combined with a
1059 // non-empty vector of ComboAddresses means 'this is a forwarded domain'
1060 // This is actually picked up in retrieveAddressesForNS called from doResolveAt.
1061 nsset.insert({DNSName(), {iter->second.d_servers, iter->second.shouldRecurse() }});
1062 }
1063 return authdomain;
1064 }
1065
1066 vector<DNSRecord> bestns;
1067 getBestNSFromCache(subdomain, qtype, bestns, flawedNSSet, depth, beenthere);
1068
1069 for(auto k=bestns.cbegin() ; k != bestns.cend(); ++k) {
1070 // The actual resolver code will not even look at the ComboAddress or bool
1071 const auto nsContent = getRR<NSRecordContent>(*k);
1072 if (nsContent) {
1073 nsset.insert({nsContent->getNS(), {{}, false}});
1074 if(k==bestns.cbegin())
1075 subdomain=k->d_name;
1076 }
1077 }
1078 return subdomain;
1079 }
1080
1081 void SyncRes::updateValidationStatusInCache(const DNSName &qname, const QType& qt, bool aa, vState newState) const
1082 {
1083 if (newState == Bogus) {
1084 t_RC->updateValidationStatus(d_now.tv_sec, qname, qt, d_cacheRemote, aa, newState, s_maxbogusttl + d_now.tv_sec);
1085 }
1086 else {
1087 t_RC->updateValidationStatus(d_now.tv_sec, qname, qt, d_cacheRemote, aa, newState, boost::none);
1088 }
1089 }
1090
1091 bool SyncRes::doCNAMECacheCheck(const DNSName &qname, const QType &qtype, vector<DNSRecord>& ret, unsigned int depth, int &res, vState& state, bool wasAuthZone, bool wasForwardRecurse)
1092 {
1093 string prefix;
1094 if(doLog()) {
1095 prefix=d_prefix;
1096 prefix.append(depth, ' ');
1097 }
1098
1099 if((depth>9 && d_outqueries>10 && d_throttledqueries>5) || depth > 15) {
1100 LOG(prefix<<qname<<": recursing (CNAME or other indirection) too deep, depth="<<depth<<endl);
1101 res=RCode::ServFail;
1102 return true;
1103 }
1104
1105 vector<DNSRecord> cset;
1106 vector<std::shared_ptr<RRSIGRecordContent>> signatures;
1107 vector<std::shared_ptr<DNSRecord>> authorityRecs;
1108 bool wasAuth;
1109 uint32_t capTTL = std::numeric_limits<uint32_t>::max();
1110 DNSName foundName;
1111 QType foundQT = QType(0); // 0 == QTYPE::ENT
1112
1113 LOG(prefix<<qname<<": Looking for CNAME cache hit of '"<<qname<<"|CNAME"<<"'"<<endl);
1114 /* we don't require auth data for forward-recurse lookups */
1115 if (t_RC->get(d_now.tv_sec, qname, QType(QType::CNAME), !wasForwardRecurse && d_requireAuthData, &cset, d_cacheRemote, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &state, &wasAuth) > 0) {
1116 foundName = qname;
1117 foundQT = QType(QType::CNAME);
1118 }
1119
1120 if (foundName.empty() && qname != g_rootdnsname) {
1121 // look for a DNAME cache hit
1122 auto labels = qname.getRawLabels();
1123 DNSName dnameName(g_rootdnsname);
1124
1125 LOG(prefix<<qname<<": Looking for DNAME cache hit of '"<<qname<<"|DNAME' or its ancestors"<<endl);
1126 do {
1127 dnameName.prependRawLabel(labels.back());
1128 labels.pop_back();
1129 if (dnameName == qname && qtype != QType::DNAME) { // The client does not want a DNAME, but we've reached the QNAME already. So there is no match
1130 break;
1131 }
1132 if (t_RC->get(d_now.tv_sec, dnameName, QType(QType::DNAME), !wasForwardRecurse && d_requireAuthData, &cset, d_cacheRemote, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &state, &wasAuth) > 0) {
1133 foundName = dnameName;
1134 foundQT = QType(QType::DNAME);
1135 break;
1136 }
1137 } while(!labels.empty());
1138 }
1139
1140 if (foundName.empty()) {
1141 LOG(prefix<<qname<<": No CNAME or DNAME cache hit of '"<< qname <<"' found"<<endl);
1142 return false;
1143 }
1144
1145 for(auto const &record : cset) {
1146 if (record.d_class != QClass::IN) {
1147 continue;
1148 }
1149
1150 if(record.d_ttl > (unsigned int) d_now.tv_sec) {
1151
1152 if (!wasAuthZone && shouldValidate() && (wasAuth || wasForwardRecurse) && state == Indeterminate && d_requireAuthData) {
1153 /* This means we couldn't figure out the state when this entry was cached,
1154 most likely because we hadn't computed the zone cuts yet. */
1155 /* make sure they are computed before validating */
1156 DNSName subdomain(foundName);
1157 /* if we are retrieving a DS, we only care about the state of the parent zone */
1158 if(qtype == QType::DS)
1159 subdomain.chopOff();
1160
1161 computeZoneCuts(subdomain, g_rootdnsname, depth);
1162
1163 vState recordState = getValidationStatus(foundName, false);
1164 if (recordState == Secure) {
1165 LOG(prefix<<qname<<": got Indeterminate state from the "<<foundQT.getName()<<" cache, validating.."<<endl);
1166 state = SyncRes::validateRecordsWithSigs(depth, foundName, foundQT, foundName, cset, signatures);
1167 if (state != Indeterminate) {
1168 LOG(prefix<<qname<<": got Indeterminate state from the CNAME cache, new validation result is "<<vStates[state]<<endl);
1169 if (state == Bogus) {
1170 capTTL = s_maxbogusttl;
1171 }
1172 updateValidationStatusInCache(foundName, foundQT, wasAuth, state);
1173 }
1174 }
1175 }
1176
1177 LOG(prefix<<qname<<": Found cache "<<foundQT.getName()<<" hit for '"<< foundName << "|"<<foundQT.getName()<<"' to '"<<record.d_content->getZoneRepresentation()<<"', validation state is "<<vStates[state]<<endl);
1178
1179 DNSRecord dr = record;
1180 dr.d_ttl -= d_now.tv_sec;
1181 dr.d_ttl = std::min(dr.d_ttl, capTTL);
1182 const uint32_t ttl = dr.d_ttl;
1183 ret.reserve(ret.size() + 2 + signatures.size() + authorityRecs.size());
1184 ret.push_back(dr);
1185
1186 for(const auto& signature : signatures) {
1187 DNSRecord sigdr;
1188 sigdr.d_type=QType::RRSIG;
1189 sigdr.d_name=foundName;
1190 sigdr.d_ttl=ttl;
1191 sigdr.d_content=signature;
1192 sigdr.d_place=DNSResourceRecord::ANSWER;
1193 sigdr.d_class=QClass::IN;
1194 ret.push_back(sigdr);
1195 }
1196
1197 for(const auto& rec : authorityRecs) {
1198 DNSRecord authDR(*rec);
1199 authDR.d_ttl=ttl;
1200 ret.push_back(authDR);
1201 }
1202
1203 DNSName newTarget;
1204 if (foundQT == QType::DNAME) {
1205 if (qtype == QType::DNAME && qname == foundName) { // client wanted the DNAME, no need to synthesize a CNAME
1206 res = 0;
1207 return true;
1208 }
1209 // Synthesize a CNAME
1210 auto dnameRR = getRR<DNAMERecordContent>(record);
1211 if (dnameRR == nullptr) {
1212 throw ImmediateServFailException("Unable to get record content for "+foundName.toLogString()+"|DNAME cache entry");
1213 }
1214 const auto& dnameSuffix = dnameRR->getTarget();
1215 DNSName targetPrefix = qname.makeRelative(foundName);
1216 try {
1217 dr.d_type = QType::CNAME;
1218 dr.d_name = targetPrefix + foundName;
1219 newTarget = targetPrefix + dnameSuffix;
1220 dr.d_content = std::make_shared<CNAMERecordContent>(CNAMERecordContent(newTarget));
1221 ret.push_back(dr);
1222 } catch (const std::exception &e) {
1223 // We should probably catch an std::range_error here and set the rcode to YXDOMAIN (RFC 6672, section 2.2)
1224 // But this is consistent with processRecords
1225 throw ImmediateServFailException("Unable to perform DNAME substitution(DNAME owner: '" + foundName.toLogString() +
1226 "', DNAME target: '" + dnameSuffix.toLogString() + "', substituted name: '" +
1227 targetPrefix.toLogString() + "." + dnameSuffix.toLogString() +
1228 "' : " + e.what());
1229 }
1230
1231 LOG(prefix<<qname<<": Synthesized "<<dr.d_name<<"|CNAME "<<newTarget<<endl);
1232 }
1233
1234 if(qtype == QType::CNAME) { // perhaps they really wanted a CNAME!
1235 res = 0;
1236 return true;
1237 }
1238
1239 // We have a DNAME _or_ CNAME cache hit and the client wants something else than those two.
1240 // Let's find the answer!
1241 if (foundQT == QType::CNAME) {
1242 const auto cnameContent = getRR<CNAMERecordContent>(record);
1243 if (cnameContent == nullptr) {
1244 throw ImmediateServFailException("Unable to get record content for "+foundName.toLogString()+"|CNAME cache entry");
1245 }
1246 newTarget = cnameContent->getTarget();
1247 }
1248
1249 set<GetBestNSAnswer>beenthere;
1250 vState cnameState = Indeterminate;
1251 res = doResolve(newTarget, qtype, ret, depth+1, beenthere, cnameState);
1252 LOG(prefix<<qname<<": updating validation state for response to "<<qname<<" from "<<vStates[state]<<" with the state from the DNAME/CNAME quest: "<<vStates[cnameState]<<endl);
1253 updateValidationState(state, cnameState);
1254
1255 return true;
1256 }
1257 }
1258 throw ImmediateServFailException("Could not determine whether or not there was a CNAME or DNAME in cache for '" + qname.toLogString() + "'");
1259 }
1260
1261 namespace {
1262 struct CacheEntry
1263 {
1264 vector<DNSRecord> records;
1265 vector<shared_ptr<RRSIGRecordContent>> signatures;
1266 uint32_t signaturesTTL{std::numeric_limits<uint32_t>::max()};
1267 };
1268 struct CacheKey
1269 {
1270 DNSName name;
1271 uint16_t type;
1272 DNSResourceRecord::Place place;
1273 bool operator<(const CacheKey& rhs) const {
1274 return tie(type, place, name) < tie(rhs.type, rhs.place, rhs.name);
1275 }
1276 };
1277 typedef map<CacheKey, CacheEntry> tcache_t;
1278 }
1279
1280 static void reapRecordsFromNegCacheEntryForValidation(tcache_t& tcache, const vector<DNSRecord>& records)
1281 {
1282 for (const auto& rec : records) {
1283 if (rec.d_type == QType::RRSIG) {
1284 auto rrsig = getRR<RRSIGRecordContent>(rec);
1285 if (rrsig) {
1286 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signatures.push_back(rrsig);
1287 }
1288 } else {
1289 tcache[{rec.d_name,rec.d_type,rec.d_place}].records.push_back(rec);
1290 }
1291 }
1292 }
1293
1294 /*!
1295 * Convience function to push the records from records into ret with a new TTL
1296 *
1297 * \param records DNSRecords that need to go into ret
1298 * \param ttl The new TTL for these records
1299 * \param ret The vector of DNSRecords that should contian the records with the modified TTL
1300 */
1301 static void addTTLModifiedRecords(const vector<DNSRecord>& records, const uint32_t ttl, vector<DNSRecord>& ret) {
1302 for (const auto& rec : records) {
1303 DNSRecord r(rec);
1304 r.d_ttl = ttl;
1305 ret.push_back(r);
1306 }
1307 }
1308
1309 void SyncRes::computeNegCacheValidationStatus(const NegCache::NegCacheEntry* ne, const DNSName& qname, const QType& qtype, const int res, vState& state, unsigned int depth)
1310 {
1311 DNSName subdomain(qname);
1312 /* if we are retrieving a DS, we only care about the state of the parent zone */
1313 if(qtype == QType::DS)
1314 subdomain.chopOff();
1315
1316 computeZoneCuts(subdomain, g_rootdnsname, depth);
1317
1318 tcache_t tcache;
1319 reapRecordsFromNegCacheEntryForValidation(tcache, ne->authoritySOA.records);
1320 reapRecordsFromNegCacheEntryForValidation(tcache, ne->authoritySOA.signatures);
1321 reapRecordsFromNegCacheEntryForValidation(tcache, ne->DNSSECRecords.records);
1322 reapRecordsFromNegCacheEntryForValidation(tcache, ne->DNSSECRecords.signatures);
1323
1324 for (const auto& entry : tcache) {
1325 // this happens when we did store signatures, but passed on the records themselves
1326 if (entry.second.records.empty()) {
1327 continue;
1328 }
1329
1330 const DNSName& owner = entry.first.name;
1331
1332 vState recordState = getValidationStatus(owner, false);
1333 if (state == Indeterminate) {
1334 state = recordState;
1335 }
1336
1337 if (recordState == Secure) {
1338 recordState = SyncRes::validateRecordsWithSigs(depth, qname, qtype, owner, entry.second.records, entry.second.signatures);
1339 }
1340
1341 if (recordState != Indeterminate && recordState != state) {
1342 updateValidationState(state, recordState);
1343 if (state != Secure) {
1344 break;
1345 }
1346 }
1347 }
1348
1349 if (state == Secure) {
1350 vState neValidationState = ne->d_validationState;
1351 dState expectedState = res == RCode::NXDomain ? NXDOMAIN : NXQTYPE;
1352 dState denialState = getDenialValidationState(*ne, state, expectedState, false);
1353 updateDenialValidationState(neValidationState, ne->d_name, state, denialState, expectedState, qtype == QType::DS);
1354 }
1355 if (state != Indeterminate) {
1356 /* validation succeeded, let's update the cache entry so we don't have to validate again */
1357 boost::optional<uint32_t> capTTD = boost::none;
1358 if (state == Bogus) {
1359 capTTD = d_now.tv_sec + s_maxbogusttl;
1360 }
1361 t_sstorage.negcache.updateValidationStatus(ne->d_name, ne->d_qtype, state, capTTD);
1362 }
1363 }
1364
1365 bool SyncRes::doCacheCheck(const DNSName &qname, const DNSName& authname, bool wasForwardedOrAuthZone, bool wasAuthZone, bool wasForwardRecurse, const QType &qtype, vector<DNSRecord>&ret, unsigned int depth, int &res, vState& state)
1366 {
1367 bool giveNegative=false;
1368
1369 string prefix;
1370 if(doLog()) {
1371 prefix=d_prefix;
1372 prefix.append(depth, ' ');
1373 }
1374
1375 // sqname and sqtype are used contain 'higher' names if we have them (e.g. powerdns.com|SOA when we find a negative entry for doesnotexists.powerdns.com|A)
1376 DNSName sqname(qname);
1377 QType sqt(qtype);
1378 uint32_t sttl=0;
1379 // cout<<"Lookup for '"<<qname<<"|"<<qtype.getName()<<"' -> "<<getLastLabel(qname)<<endl;
1380 vState cachedState;
1381 const NegCache::NegCacheEntry* ne = nullptr;
1382
1383 if(s_rootNXTrust &&
1384 t_sstorage.negcache.getRootNXTrust(qname, d_now, &ne) &&
1385 ne->d_auth.isRoot() &&
1386 !(wasForwardedOrAuthZone && !authname.isRoot())) { // when forwarding, the root may only neg-cache if it was forwarded to.
1387 sttl = ne->d_ttd - d_now.tv_sec;
1388 LOG(prefix<<qname<<": Entire name '"<<qname<<"', is negatively cached via '"<<ne->d_auth<<"' & '"<<ne->d_name<<"' for another "<<sttl<<" seconds"<<endl);
1389 res = RCode::NXDomain;
1390 giveNegative = true;
1391 cachedState = ne->d_validationState;
1392 }
1393 else if (t_sstorage.negcache.get(qname, qtype, d_now, &ne)) {
1394 /* If we are looking for a DS, discard NXD if auth == qname
1395 and ask for a specific denial instead */
1396 if (qtype != QType::DS || ne->d_qtype.getCode() || ne->d_auth != qname ||
1397 t_sstorage.negcache.get(qname, qtype, d_now, &ne, true))
1398 {
1399 res = 0;
1400 sttl = ne->d_ttd - d_now.tv_sec;
1401 giveNegative = true;
1402 cachedState = ne->d_validationState;
1403 if(ne->d_qtype.getCode()) {
1404 LOG(prefix<<qname<<": "<<qtype.getName()<<" is negatively cached via '"<<ne->d_auth<<"' for another "<<sttl<<" seconds"<<endl);
1405 res = RCode::NoError;
1406 }
1407 else {
1408 LOG(prefix<<qname<<": Entire name '"<<qname<<"', is negatively cached via '"<<ne->d_auth<<"' for another "<<sttl<<" seconds"<<endl);
1409 res = RCode::NXDomain;
1410 }
1411 }
1412 }
1413
1414 if (giveNegative) {
1415
1416 state = cachedState;
1417
1418 if (!wasAuthZone && shouldValidate() && state == Indeterminate) {
1419 LOG(prefix<<qname<<": got Indeterminate state for records retrieved from the negative cache, validating.."<<endl);
1420 computeNegCacheValidationStatus(ne, qname, qtype, res, state, depth);
1421
1422 if (state != cachedState && state == Bogus) {
1423 sttl = std::min(sttl, s_maxbogusttl);
1424 }
1425 }
1426
1427 // Transplant SOA to the returned packet
1428 addTTLModifiedRecords(ne->authoritySOA.records, sttl, ret);
1429 if(d_doDNSSEC) {
1430 addTTLModifiedRecords(ne->authoritySOA.signatures, sttl, ret);
1431 addTTLModifiedRecords(ne->DNSSECRecords.records, sttl, ret);
1432 addTTLModifiedRecords(ne->DNSSECRecords.signatures, sttl, ret);
1433 }
1434
1435 LOG(prefix<<qname<<": updating validation state with negative cache content for "<<qname<<" to "<<vStates[state]<<endl);
1436 return true;
1437 }
1438
1439 vector<DNSRecord> cset;
1440 bool found=false, expired=false;
1441 vector<std::shared_ptr<RRSIGRecordContent>> signatures;
1442 vector<std::shared_ptr<DNSRecord>> authorityRecs;
1443 uint32_t ttl=0;
1444 uint32_t capTTL = std::numeric_limits<uint32_t>::max();
1445 bool wasCachedAuth;
1446 if(t_RC->get(d_now.tv_sec, sqname, sqt, !wasForwardRecurse && d_requireAuthData, &cset, d_cacheRemote, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &cachedState, &wasCachedAuth) > 0) {
1447
1448 LOG(prefix<<sqname<<": Found cache hit for "<<sqt.getName()<<": ");
1449
1450 if (!wasAuthZone && shouldValidate() && (wasCachedAuth || wasForwardRecurse) && cachedState == Indeterminate && d_requireAuthData) {
1451
1452 /* This means we couldn't figure out the state when this entry was cached,
1453 most likely because we hadn't computed the zone cuts yet. */
1454 /* make sure they are computed before validating */
1455 DNSName subdomain(sqname);
1456 /* if we are retrieving a DS, we only care about the state of the parent zone */
1457 if(qtype == QType::DS)
1458 subdomain.chopOff();
1459
1460 computeZoneCuts(subdomain, g_rootdnsname, depth);
1461
1462 vState recordState = getValidationStatus(qname, false);
1463 if (recordState == Secure) {
1464 LOG(prefix<<sqname<<": got Indeterminate state from the cache, validating.."<<endl);
1465 cachedState = SyncRes::validateRecordsWithSigs(depth, sqname, sqt, sqname, cset, signatures);
1466 }
1467 else {
1468 cachedState = recordState;
1469 }
1470
1471 if (cachedState != Indeterminate) {
1472 LOG(prefix<<qname<<": got Indeterminate state from the cache, validation result is "<<vStates[cachedState]<<endl);
1473 if (cachedState == Bogus) {
1474 capTTL = s_maxbogusttl;
1475 }
1476 updateValidationStatusInCache(sqname, sqt, wasCachedAuth, cachedState);
1477 }
1478 }
1479
1480 for(auto j=cset.cbegin() ; j != cset.cend() ; ++j) {
1481
1482 LOG(j->d_content->getZoneRepresentation());
1483
1484 if (j->d_class != QClass::IN) {
1485 continue;
1486 }
1487
1488 if(j->d_ttl>(unsigned int) d_now.tv_sec) {
1489 DNSRecord dr=*j;
1490 dr.d_ttl -= d_now.tv_sec;
1491 dr.d_ttl = std::min(dr.d_ttl, capTTL);
1492 ttl = dr.d_ttl;
1493 ret.push_back(dr);
1494 LOG("[ttl="<<dr.d_ttl<<"] ");
1495 found=true;
1496 }
1497 else {
1498 LOG("[expired] ");
1499 expired=true;
1500 }
1501 }
1502
1503 ret.reserve(ret.size() + signatures.size() + authorityRecs.size());
1504
1505 for(const auto& signature : signatures) {
1506 DNSRecord dr;
1507 dr.d_type=QType::RRSIG;
1508 dr.d_name=sqname;
1509 dr.d_ttl=ttl;
1510 dr.d_content=signature;
1511 dr.d_place = DNSResourceRecord::ANSWER;
1512 dr.d_class=QClass::IN;
1513 ret.push_back(dr);
1514 }
1515
1516 for(const auto& rec : authorityRecs) {
1517 DNSRecord dr(*rec);
1518 dr.d_ttl=ttl;
1519 ret.push_back(dr);
1520 }
1521
1522 LOG(endl);
1523 if(found && !expired) {
1524 if (!giveNegative)
1525 res=0;
1526 LOG(prefix<<qname<<": updating validation state with cache content for "<<qname<<" to "<<vStates[cachedState]<<endl);
1527 state = cachedState;
1528 return true;
1529 }
1530 else
1531 LOG(prefix<<qname<<": cache had only stale entries"<<endl);
1532 }
1533
1534 return false;
1535 }
1536
1537 bool SyncRes::moreSpecificThan(const DNSName& a, const DNSName &b) const
1538 {
1539 return (a.isPartOf(b) && a.countLabels() > b.countLabels());
1540 }
1541
1542 struct speedOrder
1543 {
1544 bool operator()(const std::pair<DNSName, double> &a, const std::pair<DNSName, double> &b) const
1545 {
1546 return a.second < b.second;
1547 }
1548 };
1549
1550 inline std::vector<std::pair<DNSName, double>> SyncRes::shuffleInSpeedOrder(NsSet &tnameservers, const string &prefix)
1551 {
1552 std::vector<std::pair<DNSName, double>> rnameservers;
1553 rnameservers.reserve(tnameservers.size());
1554 for(const auto& tns: tnameservers) {
1555 double speed = t_sstorage.nsSpeeds[tns.first].get(&d_now);
1556 rnameservers.push_back({tns.first, speed});
1557 if(tns.first.empty()) // this was an authoritative OOB zone, don't pollute the nsSpeeds with that
1558 return rnameservers;
1559 }
1560
1561 random_shuffle(rnameservers.begin(),rnameservers.end());
1562 speedOrder so;
1563 stable_sort(rnameservers.begin(),rnameservers.end(), so);
1564
1565 if(doLog()) {
1566 LOG(prefix<<"Nameservers: ");
1567 for(auto i=rnameservers.begin();i!=rnameservers.end();++i) {
1568 if(i!=rnameservers.begin()) {
1569 LOG(", ");
1570 if(!((i-rnameservers.begin())%3)) {
1571 LOG(endl<<prefix<<" ");
1572 }
1573 }
1574 LOG(i->first.toLogString()<<"(" << (boost::format("%0.2f") % (i->second/1000.0)).str() <<"ms)");
1575 }
1576 LOG(endl);
1577 }
1578 return rnameservers;
1579 }
1580
1581 inline vector<ComboAddress> SyncRes::shuffleForwardSpeed(const vector<ComboAddress> &rnameservers, const string &prefix, const bool wasRd)
1582 {
1583 vector<ComboAddress> nameservers = rnameservers;
1584 map<ComboAddress, double> speeds;
1585
1586 for(const auto& val: nameservers) {
1587 double speed;
1588 DNSName nsName = DNSName(val.toStringWithPort());
1589 speed=t_sstorage.nsSpeeds[nsName].get(&d_now);
1590 speeds[val]=speed;
1591 }
1592 random_shuffle(nameservers.begin(),nameservers.end());
1593 speedOrderCA so(speeds);
1594 stable_sort(nameservers.begin(),nameservers.end(), so);
1595
1596 if(doLog()) {
1597 LOG(prefix<<"Nameservers: ");
1598 for(vector<ComboAddress>::const_iterator i=nameservers.cbegin();i!=nameservers.cend();++i) {
1599 if(i!=nameservers.cbegin()) {
1600 LOG(", ");
1601 if(!((i-nameservers.cbegin())%3)) {
1602 LOG(endl<<prefix<<" ");
1603 }
1604 }
1605 LOG((wasRd ? string("+") : string("-")) << i->toStringWithPort() <<"(" << (boost::format("%0.2f") % (speeds[*i]/1000.0)).str() <<"ms)");
1606 }
1607 LOG(endl);
1608 }
1609 return nameservers;
1610 }
1611
1612 static uint32_t getRRSIGTTL(const time_t now, const std::shared_ptr<RRSIGRecordContent>& rrsig)
1613 {
1614 uint32_t res = 0;
1615 if (now < rrsig->d_sigexpire) {
1616 res = static_cast<uint32_t>(rrsig->d_sigexpire) - now;
1617 }
1618 return res;
1619 }
1620
1621 static const set<uint16_t> nsecTypes = {QType::NSEC, QType::NSEC3};
1622
1623 /* Fills the authoritySOA and DNSSECRecords fields from ne with those found in the records
1624 *
1625 * \param records The records to parse for the authority SOA and NSEC(3) records
1626 * \param ne The NegCacheEntry to be filled out (will not be cleared, only appended to
1627 */
1628 static void harvestNXRecords(const vector<DNSRecord>& records, NegCache::NegCacheEntry& ne, const time_t now, uint32_t* lowestTTL) {
1629 for(const auto& rec : records) {
1630 if(rec.d_place != DNSResourceRecord::AUTHORITY)
1631 // RFC 4035 section 3.1.3. indicates that NSEC records MUST be placed in
1632 // the AUTHORITY section. Section 3.1.1 indicates that that RRSIGs for
1633 // records MUST be in the same section as the records they cover.
1634 // Hence, we ignore all records outside of the AUTHORITY section.
1635 continue;
1636
1637 if(rec.d_type == QType::RRSIG) {
1638 auto rrsig = getRR<RRSIGRecordContent>(rec);
1639 if(rrsig) {
1640 if(rrsig->d_type == QType::SOA) {
1641 ne.authoritySOA.signatures.push_back(rec);
1642 if (lowestTTL && isRRSIGNotExpired(now, rrsig)) {
1643 *lowestTTL = min(*lowestTTL, rec.d_ttl);
1644 *lowestTTL = min(*lowestTTL, getRRSIGTTL(now, rrsig));
1645 }
1646 }
1647 if(nsecTypes.count(rrsig->d_type)) {
1648 ne.DNSSECRecords.signatures.push_back(rec);
1649 if (lowestTTL && isRRSIGNotExpired(now, rrsig)) {
1650 *lowestTTL = min(*lowestTTL, rec.d_ttl);
1651 *lowestTTL = min(*lowestTTL, getRRSIGTTL(now, rrsig));
1652 }
1653 }
1654 }
1655 continue;
1656 }
1657 if(rec.d_type == QType::SOA) {
1658 ne.authoritySOA.records.push_back(rec);
1659 if (lowestTTL) {
1660 *lowestTTL = min(*lowestTTL, rec.d_ttl);
1661 }
1662 continue;
1663 }
1664 if(nsecTypes.count(rec.d_type)) {
1665 ne.DNSSECRecords.records.push_back(rec);
1666 if (lowestTTL) {
1667 *lowestTTL = min(*lowestTTL, rec.d_ttl);
1668 }
1669 continue;
1670 }
1671 }
1672 }
1673
1674 static cspmap_t harvestCSPFromNE(const NegCache::NegCacheEntry& ne)
1675 {
1676 cspmap_t cspmap;
1677 for(const auto& rec : ne.DNSSECRecords.signatures) {
1678 if(rec.d_type == QType::RRSIG) {
1679 auto rrc = getRR<RRSIGRecordContent>(rec);
1680 if (rrc) {
1681 cspmap[{rec.d_name,rrc->d_type}].signatures.push_back(rrc);
1682 }
1683 }
1684 }
1685 for(const auto& rec : ne.DNSSECRecords.records) {
1686 cspmap[{rec.d_name, rec.d_type}].records.push_back(rec.d_content);
1687 }
1688 return cspmap;
1689 }
1690
1691 // TODO remove after processRecords is fixed!
1692 // Adds the RRSIG for the SOA and the NSEC(3) + RRSIGs to ret
1693 static void addNXNSECS(vector<DNSRecord>&ret, const vector<DNSRecord>& records)
1694 {
1695 NegCache::NegCacheEntry ne;
1696 harvestNXRecords(records, ne, 0, nullptr);
1697 ret.insert(ret.end(), ne.authoritySOA.signatures.begin(), ne.authoritySOA.signatures.end());
1698 ret.insert(ret.end(), ne.DNSSECRecords.records.begin(), ne.DNSSECRecords.records.end());
1699 ret.insert(ret.end(), ne.DNSSECRecords.signatures.begin(), ne.DNSSECRecords.signatures.end());
1700 }
1701
1702 bool SyncRes::nameserversBlockedByRPZ(const DNSFilterEngine& dfe, const NsSet& nameservers)
1703 {
1704 if(d_wantsRPZ) {
1705 for (auto const &ns : nameservers) {
1706 d_appliedPolicy = dfe.getProcessingPolicy(ns.first, d_discardedPolicies);
1707 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
1708 LOG(", however nameserver "<<ns.first<<" was blocked by RPZ policy '"<<(d_appliedPolicy.d_name ? *d_appliedPolicy.d_name : "")<<"'"<<endl);
1709 return true;
1710 }
1711
1712 // Traverse all IP addresses for this NS to see if they have an RPN NSIP policy
1713 for (auto const &address : ns.second.first) {
1714 d_appliedPolicy = dfe.getProcessingPolicy(address, d_discardedPolicies);
1715 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
1716 LOG(", however nameserver "<<ns.first<<" IP address "<<address.toString()<<" was blocked by RPZ policy '"<<(d_appliedPolicy.d_name ? *d_appliedPolicy.d_name : "")<<"'"<<endl);
1717 return true;
1718 }
1719 }
1720 }
1721 }
1722 return false;
1723 }
1724
1725 bool SyncRes::nameserverIPBlockedByRPZ(const DNSFilterEngine& dfe, const ComboAddress& remoteIP)
1726 {
1727 if (d_wantsRPZ) {
1728 d_appliedPolicy = dfe.getProcessingPolicy(remoteIP, d_discardedPolicies);
1729 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) {
1730 LOG(" (blocked by RPZ policy '"+(d_appliedPolicy.d_name ? *d_appliedPolicy.d_name : "")+"')");
1731 return true;
1732 }
1733 }
1734 return false;
1735 }
1736
1737 vector<ComboAddress> SyncRes::retrieveAddressesForNS(const std::string& prefix, const DNSName& qname, std::vector<std::pair<DNSName, double>>::const_iterator& tns, const unsigned int depth, set<GetBestNSAnswer>& beenthere, const vector<std::pair<DNSName, double>>& rnameservers, NsSet& nameservers, bool& sendRDQuery, bool& pierceDontQuery, bool& flawedNSSet, bool cacheOnly)
1738 {
1739 vector<ComboAddress> result;
1740
1741 if(!tns->first.empty()) {
1742 LOG(prefix<<qname<<": Trying to resolve NS '"<<tns->first<< "' ("<<1+tns-rnameservers.begin()<<"/"<<(unsigned int)rnameservers.size()<<")"<<endl);
1743 result = getAddrs(tns->first, depth+2, beenthere, cacheOnly);
1744 pierceDontQuery=false;
1745 }
1746 else {
1747 LOG(prefix<<qname<<": Domain has hardcoded nameserver");
1748
1749 if(nameservers[tns->first].first.size() > 1) {
1750 LOG("s");
1751 }
1752 LOG(endl);
1753
1754 sendRDQuery = nameservers[tns->first].second;
1755 result = shuffleForwardSpeed(nameservers[tns->first].first, doLog() ? (prefix+qname.toString()+": ") : string(), sendRDQuery);
1756 pierceDontQuery=true;
1757 }
1758 return result;
1759 }
1760
1761 bool SyncRes::throttledOrBlocked(const std::string& prefix, const ComboAddress& remoteIP, const DNSName& qname, const QType& qtype, bool pierceDontQuery)
1762 {
1763 if(t_sstorage.throttle.shouldThrottle(d_now.tv_sec, boost::make_tuple(remoteIP, "", 0))) {
1764 LOG(prefix<<qname<<": server throttled "<<endl);
1765 s_throttledqueries++; d_throttledqueries++;
1766 return true;
1767 }
1768 else if(t_sstorage.throttle.shouldThrottle(d_now.tv_sec, boost::make_tuple(remoteIP, qname, qtype.getCode()))) {
1769 LOG(prefix<<qname<<": query throttled "<<remoteIP.toString()<<", "<<qname<<"; "<<qtype.getName()<<endl);
1770 s_throttledqueries++; d_throttledqueries++;
1771 return true;
1772 }
1773 else if(!pierceDontQuery && s_dontQuery && s_dontQuery->match(&remoteIP)) {
1774 LOG(prefix<<qname<<": not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
1775 s_dontqueries++;
1776 return true;
1777 }
1778 return false;
1779 }
1780
1781 bool SyncRes::validationEnabled() const
1782 {
1783 return g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate;
1784 }
1785
1786 uint32_t SyncRes::computeLowestTTD(const std::vector<DNSRecord>& records, const std::vector<std::shared_ptr<RRSIGRecordContent> >& signatures, uint32_t signaturesTTL) const
1787 {
1788 uint32_t lowestTTD = std::numeric_limits<uint32_t>::max();
1789 for(const auto& record : records)
1790 lowestTTD = min(lowestTTD, record.d_ttl);
1791
1792 /* even if it was not requested for that request (Process, and neither AD nor DO set),
1793 it might be requested at a later time so we need to be careful with the TTL. */
1794 if (validationEnabled() && !signatures.empty()) {
1795 /* if we are validating, we don't want to cache records after their signatures expire. */
1796 /* records TTL are now TTD, let's add 'now' to the signatures lowest TTL */
1797 lowestTTD = min(lowestTTD, static_cast<uint32_t>(signaturesTTL + d_now.tv_sec));
1798
1799 for(const auto& sig : signatures) {
1800 if (isRRSIGNotExpired(d_now.tv_sec, sig)) {
1801 // we don't decerement d_sigexpire by 'now' because we actually want a TTD, not a TTL */
1802 lowestTTD = min(lowestTTD, static_cast<uint32_t>(sig->d_sigexpire));
1803 }
1804 }
1805 }
1806
1807 return lowestTTD;
1808 }
1809
1810 void SyncRes::updateValidationState(vState& state, const vState stateUpdate)
1811 {
1812 LOG(d_prefix<<"validation state was "<<std::string(vStates[state])<<", state update is "<<std::string(vStates[stateUpdate]));
1813
1814 if (stateUpdate == TA) {
1815 state = Secure;
1816 }
1817 else if (stateUpdate == NTA) {
1818 state = Insecure;
1819 }
1820 else if (stateUpdate == Bogus) {
1821 state = Bogus;
1822 }
1823 else if (state == Indeterminate) {
1824 state = stateUpdate;
1825 }
1826 else if (stateUpdate == Insecure) {
1827 if (state != Bogus) {
1828 state = Insecure;
1829 }
1830 }
1831 LOG(", validation state is now "<<std::string(vStates[state])<<endl);
1832 }
1833
1834 vState SyncRes::getTA(const DNSName& zone, dsmap_t& ds)
1835 {
1836 auto luaLocal = g_luaconfs.getLocal();
1837
1838 if (luaLocal->dsAnchors.empty()) {
1839 LOG(d_prefix<<": No trust anchors configured, everything is Insecure"<<endl);
1840 /* We have no TA, everything is insecure */
1841 return Insecure;
1842 }
1843
1844 std::string reason;
1845 if (haveNegativeTrustAnchor(luaLocal->negAnchors, zone, reason)) {
1846 LOG(d_prefix<<": got NTA for '"<<zone<<"'"<<endl);
1847 return NTA;
1848 }
1849
1850 if (getTrustAnchor(luaLocal->dsAnchors, zone, ds)) {
1851 LOG(d_prefix<<": got TA for '"<<zone<<"'"<<endl);
1852 return TA;
1853 }
1854 else {
1855 LOG(d_prefix<<": no TA found for '"<<zone<<"' among "<< luaLocal->dsAnchors.size()<<endl);
1856 }
1857
1858 if (zone.isRoot()) {
1859 /* No TA for the root */
1860 return Insecure;
1861 }
1862
1863 return Indeterminate;
1864 }
1865
1866 static size_t countSupportedDS(const dsmap_t& dsmap)
1867 {
1868 size_t count = 0;
1869
1870 for (const auto& ds : dsmap) {
1871 if (isSupportedDS(ds)) {
1872 count++;
1873 }
1874 }
1875
1876 return count;
1877 }
1878
1879 vState SyncRes::getDSRecords(const DNSName& zone, dsmap_t& ds, bool taOnly, unsigned int depth, bool bogusOnNXD, bool* foundCut)
1880 {
1881 vState result = getTA(zone, ds);
1882
1883 if (result != Indeterminate || taOnly) {
1884 if (foundCut) {
1885 *foundCut = (result != Indeterminate);
1886 }
1887
1888 if (result == TA) {
1889 if (countSupportedDS(ds) == 0) {
1890 ds.clear();
1891 result = Insecure;
1892 }
1893 else {
1894 result = Secure;
1895 }
1896 }
1897 else if (result == NTA) {
1898 result = Insecure;
1899 }
1900
1901 return result;
1902 }
1903
1904 bool oldSkipCNAME = d_skipCNAMECheck;
1905 d_skipCNAMECheck = true;
1906
1907 std::set<GetBestNSAnswer> beenthere;
1908 std::vector<DNSRecord> dsrecords;
1909
1910 vState state = Indeterminate;
1911 int rcode = doResolve(zone, QType(QType::DS), dsrecords, depth + 1, beenthere, state);
1912 d_skipCNAMECheck = oldSkipCNAME;
1913
1914 if (rcode == RCode::NoError || (rcode == RCode::NXDomain && !bogusOnNXD)) {
1915 uint8_t bestDigestType = 0;
1916
1917 bool gotCNAME = false;
1918 for (const auto& record : dsrecords) {
1919 if (record.d_type == QType::DS) {
1920 const auto dscontent = getRR<DSRecordContent>(record);
1921 if (dscontent && isSupportedDS(*dscontent)) {
1922 // Make GOST a lower prio than SHA256
1923 if (dscontent->d_digesttype == DNSSECKeeper::GOST && bestDigestType == DNSSECKeeper::SHA256) {
1924 continue;
1925 }
1926 if (dscontent->d_digesttype > bestDigestType || (bestDigestType == DNSSECKeeper::GOST && dscontent->d_digesttype == DNSSECKeeper::SHA256)) {
1927 bestDigestType = dscontent->d_digesttype;
1928 }
1929 ds.insert(*dscontent);
1930 }
1931 }
1932 else if (record.d_type == QType::CNAME && record.d_name == zone) {
1933 gotCNAME = true;
1934 }
1935 }
1936
1937 /* RFC 4509 section 3: "Validator implementations SHOULD ignore DS RRs containing SHA-1
1938 * digests if DS RRs with SHA-256 digests are present in the DS RRset."
1939 * As SHA348 is specified as well, the spirit of the this line is "use the best algorithm".
1940 */
1941 for (auto dsrec = ds.begin(); dsrec != ds.end(); ) {
1942 if (dsrec->d_digesttype != bestDigestType) {
1943 dsrec = ds.erase(dsrec);
1944 }
1945 else {
1946 ++dsrec;
1947 }
1948 }
1949
1950 if (rcode == RCode::NoError) {
1951 if (ds.empty()) {
1952 /* we have no DS, it's either:
1953 - a delegation to a non-DNSSEC signed zone
1954 - no delegation, we stay in the same zone
1955 */
1956 if (gotCNAME || denialProvesNoDelegation(zone, dsrecords)) {
1957 /* we are still inside the same zone */
1958
1959 if (foundCut) {
1960 *foundCut = false;
1961 }
1962 return state;
1963 }
1964
1965 /* delegation with no DS, might be Secure -> Insecure */
1966 if (foundCut) {
1967 *foundCut = true;
1968 }
1969
1970 /* a delegation with no DS is either:
1971 - a signed zone (Secure) to an unsigned one (Insecure)
1972 - an unsigned zone to another unsigned one (Insecure stays Insecure, Bogus stays Bogus)
1973 */
1974 return state == Secure ? Insecure : state;
1975 } else {
1976 /* we have a DS */
1977 if (foundCut) {
1978 *foundCut = true;
1979 }
1980 }
1981 }
1982
1983 return state;
1984 }
1985
1986 LOG(d_prefix<<": returning Bogus state from "<<__func__<<"("<<zone<<")"<<endl);
1987 return Bogus;
1988 }
1989
1990 bool SyncRes::haveExactValidationStatus(const DNSName& domain)
1991 {
1992 if (!shouldValidate()) {
1993 return false;
1994 }
1995 const auto& it = d_cutStates.find(domain);
1996 if (it != d_cutStates.cend()) {
1997 return true;
1998 }
1999 return false;
2000 }
2001
2002 vState SyncRes::getValidationStatus(const DNSName& subdomain, bool allowIndeterminate)
2003 {
2004 vState result = Indeterminate;
2005
2006 if (!shouldValidate()) {
2007 return result;
2008 }
2009 DNSName name(subdomain);
2010 do {
2011 const auto& it = d_cutStates.find(name);
2012 if (it != d_cutStates.cend()) {
2013 if (allowIndeterminate || it->second != Indeterminate) {
2014 LOG(d_prefix<<": got status "<<vStates[it->second]<<" for name "<<subdomain<<" (from "<<name<<")"<<endl);
2015 return it->second;
2016 }
2017 }
2018 }
2019 while (name.chopOff());
2020
2021 return result;
2022 }
2023
2024 bool SyncRes::lookForCut(const DNSName& qname, unsigned int depth, const vState existingState, vState& newState)
2025 {
2026 bool foundCut = false;
2027 dsmap_t ds;
2028 vState dsState = getDSRecords(qname, ds, newState == Bogus || existingState == Insecure || existingState == Bogus, depth, false, &foundCut);
2029
2030 if (dsState != Indeterminate) {
2031 newState = dsState;
2032 }
2033
2034 return foundCut;
2035 }
2036
2037 void SyncRes::computeZoneCuts(const DNSName& begin, const DNSName& end, unsigned int depth)
2038 {
2039 if(!begin.isPartOf(end)) {
2040 LOG(d_prefix<<" "<<begin.toLogString()<<" is not part of "<<end.toLogString()<<endl);
2041 throw PDNSException(begin.toLogString() + " is not part of " + end.toLogString());
2042 }
2043
2044 if (d_cutStates.count(begin) != 0) {
2045 return;
2046 }
2047
2048 dsmap_t ds;
2049 vState cutState = getDSRecords(end, ds, false, depth);
2050 LOG(d_prefix<<": setting cut state for "<<end<<" to "<<vStates[cutState]<<endl);
2051 d_cutStates[end] = cutState;
2052
2053 if (!shouldValidate()) {
2054 return;
2055 }
2056
2057 DNSName qname(end);
2058 std::vector<string> labelsToAdd = begin.makeRelative(end).getRawLabels();
2059
2060 bool oldSkipCNAME = d_skipCNAMECheck;
2061 d_skipCNAMECheck = true;
2062
2063 while(qname != begin) {
2064 if (labelsToAdd.empty())
2065 break;
2066
2067 qname.prependRawLabel(labelsToAdd.back());
2068 labelsToAdd.pop_back();
2069 LOG(d_prefix<<": - Looking for a cut at "<<qname<<endl);
2070
2071 const auto cutIt = d_cutStates.find(qname);
2072 if (cutIt != d_cutStates.cend()) {
2073 if (cutIt->second != Indeterminate) {
2074 LOG(d_prefix<<": - Cut already known at "<<qname<<endl);
2075 cutState = cutIt->second;
2076 continue;
2077 }
2078 }
2079
2080 /* no need to look for NS and DS if we are already insecure or bogus,
2081 just look for (N)TA
2082 */
2083 if (cutState == Insecure || cutState == Bogus) {
2084 dsmap_t cutDS;
2085 vState newState = getDSRecords(qname, cutDS, true, depth);
2086 if (newState == Indeterminate) {
2087 continue;
2088 }
2089
2090 LOG(d_prefix<<": New state for "<<qname<<" is "<<vStates[newState]<<endl);
2091 cutState = newState;
2092
2093 d_cutStates[qname] = cutState;
2094
2095 continue;
2096 }
2097
2098 vState newState = Indeterminate;
2099 /* temporarily mark as Indeterminate, so that we won't enter an endless loop
2100 trying to determine that zone cut again. */
2101 d_cutStates[qname] = newState;
2102 bool foundCut = lookForCut(qname, depth + 1, cutState, newState);
2103 if (foundCut) {
2104 LOG(d_prefix<<": - Found cut at "<<qname<<endl);
2105 if (newState != Indeterminate) {
2106 cutState = newState;
2107 }
2108 LOG(d_prefix<<": New state for "<<qname<<" is "<<vStates[cutState]<<endl);
2109 d_cutStates[qname] = cutState;
2110 }
2111 else {
2112 /* remove the temporary cut */
2113 LOG(d_prefix<<qname<<": removing cut state for "<<qname<<endl);
2114 d_cutStates.erase(qname);
2115 }
2116 }
2117
2118 d_skipCNAMECheck = oldSkipCNAME;
2119
2120 LOG(d_prefix<<": list of cuts from "<<begin<<" to "<<end<<endl);
2121 for (const auto& cut : d_cutStates) {
2122 if (cut.first.isRoot() || (begin.isPartOf(cut.first) && cut.first.isPartOf(end))) {
2123 LOG(" - "<<cut.first<<": "<<vStates[cut.second]<<endl);
2124 }
2125 }
2126 }
2127
2128 vState SyncRes::validateDNSKeys(const DNSName& zone, const std::vector<DNSRecord>& dnskeys, const std::vector<std::shared_ptr<RRSIGRecordContent> >& signatures, unsigned int depth)
2129 {
2130 dsmap_t ds;
2131 if (!signatures.empty()) {
2132 DNSName signer = getSigner(signatures);
2133
2134 if (!signer.empty() && zone.isPartOf(signer)) {
2135 vState state = getDSRecords(signer, ds, false, depth);
2136
2137 if (state != Secure) {
2138 return state;
2139 }
2140 }
2141 }
2142
2143 skeyset_t tentativeKeys;
2144 std::vector<shared_ptr<DNSRecordContent> > toSign;
2145
2146 for (const auto& dnskey : dnskeys) {
2147 if (dnskey.d_type == QType::DNSKEY) {
2148 auto content = getRR<DNSKEYRecordContent>(dnskey);
2149 if (content) {
2150 tentativeKeys.insert(content);
2151 toSign.push_back(content);
2152 }
2153 }
2154 }
2155
2156 LOG(d_prefix<<": trying to validate "<<std::to_string(tentativeKeys.size())<<" DNSKEYs with "<<std::to_string(ds.size())<<" DS"<<endl);
2157 skeyset_t validatedKeys;
2158 validateDNSKeysAgainstDS(d_now.tv_sec, zone, ds, tentativeKeys, toSign, signatures, validatedKeys);
2159
2160 LOG(d_prefix<<": we now have "<<std::to_string(validatedKeys.size())<<" DNSKEYs"<<endl);
2161
2162 /* if we found at least one valid RRSIG covering the set,
2163 all tentative keys are validated keys. Otherwise it means
2164 we haven't found at least one DNSKEY and a matching RRSIG
2165 covering this set, this looks Bogus. */
2166 if (validatedKeys.size() != tentativeKeys.size()) {
2167 LOG(d_prefix<<": returning Bogus state from "<<__func__<<"("<<zone<<")"<<endl);
2168 return Bogus;
2169 }
2170
2171 return Secure;
2172 }
2173
2174 vState SyncRes::getDNSKeys(const DNSName& signer, skeyset_t& keys, unsigned int depth)
2175 {
2176 std::vector<DNSRecord> records;
2177 std::set<GetBestNSAnswer> beenthere;
2178 LOG(d_prefix<<"Retrieving DNSKeys for "<<signer<<endl);
2179
2180 vState state = Indeterminate;
2181 /* following CNAME might lead to us to the wrong DNSKEY */
2182 bool oldSkipCNAME = d_skipCNAMECheck;
2183 d_skipCNAMECheck = true;
2184 int rcode = doResolve(signer, QType(QType::DNSKEY), records, depth + 1, beenthere, state);
2185 d_skipCNAMECheck = oldSkipCNAME;
2186
2187 if (rcode == RCode::NoError) {
2188 if (state == Secure) {
2189 for (const auto& key : records) {
2190 if (key.d_type == QType::DNSKEY) {
2191 auto content = getRR<DNSKEYRecordContent>(key);
2192 if (content) {
2193 keys.insert(content);
2194 }
2195 }
2196 }
2197 }
2198 LOG(d_prefix<<"Retrieved "<<keys.size()<<" DNSKeys for "<<signer<<", state is "<<vStates[state]<<endl);
2199 return state;
2200 }
2201
2202 LOG(d_prefix<<"Returning Bogus state from "<<__func__<<"("<<signer<<")"<<endl);
2203 return Bogus;
2204 }
2205
2206 vState SyncRes::validateRecordsWithSigs(unsigned int depth, const DNSName& qname, const QType& qtype, const DNSName& name, const std::vector<DNSRecord>& records, const std::vector<std::shared_ptr<RRSIGRecordContent> >& signatures)
2207 {
2208 skeyset_t keys;
2209 if (!signatures.empty()) {
2210 const DNSName signer = getSigner(signatures);
2211 if (!signer.empty() && name.isPartOf(signer)) {
2212 if ((qtype == QType::DNSKEY || qtype == QType::DS) && signer == qname) {
2213 /* we are already retrieving those keys, sorry */
2214 if (qtype == QType::DS) {
2215 /* something is very wrong */
2216 LOG(d_prefix<<"The DS for "<<qname<<" is signed by itself, going Bogus"<<endl);
2217 return Bogus;
2218 }
2219 return Indeterminate;
2220 }
2221 vState state = getDNSKeys(signer, keys, depth);
2222 if (state != Secure) {
2223 return state;
2224 }
2225 }
2226 } else {
2227 LOG(d_prefix<<"Bogus!"<<endl);
2228 return Bogus;
2229 }
2230
2231 std::vector<std::shared_ptr<DNSRecordContent> > recordcontents;
2232 for (const auto& record : records) {
2233 recordcontents.push_back(record.d_content);
2234 }
2235
2236 LOG(d_prefix<<"Going to validate "<<recordcontents.size()<< " record contents with "<<signatures.size()<<" sigs and "<<keys.size()<<" keys for "<<name<<endl);
2237 if (validateWithKeySet(d_now.tv_sec, name, recordcontents, signatures, keys, false)) {
2238 LOG(d_prefix<<"Secure!"<<endl);
2239 return Secure;
2240 }
2241
2242 LOG(d_prefix<<"Bogus!"<<endl);
2243 return Bogus;
2244 }
2245
2246 static bool allowAdditionalEntry(std::unordered_set<DNSName>& allowedAdditionals, const DNSRecord& rec)
2247 {
2248 switch(rec.d_type) {
2249 case QType::MX:
2250 {
2251 if (auto mxContent = getRR<MXRecordContent>(rec)) {
2252 allowedAdditionals.insert(mxContent->d_mxname);
2253 }
2254 return true;
2255 }
2256 case QType::NS:
2257 {
2258 if (auto nsContent = getRR<NSRecordContent>(rec)) {
2259 allowedAdditionals.insert(nsContent->getNS());
2260 }
2261 return true;
2262 }
2263 case QType::SRV:
2264 {
2265 if (auto srvContent = getRR<SRVRecordContent>(rec)) {
2266 allowedAdditionals.insert(srvContent->d_target);
2267 }
2268 return true;
2269 }
2270 default:
2271 return false;
2272 }
2273 }
2274
2275 void SyncRes::sanitizeRecords(const std::string& prefix, LWResult& lwr, const DNSName& qname, const QType& qtype, const DNSName& auth, bool wasForwarded, bool rdQuery)
2276 {
2277 const bool wasForwardRecurse = wasForwarded && rdQuery;
2278 /* list of names for which we will allow A and AAAA records in the additional section
2279 to remain */
2280 std::unordered_set<DNSName> allowedAdditionals = { qname };
2281 bool haveAnswers = false;
2282 bool isNXDomain = false;
2283 bool isNXQType = false;
2284
2285 for(auto rec = lwr.d_records.begin(); rec != lwr.d_records.end(); ) {
2286
2287 if (rec->d_type == QType::OPT) {
2288 ++rec;
2289 continue;
2290 }
2291
2292 if (rec->d_class != QClass::IN) {
2293 LOG(prefix<<"Removing non internet-classed data received from "<<auth<<endl);
2294 rec = lwr.d_records.erase(rec);
2295 continue;
2296 }
2297
2298 if (rec->d_type == QType::ANY) {
2299 LOG(prefix<<"Removing 'ANY'-typed data received from "<<auth<<endl);
2300 rec = lwr.d_records.erase(rec);
2301 continue;
2302 }
2303
2304 if (!rec->d_name.isPartOf(auth)) {
2305 LOG(prefix<<"Removing record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2306 rec = lwr.d_records.erase(rec);
2307 continue;
2308 }
2309
2310 /* dealing with the records in answer */
2311 if (!(lwr.d_aabit || wasForwardRecurse) && rec->d_place == DNSResourceRecord::ANSWER) {
2312 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
2313 are sending such responses */
2314 if (!(rec->d_type == QType::CNAME && qname == rec->d_name)) {
2315 LOG(prefix<<"Removing record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the answer section without the AA bit set received from "<<auth<<endl);
2316 rec = lwr.d_records.erase(rec);
2317 continue;
2318 }
2319 }
2320
2321 if (rec->d_type == QType::DNAME && (rec->d_place != DNSResourceRecord::ANSWER || !qname.isPartOf(rec->d_name))) {
2322 LOG(prefix<<"Removing invalid DNAME record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2323 rec = lwr.d_records.erase(rec);
2324 continue;
2325 }
2326
2327 if (rec->d_place == DNSResourceRecord::ANSWER && (qtype != QType::ANY && rec->d_type != qtype.getCode() && s_redirectionQTypes.count(rec->d_type) == 0 && rec->d_type != QType::SOA && rec->d_type != QType::RRSIG)) {
2328 LOG(prefix<<"Removing irrelevant record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2329 rec = lwr.d_records.erase(rec);
2330 continue;
2331 }
2332
2333 if (rec->d_place == DNSResourceRecord::ANSWER && !haveAnswers) {
2334 haveAnswers = true;
2335 }
2336
2337 if (rec->d_place == DNSResourceRecord::ANSWER) {
2338 allowAdditionalEntry(allowedAdditionals, *rec);
2339 }
2340
2341 /* dealing with the records in authority */
2342 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type != QType::NS && rec->d_type != QType::DS && rec->d_type != QType::SOA && rec->d_type != QType::RRSIG && rec->d_type != QType::NSEC && rec->d_type != QType::NSEC3) {
2343 LOG(prefix<<"Removing irrelevant record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2344 rec = lwr.d_records.erase(rec);
2345 continue;
2346 }
2347
2348 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::SOA) {
2349 if (!qname.isPartOf(rec->d_name)) {
2350 LOG(prefix<<"Removing irrelevant record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2351 rec = lwr.d_records.erase(rec);
2352 continue;
2353 }
2354
2355 if (!(lwr.d_aabit || wasForwardRecurse)) {
2356 LOG(prefix<<"Removing irrelevant record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2357 rec = lwr.d_records.erase(rec);
2358 continue;
2359 }
2360
2361 if (!haveAnswers) {
2362 if (lwr.d_rcode == RCode::NXDomain) {
2363 isNXDomain = true;
2364 }
2365 else if (lwr.d_rcode == RCode::NoError) {
2366 isNXQType = true;
2367 }
2368 }
2369 }
2370
2371 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS && (isNXDomain || isNXQType)) {
2372 /* we don't want to pick up NS records in AUTHORITY or ADDITIONAL sections of NXDomain answers
2373 because they are somewhat easy to insert into a large, fragmented UDP response
2374 for an off-path attacker by injecting spoofed UDP fragments.
2375 */
2376 LOG(prefix<<"Removing NS record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section of a "<<(isNXDomain ? "NXD" : "NXQTYPE")<<" response received from "<<auth<<endl);
2377 rec = lwr.d_records.erase(rec);
2378 continue;
2379 }
2380
2381 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS) {
2382 allowAdditionalEntry(allowedAdditionals, *rec);
2383 }
2384
2385 /* dealing with the records in additional */
2386 if (rec->d_place == DNSResourceRecord::ADDITIONAL && rec->d_type != QType::A && rec->d_type != QType::AAAA && rec->d_type != QType::RRSIG) {
2387 LOG(prefix<<"Removing irrelevant record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2388 rec = lwr.d_records.erase(rec);
2389 continue;
2390 }
2391
2392 if (rec->d_place == DNSResourceRecord::ADDITIONAL && allowedAdditionals.count(rec->d_name) == 0) {
2393 LOG(prefix<<"Removing irrelevant additional record '"<<rec->d_name<<"|"<<DNSRecordContent::NumberToType(rec->d_type)<<"|"<<rec->d_content->getZoneRepresentation()<<"' in the "<<(int)rec->d_place<<" section received from "<<auth<<endl);
2394 rec = lwr.d_records.erase(rec);
2395 continue;
2396 }
2397
2398 ++rec;
2399 }
2400 }
2401
2402 RCode::rcodes_ SyncRes::updateCacheFromRecords(unsigned int depth, LWResult& lwr, const DNSName& qname, const QType& qtype, const DNSName& auth, bool wasForwarded, const boost::optional<Netmask> ednsmask, vState& state, bool& needWildcardProof, bool& gatherWildcardProof, unsigned int& wildcardLabelsCount, bool rdQuery)
2403 {
2404 bool wasForwardRecurse = wasForwarded && rdQuery;
2405 tcache_t tcache;
2406
2407 string prefix;
2408 if(doLog()) {
2409 prefix=d_prefix;
2410 prefix.append(depth, ' ');
2411 }
2412
2413 sanitizeRecords(prefix, lwr, qname, qtype, auth, wasForwarded, rdQuery);
2414
2415 std::vector<std::shared_ptr<DNSRecord>> authorityRecs;
2416 const unsigned int labelCount = qname.countLabels();
2417 bool isCNAMEAnswer = false;
2418 bool isDNAMEAnswer = false;
2419 for(const auto& rec : lwr.d_records) {
2420 if (rec.d_class != QClass::IN) {
2421 continue;
2422 }
2423
2424 if(!isCNAMEAnswer && rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == QType::CNAME && (!(qtype==QType(QType::CNAME))) && rec.d_name == qname && !isDNAMEAnswer) {
2425 isCNAMEAnswer = true;
2426 }
2427 if(!isDNAMEAnswer && rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == QType::DNAME && qtype != QType(QType::DNAME) && qname.isPartOf(rec.d_name)) {
2428 isDNAMEAnswer = true;
2429 isCNAMEAnswer = false;
2430 }
2431
2432 /* if we have a positive answer synthetized from a wildcard,
2433 we need to store the corresponding NSEC/NSEC3 records proving
2434 that the exact name did not exist in the negative cache */
2435 if(gatherWildcardProof) {
2436 if (nsecTypes.count(rec.d_type)) {
2437 authorityRecs.push_back(std::make_shared<DNSRecord>(rec));
2438 }
2439 else if (rec.d_type == QType::RRSIG) {
2440 auto rrsig = getRR<RRSIGRecordContent>(rec);
2441 if (rrsig && nsecTypes.count(rrsig->d_type)) {
2442 authorityRecs.push_back(std::make_shared<DNSRecord>(rec));
2443 }
2444 }
2445 }
2446 if(rec.d_type == QType::RRSIG) {
2447 auto rrsig = getRR<RRSIGRecordContent>(rec);
2448 if (rrsig) {
2449 /* As illustrated in rfc4035's Appendix B.6, the RRSIG label
2450 count can be lower than the name's label count if it was
2451 synthetized from the wildcard. Note that the difference might
2452 be > 1. */
2453 if (rec.d_name == qname && isWildcardExpanded(labelCount, rrsig)) {
2454 gatherWildcardProof = true;
2455 if (!isWildcardExpandedOntoItself(rec.d_name, labelCount, rrsig)) {
2456 /* if we have a wildcard expanded onto itself, we don't need to prove
2457 that the exact name doesn't exist because it actually does.
2458 We still want to gather the corresponding NSEC/NSEC3 records
2459 to pass them to our client in case it wants to validate by itself.
2460 */
2461 LOG(prefix<<qname<<": RRSIG indicates the name was synthetized from a wildcard, we need a wildcard proof"<<endl);
2462 needWildcardProof = true;
2463 }
2464 else {
2465 LOG(prefix<<qname<<": RRSIG indicates the name was synthetized from a wildcard expanded onto itself, we need to gather wildcard proof"<<endl);
2466 }
2467 wildcardLabelsCount = rrsig->d_labels;
2468 }
2469
2470 // cerr<<"Got an RRSIG for "<<DNSRecordContent::NumberToType(rrsig->d_type)<<" with name '"<<rec.d_name<<"'"<<endl;
2471 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signatures.push_back(rrsig);
2472 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signaturesTTL = std::min(tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signaturesTTL, rec.d_ttl);
2473 }
2474 }
2475 }
2476
2477 // reap all answers from this packet that are acceptable
2478 for(auto& rec : lwr.d_records) {
2479 if(rec.d_type == QType::OPT) {
2480 LOG(prefix<<qname<<": OPT answer '"<<rec.d_name<<"' from '"<<auth<<"' nameservers" <<endl);
2481 continue;
2482 }
2483 LOG(prefix<<qname<<": accept answer '"<<rec.d_name<<"|"<<DNSRecordContent::NumberToType(rec.d_type)<<"|"<<rec.d_content->getZoneRepresentation()<<"' from '"<<auth<<"' nameservers? ttl="<<rec.d_ttl<<", place="<<(int)rec.d_place<<" ");
2484 if(rec.d_type == QType::ANY) {
2485 LOG("NO! - we don't accept 'ANY'-typed data"<<endl);
2486 continue;
2487 }
2488
2489 if(rec.d_class != QClass::IN) {
2490 LOG("NO! - we don't accept records for any other class than 'IN'"<<endl);
2491 continue;
2492 }
2493
2494 if (!(lwr.d_aabit || wasForwardRecurse) && rec.d_place == DNSResourceRecord::ANSWER) {
2495 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
2496 are sending such responses */
2497 if (!(rec.d_type == QType::CNAME && rec.d_name == qname)) {
2498 LOG("NO! - we don't accept records in the answers section without the AA bit set"<<endl);
2499 continue;
2500 }
2501 }
2502
2503 if(rec.d_name.isPartOf(auth)) {
2504 if(rec.d_type == QType::RRSIG) {
2505 LOG("RRSIG - separate"<<endl);
2506 }
2507 else if(lwr.d_aabit && lwr.d_rcode==RCode::NoError && rec.d_place==DNSResourceRecord::ANSWER && ((rec.d_type != QType::DNSKEY && rec.d_type != QType::DS) || rec.d_name != auth) && s_delegationOnly.count(auth)) {
2508 LOG("NO! Is from delegation-only zone"<<endl);
2509 s_nodelegated++;
2510 return RCode::NXDomain;
2511 }
2512 else {
2513 bool haveLogged = false;
2514 if (isDNAMEAnswer && rec.d_type == QType::CNAME) {
2515 LOG("NO - we already have a DNAME answer for this domain");
2516 continue;
2517 }
2518 if (!t_sstorage.domainmap->empty()) {
2519 // Check if we are authoritative for a zone in this answer
2520 DNSName tmp_qname(rec.d_name);
2521 auto auth_domain_iter=getBestAuthZone(&tmp_qname);
2522 if(auth_domain_iter!=t_sstorage.domainmap->end() &&
2523 auth.countLabels() <= auth_domain_iter->first.countLabels()) {
2524 if (auth_domain_iter->first != auth) {
2525 LOG("NO! - we are authoritative for the zone "<<auth_domain_iter->first<<endl);
2526 continue;
2527 } else {
2528 LOG("YES! - This answer was ");
2529 if (!wasForwarded) {
2530 LOG("retrieved from the local auth store.");
2531 } else {
2532 LOG("received from a server we forward to.");
2533 }
2534 haveLogged = true;
2535 LOG(endl);
2536 }
2537 }
2538 }
2539 if (!haveLogged) {
2540 LOG("YES!"<<endl);
2541 }
2542
2543 rec.d_ttl=min(s_maxcachettl, rec.d_ttl);
2544
2545 DNSRecord dr(rec);
2546 dr.d_ttl += d_now.tv_sec;
2547 dr.d_place=DNSResourceRecord::ANSWER;
2548 tcache[{rec.d_name,rec.d_type,rec.d_place}].records.push_back(dr);
2549 }
2550 }
2551 else
2552 LOG("NO!"<<endl);
2553 }
2554
2555 // supplant
2556 for(tcache_t::iterator i = tcache.begin(); i != tcache.end(); ++i) {
2557 if((i->second.records.size() + i->second.signatures.size()) > 1) { // need to group the ttl to be the minimum of the RRSET (RFC 2181, 5.2)
2558 uint32_t lowestTTD=computeLowestTTD(i->second.records, i->second.signatures, i->second.signaturesTTL);
2559
2560 for(auto& record : i->second.records)
2561 record.d_ttl = lowestTTD; // boom
2562 }
2563
2564 // cout<<"Have "<<i->second.records.size()<<" records and "<<i->second.signatures.size()<<" signatures for "<<i->first.name;
2565 // cout<<'|'<<DNSRecordContent::NumberToType(i->first.type)<<endl;
2566 }
2567
2568 for(tcache_t::iterator i = tcache.begin(); i != tcache.end(); ++i) {
2569
2570 if(i->second.records.empty()) // this happens when we did store signatures, but passed on the records themselves
2571 continue;
2572
2573 /* Even if the AA bit is set, additional data cannot be considered
2574 as authoritative. This is especially important during validation
2575 because keeping records in the additional section is allowed even
2576 if the corresponding RRSIGs are not included, without setting the TC
2577 bit, as stated in rfc4035's section 3.1.1. Including RRSIG RRs in a Response:
2578 "When placing a signed RRset in the Additional section, the name
2579 server MUST also place its RRSIG RRs in the Additional section.
2580 If space does not permit inclusion of both the RRset and its
2581 associated RRSIG RRs, the name server MAY retain the RRset while
2582 dropping the RRSIG RRs. If this happens, the name server MUST NOT
2583 set the TC bit solely because these RRSIG RRs didn't fit."
2584 */
2585 bool isAA = lwr.d_aabit && i->first.place != DNSResourceRecord::ADDITIONAL;
2586 /* if we forwarded the query to a recursor, we can expect the answer to be signed,
2587 even if the answer is not AA. Of course that's not only true inside a Secure
2588 zone, but we check that below. */
2589 bool expectSignature = i->first.place == DNSResourceRecord::ANSWER || ((lwr.d_aabit || wasForwardRecurse) && i->first.place != DNSResourceRecord::ADDITIONAL);
2590 if (isCNAMEAnswer && (i->first.place != DNSResourceRecord::ANSWER || i->first.type != QType::CNAME || i->first.name != qname)) {
2591 /*
2592 rfc2181 states:
2593 Note that the answer section of an authoritative answer normally
2594 contains only authoritative data. However when the name sought is an
2595 alias (see section 10.1.1) only the record describing that alias is
2596 necessarily authoritative. Clients should assume that other records
2597 may have come from the server's cache. Where authoritative answers
2598 are required, the client should query again, using the canonical name
2599 associated with the alias.
2600 */
2601 isAA = false;
2602 expectSignature = false;
2603 }
2604
2605 if (isCNAMEAnswer && i->first.place == DNSResourceRecord::AUTHORITY && i->first.type == QType::NS && auth == i->first.name) {
2606 /* These NS can't be authoritative since we have a CNAME answer for which (see above) only the
2607 record describing that alias is necessarily authoritative.
2608 But if we allow the current auth, which might be serving the child zone, to raise the TTL
2609 of non-authoritative NS in the cache, they might be able to keep a "ghost" zone alive forever,
2610 even after the delegation is gone from the parent.
2611 So let's just do nothing with them, we can fetch them directly if we need them.
2612 */
2613 LOG(d_prefix<<": skipping authority NS from '"<<auth<<"' nameservers in CNAME answer "<<i->first.name<<"|"<<DNSRecordContent::NumberToType(i->first.type)<<endl);
2614 continue;
2615 }
2616
2617 vState recordState = getValidationStatus(i->first.name, false);
2618 LOG(d_prefix<<": got initial zone status "<<vStates[recordState]<<" for record "<<i->first.name<<"|"<<DNSRecordContent::NumberToType(i->first.type)<<endl);
2619
2620 if (shouldValidate() && recordState == Secure) {
2621 vState initialState = recordState;
2622
2623 if (expectSignature) {
2624 if (i->first.place != DNSResourceRecord::ADDITIONAL) {
2625 /* the additional entries can be insecure,
2626 like glue:
2627 "Glue address RRsets associated with delegations MUST NOT be signed"
2628 */
2629 if (i->first.type == QType::DNSKEY && i->first.place == DNSResourceRecord::ANSWER) {
2630 LOG(d_prefix<<"Validating DNSKEY for "<<i->first.name<<endl);
2631 recordState = validateDNSKeys(i->first.name, i->second.records, i->second.signatures, depth);
2632 }
2633 else {
2634 /*
2635 * RFC 6672 section 5.3.1
2636 * In any response, a signed DNAME RR indicates a non-terminal
2637 * redirection of the query. There might or might not be a server-
2638 * synthesized CNAME in the answer section; if there is, the CNAME will
2639 * never be signed. For a DNSSEC validator, verification of the DNAME
2640 * RR and then that the CNAME was properly synthesized is sufficient
2641 * proof.
2642 *
2643 * We do the synthesis check in processRecords, here we make sure we
2644 * don't validate the CNAME.
2645 */
2646 if (!(isDNAMEAnswer && i->first.type == QType::CNAME)) {
2647 LOG(d_prefix<<"Validating non-additional record for "<<i->first.name<<endl);
2648 recordState = validateRecordsWithSigs(depth, qname, qtype, i->first.name, i->second.records, i->second.signatures);
2649 /* we might have missed a cut (zone cut within the same auth servers), causing the NS query for an Insecure zone to seem Bogus during zone cut determination */
2650 if (qtype == QType::NS && i->second.signatures.empty() && recordState == Bogus && haveExactValidationStatus(i->first.name) && getValidationStatus(i->first.name) == Indeterminate) {
2651 recordState = Indeterminate;
2652 }
2653 }
2654 }
2655 }
2656 }
2657 else {
2658 recordState = Indeterminate;
2659
2660 /* in a non authoritative answer, we only care about the DS record (or lack of) */
2661 if ((i->first.type == QType::DS || i->first.type == QType::NSEC || i->first.type == QType::NSEC3) && i->first.place == DNSResourceRecord::AUTHORITY) {
2662 LOG(d_prefix<<"Validating DS record for "<<i->first.name<<endl);
2663 recordState = validateRecordsWithSigs(depth, qname, qtype, i->first.name, i->second.records, i->second.signatures);
2664 }
2665 }
2666
2667 if (initialState == Secure && state != recordState && expectSignature) {
2668 updateValidationState(state, recordState);
2669 }
2670 }
2671 else {
2672 if (shouldValidate()) {
2673 LOG(d_prefix<<"Skipping validation because the current state is "<<vStates[recordState]<<endl);
2674 }
2675 }
2676
2677 if (recordState == Bogus) {
2678 /* this is a TTD by now, be careful */
2679 for(auto& record : i->second.records) {
2680 record.d_ttl = std::min(record.d_ttl, static_cast<uint32_t>(s_maxbogusttl + d_now.tv_sec));
2681 }
2682 }
2683
2684 /* We don't need to store NSEC3 records in the positive cache because:
2685 - we don't allow direct NSEC3 queries
2686 - denial of existence proofs in wildcard expanded positive responses are stored in authorityRecs
2687 - denial of existence proofs for negative responses are stored in the negative cache
2688 We also don't want to cache non-authoritative data except for:
2689 - records coming from non forward-recurse servers (those will never be AA)
2690 - DS (special case)
2691 - NS, A and AAAA (used for infra queries)
2692 */
2693 if (i->first.type != QType::NSEC3 && (i->first.type == QType::DS || i->first.type == QType::NS || i->first.type == QType::A || i->first.type == QType::AAAA || isAA || wasForwardRecurse)) {
2694
2695 bool doCache = true;
2696 if (i->first.place == DNSResourceRecord::ANSWER && ednsmask) {
2697 // If ednsmask is relevant, we do not want to cache if the scope prefix length is large and TTL is small
2698 if (SyncRes::s_ecscachelimitttl > 0) {
2699 bool manyMaskBits = (ednsmask->isIpv4() && ednsmask->getBits() > SyncRes::s_ecsipv4cachelimit) ||
2700 (ednsmask->isIpv6() && ednsmask->getBits() > SyncRes::s_ecsipv6cachelimit);
2701
2702 if (manyMaskBits) {
2703 uint32_t minttl = UINT32_MAX;
2704 for (const auto &it : i->second.records) {
2705 if (it.d_ttl < minttl)
2706 minttl = it.d_ttl;
2707 }
2708 bool ttlIsSmall = minttl < SyncRes::s_ecscachelimitttl + d_now.tv_sec;
2709 if (ttlIsSmall) {
2710 // Case: many bits and ttlIsSmall
2711 doCache = false;
2712 }
2713 }
2714 }
2715 }
2716 if (doCache) {
2717 t_RC->replace(d_now.tv_sec, i->first.name, QType(i->first.type), i->second.records, i->second.signatures, authorityRecs, i->first.type == QType::DS ? true : isAA, i->first.place == DNSResourceRecord::ANSWER ? ednsmask : boost::none, recordState);
2718 }
2719 }
2720
2721 if(i->first.place == DNSResourceRecord::ANSWER && ednsmask)
2722 d_wasVariable=true;
2723 }
2724
2725 return RCode::NoError;
2726 }
2727
2728 void SyncRes::updateDenialValidationState(vState& neValidationState, const DNSName& neName, vState& state, const dState denialState, const dState expectedState, bool allowOptOut)
2729 {
2730 if (denialState == expectedState) {
2731 neValidationState = Secure;
2732 }
2733 else {
2734 if (denialState == OPTOUT && allowOptOut) {
2735 LOG(d_prefix<<"OPT-out denial found for "<<neName<<endl);
2736 neValidationState = Secure;
2737 return;
2738 }
2739 else if (denialState == INSECURE) {
2740 LOG(d_prefix<<"Insecure denial found for "<<neName<<", returning Insecure"<<endl);
2741 neValidationState = Insecure;
2742 }
2743 else {
2744 LOG(d_prefix<<"Invalid denial found for "<<neName<<", returning Bogus, res="<<denialState<<", expectedState="<<expectedState<<endl);
2745 neValidationState = Bogus;
2746 }
2747 updateValidationState(state, neValidationState);
2748 }
2749 }
2750
2751 dState SyncRes::getDenialValidationState(const NegCache::NegCacheEntry& ne, const vState state, const dState expectedState, bool referralToUnsigned)
2752 {
2753 cspmap_t csp = harvestCSPFromNE(ne);
2754 return getDenial(csp, ne.d_name, ne.d_qtype.getCode(), referralToUnsigned, expectedState == NXQTYPE);
2755 }
2756
2757 bool SyncRes::processRecords(const std::string& prefix, const DNSName& qname, const QType& qtype, const DNSName& auth, LWResult& lwr, const bool sendRDQuery, vector<DNSRecord>& ret, set<DNSName>& nsset, DNSName& newtarget, DNSName& newauth, bool& realreferral, bool& negindic, vState& state, const bool needWildcardProof, const bool gatherWildcardProof, const unsigned int wildcardLabelsCount)
2758 {
2759 bool done = false;
2760 DNSName dnameTarget, dnameOwner;
2761 uint32_t dnameTTL = 0;
2762
2763 for(auto& rec : lwr.d_records) {
2764 if (rec.d_type!=QType::OPT && rec.d_class!=QClass::IN)
2765 continue;
2766
2767 if (rec.d_place==DNSResourceRecord::ANSWER && !(lwr.d_aabit || sendRDQuery)) {
2768 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
2769 are sending such responses */
2770 if (!(rec.d_type == QType::CNAME && rec.d_name == qname)) {
2771 continue;
2772 }
2773 }
2774
2775 if(rec.d_place==DNSResourceRecord::AUTHORITY && rec.d_type==QType::SOA &&
2776 lwr.d_rcode==RCode::NXDomain && qname.isPartOf(rec.d_name) && rec.d_name.isPartOf(auth)) {
2777 LOG(prefix<<qname<<": got negative caching indication for name '"<<qname<<"' (accept="<<rec.d_name.isPartOf(auth)<<"), newtarget='"<<newtarget<<"'"<<endl);
2778
2779 rec.d_ttl = min(rec.d_ttl, s_maxnegttl);
2780 if(newtarget.empty()) // only add a SOA if we're not going anywhere after this
2781 ret.push_back(rec);
2782
2783 NegCache::NegCacheEntry ne;
2784
2785 uint32_t lowestTTL = rec.d_ttl;
2786 /* if we get an NXDomain answer with a CNAME, the name
2787 does exist but the target does not */
2788 ne.d_name = newtarget.empty() ? qname : newtarget;
2789 ne.d_qtype = QType(0); // this encodes 'whole record'
2790 ne.d_auth = rec.d_name;
2791 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
2792
2793 if (state == Secure) {
2794 dState denialState = getDenialValidationState(ne, state, NXDOMAIN, false);
2795 updateDenialValidationState(ne.d_validationState, ne.d_name, state, denialState, NXDOMAIN, false);
2796 }
2797 else {
2798 ne.d_validationState = state;
2799 }
2800
2801 if (ne.d_validationState == Bogus) {
2802 lowestTTL = min(lowestTTL, s_maxbogusttl);
2803 }
2804
2805 ne.d_ttd = d_now.tv_sec + lowestTTL;
2806 /* if we get an NXDomain answer with a CNAME, let's not cache the
2807 target, even the server was authoritative for it,
2808 and do an additional query for the CNAME target.
2809 We have a regression test making sure we do exactly that.
2810 */
2811 if(!wasVariable() && newtarget.empty()) {
2812 t_sstorage.negcache.add(ne);
2813 if(s_rootNXTrust && ne.d_auth.isRoot() && auth.isRoot() && lwr.d_aabit) {
2814 ne.d_name = ne.d_name.getLastLabel();
2815 t_sstorage.negcache.add(ne);
2816 }
2817 }
2818
2819 negindic=true;
2820 }
2821 else if(rec.d_place==DNSResourceRecord::ANSWER && s_redirectionQTypes.count(rec.d_type) > 0 && // CNAME or DNAME answer
2822 s_redirectionQTypes.count(qtype.getCode()) == 0) { // But not in response to a CNAME or DNAME query
2823 if (rec.d_type == QType::CNAME && rec.d_name == qname) {
2824 if (!dnameOwner.empty()) { // We synthesize ourselves
2825 continue;
2826 }
2827 ret.push_back(rec);
2828 if (auto content = getRR<CNAMERecordContent>(rec)) {
2829 newtarget=content->getTarget();
2830 }
2831 } else if (rec.d_type == QType::DNAME && qname.isPartOf(rec.d_name)) { // DNAME
2832 ret.push_back(rec);
2833 if (auto content = getRR<DNAMERecordContent>(rec)) {
2834 dnameOwner = rec.d_name;
2835 dnameTarget = content->getTarget();
2836 dnameTTL = rec.d_ttl;
2837 if (!newtarget.empty()) { // We had a CNAME before, remove it from ret so we don't cache it
2838 ret.erase(std::remove_if(
2839 ret.begin(),
2840 ret.end(),
2841 [&qname](DNSRecord& rr) {
2842 return (rr.d_place == DNSResourceRecord::ANSWER && rr.d_type == QType::CNAME && rr.d_name == qname);
2843 }),
2844 ret.end());
2845 }
2846 try {
2847 newtarget = qname.makeRelative(dnameOwner) + dnameTarget;
2848 } catch (const std::exception &e) {
2849 // We should probably catch an std::range_error here and set the rcode to YXDOMAIN (RFC 6672, section 2.2)
2850 // But there is no way to set the RCODE from this function
2851 throw ImmediateServFailException("Unable to perform DNAME substitution(DNAME owner: '" + dnameOwner.toLogString() +
2852 "', DNAME target: '" + dnameTarget.toLogString() + "', substituted name: '" +
2853 qname.makeRelative(dnameOwner).toLogString() + "." + dnameTarget.toLogString() +
2854 "' : " + e.what());
2855 }
2856 }
2857 }
2858 }
2859 /* if we have a positive answer synthetized from a wildcard, we need to
2860 return the corresponding NSEC/NSEC3 records from the AUTHORITY section
2861 proving that the exact name did not exist */
2862 else if(gatherWildcardProof && (rec.d_type==QType::RRSIG || rec.d_type==QType::NSEC || rec.d_type==QType::NSEC3) && rec.d_place==DNSResourceRecord::AUTHORITY) {
2863 ret.push_back(rec); // enjoy your DNSSEC
2864 }
2865 // for ANY answers we *must* have an authoritative answer, unless we are forwarding recursively
2866 else if(rec.d_place==DNSResourceRecord::ANSWER && rec.d_name == qname &&
2867 (
2868 rec.d_type==qtype.getCode() || ((lwr.d_aabit || sendRDQuery) && qtype == QType(QType::ANY))
2869 )
2870 )
2871 {
2872 LOG(prefix<<qname<<": answer is in: resolved to '"<< rec.d_content->getZoneRepresentation()<<"|"<<DNSRecordContent::NumberToType(rec.d_type)<<"'"<<endl);
2873
2874 done=true;
2875
2876 if (state == Secure && needWildcardProof) {
2877 /* We have a positive answer synthetized from a wildcard, we need to check that we have
2878 proof that the exact name doesn't exist so the wildcard can be used,
2879 as described in section 5.3.4 of RFC 4035 and 5.3 of FRC 7129.
2880 */
2881 NegCache::NegCacheEntry ne;
2882
2883 uint32_t lowestTTL = rec.d_ttl;
2884 ne.d_name = qname;
2885 ne.d_qtype = QType(0); // this encodes 'whole record'
2886 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
2887
2888 cspmap_t csp = harvestCSPFromNE(ne);
2889 dState res = getDenial(csp, qname, ne.d_qtype.getCode(), false, false, false, wildcardLabelsCount);
2890 if (res != NXDOMAIN) {
2891 vState st = Bogus;
2892 if (res == INSECURE) {
2893 /* Some part could not be validated, for example a NSEC3 record with a too large number of iterations,
2894 this is not enough to warrant a Bogus, but go Insecure. */
2895 st = Insecure;
2896 LOG(d_prefix<<"Unable to validate denial in wildcard expanded positive response found for "<<qname<<", returning Insecure, res="<<res<<endl);
2897 }
2898 else {
2899 LOG(d_prefix<<"Invalid denial in wildcard expanded positive response found for "<<qname<<", returning Bogus, res="<<res<<endl);
2900 rec.d_ttl = std::min(rec.d_ttl, s_maxbogusttl);
2901 }
2902
2903 updateValidationState(state, st);
2904 /* we already stored the record with a different validation status, let's fix it */
2905 updateValidationStatusInCache(qname, qtype, lwr.d_aabit, st);
2906 }
2907 }
2908 ret.push_back(rec);
2909 }
2910 else if((rec.d_type==QType::RRSIG || rec.d_type==QType::NSEC || rec.d_type==QType::NSEC3) && rec.d_place==DNSResourceRecord::ANSWER) {
2911 if(rec.d_type != QType::RRSIG || rec.d_name == qname) {
2912 ret.push_back(rec); // enjoy your DNSSEC
2913 } else if(rec.d_type == QType::RRSIG && qname.isPartOf(rec.d_name)) {
2914 auto rrsig = getRR<RRSIGRecordContent>(rec);
2915 if (rrsig != nullptr && rrsig->d_type == QType::DNAME) {
2916 ret.push_back(rec);
2917 }
2918 }
2919 }
2920 else if(rec.d_place==DNSResourceRecord::AUTHORITY && rec.d_type==QType::NS && qname.isPartOf(rec.d_name)) {
2921 if(moreSpecificThan(rec.d_name,auth)) {
2922 newauth=rec.d_name;
2923 LOG(prefix<<qname<<": got NS record '"<<rec.d_name<<"' -> '"<<rec.d_content->getZoneRepresentation()<<"'"<<endl);
2924 realreferral=true;
2925 }
2926 else {
2927 LOG(prefix<<qname<<": got upwards/level NS record '"<<rec.d_name<<"' -> '"<<rec.d_content->getZoneRepresentation()<<"', had '"<<auth<<"'"<<endl);
2928 }
2929 if (auto content = getRR<NSRecordContent>(rec)) {
2930 nsset.insert(content->getNS());
2931 }
2932 }
2933 else if(rec.d_place==DNSResourceRecord::AUTHORITY && rec.d_type==QType::DS && qname.isPartOf(rec.d_name)) {
2934 LOG(prefix<<qname<<": got DS record '"<<rec.d_name<<"' -> '"<<rec.d_content->getZoneRepresentation()<<"'"<<endl);
2935 }
2936 else if(realreferral && rec.d_place==DNSResourceRecord::AUTHORITY && (rec.d_type==QType::NSEC || rec.d_type==QType::NSEC3) && newauth.isPartOf(auth)) {
2937 /* we might have received a denial of the DS, let's check */
2938 if (state == Secure) {
2939 NegCache::NegCacheEntry ne;
2940 ne.d_auth = auth;
2941 ne.d_name = newauth;
2942 ne.d_qtype = QType::DS;
2943 rec.d_ttl = min(s_maxnegttl, rec.d_ttl);
2944 uint32_t lowestTTL = rec.d_ttl;
2945 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
2946
2947 dState denialState = getDenialValidationState(ne, state, NXQTYPE, true);
2948
2949 if (denialState == NXQTYPE || denialState == OPTOUT || denialState == INSECURE) {
2950 ne.d_ttd = lowestTTL + d_now.tv_sec;
2951 ne.d_validationState = Secure;
2952 LOG(prefix<<qname<<": got negative indication of DS record for '"<<newauth<<"'"<<endl);
2953
2954 if(!wasVariable()) {
2955 t_sstorage.negcache.add(ne);
2956 }
2957
2958 if (qname == newauth && qtype == QType::DS) {
2959 /* we are actually done! */
2960 negindic=true;
2961 nsset.clear();
2962 }
2963 }
2964 }
2965 }
2966 else if(!done && rec.d_place==DNSResourceRecord::AUTHORITY && rec.d_type==QType::SOA &&
2967 lwr.d_rcode==RCode::NoError && qname.isPartOf(rec.d_name)) {
2968 LOG(prefix<<qname<<": got negative caching indication for '"<< qname<<"|"<<qtype.getName()<<"'"<<endl);
2969
2970 if(!newtarget.empty()) {
2971 LOG(prefix<<qname<<": Hang on! Got a redirect to '"<<newtarget<<"' already"<<endl);
2972 }
2973 else {
2974 rec.d_ttl = min(s_maxnegttl, rec.d_ttl);
2975
2976 NegCache::NegCacheEntry ne;
2977 ne.d_auth = rec.d_name;
2978 uint32_t lowestTTL = rec.d_ttl;
2979 ne.d_name = qname;
2980 ne.d_qtype = qtype;
2981 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
2982
2983 if (state == Secure) {
2984 dState denialState = getDenialValidationState(ne, state, NXQTYPE, false);
2985 updateDenialValidationState(ne.d_validationState, ne.d_name, state, denialState, NXQTYPE, qtype == QType::DS);
2986 } else {
2987 ne.d_validationState = state;
2988 }
2989
2990 if (ne.d_validationState == Bogus) {
2991 lowestTTL = min(lowestTTL, s_maxbogusttl);
2992 rec.d_ttl = min(rec.d_ttl, s_maxbogusttl);
2993 }
2994 ne.d_ttd = d_now.tv_sec + lowestTTL;
2995
2996 if(!wasVariable()) {
2997 if(qtype.getCode()) { // prevents us from blacking out a whole domain
2998 t_sstorage.negcache.add(ne);
2999 }
3000 }
3001
3002 ret.push_back(rec);
3003 negindic=true;
3004 }
3005 }
3006 }
3007
3008 if (!dnameTarget.empty()) {
3009 // Synthesize a CNAME
3010 auto cnamerec = DNSRecord();
3011 cnamerec.d_name = qname;
3012 cnamerec.d_type = QType::CNAME;
3013 cnamerec.d_ttl = dnameTTL;
3014 cnamerec.d_content = std::make_shared<CNAMERecordContent>(CNAMERecordContent(newtarget));
3015 ret.push_back(cnamerec);
3016 }
3017 return done;
3018 }
3019
3020 bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname, const QType& qtype, LWResult& lwr, boost::optional<Netmask>& ednsmask, const DNSName& auth, bool const sendRDQuery, const DNSName& nsName, const ComboAddress& remoteIP, bool doTCP, bool* truncated)
3021 {
3022 bool chained = false;
3023 int resolveret = RCode::NoError;
3024 s_outqueries++;
3025 d_outqueries++;
3026
3027 if(d_outqueries + d_throttledqueries > s_maxqperq) {
3028 throw ImmediateServFailException("more than "+std::to_string(s_maxqperq)+" (max-qperq) queries sent while resolving "+qname.toLogString());
3029 }
3030
3031 if(s_maxtotusec && d_totUsec > s_maxtotusec) {
3032 throw ImmediateServFailException("Too much time waiting for "+qname.toLogString()+"|"+qtype.getName()+", timeouts: "+std::to_string(d_timeouts) +", throttles: "+std::to_string(d_throttledqueries) + ", queries: "+std::to_string(d_outqueries)+", "+std::to_string(d_totUsec/1000)+"msec");
3033 }
3034
3035 if(doTCP) {
3036 LOG(prefix<<qname<<": using TCP with "<< remoteIP.toStringWithPort() <<endl);
3037 s_tcpoutqueries++;
3038 d_tcpoutqueries++;
3039 }
3040
3041 if(d_pdl && d_pdl->preoutquery(remoteIP, d_requestor, qname, qtype, doTCP, lwr.d_records, resolveret)) {
3042 LOG(prefix<<qname<<": query handled by Lua"<<endl);
3043 }
3044 else {
3045 ednsmask=getEDNSSubnetMask(qname, remoteIP);
3046 if(ednsmask) {
3047 LOG(prefix<<qname<<": Adding EDNS Client Subnet Mask "<<ednsmask->toString()<<" to query"<<endl);
3048 s_ecsqueries++;
3049 }
3050 resolveret = asyncresolveWrapper(remoteIP, d_doDNSSEC, qname, auth, qtype.getCode(),
3051 doTCP, sendRDQuery, &d_now, ednsmask, &lwr, &chained); // <- we go out on the wire!
3052 if(ednsmask) {
3053 s_ecsresponses++;
3054 LOG(prefix<<qname<<": Received EDNS Client Subnet Mask "<<ednsmask->toString()<<" on response"<<endl);
3055 if (ednsmask->getBits() > 0) {
3056 if (ednsmask->isIpv4()) {
3057 ++SyncRes::s_ecsResponsesBySubnetSize4.at(ednsmask->getBits()-1);
3058 }
3059 else {
3060 ++SyncRes::s_ecsResponsesBySubnetSize6.at(ednsmask->getBits()-1);
3061 }
3062 }
3063 }
3064 }
3065
3066 /* preoutquery killed the query by setting dq.rcode to -3 */
3067 if(resolveret==-3) {
3068 throw ImmediateServFailException("Query killed by policy");
3069 }
3070
3071 d_totUsec += lwr.d_usec;
3072 accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
3073
3074 bool dontThrottle = false;
3075 {
3076 auto dontThrottleNames = g_dontThrottleNames.getLocal();
3077 auto dontThrottleNetmasks = g_dontThrottleNetmasks.getLocal();
3078 dontThrottle = dontThrottleNames->check(nsName) || dontThrottleNetmasks->match(remoteIP);
3079 }
3080
3081 if(resolveret != 1) {
3082 /* Error while resolving */
3083 if(resolveret == 0) {
3084 /* Time out */
3085
3086 LOG(prefix<<qname<<": timeout resolving after "<<lwr.d_usec/1000.0<<"msec "<< (doTCP ? "over TCP" : "")<<endl);
3087 d_timeouts++;
3088 s_outgoingtimeouts++;
3089
3090 if(remoteIP.sin4.sin_family == AF_INET)
3091 s_outgoing4timeouts++;
3092 else
3093 s_outgoing6timeouts++;
3094
3095 if(t_timeouts)
3096 t_timeouts->push_back(remoteIP);
3097 }
3098 else if(resolveret == -2) {
3099 /* OS resource limit reached */
3100 LOG(prefix<<qname<<": hit a local resource limit resolving"<< (doTCP ? " over TCP" : "")<<", probable error: "<<stringerror()<<endl);
3101 g_stats.resourceLimits++;
3102 }
3103 else {
3104 /* -1 means server unreachable */
3105 s_unreachables++;
3106 d_unreachables++;
3107 LOG(prefix<<qname<<": error resolving from "<<remoteIP.toString()<< (doTCP ? " over TCP" : "") <<", possible error: "<<strerror(errno)<< endl);
3108 }
3109
3110 if(resolveret != -2 && !chained && !dontThrottle) {
3111 // don't account for resource limits, they are our own fault
3112 // And don't throttle when the IP address is on the dontThrottleNetmasks list or the name is part of dontThrottleNames
3113 t_sstorage.nsSpeeds[nsName.empty()? DNSName(remoteIP.toStringWithPort()) : nsName].submit(remoteIP, 1000000, &d_now); // 1 sec
3114
3115 // code below makes sure we don't filter COM or the root
3116 if (s_serverdownmaxfails > 0 && (auth != g_rootdnsname) && t_sstorage.fails.incr(remoteIP) >= s_serverdownmaxfails) {
3117 LOG(prefix<<qname<<": Max fails reached resolving on "<< remoteIP.toString() <<". Going full throttle for "<< s_serverdownthrottletime <<" seconds" <<endl);
3118 // mark server as down
3119 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(remoteIP, "", 0), s_serverdownthrottletime, 10000);
3120 }
3121 else if (resolveret == -1) {
3122 // unreachable, 1 minute or 100 queries
3123 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(remoteIP, qname, qtype.getCode()), 60, 100);
3124 }
3125 else {
3126 // timeout, 10 seconds or 5 queries
3127 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(remoteIP, qname, qtype.getCode()), 10, 5);
3128 }
3129 }
3130
3131 return false;
3132 }
3133
3134 /* we got an answer */
3135 if(lwr.d_rcode==RCode::ServFail || lwr.d_rcode==RCode::Refused) {
3136 LOG(prefix<<qname<<": "<<nsName<<" ("<<remoteIP.toString()<<") returned a "<< (lwr.d_rcode==RCode::ServFail ? "ServFail" : "Refused") << ", trying sibling IP or NS"<<endl);
3137 if (!chained && !dontThrottle) {
3138 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(remoteIP, qname, qtype.getCode()), 60, 3);
3139 }
3140 return false;
3141 }
3142
3143 /* this server sent a valid answer, mark it backup up if it was down */
3144 if(s_serverdownmaxfails > 0) {
3145 t_sstorage.fails.clear(remoteIP);
3146 }
3147
3148 if(lwr.d_tcbit) {
3149 *truncated = true;
3150
3151 if (doTCP) {
3152 LOG(prefix<<qname<<": truncated bit set, over TCP?"<<endl);
3153 if (!dontThrottle) {
3154 /* let's treat that as a ServFail answer from this server */
3155 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(remoteIP, qname, qtype.getCode()), 60, 3);
3156 }
3157 return false;
3158 }
3159 LOG(prefix<<qname<<": truncated bit set, over UDP"<<endl);
3160
3161 return true;
3162 }
3163
3164 return true;
3165 }
3166
3167 bool SyncRes::processAnswer(unsigned int depth, LWResult& lwr, const DNSName& qname, const QType& qtype, DNSName& auth, bool wasForwarded, const boost::optional<Netmask> ednsmask, bool sendRDQuery, NsSet &nameservers, std::vector<DNSRecord>& ret, const DNSFilterEngine& dfe, bool* gotNewServers, int* rcode, vState& state)
3168 {
3169 string prefix;
3170 if(doLog()) {
3171 prefix=d_prefix;
3172 prefix.append(depth, ' ');
3173 }
3174
3175 if(s_minimumTTL) {
3176 for(auto& rec : lwr.d_records) {
3177 rec.d_ttl = max(rec.d_ttl, s_minimumTTL);
3178 }
3179 }
3180
3181 /* if the answer is ECS-specific, a minimum TTL is set for this kind of answers
3182 and it's higher than the global minimum TTL */
3183 if (ednsmask && s_minimumECSTTL > 0 && (s_minimumTTL == 0 || s_minimumECSTTL > s_minimumTTL)) {
3184 for(auto& rec : lwr.d_records) {
3185 if (rec.d_place == DNSResourceRecord::ANSWER) {
3186 rec.d_ttl = max(rec.d_ttl, s_minimumECSTTL);
3187 }
3188 }
3189 }
3190
3191 bool needWildcardProof = false;
3192 bool gatherWildcardProof = false;
3193 unsigned int wildcardLabelsCount;
3194 *rcode = updateCacheFromRecords(depth, lwr, qname, qtype, auth, wasForwarded, ednsmask, state, needWildcardProof, gatherWildcardProof, wildcardLabelsCount, sendRDQuery);
3195 if (*rcode != RCode::NoError) {
3196 return true;
3197 }
3198
3199 LOG(prefix<<qname<<": determining status after receiving this packet"<<endl);
3200
3201 set<DNSName> nsset;
3202 bool realreferral=false, negindic=false;
3203 DNSName newauth;
3204 DNSName newtarget;
3205
3206 bool done = processRecords(prefix, qname, qtype, auth, lwr, sendRDQuery, ret, nsset, newtarget, newauth, realreferral, negindic, state, needWildcardProof, gatherWildcardProof, wildcardLabelsCount);
3207
3208 if(done){
3209 LOG(prefix<<qname<<": status=got results, this level of recursion done"<<endl);
3210 LOG(prefix<<qname<<": validation status is "<<vStates[state]<<endl);
3211 *rcode = RCode::NoError;
3212 return true;
3213 }
3214
3215 if(!newtarget.empty()) {
3216 if(newtarget == qname) {
3217 LOG(prefix<<qname<<": status=got a CNAME referral to self, returning SERVFAIL"<<endl);
3218 *rcode = RCode::ServFail;
3219 return true;
3220 }
3221
3222 if(depth > 10) {
3223 LOG(prefix<<qname<<": status=got a CNAME referral, but recursing too deep, returning SERVFAIL"<<endl);
3224 *rcode = RCode::ServFail;
3225 return true;
3226 }
3227
3228 if (qtype == QType::DS) {
3229 LOG(prefix<<qname<<": status=got a CNAME referral, but we are looking for a DS"<<endl);
3230
3231 if(d_doDNSSEC)
3232 addNXNSECS(ret, lwr.d_records);
3233
3234 *rcode = RCode::NoError;
3235 return true;
3236 }
3237 else {
3238 LOG(prefix<<qname<<": status=got a CNAME referral, starting over with "<<newtarget<<endl);
3239
3240 set<GetBestNSAnswer> beenthere2;
3241 vState cnameState = Indeterminate;
3242 *rcode = doResolve(newtarget, qtype, ret, depth + 1, beenthere2, cnameState);
3243 LOG(prefix<<qname<<": updating validation state for response to "<<qname<<" from "<<vStates[state]<<" with the state from the CNAME quest: "<<vStates[cnameState]<<endl);
3244 updateValidationState(state, cnameState);
3245 return true;
3246 }
3247 }
3248
3249 if(lwr.d_rcode == RCode::NXDomain) {
3250 LOG(prefix<<qname<<": status=NXDOMAIN, we are done "<<(negindic ? "(have negative SOA)" : "")<<endl);
3251
3252 if(d_doDNSSEC)
3253 addNXNSECS(ret, lwr.d_records);
3254
3255 *rcode = RCode::NXDomain;
3256 return true;
3257 }
3258
3259 if(nsset.empty() && !lwr.d_rcode && (negindic || lwr.d_aabit || sendRDQuery)) {
3260 LOG(prefix<<qname<<": status=noerror, other types may exist, but we are done "<<(negindic ? "(have negative SOA) " : "")<<(lwr.d_aabit ? "(have aa bit) " : "")<<endl);
3261
3262 if(state == Secure && (lwr.d_aabit || sendRDQuery) && !negindic) {
3263 updateValidationState(state, Bogus);
3264 }
3265
3266 if(d_doDNSSEC)
3267 addNXNSECS(ret, lwr.d_records);
3268
3269 *rcode = RCode::NoError;
3270 return true;
3271 }
3272
3273 if(realreferral) {
3274 LOG(prefix<<qname<<": status=did not resolve, got "<<(unsigned int)nsset.size()<<" NS, ");
3275
3276 nameservers.clear();
3277 for (auto const &nameserver : nsset) {
3278 if (d_wantsRPZ) {
3279 d_appliedPolicy = dfe.getProcessingPolicy(nameserver, d_discardedPolicies);
3280 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
3281 LOG("however "<<nameserver<<" was blocked by RPZ policy '"<<(d_appliedPolicy.d_name ? *d_appliedPolicy.d_name : "")<<"'"<<endl);
3282 *rcode = -2;
3283 return true;
3284 }
3285 }
3286 nameservers.insert({nameserver, {{}, false}});
3287 }
3288 LOG("looping to them"<<endl);
3289 *gotNewServers = true;
3290 auth=newauth;
3291
3292 return false;
3293 }
3294
3295 return false;
3296 }
3297
3298 /** returns:
3299 * -1 in case of no results
3300 * -2 when a FilterEngine Policy was hit
3301 * rcode otherwise
3302 */
3303 int SyncRes::doResolveAt(NsSet &nameservers, DNSName auth, bool flawedNSSet, const DNSName &qname, const QType &qtype,
3304 vector<DNSRecord>&ret,
3305 unsigned int depth, set<GetBestNSAnswer>&beenthere, vState& state, StopAtDelegation* stopAtDelegation)
3306 {
3307 auto luaconfsLocal = g_luaconfs.getLocal();
3308 string prefix;
3309 if(doLog()) {
3310 prefix=d_prefix;
3311 prefix.append(depth, ' ');
3312 }
3313
3314 LOG(prefix<<qname<<": Cache consultations done, have "<<(unsigned int)nameservers.size()<<" NS to contact");
3315
3316 if (nameserversBlockedByRPZ(luaconfsLocal->dfe, nameservers)) {
3317 return -2;
3318 }
3319
3320 LOG(endl);
3321
3322 for(;;) { // we may get more specific nameservers
3323 auto rnameservers = shuffleInSpeedOrder(nameservers, doLog() ? (prefix+qname.toString()+": ") : string() );
3324
3325 for(auto tns=rnameservers.cbegin();;++tns) {
3326 if(tns==rnameservers.cend()) {
3327 LOG(prefix<<qname<<": Failed to resolve via any of the "<<(unsigned int)rnameservers.size()<<" offered NS at level '"<<auth<<"'"<<endl);
3328 if(!auth.isRoot() && flawedNSSet) {
3329 LOG(prefix<<qname<<": Ageing nameservers for level '"<<auth<<"', next query might succeed"<<endl);
3330
3331 if(t_RC->doAgeCache(d_now.tv_sec, auth, QType::NS, 10))
3332 g_stats.nsSetInvalidations++;
3333 }
3334 return -1;
3335 }
3336
3337 bool cacheOnly = false;
3338 // this line needs to identify the 'self-resolving' behaviour
3339 if(qname == tns->first && (qtype.getCode() == QType::A || qtype.getCode() == QType::AAAA)) {
3340 /* we might have a glue entry in cache so let's try this NS
3341 but only if we have enough in the cache to know how to reach it */
3342 LOG(prefix<<qname<<": Using NS to resolve itself, but only using what we have in cache ("<<(1+tns-rnameservers.cbegin())<<"/"<<rnameservers.size()<<")"<<endl);
3343 cacheOnly = true;
3344 }
3345
3346 typedef vector<ComboAddress> remoteIPs_t;
3347 remoteIPs_t remoteIPs;
3348 remoteIPs_t::const_iterator remoteIP;
3349 bool pierceDontQuery=false;
3350 bool sendRDQuery=false;
3351 boost::optional<Netmask> ednsmask;
3352 LWResult lwr;
3353 const bool wasForwarded = tns->first.empty() && (!nameservers[tns->first].first.empty());
3354 int rcode = RCode::NoError;
3355 bool gotNewServers = false;
3356
3357 if(tns->first.empty() && !wasForwarded) {
3358 LOG(prefix<<qname<<": Domain is out-of-band"<<endl);
3359 /* setting state to indeterminate since validation is disabled for local auth zone,
3360 and Insecure would be misleading. */
3361 state = Indeterminate;
3362 d_wasOutOfBand = doOOBResolve(qname, qtype, lwr.d_records, depth, lwr.d_rcode);
3363 lwr.d_tcbit=false;
3364 lwr.d_aabit=true;
3365
3366 /* we have received an answer, are we done ? */
3367 bool done = processAnswer(depth, lwr, qname, qtype, auth, false, ednsmask, sendRDQuery, nameservers, ret, luaconfsLocal->dfe, &gotNewServers, &rcode, state);
3368 if (done) {
3369 return rcode;
3370 }
3371 if (gotNewServers) {
3372 if (stopAtDelegation && *stopAtDelegation == Stop) {
3373 *stopAtDelegation = Stopped;
3374 return rcode;
3375 }
3376 break;
3377 }
3378 }
3379 else {
3380 /* if tns is empty, retrieveAddressesForNS() knows we have hardcoded servers (i.e. "forwards") */
3381 remoteIPs = retrieveAddressesForNS(prefix, qname, tns, depth, beenthere, rnameservers, nameservers, sendRDQuery, pierceDontQuery, flawedNSSet, cacheOnly);
3382
3383 if(remoteIPs.empty()) {
3384 LOG(prefix<<qname<<": Failed to get IP for NS "<<tns->first<<", trying next if available"<<endl);
3385 flawedNSSet=true;
3386 continue;
3387 }
3388 else {
3389 bool hitPolicy{false};
3390 LOG(prefix<<qname<<": Resolved '"<<auth<<"' NS "<<tns->first<<" to: ");
3391 for(remoteIP = remoteIPs.cbegin(); remoteIP != remoteIPs.cend(); ++remoteIP) {
3392 if(remoteIP != remoteIPs.cbegin()) {
3393 LOG(", ");
3394 }
3395 LOG(remoteIP->toString());
3396 if(nameserverIPBlockedByRPZ(luaconfsLocal->dfe, *remoteIP)) {
3397 hitPolicy = true;
3398 }
3399 }
3400 LOG(endl);
3401 if (hitPolicy) //implies d_wantsRPZ
3402 return -2;
3403 }
3404
3405 for(remoteIP = remoteIPs.cbegin(); remoteIP != remoteIPs.cend(); ++remoteIP) {
3406 LOG(prefix<<qname<<": Trying IP "<< remoteIP->toStringWithPort() <<", asking '"<<qname<<"|"<<qtype.getName()<<"'"<<endl);
3407
3408 if (throttledOrBlocked(prefix, *remoteIP, qname, qtype, pierceDontQuery)) {
3409 continue;
3410 }
3411
3412 bool truncated = false;
3413 bool gotAnswer = doResolveAtThisIP(prefix, qname, qtype, lwr, ednsmask, auth, sendRDQuery,
3414 tns->first, *remoteIP, false, &truncated);
3415 if (gotAnswer && truncated ) {
3416 /* retry, over TCP this time */
3417 gotAnswer = doResolveAtThisIP(prefix, qname, qtype, lwr, ednsmask, auth, sendRDQuery,
3418 tns->first, *remoteIP, true, &truncated);
3419 }
3420
3421 if (!gotAnswer) {
3422 continue;
3423 }
3424
3425 LOG(prefix<<qname<<": Got "<<(unsigned int)lwr.d_records.size()<<" answers from "<<tns->first<<" ("<< remoteIP->toString() <<"), rcode="<<lwr.d_rcode<<" ("<<RCode::to_s(lwr.d_rcode)<<"), aa="<<lwr.d_aabit<<", in "<<lwr.d_usec/1000<<"ms"<<endl);
3426
3427 /* // for you IPv6 fanatics :-)
3428 if(remoteIP->sin4.sin_family==AF_INET6)
3429 lwr.d_usec/=3;
3430 */
3431 // cout<<"msec: "<<lwr.d_usec/1000.0<<", "<<g_avgLatency/1000.0<<'\n';
3432
3433 t_sstorage.nsSpeeds[tns->first.empty()? DNSName(remoteIP->toStringWithPort()) : tns->first].submit(*remoteIP, lwr.d_usec, &d_now);
3434
3435 /* we have received an answer, are we done ? */
3436 bool done = processAnswer(depth, lwr, qname, qtype, auth, wasForwarded, ednsmask, sendRDQuery, nameservers, ret, luaconfsLocal->dfe, &gotNewServers, &rcode, state);
3437 if (done) {
3438 return rcode;
3439 }
3440 if (gotNewServers) {
3441 if (stopAtDelegation && *stopAtDelegation == Stop) {
3442 *stopAtDelegation = Stopped;
3443 return rcode;
3444 }
3445 break;
3446 }
3447 /* was lame */
3448 t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(*remoteIP, qname, qtype.getCode()), 60, 100);
3449 }
3450
3451 if (gotNewServers) {
3452 break;
3453 }
3454
3455 if(remoteIP == remoteIPs.cend()) // we tried all IP addresses, none worked
3456 continue;
3457
3458 }
3459 }
3460 }
3461 return -1;
3462 }
3463
3464 void SyncRes::setQuerySource(const ComboAddress& requestor, boost::optional<const EDNSSubnetOpts&> incomingECS)
3465 {
3466 d_requestor = requestor;
3467
3468 if (incomingECS && incomingECS->source.getBits() > 0) {
3469 d_cacheRemote = incomingECS->source.getMaskedNetwork();
3470 uint8_t bits = std::min(incomingECS->source.getBits(), (incomingECS->source.isIpv4() ? s_ecsipv4limit : s_ecsipv6limit));
3471 ComboAddress trunc = incomingECS->source.getNetwork();
3472 trunc.truncate(bits);
3473 d_outgoingECSNetwork = boost::optional<Netmask>(Netmask(trunc, bits));
3474 } else {
3475 d_cacheRemote = d_requestor;
3476 if(!incomingECS && s_ednslocalsubnets.match(d_requestor)) {
3477 ComboAddress trunc = d_requestor;
3478 uint8_t bits = d_requestor.isIPv4() ? 32 : 128;
3479 bits = std::min(bits, (trunc.isIPv4() ? s_ecsipv4limit : s_ecsipv6limit));
3480 trunc.truncate(bits);
3481 d_outgoingECSNetwork = boost::optional<Netmask>(Netmask(trunc, bits));
3482 } else if (s_ecsScopeZero.source.getBits() > 0) {
3483 /* RFC7871 says we MUST NOT send any ECS if the source scope is 0.
3484 But using an empty ECS in that case would mean inserting
3485 a non ECS-specific entry into the cache, preventing any further
3486 ECS-specific query to be sent.
3487 So instead we use the trick described in section 7.1.2:
3488 "The subsequent Recursive Resolver query to the Authoritative Nameserver
3489 will then either not include an ECS option or MAY optionally include
3490 its own address information, which is what the Authoritative
3491 Nameserver will almost certainly use to generate any Tailored
3492 Response in lieu of an option. This allows the answer to be handled
3493 by the same caching mechanism as other queries, with an explicit
3494 indicator of the applicable scope. Subsequent Stub Resolver queries
3495 for /0 can then be answered from this cached response.
3496 */
3497 d_outgoingECSNetwork = boost::optional<Netmask>(s_ecsScopeZero.source.getMaskedNetwork());
3498 d_cacheRemote = s_ecsScopeZero.source.getNetwork();
3499 } else {
3500 // ECS disabled because no scope-zero address could be derived.
3501 d_outgoingECSNetwork = boost::none;
3502 }
3503 }
3504 }
3505
3506 boost::optional<Netmask> SyncRes::getEDNSSubnetMask(const DNSName& dn, const ComboAddress& rem)
3507 {
3508 if(d_outgoingECSNetwork && (s_ednsdomains.check(dn) || s_ednsremotesubnets.match(rem))) {
3509 return d_outgoingECSNetwork;
3510 }
3511 return boost::none;
3512 }
3513
3514 void SyncRes::parseEDNSSubnetWhitelist(const std::string& wlist)
3515 {
3516 vector<string> parts;
3517 stringtok(parts, wlist, ",; ");
3518 for(const auto& a : parts) {
3519 try {
3520 s_ednsremotesubnets.addMask(Netmask(a));
3521 }
3522 catch(...) {
3523 s_ednsdomains.add(DNSName(a));
3524 }
3525 }
3526 }
3527
3528 void SyncRes::parseEDNSSubnetAddFor(const std::string& subnetlist)
3529 {
3530 vector<string> parts;
3531 stringtok(parts, subnetlist, ",; ");
3532 for(const auto& a : parts) {
3533 s_ednslocalsubnets.addMask(a);
3534 }
3535 }
3536
3537 // used by PowerDNSLua - note that this neglects to add the packet count & statistics back to pdns_ercursor.cc
3538 int directResolve(const DNSName& qname, const QType& qtype, int qclass, vector<DNSRecord>& ret)
3539 {
3540 struct timeval now;
3541 gettimeofday(&now, 0);
3542
3543 SyncRes sr(now);
3544 int res = -1;
3545 try {
3546 res = sr.beginResolve(qname, QType(qtype), qclass, ret);
3547 }
3548 catch(const PDNSException& e) {
3549 g_log<<Logger::Error<<"Failed to resolve "<<qname.toLogString()<<", got pdns exception: "<<e.reason<<endl;
3550 ret.clear();
3551 }
3552 catch(const ImmediateServFailException& e) {
3553 g_log<<Logger::Error<<"Failed to resolve "<<qname.toLogString()<<", got ImmediateServFailException: "<<e.reason<<endl;
3554 ret.clear();
3555 }
3556 catch(const std::exception& e) {
3557 g_log<<Logger::Error<<"Failed to resolve "<<qname.toLogString()<<", got STL error: "<<e.what()<<endl;
3558 ret.clear();
3559 }
3560 catch(...) {
3561 g_log<<Logger::Error<<"Failed to resolve "<<qname.toLogString()<<", got an exception"<<endl;
3562 ret.clear();
3563 }
3564
3565 return res;
3566 }
3567
3568 int SyncRes::getRootNS(struct timeval now, asyncresolve_t asyncCallback) {
3569 SyncRes sr(now);
3570 sr.setDoEDNS0(true);
3571 sr.setUpdatingRootNS();
3572 sr.setDoDNSSEC(g_dnssecmode != DNSSECMode::Off);
3573 sr.setDNSSECValidationRequested(g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate);
3574 sr.setAsyncCallback(asyncCallback);
3575
3576 vector<DNSRecord> ret;
3577 int res=-1;
3578 try {
3579 res=sr.beginResolve(g_rootdnsname, QType(QType::NS), 1, ret);
3580 if (g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate) {
3581 auto state = sr.getValidationState();
3582 if (state == Bogus)
3583 throw PDNSException("Got Bogus validation result for .|NS");
3584 }
3585 return res;
3586 }
3587 catch(const PDNSException& e) {
3588 g_log<<Logger::Error<<"Failed to update . records, got an exception: "<<e.reason<<endl;
3589 }
3590 catch(const ImmediateServFailException& e) {
3591 g_log<<Logger::Error<<"Failed to update . records, got an exception: "<<e.reason<<endl;
3592 }
3593 catch(const std::exception& e) {
3594 g_log<<Logger::Error<<"Failed to update . records, got an exception: "<<e.what()<<endl;
3595 }
3596 catch(...) {
3597 g_log<<Logger::Error<<"Failed to update . records, got an exception"<<endl;
3598 }
3599
3600 if(!res) {
3601 g_log<<Logger::Notice<<"Refreshed . records"<<endl;
3602 }
3603 else
3604 g_log<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
3605
3606 return res;
3607 }