]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/recursordist/syncres.cc
Merge pull request #12674 from rgacogne/less-shared-ptr
[thirdparty/pdns.git] / pdns / recursordist / syncres.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include "arguments.hh"
27 #include "aggressive_nsec.hh"
28 #include "cachecleaner.hh"
29 #include "dns_random.hh"
30 #include "dnsparser.hh"
31 #include "dnsrecords.hh"
32 #include "ednssubnet.hh"
33 #include "logger.hh"
34 #include "lua-recursor4.hh"
35 #include "rec-lua-conf.hh"
36 #include "syncres.hh"
37 #include "dnsseckeeper.hh"
38 #include "validate-recursor.hh"
39 #include "rec-taskqueue.hh"
40
41 rec::GlobalCounters g_Counters;
42 thread_local rec::TCounters t_Counters(g_Counters);
43
44 template <class T>
45 class fails_t : public boost::noncopyable
46 {
47 public:
48 typedef uint64_t counter_t;
49 struct value_t
50 {
51 value_t(const T& a) :
52 key(a) {}
53 T key;
54 mutable counter_t value{0};
55 time_t last{0};
56 };
57
58 typedef multi_index_container<value_t,
59 indexed_by<
60 ordered_unique<tag<T>, member<value_t, T, &value_t::key>>,
61 ordered_non_unique<tag<time_t>, member<value_t, time_t, &value_t::last>>>>
62 cont_t;
63
64 cont_t getMapCopy() const
65 {
66 return d_cont;
67 }
68
69 counter_t value(const T& t) const
70 {
71 auto i = d_cont.find(t);
72
73 if (i == d_cont.end()) {
74 return 0;
75 }
76 return i->value;
77 }
78
79 counter_t incr(const T& key, const struct timeval& now)
80 {
81 auto i = d_cont.insert(key).first;
82
83 if (i->value < std::numeric_limits<counter_t>::max()) {
84 i->value++;
85 }
86 auto& ind = d_cont.template get<T>();
87 time_t tm = now.tv_sec;
88 ind.modify(i, [tm](value_t& val) { val.last = tm; });
89 return i->value;
90 }
91
92 void clear(const T& a)
93 {
94 d_cont.erase(a);
95 }
96
97 void clear()
98 {
99 d_cont.clear();
100 }
101
102 size_t size() const
103 {
104 return d_cont.size();
105 }
106
107 void prune(time_t cutoff)
108 {
109 auto& ind = d_cont.template get<time_t>();
110 ind.erase(ind.begin(), ind.upper_bound(cutoff));
111 }
112
113 private:
114 cont_t d_cont;
115 };
116
117 /** Class that implements a decaying EWMA.
118 This class keeps an exponentially weighted moving average which, additionally, decays over time.
119 The decaying is only done on get.
120 */
121
122 //! This represents a number of decaying Ewmas, used to store performance per nameserver-name.
123 /** Modelled to work mostly like the underlying DecayingEwma */
124 class DecayingEwmaCollection
125 {
126 private:
127 struct DecayingEwma
128 {
129 public:
130 void submit(int arg, const struct timeval& last, const struct timeval& now)
131 {
132 d_last = arg;
133 auto val = static_cast<float>(arg);
134 if (d_val == 0) {
135 d_val = val;
136 }
137 else {
138 auto diff = makeFloat(last - now);
139 auto factor = expf(diff) / 2.0f; // might be '0.5', or 0.0001
140 d_val = (1.0f - factor) * val + factor * d_val;
141 }
142 }
143
144 float get(float factor)
145 {
146 return d_val *= factor;
147 }
148
149 float peek(void) const
150 {
151 return d_val;
152 }
153
154 int last(void) const
155 {
156 return d_last;
157 }
158
159 float d_val{0};
160 int d_last{0};
161 };
162
163 public:
164 DecayingEwmaCollection(const DNSName& name, const struct timeval ts = {0, 0}) :
165 d_name(name), d_lastget(ts)
166 {
167 }
168
169 void submit(const ComboAddress& remote, int usecs, const struct timeval& now) const
170 {
171 d_collection[remote].submit(usecs, d_lastget, now);
172 }
173
174 float getFactor(const struct timeval& now) const
175 {
176 float diff = makeFloat(d_lastget - now);
177 return expf(diff / 60.0f); // is 1.0 or less
178 }
179
180 bool stale(time_t limit) const
181 {
182 return limit > d_lastget.tv_sec;
183 }
184
185 void purge(const std::map<ComboAddress, float>& keep) const
186 {
187 for (auto iter = d_collection.begin(); iter != d_collection.end();) {
188 if (keep.find(iter->first) != keep.end()) {
189 ++iter;
190 }
191 else {
192 iter = d_collection.erase(iter);
193 }
194 }
195 }
196
197 // d_collection is the modifyable part of the record, we index on DNSName and timeval, and DNSName never changes
198 mutable std::map<ComboAddress, DecayingEwma> d_collection;
199 const DNSName d_name;
200 struct timeval d_lastget;
201 };
202
203 class nsspeeds_t : public multi_index_container<DecayingEwmaCollection,
204 indexed_by<
205 hashed_unique<tag<DNSName>, member<DecayingEwmaCollection, const DNSName, &DecayingEwmaCollection::d_name>>,
206 ordered_non_unique<tag<timeval>, member<DecayingEwmaCollection, timeval, &DecayingEwmaCollection::d_lastget>>>>
207 {
208 public:
209 const auto& find_or_enter(const DNSName& name, const struct timeval& now)
210 {
211 const auto it = insert(DecayingEwmaCollection{name, now}).first;
212 return *it;
213 }
214
215 const auto& find_or_enter(const DNSName& name)
216 {
217 const auto it = insert(DecayingEwmaCollection{name}).first;
218 return *it;
219 }
220
221 float fastest(const DNSName& name, const struct timeval& now)
222 {
223 auto& ind = get<DNSName>();
224 auto it = insert(DecayingEwmaCollection{name, now}).first;
225 if (it->d_collection.empty()) {
226 return 0;
227 }
228 // This could happen if find(DNSName) entered an entry; it's used only by test code
229 if (it->d_lastget.tv_sec == 0 && it->d_lastget.tv_usec == 0) {
230 ind.modify(it, [&](DecayingEwmaCollection& d) { d.d_lastget = now; });
231 }
232
233 float ret = std::numeric_limits<float>::max();
234 const float factor = it->getFactor(now);
235 for (auto& entry : it->d_collection) {
236 if (float tmp = entry.second.get(factor); tmp < ret) {
237 ret = tmp;
238 }
239 }
240 ind.modify(it, [&](DecayingEwmaCollection& d) { d.d_lastget = now; });
241 return ret;
242 }
243 };
244
245 static LockGuarded<nsspeeds_t> s_nsSpeeds;
246
247 template <class Thing>
248 class Throttle : public boost::noncopyable
249 {
250 public:
251 struct entry_t
252 {
253 entry_t(const Thing& thing_, time_t ttd_, unsigned int count_) :
254 thing(thing_), ttd(ttd_), count(count_)
255 {
256 }
257 Thing thing;
258 time_t ttd;
259 mutable unsigned int count;
260 };
261 typedef multi_index_container<entry_t,
262 indexed_by<
263 ordered_unique<tag<Thing>, member<entry_t, Thing, &entry_t::thing>>,
264 ordered_non_unique<tag<time_t>, member<entry_t, time_t, &entry_t::ttd>>>>
265 cont_t;
266
267 bool shouldThrottle(time_t now, const Thing& t)
268 {
269 auto i = d_cont.find(t);
270 if (i == d_cont.end()) {
271 return false;
272 }
273 if (now > i->ttd || i->count == 0) {
274 d_cont.erase(i);
275 return false;
276 }
277 i->count--;
278
279 return true; // still listed, still blocked
280 }
281
282 void throttle(time_t now, const Thing& t, time_t ttl, unsigned int count)
283 {
284 auto i = d_cont.find(t);
285 time_t ttd = now + ttl;
286 if (i == d_cont.end()) {
287 d_cont.emplace(t, ttd, count);
288 }
289 else if (ttd > i->ttd || count > i->count) {
290 ttd = std::max(i->ttd, ttd);
291 count = std::max(i->count, count);
292 auto& ind = d_cont.template get<Thing>();
293 ind.modify(i, [ttd, count](entry_t& e) { e.ttd = ttd; e.count = count; });
294 }
295 }
296
297 size_t size() const
298 {
299 return d_cont.size();
300 }
301
302 cont_t getThrottleMap() const
303 {
304 return d_cont;
305 }
306
307 void clear()
308 {
309 d_cont.clear();
310 }
311
312 void prune(time_t now)
313 {
314 auto& ind = d_cont.template get<time_t>();
315 ind.erase(ind.begin(), ind.upper_bound(now));
316 }
317
318 private:
319 cont_t d_cont;
320 };
321
322 static LockGuarded<Throttle<std::tuple<ComboAddress, DNSName, QType>>> s_throttle;
323
324 struct SavedParentEntry
325 {
326 SavedParentEntry(const DNSName& name, map<DNSName, vector<ComboAddress>>&& nsAddresses, time_t ttd) :
327 d_domain(name), d_nsAddresses(nsAddresses), d_ttd(ttd)
328 {
329 }
330 DNSName d_domain;
331 map<DNSName, vector<ComboAddress>> d_nsAddresses;
332 time_t d_ttd;
333 mutable uint64_t d_count{0};
334 };
335
336 typedef multi_index_container<
337 SavedParentEntry,
338 indexed_by<ordered_unique<tag<DNSName>, member<SavedParentEntry, DNSName, &SavedParentEntry::d_domain>>,
339 ordered_non_unique<tag<time_t>, member<SavedParentEntry, time_t, &SavedParentEntry::d_ttd>>>>
340 SavedParentNSSetBase;
341
342 class SavedParentNSSet : public SavedParentNSSetBase
343 {
344 public:
345 void prune(time_t now)
346 {
347 auto& ind = get<time_t>();
348 ind.erase(ind.begin(), ind.upper_bound(now));
349 }
350 void inc(const DNSName& name)
351 {
352 auto it = find(name);
353 if (it != end()) {
354 ++(*it).d_count;
355 }
356 }
357 SavedParentNSSet getMapCopy() const
358 {
359 return *this;
360 }
361 };
362
363 static LockGuarded<SavedParentNSSet> s_savedParentNSSet;
364
365 thread_local SyncRes::ThreadLocalStorage SyncRes::t_sstorage;
366 thread_local std::unique_ptr<addrringbuf_t> t_timeouts;
367
368 std::unique_ptr<NetmaskGroup> SyncRes::s_dontQuery{nullptr};
369 NetmaskGroup SyncRes::s_ednslocalsubnets;
370 NetmaskGroup SyncRes::s_ednsremotesubnets;
371 SuffixMatchNode SyncRes::s_ednsdomains;
372 EDNSSubnetOpts SyncRes::s_ecsScopeZero;
373 string SyncRes::s_serverID;
374 SyncRes::LogMode SyncRes::s_lm;
375 const std::unordered_set<QType> SyncRes::s_redirectionQTypes = {QType::CNAME, QType::DNAME};
376 static LockGuarded<fails_t<ComboAddress>> s_fails;
377 static LockGuarded<fails_t<DNSName>> s_nonresolving;
378
379 struct DoTStatus
380 {
381 DoTStatus(const ComboAddress& ip, const DNSName& auth, time_t ttd) :
382 d_address(ip), d_auth(auth), d_ttd(ttd)
383 {
384 }
385 enum Status : uint8_t
386 {
387 Unknown,
388 Busy,
389 Bad,
390 Good
391 };
392 const ComboAddress d_address;
393 const DNSName d_auth;
394 time_t d_ttd;
395 mutable uint64_t d_count{0};
396 mutable Status d_status{Unknown};
397 std::string toString() const
398 {
399 const std::array<std::string, 4> n{"Unknown", "Busy", "Bad", "Good"};
400 unsigned int v = static_cast<unsigned int>(d_status);
401 return v >= n.size() ? "?" : n[v];
402 }
403 };
404
405 struct DoTMap
406 {
407 multi_index_container<DoTStatus,
408 indexed_by<
409 ordered_unique<tag<ComboAddress>, member<DoTStatus, const ComboAddress, &DoTStatus::d_address>>,
410 ordered_non_unique<tag<time_t>, member<DoTStatus, time_t, &DoTStatus::d_ttd>>>>
411 d_map;
412 uint64_t d_numBusy{0};
413
414 void prune(time_t cutoff)
415 {
416 auto& ind = d_map.template get<time_t>();
417 ind.erase(ind.begin(), ind.upper_bound(cutoff));
418 }
419 };
420
421 static LockGuarded<DoTMap> s_dotMap;
422
423 static const time_t dotFailWait = 24 * 3600;
424 static const time_t dotSuccessWait = 3 * 24 * 3600;
425 static bool shouldDoDoT(ComboAddress address, time_t now);
426
427 unsigned int SyncRes::s_maxnegttl;
428 unsigned int SyncRes::s_maxbogusttl;
429 unsigned int SyncRes::s_maxcachettl;
430 unsigned int SyncRes::s_maxqperq;
431 unsigned int SyncRes::s_maxnsperresolve;
432 unsigned int SyncRes::s_maxnsaddressqperq;
433 unsigned int SyncRes::s_maxtotusec;
434 unsigned int SyncRes::s_maxdepth;
435 unsigned int SyncRes::s_minimumTTL;
436 unsigned int SyncRes::s_minimumECSTTL;
437 unsigned int SyncRes::s_packetcachettl;
438 unsigned int SyncRes::s_packetcacheservfailttl;
439 unsigned int SyncRes::s_serverdownmaxfails;
440 unsigned int SyncRes::s_serverdownthrottletime;
441 unsigned int SyncRes::s_nonresolvingnsmaxfails;
442 unsigned int SyncRes::s_nonresolvingnsthrottletime;
443 unsigned int SyncRes::s_ecscachelimitttl;
444 pdns::stat_t SyncRes::s_ecsqueries;
445 pdns::stat_t SyncRes::s_ecsresponses;
446 std::map<uint8_t, pdns::stat_t> SyncRes::s_ecsResponsesBySubnetSize4;
447 std::map<uint8_t, pdns::stat_t> SyncRes::s_ecsResponsesBySubnetSize6;
448
449 uint8_t SyncRes::s_ecsipv4limit;
450 uint8_t SyncRes::s_ecsipv6limit;
451 uint8_t SyncRes::s_ecsipv4cachelimit;
452 uint8_t SyncRes::s_ecsipv6cachelimit;
453 bool SyncRes::s_ecsipv4nevercache;
454 bool SyncRes::s_ecsipv6nevercache;
455
456 bool SyncRes::s_doIPv4;
457 bool SyncRes::s_doIPv6;
458 bool SyncRes::s_rootNXTrust;
459 bool SyncRes::s_noEDNS;
460 bool SyncRes::s_qnameminimization;
461 SyncRes::HardenNXD SyncRes::s_hardenNXD;
462 unsigned int SyncRes::s_refresh_ttlperc;
463 unsigned int SyncRes::s_locked_ttlperc;
464 int SyncRes::s_tcp_fast_open;
465 bool SyncRes::s_tcp_fast_open_connect;
466 bool SyncRes::s_dot_to_port_853;
467 int SyncRes::s_event_trace_enabled;
468 bool SyncRes::s_save_parent_ns_set;
469 unsigned int SyncRes::s_max_busy_dot_probes;
470 bool SyncRes::s_addExtendedResolutionDNSErrors;
471
472 #define LOG(x) \
473 if (d_lm == Log) { \
474 g_log << Logger::Warning << x; \
475 } \
476 else if (d_lm == Store) { \
477 addTraceTS(d_fixednow, d_trace); \
478 d_trace << x; \
479 }
480
481 OptLog SyncRes::LogObject(const string& prefix)
482 {
483 OptLog ret;
484 if (d_lm == Log) {
485 ret = {prefix, d_fixednow, &g_log};
486 }
487 else if (d_lm == Store) {
488 ret = {prefix, d_fixednow, &d_trace};
489 }
490 return ret;
491 }
492
493 // A helper function to print a double with specific printf format.
494 // Not using boost::format since it is not thread safe while calling
495 // into locale handling code according to tsan.
496 // This allocates a string, but that's nothing compared to what
497 // boost::format is doing and may even be optimized away anyway.
498 static inline std::string fmtfloat(double f)
499 {
500 char buf[20];
501 int ret = snprintf(buf, sizeof(buf), "%0.2f", f);
502 if (ret < 0 || ret >= static_cast<int>(sizeof(buf))) {
503 return "?";
504 }
505 return std::string(buf, ret);
506 }
507
508 static inline void accountAuthLatency(uint64_t usec, int family)
509 {
510 if (family == AF_INET) {
511 t_Counters.at(rec::Histogram::auth4Answers)(usec);
512 t_Counters.at(rec::Histogram::cumulativeAuth4Answers)(usec);
513 }
514 else {
515 t_Counters.at(rec::Histogram::auth6Answers)(usec);
516 t_Counters.at(rec::Histogram::cumulativeAuth6Answers)(usec);
517 }
518 }
519
520 SyncRes::SyncRes(const struct timeval& now) :
521 d_authzonequeries(0), d_outqueries(0), d_tcpoutqueries(0), d_dotoutqueries(0), d_throttledqueries(0), d_timeouts(0), d_unreachables(0), d_totUsec(0), d_fixednow(now), d_now(now), d_cacheonly(false), d_doDNSSEC(false), d_doEDNS0(false), d_qNameMinimization(s_qnameminimization), d_lm(s_lm)
522
523 {
524 }
525
526 static void allowAdditionalEntry(std::unordered_set<DNSName>& allowedAdditionals, const DNSRecord& rec);
527
528 void SyncRes::resolveAdditionals(const DNSName& qname, QType qtype, AdditionalMode mode, std::vector<DNSRecord>& additionals, unsigned int depth, bool& additionalsNotInCache)
529 {
530 vector<DNSRecord> addRecords;
531
532 Context context;
533 switch (mode) {
534 case AdditionalMode::ResolveImmediately: {
535 set<GetBestNSAnswer> beenthere;
536 int res = doResolve(qname, qtype, addRecords, depth, beenthere, context);
537 if (res != 0) {
538 return;
539 }
540 // We're conservative here. We do not add Bogus records in any circumstance, we add Indeterminates only if no
541 // validation is required.
542 if (vStateIsBogus(context.state)) {
543 return;
544 }
545 if (shouldValidate() && context.state != vState::Secure && context.state != vState::Insecure) {
546 return;
547 }
548 for (auto& rec : addRecords) {
549 if (rec.d_place == DNSResourceRecord::ANSWER) {
550 additionals.push_back(std::move(rec));
551 }
552 }
553 break;
554 }
555 case AdditionalMode::CacheOnly:
556 case AdditionalMode::CacheOnlyRequireAuth: {
557 // Peek into cache
558 MemRecursorCache::Flags flags = mode == AdditionalMode::CacheOnlyRequireAuth ? MemRecursorCache::RequireAuth : MemRecursorCache::None;
559 if (g_recCache->get(d_now.tv_sec, qname, qtype, flags, &addRecords, d_cacheRemote, d_routingTag, nullptr, nullptr, nullptr, &context.state) <= 0) {
560 return;
561 }
562 // See the comment for the ResolveImmediately case
563 if (vStateIsBogus(context.state)) {
564 return;
565 }
566 if (shouldValidate() && context.state != vState::Secure && context.state != vState::Insecure) {
567 return;
568 }
569 for (auto& rec : addRecords) {
570 if (rec.d_place == DNSResourceRecord::ANSWER) {
571 rec.d_ttl -= d_now.tv_sec;
572 additionals.push_back(std::move(rec));
573 }
574 }
575 break;
576 }
577 case AdditionalMode::ResolveDeferred: {
578 const bool oldCacheOnly = setCacheOnly(true);
579 set<GetBestNSAnswer> beenthere;
580 int res = doResolve(qname, qtype, addRecords, depth, beenthere, context);
581 setCacheOnly(oldCacheOnly);
582 if (res == 0 && addRecords.size() > 0) {
583 // We're conservative here. We do not add Bogus records in any circumstance, we add Indeterminates only if no
584 // validation is required.
585 if (vStateIsBogus(context.state)) {
586 return;
587 }
588 if (shouldValidate() && context.state != vState::Secure && context.state != vState::Insecure) {
589 return;
590 }
591 bool found = false;
592 for (auto& rec : addRecords) {
593 if (rec.d_place == DNSResourceRecord::ANSWER) {
594 found = true;
595 additionals.push_back(std::move(rec));
596 }
597 }
598 if (found) {
599 return;
600 }
601 }
602 // Not found in cache, check negcache and push task if also not in negcache
603 NegCache::NegCacheEntry ne;
604 bool inNegCache = g_negCache->get(qname, qtype, d_now, ne, false);
605 if (!inNegCache) {
606 // There are a few cases where an answer is neither stored in the record cache nor in the neg cache.
607 // An example is a SOA-less NODATA response. Rate limiting will kick in if those tasks are pushed too often.
608 // We might want to fix these cases (and always either store positive or negative) some day.
609 pushResolveTask(qname, qtype, d_now.tv_sec, d_now.tv_sec + 60);
610 additionalsNotInCache = true;
611 }
612 break;
613 }
614 case AdditionalMode::Ignore:
615 break;
616 }
617 }
618
619 // The main (recursive) function to add additionals
620 // qtype: the original query type to expand
621 // start: records to start from
622 // This function uses to state sets to avoid infinite recursion and allow depulication
623 // depth is the main recursion depth
624 // additionaldepth is the depth for addAdditionals itself
625 void SyncRes::addAdditionals(QType qtype, const vector<DNSRecord>& start, vector<DNSRecord>& additionals, std::set<std::pair<DNSName, QType>>& uniqueCalls, std::set<std::tuple<DNSName, QType, QType>>& uniqueResults, unsigned int depth, unsigned additionaldepth, bool& additionalsNotInCache)
626 {
627 if (additionaldepth >= 5 || start.empty()) {
628 return;
629 }
630
631 auto luaLocal = g_luaconfs.getLocal();
632 const auto it = luaLocal->allowAdditionalQTypes.find(qtype);
633 if (it == luaLocal->allowAdditionalQTypes.end()) {
634 return;
635 }
636 std::unordered_set<DNSName> addnames;
637 for (const auto& rec : start) {
638 if (rec.d_place == DNSResourceRecord::ANSWER) {
639 // currently, this function only knows about names, we could also take the target types that are dependent on
640 // record contents into account
641 // e.g. for NAPTR records, go only for SRV for flag value "s", or A/AAAA for flag value "a"
642 allowAdditionalEntry(addnames, rec);
643 }
644 }
645
646 // We maintain two sets for deduplication:
647 // - uniqueCalls makes sure we never resolve a qname/qtype twice
648 // - uniqueResults makes sure we never add the same qname/qytype RRSet to the result twice,
649 // but note that that set might contain multiple elements.
650
651 auto mode = it->second.second;
652 for (const auto& targettype : it->second.first) {
653 for (const auto& addname : addnames) {
654 std::vector<DNSRecord> records;
655 bool inserted = uniqueCalls.emplace(addname, targettype).second;
656 if (inserted) {
657 resolveAdditionals(addname, targettype, mode, records, depth, additionalsNotInCache);
658 }
659 if (!records.empty()) {
660 for (auto r = records.begin(); r != records.end();) {
661 QType covered = QType::ENT;
662 if (r->d_type == QType::RRSIG) {
663 if (auto rsig = getRR<RRSIGRecordContent>(*r); rsig != nullptr) {
664 covered = rsig->d_type;
665 }
666 }
667 if (uniqueResults.count(std::tuple(r->d_name, QType(r->d_type), covered)) > 0) {
668 // A bit expensive for vectors, but they are small
669 r = records.erase(r);
670 }
671 else {
672 ++r;
673 }
674 }
675 for (const auto& r : records) {
676 additionals.push_back(r);
677 QType covered = QType::ENT;
678 if (r.d_type == QType::RRSIG) {
679 if (auto rsig = getRR<RRSIGRecordContent>(r); rsig != nullptr) {
680 covered = rsig->d_type;
681 }
682 }
683 uniqueResults.emplace(r.d_name, r.d_type, covered);
684 }
685 addAdditionals(targettype, records, additionals, uniqueCalls, uniqueResults, depth, additionaldepth + 1, additionalsNotInCache);
686 }
687 }
688 }
689 }
690
691 // The entry point for other code
692 bool SyncRes::addAdditionals(QType qtype, vector<DNSRecord>& ret, unsigned int depth)
693 {
694 // The additional records of interest
695 std::vector<DNSRecord> additionals;
696
697 // We only call resolve for a specific name/type combo once
698 std::set<std::pair<DNSName, QType>> uniqueCalls;
699
700 // Collect multiple name/qtype from a single resolve but do not add a new set from new resolve calls
701 // For RRSIGs, the type covered is stored in the second Qtype
702 std::set<std::tuple<DNSName, QType, QType>> uniqueResults;
703
704 bool additionalsNotInCache = false;
705 addAdditionals(qtype, ret, additionals, uniqueCalls, uniqueResults, depth, 0, additionalsNotInCache);
706
707 for (auto& rec : additionals) {
708 rec.d_place = DNSResourceRecord::ADDITIONAL;
709 ret.push_back(std::move(rec));
710 }
711 return additionalsNotInCache;
712 }
713
714 /** everything begins here - this is the entry point just after receiving a packet */
715 int SyncRes::beginResolve(const DNSName& qname, const QType qtype, QClass qclass, vector<DNSRecord>& ret, unsigned int depth)
716 {
717 d_eventTrace.add(RecEventTrace::SyncRes);
718 t_Counters.at(rec::Counter::syncresqueries)++;
719 d_wasVariable = false;
720 d_wasOutOfBand = false;
721 d_cutStates.clear();
722
723 if (doSpecialNamesResolve(qname, qtype, qclass, ret)) {
724 d_queryValidationState = vState::Insecure; // this could fool our stats into thinking a validation took place
725 return 0; // so do check before updating counters (we do now)
726 }
727
728 if (isUnsupported(qtype)) {
729 return -1;
730 }
731
732 if (qclass == QClass::ANY)
733 qclass = QClass::IN;
734 else if (qclass != QClass::IN)
735 return -1;
736
737 if (qtype == QType::DS) {
738 d_externalDSQuery = qname;
739 }
740 else {
741 d_externalDSQuery.clear();
742 }
743
744 set<GetBestNSAnswer> beenthere;
745 Context context;
746 int res = doResolve(qname, qtype, ret, depth, beenthere, context);
747 d_queryValidationState = context.state;
748 d_extendedError = context.extendedError;
749
750 if (shouldValidate()) {
751 if (d_queryValidationState != vState::Indeterminate) {
752 t_Counters.at(rec::Counter::dnssecValidations)++;
753 }
754 auto xdnssec = g_xdnssec.getLocal();
755 if (xdnssec->check(qname)) {
756 increaseXDNSSECStateCounter(d_queryValidationState);
757 }
758 else {
759 increaseDNSSECStateCounter(d_queryValidationState);
760 }
761 }
762
763 // Avoid calling addAdditionals() if we know we won't find anything
764 auto luaLocal = g_luaconfs.getLocal();
765 if (res == 0 && qclass == QClass::IN && luaLocal->allowAdditionalQTypes.find(qtype) != luaLocal->allowAdditionalQTypes.end()) {
766 bool additionalsNotInCache = addAdditionals(qtype, ret, depth);
767 if (additionalsNotInCache) {
768 d_wasVariable = true;
769 }
770 }
771 d_eventTrace.add(RecEventTrace::SyncRes, res, false);
772 return res;
773 }
774
775 /*! Handles all special, built-in names
776 * Fills ret with an answer and returns true if it handled the query.
777 *
778 * Handles the following queries (and their ANY variants):
779 *
780 * - localhost. IN A
781 * - localhost. IN AAAA
782 * - 1.0.0.127.in-addr.arpa. IN PTR
783 * - 1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa. IN PTR
784 * - version.bind. CH TXT
785 * - version.pdns. CH TXT
786 * - id.server. CH TXT
787 * - trustanchor.server CH TXT
788 * - negativetrustanchor.server CH TXT
789 */
790 bool SyncRes::doSpecialNamesResolve(const DNSName& qname, const QType qtype, const QClass qclass, vector<DNSRecord>& ret)
791 {
792 static const DNSName arpa("1.0.0.127.in-addr.arpa."), ip6_arpa("1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa."),
793 localhost("localhost."), versionbind("version.bind."), idserver("id.server."), versionpdns("version.pdns."), trustanchorserver("trustanchor.server."),
794 negativetrustanchorserver("negativetrustanchor.server.");
795
796 bool handled = false;
797 vector<pair<QType::typeenum, string>> answers;
798
799 if ((qname == arpa || qname == ip6_arpa) && qclass == QClass::IN) {
800 handled = true;
801 if (qtype == QType::PTR || qtype == QType::ANY)
802 answers.emplace_back(QType::PTR, "localhost.");
803 }
804
805 if (qname.isPartOf(localhost) && qclass == QClass::IN) {
806 handled = true;
807 if (qtype == QType::A || qtype == QType::ANY)
808 answers.emplace_back(QType::A, "127.0.0.1");
809 if (qtype == QType::AAAA || qtype == QType::ANY)
810 answers.emplace_back(QType::AAAA, "::1");
811 }
812
813 if ((qname == versionbind || qname == idserver || qname == versionpdns) && qclass == QClass::CHAOS) {
814 handled = true;
815 if (qtype == QType::TXT || qtype == QType::ANY) {
816 if (qname == versionbind || qname == versionpdns)
817 answers.emplace_back(QType::TXT, "\"" + ::arg()["version-string"] + "\"");
818 else if (s_serverID != "disabled")
819 answers.emplace_back(QType::TXT, "\"" + s_serverID + "\"");
820 }
821 }
822
823 if (qname == trustanchorserver && qclass == QClass::CHAOS && ::arg().mustDo("allow-trust-anchor-query")) {
824 handled = true;
825 if (qtype == QType::TXT || qtype == QType::ANY) {
826 auto luaLocal = g_luaconfs.getLocal();
827 for (auto const& dsAnchor : luaLocal->dsAnchors) {
828 ostringstream ans;
829 ans << "\"";
830 ans << dsAnchor.first.toString(); // Explicit toString to have a trailing dot
831 for (auto const& dsRecord : dsAnchor.second) {
832 ans << " ";
833 ans << dsRecord.d_tag;
834 }
835 ans << "\"";
836 answers.emplace_back(QType::TXT, ans.str());
837 }
838 }
839 }
840
841 if (qname == negativetrustanchorserver && qclass == QClass::CHAOS && ::arg().mustDo("allow-trust-anchor-query")) {
842 handled = true;
843 if (qtype == QType::TXT || qtype == QType::ANY) {
844 auto luaLocal = g_luaconfs.getLocal();
845 for (auto const& negAnchor : luaLocal->negAnchors) {
846 ostringstream ans;
847 ans << "\"";
848 ans << negAnchor.first.toString(); // Explicit toString to have a trailing dot
849 if (negAnchor.second.length())
850 ans << " " << negAnchor.second;
851 ans << "\"";
852 answers.emplace_back(QType::TXT, ans.str());
853 }
854 }
855 }
856
857 if (handled && !answers.empty()) {
858 ret.clear();
859 d_wasOutOfBand = true;
860
861 DNSRecord dr;
862 dr.d_name = qname;
863 dr.d_place = DNSResourceRecord::ANSWER;
864 dr.d_class = qclass;
865 dr.d_ttl = 86400;
866 for (const auto& ans : answers) {
867 dr.d_type = ans.first;
868 dr.setContent(DNSRecordContent::mastermake(ans.first, qclass, ans.second));
869 ret.push_back(dr);
870 }
871 }
872
873 return handled;
874 }
875
876 //! This is the 'out of band resolver', in other words, the authoritative server
877 void SyncRes::AuthDomain::addSOA(std::vector<DNSRecord>& records) const
878 {
879 SyncRes::AuthDomain::records_t::const_iterator ziter = d_records.find(std::make_tuple(getName(), QType::SOA));
880 if (ziter != d_records.end()) {
881 DNSRecord dr = *ziter;
882 dr.d_place = DNSResourceRecord::AUTHORITY;
883 records.push_back(dr);
884 }
885 }
886
887 bool SyncRes::AuthDomain::operator==(const AuthDomain& rhs) const
888 {
889 return d_records == rhs.d_records
890 && d_servers == rhs.d_servers
891 && d_name == rhs.d_name
892 && d_rdForward == rhs.d_rdForward;
893 }
894
895 [[nodiscard]] std::string SyncRes::AuthDomain::print(const std::string& indent,
896 const std::string& indentLevel) const
897 {
898 std::stringstream s;
899 s << indent << "DNSName = " << d_name << std::endl;
900 s << indent << "rdForward = " << d_rdForward << std::endl;
901 s << indent << "Records {" << std::endl;
902 auto recordContentIndentation = indent;
903 recordContentIndentation += indentLevel;
904 recordContentIndentation += indentLevel;
905 for (const auto& record : d_records) {
906 s << indent << indentLevel << "Record `" << record.d_name << "` {" << std::endl;
907 s << record.print(recordContentIndentation);
908 s << indent << indentLevel << "}" << std::endl;
909 }
910 s << indent << "}" << std::endl;
911 s << indent << "Servers {" << std::endl;
912 for (const auto& server : d_servers) {
913 s << indent << indentLevel << server.toString() << std::endl;
914 }
915 s << indent << "}" << std::endl;
916 return s.str();
917 }
918
919 int SyncRes::AuthDomain::getRecords(const DNSName& qname, const QType qtype, std::vector<DNSRecord>& records) const
920 {
921 int result = RCode::NoError;
922 records.clear();
923
924 // partial lookup
925 std::pair<records_t::const_iterator, records_t::const_iterator> range = d_records.equal_range(std::tie(qname));
926
927 SyncRes::AuthDomain::records_t::const_iterator ziter;
928 bool somedata = false;
929
930 for (ziter = range.first; ziter != range.second; ++ziter) {
931 somedata = true;
932
933 if (qtype == QType::ANY || ziter->d_type == qtype || ziter->d_type == QType::CNAME) {
934 // let rest of nameserver do the legwork on this one
935 records.push_back(*ziter);
936 }
937 else if (ziter->d_type == QType::NS && ziter->d_name.countLabels() > getName().countLabels()) {
938 // we hit a delegation point!
939 DNSRecord dr = *ziter;
940 dr.d_place = DNSResourceRecord::AUTHORITY;
941 records.push_back(dr);
942 }
943 }
944
945 if (!records.empty()) {
946 /* We have found an exact match, we're done */
947 return result;
948 }
949
950 if (somedata) {
951 /* We have records for that name, but not of the wanted qtype */
952 addSOA(records);
953
954 return result;
955 }
956
957 DNSName wcarddomain(qname);
958 while (wcarddomain != getName() && wcarddomain.chopOff()) {
959 range = d_records.equal_range(std::make_tuple(g_wildcarddnsname + wcarddomain));
960 if (range.first == range.second)
961 continue;
962
963 for (ziter = range.first; ziter != range.second; ++ziter) {
964 DNSRecord dr = *ziter;
965 // if we hit a CNAME, just answer that - rest of recursor will do the needful & follow
966 if (dr.d_type == qtype || qtype == QType::ANY || dr.d_type == QType::CNAME) {
967 dr.d_name = qname;
968 dr.d_place = DNSResourceRecord::ANSWER;
969 records.push_back(dr);
970 }
971 }
972
973 if (records.empty()) {
974 addSOA(records);
975 }
976
977 return result;
978 }
979
980 /* Nothing for this name, no wildcard, let's see if there is some NS */
981 DNSName nsdomain(qname);
982 while (nsdomain.chopOff() && nsdomain != getName()) {
983 range = d_records.equal_range(std::make_tuple(nsdomain, QType::NS));
984 if (range.first == range.second)
985 continue;
986
987 for (ziter = range.first; ziter != range.second; ++ziter) {
988 DNSRecord dr = *ziter;
989 dr.d_place = DNSResourceRecord::AUTHORITY;
990 records.push_back(dr);
991 }
992 }
993
994 if (records.empty()) {
995 addSOA(records);
996 result = RCode::NXDomain;
997 }
998
999 return result;
1000 }
1001
1002 bool SyncRes::doOOBResolve(const AuthDomain& domain, const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, int& res)
1003 {
1004 d_authzonequeries++;
1005 t_Counters.at(rec::Counter::authzonequeries)++;
1006
1007 res = domain.getRecords(qname, qtype, ret);
1008 return true;
1009 }
1010
1011 bool SyncRes::doOOBResolve(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int depth, const string& prefix, int& res)
1012 {
1013 DNSName authdomain(qname);
1014 domainmap_t::const_iterator iter = getBestAuthZone(&authdomain);
1015 if (iter == t_sstorage.domainmap->end() || !iter->second.isAuth()) {
1016 LOG(prefix << qname << ": Auth storage has no zone for this query!" << endl);
1017 return false;
1018 }
1019
1020 LOG(prefix << qname << ": Auth storage has data, zone='" << authdomain << "'" << endl);
1021 return doOOBResolve(iter->second, qname, qtype, ret, res);
1022 }
1023
1024 bool SyncRes::isRecursiveForwardOrAuth(const DNSName& qname) const
1025 {
1026 DNSName authname(qname);
1027 domainmap_t::const_iterator iter = getBestAuthZone(&authname);
1028 return iter != t_sstorage.domainmap->end() && (iter->second.isAuth() || iter->second.shouldRecurse());
1029 }
1030
1031 bool SyncRes::isForwardOrAuth(const DNSName& qname) const
1032 {
1033 DNSName authname(qname);
1034 domainmap_t::const_iterator iter = getBestAuthZone(&authname);
1035 return iter != t_sstorage.domainmap->end();
1036 }
1037
1038 const char* isoDateTimeMillis(const struct timeval& tv, char* buf, size_t sz)
1039 {
1040 const std::string s_timestampFormat = "%Y-%m-%dT%T";
1041 struct tm tm;
1042 size_t len = strftime(buf, sz, s_timestampFormat.c_str(), localtime_r(&tv.tv_sec, &tm));
1043 if (len == 0) {
1044 int ret = snprintf(buf, sz, "%lld", static_cast<long long>(tv.tv_sec));
1045 if (ret < 0 || static_cast<size_t>(ret) >= sz) {
1046 if (sz > 0) {
1047 buf[0] = '\0';
1048 }
1049 return buf;
1050 }
1051 len = ret;
1052 }
1053
1054 if (sz > len + 4) {
1055 snprintf(buf + len, sz - len, ".%03ld", static_cast<long>(tv.tv_usec) / 1000);
1056 }
1057 return buf;
1058 }
1059
1060 static const char* timestamp(time_t t, char* buf, size_t sz)
1061 {
1062 const std::string s_timestampFormat = "%Y-%m-%dT%T";
1063 struct tm tm;
1064 size_t len = strftime(buf, sz, s_timestampFormat.c_str(), localtime_r(&t, &tm));
1065 if (len == 0) {
1066 int ret = snprintf(buf, sz, "%lld", static_cast<long long>(t));
1067 if (ret < 0 || static_cast<size_t>(ret) >= sz) {
1068 if (sz > 0) {
1069 buf[0] = '\0';
1070 }
1071 }
1072 }
1073 return buf;
1074 }
1075
1076 struct ednsstatus_t : public multi_index_container<SyncRes::EDNSStatus,
1077 indexed_by<
1078 ordered_unique<tag<ComboAddress>, member<SyncRes::EDNSStatus, ComboAddress, &SyncRes::EDNSStatus::address>>,
1079 ordered_non_unique<tag<time_t>, member<SyncRes::EDNSStatus, time_t, &SyncRes::EDNSStatus::ttd>>>>
1080 {
1081 // Get a copy
1082 ednsstatus_t getMap() const
1083 {
1084 return *this;
1085 }
1086
1087 void setMode(index<ComboAddress>::type& ind, iterator it, SyncRes::EDNSStatus::EDNSMode mode, time_t ts)
1088 {
1089 if (it->mode != mode || it->ttd == 0) {
1090 ind.modify(it, [=](SyncRes::EDNSStatus& s) { s.mode = mode; s.ttd = ts + Expire; });
1091 }
1092 }
1093
1094 void prune(time_t now)
1095 {
1096 auto& ind = get<time_t>();
1097 ind.erase(ind.begin(), ind.upper_bound(now));
1098 }
1099
1100 static const time_t Expire = 7200;
1101 };
1102
1103 static LockGuarded<ednsstatus_t> s_ednsstatus;
1104
1105 SyncRes::EDNSStatus::EDNSMode SyncRes::getEDNSStatus(const ComboAddress& server)
1106 {
1107 auto lock = s_ednsstatus.lock();
1108 const auto& it = lock->find(server);
1109 if (it == lock->end()) {
1110 return EDNSStatus::EDNSOK;
1111 }
1112 return it->mode;
1113 }
1114
1115 uint64_t SyncRes::getEDNSStatusesSize()
1116 {
1117 return s_ednsstatus.lock()->size();
1118 }
1119
1120 void SyncRes::clearEDNSStatuses()
1121 {
1122 s_ednsstatus.lock()->clear();
1123 }
1124
1125 void SyncRes::pruneEDNSStatuses(time_t cutoff)
1126 {
1127 s_ednsstatus.lock()->prune(cutoff);
1128 }
1129
1130 uint64_t SyncRes::doEDNSDump(int fd)
1131 {
1132 int newfd = dup(fd);
1133 if (newfd == -1) {
1134 return 0;
1135 }
1136 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1137 if (!fp) {
1138 close(newfd);
1139 return 0;
1140 }
1141 uint64_t count = 0;
1142
1143 fprintf(fp.get(), "; edns dump follows\n; ip\tstatus\tttd\n");
1144 const auto copy = s_ednsstatus.lock()->getMap();
1145 for (const auto& eds : copy) {
1146 count++;
1147 char tmp[26];
1148 fprintf(fp.get(), "%s\t%s\t%s\n", eds.address.toString().c_str(), eds.toString().c_str(), timestamp(eds.ttd, tmp, sizeof(tmp)));
1149 }
1150 return count;
1151 }
1152
1153 void SyncRes::pruneNSSpeeds(time_t limit)
1154 {
1155 auto lock = s_nsSpeeds.lock();
1156 auto& ind = lock->get<timeval>();
1157 ind.erase(ind.begin(), ind.upper_bound(timeval{limit, 0}));
1158 }
1159
1160 uint64_t SyncRes::getNSSpeedsSize()
1161 {
1162 return s_nsSpeeds.lock()->size();
1163 }
1164
1165 void SyncRes::submitNSSpeed(const DNSName& server, const ComboAddress& ca, uint32_t usec, const struct timeval& now)
1166 {
1167 auto lock = s_nsSpeeds.lock();
1168 lock->find_or_enter(server, now).submit(ca, usec, now);
1169 }
1170
1171 void SyncRes::clearNSSpeeds()
1172 {
1173 s_nsSpeeds.lock()->clear();
1174 }
1175
1176 float SyncRes::getNSSpeed(const DNSName& server, const ComboAddress& ca)
1177 {
1178 auto lock = s_nsSpeeds.lock();
1179 return lock->find_or_enter(server).d_collection[ca].peek();
1180 }
1181
1182 uint64_t SyncRes::doDumpNSSpeeds(int fd)
1183 {
1184 int newfd = dup(fd);
1185 if (newfd == -1) {
1186 return 0;
1187 }
1188 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1189 if (!fp) {
1190 close(newfd);
1191 return 0;
1192 }
1193
1194 fprintf(fp.get(), "; nsspeed dump follows\n; nsname\ttimestamp\t[ip/decaying-ms/last-ms...]\n");
1195 uint64_t count = 0;
1196
1197 // Create a copy to avoid holding the lock while doing I/O
1198 for (const auto& i : *s_nsSpeeds.lock()) {
1199 count++;
1200
1201 // an <empty> can appear hear in case of authoritative (hosted) zones
1202 char tmp[26];
1203 fprintf(fp.get(), "%s\t%s\t", i.d_name.toLogString().c_str(), isoDateTimeMillis(i.d_lastget, tmp, sizeof(tmp)));
1204 bool first = true;
1205 for (const auto& j : i.d_collection) {
1206 fprintf(fp.get(), "%s%s/%.3f/%.3f", first ? "" : "\t", j.first.toStringWithPortExcept(53).c_str(), j.second.peek() / 1000.0f, j.second.last() / 1000.0f);
1207 first = false;
1208 }
1209 fprintf(fp.get(), "\n");
1210 }
1211 return count;
1212 }
1213
1214 uint64_t SyncRes::getThrottledServersSize()
1215 {
1216 return s_throttle.lock()->size();
1217 }
1218
1219 void SyncRes::pruneThrottledServers(time_t now)
1220 {
1221 s_throttle.lock()->prune(now);
1222 }
1223
1224 void SyncRes::clearThrottle()
1225 {
1226 s_throttle.lock()->clear();
1227 }
1228
1229 bool SyncRes::isThrottled(time_t now, const ComboAddress& server, const DNSName& target, QType qtype)
1230 {
1231 return s_throttle.lock()->shouldThrottle(now, std::make_tuple(server, target, qtype));
1232 }
1233
1234 bool SyncRes::isThrottled(time_t now, const ComboAddress& server)
1235 {
1236 return s_throttle.lock()->shouldThrottle(now, std::make_tuple(server, g_rootdnsname, 0));
1237 }
1238
1239 void SyncRes::doThrottle(time_t now, const ComboAddress& server, time_t duration, unsigned int tries)
1240 {
1241 s_throttle.lock()->throttle(now, std::make_tuple(server, g_rootdnsname, 0), duration, tries);
1242 }
1243
1244 void SyncRes::doThrottle(time_t now, const ComboAddress& server, const DNSName& name, QType qtype, time_t duration, unsigned int tries)
1245 {
1246 s_throttle.lock()->throttle(now, std::make_tuple(server, name, qtype), duration, tries);
1247 }
1248
1249 uint64_t SyncRes::doDumpThrottleMap(int fd)
1250 {
1251 int newfd = dup(fd);
1252 if (newfd == -1) {
1253 return 0;
1254 }
1255 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1256 if (!fp) {
1257 close(newfd);
1258 return 0;
1259 }
1260 fprintf(fp.get(), "; throttle map dump follows\n");
1261 fprintf(fp.get(), "; remote IP\tqname\tqtype\tcount\tttd\n");
1262 uint64_t count = 0;
1263
1264 // Get a copy to avoid holding the lock while doing I/O
1265 const auto throttleMap = s_throttle.lock()->getThrottleMap();
1266 for (const auto& i : throttleMap) {
1267 count++;
1268 char tmp[26];
1269 // remote IP, dns name, qtype, count, ttd
1270 fprintf(fp.get(), "%s\t%s\t%s\t%u\t%s\n", std::get<0>(i.thing).toString().c_str(), std::get<1>(i.thing).toLogString().c_str(), std::get<2>(i.thing).toString().c_str(), i.count, timestamp(i.ttd, tmp, sizeof(tmp)));
1271 }
1272
1273 return count;
1274 }
1275
1276 uint64_t SyncRes::getFailedServersSize()
1277 {
1278 return s_fails.lock()->size();
1279 }
1280
1281 void SyncRes::clearFailedServers()
1282 {
1283 s_fails.lock()->clear();
1284 }
1285
1286 void SyncRes::pruneFailedServers(time_t cutoff)
1287 {
1288 s_fails.lock()->prune(cutoff);
1289 }
1290
1291 unsigned long SyncRes::getServerFailsCount(const ComboAddress& server)
1292 {
1293 return s_fails.lock()->value(server);
1294 }
1295
1296 uint64_t SyncRes::doDumpFailedServers(int fd)
1297 {
1298 int newfd = dup(fd);
1299 if (newfd == -1) {
1300 return 0;
1301 }
1302 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1303 if (!fp) {
1304 close(newfd);
1305 return 0;
1306 }
1307 fprintf(fp.get(), "; failed servers dump follows\n");
1308 fprintf(fp.get(), "; remote IP\tcount\ttimestamp\n");
1309 uint64_t count = 0;
1310
1311 // We get a copy, so the I/O does not need to happen while holding the lock
1312 for (const auto& i : s_fails.lock()->getMapCopy()) {
1313 count++;
1314 char tmp[26];
1315 fprintf(fp.get(), "%s\t%" PRIu64 "\t%s\n", i.key.toString().c_str(), i.value, timestamp(i.last, tmp, sizeof(tmp)));
1316 }
1317
1318 return count;
1319 }
1320
1321 uint64_t SyncRes::getNonResolvingNSSize()
1322 {
1323 return s_nonresolving.lock()->size();
1324 }
1325
1326 void SyncRes::clearNonResolvingNS()
1327 {
1328 s_nonresolving.lock()->clear();
1329 }
1330
1331 void SyncRes::pruneNonResolving(time_t cutoff)
1332 {
1333 s_nonresolving.lock()->prune(cutoff);
1334 }
1335
1336 uint64_t SyncRes::doDumpNonResolvingNS(int fd)
1337 {
1338 int newfd = dup(fd);
1339 if (newfd == -1) {
1340 return 0;
1341 }
1342 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1343 if (!fp) {
1344 close(newfd);
1345 return 0;
1346 }
1347 fprintf(fp.get(), "; non-resolving nameserver dump follows\n");
1348 fprintf(fp.get(), "; name\tcount\ttimestamp\n");
1349 uint64_t count = 0;
1350
1351 // We get a copy, so the I/O does not need to happen while holding the lock
1352 for (const auto& i : s_nonresolving.lock()->getMapCopy()) {
1353 count++;
1354 char tmp[26];
1355 fprintf(fp.get(), "%s\t%" PRIu64 "\t%s\n", i.key.toString().c_str(), i.value, timestamp(i.last, tmp, sizeof(tmp)));
1356 }
1357
1358 return count;
1359 }
1360
1361 void SyncRes::clearSaveParentsNSSets()
1362 {
1363 s_savedParentNSSet.lock()->clear();
1364 }
1365
1366 size_t SyncRes::getSaveParentsNSSetsSize()
1367 {
1368 return s_savedParentNSSet.lock()->size();
1369 }
1370
1371 void SyncRes::pruneSaveParentsNSSets(time_t now)
1372 {
1373 s_savedParentNSSet.lock()->prune(now);
1374 }
1375
1376 uint64_t SyncRes::doDumpSavedParentNSSets(int fd)
1377 {
1378 int newfd = dup(fd);
1379 if (newfd == -1) {
1380 return 0;
1381 }
1382 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1383 if (!fp) {
1384 close(newfd);
1385 return 0;
1386 }
1387 fprintf(fp.get(), "; dump of saved parent nameserver sets succesfully used follows\n");
1388 fprintf(fp.get(), "; total entries: %zu\n", s_savedParentNSSet.lock()->size());
1389 fprintf(fp.get(), "; domain\tsuccess\tttd\n");
1390 uint64_t count = 0;
1391
1392 // We get a copy, so the I/O does not need to happen while holding the lock
1393 for (const auto& i : s_savedParentNSSet.lock()->getMapCopy()) {
1394 if (i.d_count == 0) {
1395 continue;
1396 }
1397 count++;
1398 char tmp[26];
1399 fprintf(fp.get(), "%s\t%" PRIu64 "\t%s\n", i.d_domain.toString().c_str(), i.d_count, timestamp(i.d_ttd, tmp, sizeof(tmp)));
1400 }
1401 return count;
1402 }
1403
1404 void SyncRes::pruneDoTProbeMap(time_t cutoff)
1405 {
1406 auto lock = s_dotMap.lock();
1407 auto& ind = lock->d_map.get<time_t>();
1408
1409 for (auto i = ind.begin(); i != ind.end();) {
1410 if (i->d_ttd >= cutoff) {
1411 // We're done as we loop ordered by d_ttd
1412 break;
1413 }
1414 if (i->d_status == DoTStatus::Status::Busy) {
1415 lock->d_numBusy--;
1416 }
1417 i = ind.erase(i);
1418 }
1419 }
1420
1421 uint64_t SyncRes::doDumpDoTProbeMap(int fd)
1422 {
1423 int newfd = dup(fd);
1424 if (newfd == -1) {
1425 return 0;
1426 }
1427 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1428 if (!fp) {
1429 close(newfd);
1430 return 0;
1431 }
1432 fprintf(fp.get(), "; DoT probing map follows\n");
1433 fprintf(fp.get(), "; ip\tdomain\tcount\tstatus\tttd\n");
1434 uint64_t count = 0;
1435
1436 // We get a copy, so the I/O does not need to happen while holding the lock
1437 DoTMap copy;
1438 {
1439 copy = *s_dotMap.lock();
1440 }
1441 fprintf(fp.get(), "; %" PRIu64 " Busy entries\n", copy.d_numBusy);
1442 for (const auto& i : copy.d_map) {
1443 count++;
1444 char tmp[26];
1445 fprintf(fp.get(), "%s\t%s\t%" PRIu64 "\t%s\t%s\n", i.d_address.toString().c_str(), i.d_auth.toString().c_str(), i.d_count, i.toString().c_str(), timestamp(i.d_ttd, tmp, sizeof(tmp)));
1446 }
1447 return count;
1448 }
1449
1450 /* so here is the story. First we complete the full resolution process for a domain name. And only THEN do we decide
1451 to also do DNSSEC validation, which leads to new queries. To make this simple, we *always* ask for DNSSEC records
1452 so that if there are RRSIGs for a name, we'll have them.
1453
1454 However, some hosts simply can't answer questions which ask for DNSSEC. This can manifest itself as:
1455 * No answer
1456 * FormErr
1457 * Nonsense answer
1458
1459 The cause of "No answer" may be fragmentation, and it is tempting to probe if smaller answers would get through.
1460 Another cause of "No answer" may simply be a network condition.
1461 Nonsense answers are a clearer indication this host won't be able to do DNSSEC evah.
1462
1463 Previous implementations have suffered from turning off DNSSEC questions for an authoritative server based on timeouts.
1464 A clever idea is to only turn off DNSSEC if we know a domain isn't signed anyhow. The problem with that really
1465 clever idea however is that at this point in PowerDNS, we may simply not know that yet. All the DNSSEC thinking happens
1466 elsewhere. It may not have happened yet.
1467
1468 For now this means we can't be clever, but will turn off DNSSEC if you reply with FormError or gibberish.
1469 */
1470
1471 LWResult::Result SyncRes::asyncresolveWrapper(const ComboAddress& ip, bool ednsMANDATORY, const DNSName& domain, const DNSName& auth, int type, bool doTCP, bool sendRDQuery, struct timeval* now, boost::optional<Netmask>& srcmask, LWResult* res, bool* chained, const DNSName& nsName) const
1472 {
1473 /* what is your QUEST?
1474 the goal is to get as many remotes as possible on the best level of EDNS support
1475 The levels are:
1476
1477 1) EDNSOK: Honors EDNS0, absent from table
1478 2) EDNSIGNORANT: Ignores EDNS0, gives replies without EDNS0
1479 3) NOEDNS: Generates FORMERR on EDNS queries
1480
1481 Everybody starts out assumed to be EDNSOK.
1482 If EDNSOK, send out EDNS0
1483 If you FORMERR us, go to NOEDNS,
1484 If no EDNS in response, go to EDNSIGNORANT
1485 If EDNSIGNORANT, keep on including EDNS0, see what happens
1486 Same behaviour as EDNSOK
1487 If NOEDNS, send bare queries
1488 */
1489
1490 // Read current status, defaulting to OK
1491 SyncRes::EDNSStatus::EDNSMode mode = EDNSStatus::EDNSOK;
1492 {
1493 auto lock = s_ednsstatus.lock();
1494 auto ednsstatus = lock->find(ip); // does this include port? YES
1495 if (ednsstatus != lock->end()) {
1496 if (ednsstatus->ttd && ednsstatus->ttd < d_now.tv_sec) {
1497 lock->erase(ednsstatus);
1498 }
1499 else {
1500 mode = ednsstatus->mode;
1501 }
1502 }
1503 }
1504
1505 int EDNSLevel = 0;
1506 auto luaconfsLocal = g_luaconfs.getLocal();
1507 ResolveContext ctx;
1508 ctx.d_initialRequestId = d_initialRequestId;
1509 ctx.d_nsName = nsName;
1510 #ifdef HAVE_FSTRM
1511 ctx.d_auth = auth;
1512 #endif
1513
1514 LWResult::Result ret;
1515
1516 for (int tries = 0; tries < 2; ++tries) {
1517
1518 if (mode == EDNSStatus::NOEDNS) {
1519 t_Counters.at(rec::Counter::noEdnsOutQueries)++;
1520 EDNSLevel = 0; // level != mode
1521 }
1522 else if (ednsMANDATORY || mode != EDNSStatus::NOEDNS) {
1523 EDNSLevel = 1;
1524 }
1525
1526 DNSName sendQname(domain);
1527 if (g_lowercaseOutgoing) {
1528 sendQname.makeUsLowerCase();
1529 }
1530
1531 if (d_asyncResolve) {
1532 ret = d_asyncResolve(ip, sendQname, type, doTCP, sendRDQuery, EDNSLevel, now, srcmask, ctx, res, chained);
1533 }
1534 else {
1535 ret = asyncresolve(ip, sendQname, type, doTCP, sendRDQuery, EDNSLevel, now, srcmask, ctx, d_outgoingProtobufServers, d_frameStreamServers, luaconfsLocal->outgoingProtobufExportConfig.exportTypes, res, chained);
1536 }
1537
1538 if (ret == LWResult::Result::PermanentError || ret == LWResult::Result::OSLimitError || ret == LWResult::Result::Spoofed) {
1539 break; // transport error, nothing to learn here
1540 }
1541
1542 if (ret == LWResult::Result::Timeout) { // timeout, not doing anything with it now
1543 break;
1544 }
1545
1546 if (EDNSLevel == 1) {
1547 // We sent out with EDNS
1548 // ret is LWResult::Result::Success
1549 // ednsstatus in table might be pruned or changed by another request/thread, so do a new lookup/insert if needed
1550 auto lock = s_ednsstatus.lock(); // all three branches below need a lock
1551
1552 // Determine new mode
1553 if (res->d_validpacket && !res->d_haveEDNS && res->d_rcode == RCode::FormErr) {
1554 mode = EDNSStatus::NOEDNS;
1555 auto ednsstatus = lock->insert(ip).first;
1556 auto& ind = lock->get<ComboAddress>();
1557 lock->setMode(ind, ednsstatus, mode, d_now.tv_sec);
1558 // This is the only path that re-iterates the loop
1559 continue;
1560 }
1561 else if (!res->d_haveEDNS) {
1562 auto ednsstatus = lock->insert(ip).first;
1563 auto& ind = lock->get<ComboAddress>();
1564 lock->setMode(ind, ednsstatus, EDNSStatus::EDNSIGNORANT, d_now.tv_sec);
1565 }
1566 else {
1567 // New status is EDNSOK
1568 lock->erase(ip);
1569 }
1570 }
1571
1572 break;
1573 }
1574 return ret;
1575 }
1576
1577 /* The parameters from rfc9156. */
1578 /* maximum number of QNAME minimisation iterations */
1579 static const unsigned int s_max_minimise_count = 10;
1580 /* number of queries that should only have one label appended */
1581 static const unsigned int s_minimise_one_lab = 4;
1582
1583 static unsigned int qmStepLen(unsigned int labels, unsigned int qnamelen, unsigned int i)
1584 {
1585 unsigned int step;
1586
1587 if (i < s_minimise_one_lab) {
1588 step = 1;
1589 }
1590 else if (i < s_max_minimise_count) {
1591 step = std::max(1U, (qnamelen - labels) / (10 - i));
1592 }
1593 else {
1594 step = qnamelen - labels;
1595 }
1596 unsigned int targetlen = std::min(labels + step, qnamelen);
1597 return targetlen;
1598 }
1599
1600 int SyncRes::doResolve(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int depth, set<GetBestNSAnswer>& beenthere, Context& context)
1601 {
1602 auto prefix = getPrefix(depth);
1603 auto luaconfsLocal = g_luaconfs.getLocal();
1604
1605 /* Apply qname (including CNAME chain) filtering policies */
1606 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
1607 if (luaconfsLocal->dfe.getQueryPolicy(qname, d_discardedPolicies, d_appliedPolicy)) {
1608 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1609 bool done = false;
1610 int rcode = RCode::NoError;
1611 handlePolicyHit(prefix, qname, qtype, ret, done, rcode, depth);
1612 if (done) {
1613 return rcode;
1614 }
1615 }
1616 }
1617
1618 initZoneCutsFromTA(qname, prefix);
1619
1620 // In the auth or recursive forward case, it does not make sense to do qname-minimization
1621 if (!getQNameMinimization() || isRecursiveForwardOrAuth(qname)) {
1622 return doResolveNoQNameMinimization(qname, qtype, ret, depth, beenthere, context);
1623 }
1624
1625 // The qname minimization algorithm is a simplified version of the one in RFC 7816 (bis).
1626 // It could be simplified because the cache maintenance (both positive and negative)
1627 // is already done by doResolveNoQNameMinimization().
1628 //
1629 // Sketch of algorithm:
1630 // Check cache
1631 // If result found: done
1632 // Otherwise determine closes ancestor from cache data
1633 // Repeat querying A, adding more labels of the original qname
1634 // If we get a delegation continue at ancestor determination
1635 // Until we have the full name.
1636 //
1637 // The algorithm starts with adding a single label per iteration, and
1638 // moves to three labels per iteration after three iterations.
1639
1640 DNSName child;
1641 prefix.append(string("QM "));
1642
1643 LOG(prefix << qname << ": doResolve" << endl);
1644
1645 // Look in cache only
1646 vector<DNSRecord> retq;
1647 bool old = setCacheOnly(true);
1648 bool fromCache = false;
1649 // For cache peeking, we tell doResolveNoQNameMinimization not to consider the (non-recursive) forward case.
1650 // Otherwise all queries in a forward domain will be forwarded, while we want to consult the cache.
1651 int res = doResolveNoQNameMinimization(qname, qtype, retq, depth, beenthere, context, &fromCache, nullptr);
1652 setCacheOnly(old);
1653 if (fromCache) {
1654 LOG(prefix << qname << ": Step0 Found in cache" << endl);
1655 if (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None && (d_appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NXDOMAIN || d_appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NODATA)) {
1656 ret.clear();
1657 }
1658 ret.insert(ret.end(), retq.begin(), retq.end());
1659
1660 return res;
1661 }
1662 LOG(prefix << qname << ": Step0 Not cached" << endl);
1663
1664 const unsigned int qnamelen = qname.countLabels();
1665
1666 DNSName fwdomain(qname);
1667 const bool forwarded = getBestAuthZone(&fwdomain) != t_sstorage.domainmap->end();
1668 if (forwarded) {
1669 LOG(prefix << qname << ": Step0 qname is in a forwarded domain " << fwdomain << endl);
1670 }
1671
1672 for (unsigned int i = 0; i <= qnamelen;) {
1673
1674 // Step 1
1675 vector<DNSRecord> bestns;
1676 DNSName nsdomain(qname);
1677 if (qtype == QType::DS) {
1678 nsdomain.chopOff();
1679 }
1680 // the two retries allow getBestNSFromCache&co to reprime the root
1681 // hints, in case they ever go missing
1682 for (int tries = 0; tries < 2 && bestns.empty(); ++tries) {
1683 bool flawedNSSet = false;
1684 set<GetBestNSAnswer> beenthereIgnored;
1685 getBestNSFromCache(nsdomain, qtype, bestns, &flawedNSSet, depth, prefix, beenthereIgnored, boost::make_optional(forwarded, fwdomain));
1686 if (forwarded) {
1687 break;
1688 }
1689 }
1690
1691 if (bestns.size() == 0) {
1692 if (!forwarded) {
1693 // Something terrible is wrong
1694 LOG(prefix << qname << ": Step1 No ancestor found return ServFail" << endl);
1695 return RCode::ServFail;
1696 }
1697 child = fwdomain;
1698 }
1699 else {
1700 LOG(prefix << qname << ": Step1 Ancestor from cache is " << bestns[0].d_name << endl);
1701 if (forwarded) {
1702 child = bestns[0].d_name.isPartOf(fwdomain) ? bestns[0].d_name : fwdomain;
1703 LOG(prefix << qname << ": Step1 Final Ancestor (using forwarding info) is " << child << endl);
1704 }
1705 else {
1706 child = bestns[0].d_name;
1707 }
1708 }
1709 for (; i <= qnamelen; i++) {
1710 // Step 2
1711 unsigned int labels = child.countLabels();
1712 unsigned int targetlen = qmStepLen(labels, qnamelen, i);
1713
1714 while (labels < targetlen) {
1715 child.prependRawLabel(qname.getRawLabel(qnamelen - labels - 1));
1716 labels++;
1717 }
1718 // rfc9156 section-2.3, append labels if they start with an underscore
1719 while (labels < qnamelen) {
1720 auto prependLabel = qname.getRawLabel(qnamelen - labels - 1);
1721 if (prependLabel.at(0) != '_') {
1722 break;
1723 }
1724 child.prependRawLabel(prependLabel);
1725 labels++;
1726 }
1727
1728 LOG(prefix << qname << ": Step2 New child " << child << endl);
1729
1730 // Step 3 resolve
1731 if (child == qname) {
1732 LOG(prefix << qname << ": Step3 Going to do final resolve" << endl);
1733 res = doResolveNoQNameMinimization(qname, qtype, ret, depth, beenthere, context);
1734 LOG(prefix << qname << ": Step3 Final resolve: " << RCode::to_s(res) << "/" << ret.size() << endl);
1735 return res;
1736 }
1737
1738 // If we have seen this child during resolution already; just skip it. We tried to QM it already or otherwise broken.
1739 bool skipStep4 = false;
1740 for (const auto& visitedNS : beenthere) {
1741 if (visitedNS.qname == child) {
1742 skipStep4 = true;
1743 break;
1744 }
1745 }
1746 if (skipStep4) {
1747 LOG(prefix << ": Step4 Being skipped as visited this child name already" << endl);
1748 continue;
1749 }
1750
1751 // Step 4
1752 LOG(prefix << qname << ": Step4 Resolve A for child " << child << endl);
1753 bool oldFollowCNAME = d_followCNAME;
1754 d_followCNAME = false;
1755 retq.resize(0);
1756 StopAtDelegation stopAtDelegation = Stop;
1757 res = doResolveNoQNameMinimization(child, QType::A, retq, depth, beenthere, context, nullptr, &stopAtDelegation);
1758 d_followCNAME = oldFollowCNAME;
1759 LOG(prefix << qname << ": Step4 Resolve " << child << "|A result is " << RCode::to_s(res) << "/" << retq.size() << "/" << stopAtDelegation << endl);
1760 if (stopAtDelegation == Stopped) {
1761 LOG(prefix << qname << ": Delegation seen, continue at step 1" << endl);
1762 break;
1763 }
1764
1765 if (res != RCode::NoError) {
1766 // Case 5: unexpected answer
1767 LOG(prefix << qname << ": Step5: other rcode, last effort final resolve" << endl);
1768 setQNameMinimization(false);
1769 setQMFallbackMode(true);
1770
1771 res = doResolveNoQNameMinimization(qname, qtype, ret, depth + 1, beenthere, context);
1772
1773 if (res == RCode::NoError) {
1774 t_Counters.at(rec::Counter::qnameminfallbacksuccess)++;
1775 }
1776
1777 LOG(prefix << qname << ": Step5 End resolve: " << RCode::to_s(res) << "/" << ret.size() << endl);
1778 return res;
1779 }
1780 }
1781 }
1782
1783 // Should not be reached
1784 LOG(prefix << qname << ": Max iterations reached, return ServFail" << endl);
1785 return RCode::ServFail;
1786 }
1787
1788 unsigned int SyncRes::getAdjustedRecursionBound() const
1789 {
1790 auto bound = s_maxdepth; // 40 is default value of s_maxdepth
1791 if (getQMFallbackMode()) {
1792 // We might have hit a depth level check, but we still want to allow some recursion levels in the fallback
1793 // no-qname-minimization case. This has the effect that a qname minimization fallback case might reach 150% of
1794 // maxdepth, taking care to not repeatedly increase the bound.
1795 bound += s_maxdepth / 2;
1796 }
1797 return bound;
1798 }
1799
1800 /*! This function will check the cache and go out to the internet if the answer is not in cache
1801 *
1802 * \param qname The name we need an answer for
1803 * \param qtype
1804 * \param ret The vector of DNSRecords we need to fill with the answers
1805 * \param depth The recursion depth we are in
1806 * \param beenthere
1807 * \param fromCache tells the caller the result came from the cache, may be nullptr
1808 * \param stopAtDelegation if non-nullptr and pointed-to value is Stop requests the callee to stop at a delegation, if so pointed-to value is set to Stopped
1809 * \return DNS RCODE or -1 (Error)
1810 */
1811 int SyncRes::doResolveNoQNameMinimization(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int depth, set<GetBestNSAnswer>& beenthere, Context& context, bool* fromCache, StopAtDelegation* stopAtDelegation)
1812 {
1813 auto prefix = getPrefix(depth);
1814
1815 LOG(prefix << qname << ": Wants " << (d_doDNSSEC ? "" : "NO ") << "DNSSEC processing, " << (d_requireAuthData ? "" : "NO ") << "auth data required by query for " << qtype << endl);
1816
1817 if (s_maxdepth > 0) {
1818 auto bound = getAdjustedRecursionBound();
1819 if (depth > bound) {
1820 string msg = "More than " + std::to_string(bound) + " (adjusted max-recursion-depth) levels of recursion needed while resolving " + qname.toLogString();
1821 LOG(prefix << qname << ": " << msg << endl);
1822 throw ImmediateServFailException(msg);
1823 }
1824 }
1825
1826 int res = 0;
1827
1828 const int iterations = !d_refresh && MemRecursorCache::s_maxServedStaleExtensions > 0 ? 2 : 1;
1829 for (int loop = 0; loop < iterations; loop++) {
1830
1831 d_serveStale = loop == 1;
1832
1833 // This is a difficult way of expressing "this is a normal query", i.e. not getRootNS.
1834 if (!(d_updatingRootNS && qtype.getCode() == QType::NS && qname.isRoot())) {
1835 DNSName authname(qname);
1836 const auto iter = getBestAuthZone(&authname);
1837
1838 if (d_cacheonly) {
1839 if (iter != t_sstorage.domainmap->end()) {
1840 if (iter->second.isAuth()) {
1841 LOG(prefix << qname << ": Cache only lookup for '" << qname << "|" << qtype << "', in auth zone" << endl);
1842 ret.clear();
1843 d_wasOutOfBand = doOOBResolve(qname, qtype, ret, depth, prefix, res);
1844 if (fromCache != nullptr) {
1845 *fromCache = d_wasOutOfBand;
1846 }
1847 return res;
1848 }
1849 }
1850 }
1851
1852 bool wasForwardedOrAuthZone = false;
1853 bool wasAuthZone = false;
1854 bool wasForwardRecurse = false;
1855
1856 if (iter != t_sstorage.domainmap->end()) {
1857 wasForwardedOrAuthZone = true;
1858
1859 if (iter->second.isAuth()) {
1860 wasAuthZone = true;
1861 }
1862 else if (iter->second.shouldRecurse()) {
1863 wasForwardRecurse = true;
1864 }
1865 }
1866
1867 /* When we are looking for a DS, we want to the non-CNAME cache check first
1868 because we can actually have a DS (from the parent zone) AND a CNAME (from
1869 the child zone), and what we really want is the DS */
1870 if (qtype != QType::DS && doCNAMECacheCheck(qname, qtype, ret, depth, prefix, res, context, wasAuthZone, wasForwardRecurse)) { // will reroute us if needed
1871 d_wasOutOfBand = wasAuthZone;
1872 // Here we have an issue. If we were prevented from going out to the network (cache-only was set, possibly because we
1873 // are in QM Step0) we might have a CNAME but not the corresponding target.
1874 // It means that we will sometimes go to the next steps when we are in fact done, but that's fine since
1875 // we will get the records from the cache, resulting in a small overhead.
1876 // This might be a real problem if we had a RPZ hit, though, because we do not want the processing to continue, since
1877 // RPZ rules will not be evaluated anymore (we already matched).
1878 const bool stoppedByPolicyHit = d_appliedPolicy.wasHit();
1879
1880 if (fromCache && (!d_cacheonly || stoppedByPolicyHit)) {
1881 *fromCache = true;
1882 }
1883 /* Apply Post filtering policies */
1884
1885 if (d_wantsRPZ && !stoppedByPolicyHit) {
1886 auto luaLocal = g_luaconfs.getLocal();
1887 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
1888 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1889 bool done = false;
1890 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
1891 if (done && fromCache) {
1892 *fromCache = true;
1893 }
1894 }
1895 }
1896 return res;
1897 }
1898
1899 if (doCacheCheck(qname, authname, wasForwardedOrAuthZone, wasAuthZone, wasForwardRecurse, qtype, ret, depth, prefix, res, context)) {
1900 // we done
1901 d_wasOutOfBand = wasAuthZone;
1902 if (fromCache) {
1903 *fromCache = true;
1904 }
1905
1906 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
1907 auto luaLocal = g_luaconfs.getLocal();
1908 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
1909 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1910 bool done = false;
1911 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
1912 }
1913 }
1914
1915 return res;
1916 }
1917
1918 /* if we have not found a cached DS (or denial of), now is the time to look for a CNAME */
1919 if (qtype == QType::DS && doCNAMECacheCheck(qname, qtype, ret, depth, prefix, res, context, wasAuthZone, wasForwardRecurse)) { // will reroute us if needed
1920 d_wasOutOfBand = wasAuthZone;
1921 // Here we have an issue. If we were prevented from going out to the network (cache-only was set, possibly because we
1922 // are in QM Step0) we might have a CNAME but not the corresponding target.
1923 // It means that we will sometimes go to the next steps when we are in fact done, but that's fine since
1924 // we will get the records from the cache, resulting in a small overhead.
1925 // This might be a real problem if we had a RPZ hit, though, because we do not want the processing to continue, since
1926 // RPZ rules will not be evaluated anymore (we already matched).
1927 const bool stoppedByPolicyHit = d_appliedPolicy.wasHit();
1928
1929 if (fromCache && (!d_cacheonly || stoppedByPolicyHit)) {
1930 *fromCache = true;
1931 }
1932 /* Apply Post filtering policies */
1933
1934 if (d_wantsRPZ && !stoppedByPolicyHit) {
1935 auto luaLocal = g_luaconfs.getLocal();
1936 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
1937 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1938 bool done = false;
1939 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
1940 if (done && fromCache) {
1941 *fromCache = true;
1942 }
1943 }
1944 }
1945
1946 return res;
1947 }
1948 }
1949
1950 if (d_cacheonly) {
1951 return 0;
1952 }
1953
1954 // When trying to serve-stale, we also only look at the cache. Don't look at d_serveStale, it
1955 // might be changed by recursive calls (this should be fixed in a better way!).
1956 if (loop == 1) {
1957 return res;
1958 }
1959
1960 LOG(prefix << qname << ": No cache hit for '" << qname << "|" << qtype << "', trying to find an appropriate NS record" << endl);
1961
1962 DNSName subdomain(qname);
1963 if (qtype == QType::DS)
1964 subdomain.chopOff();
1965
1966 NsSet nsset;
1967 bool flawedNSSet = false;
1968
1969 // the two retries allow getBestNSNamesFromCache&co to reprime the root
1970 // hints, in case they ever go missing
1971 for (int tries = 0; tries < 2 && nsset.empty(); ++tries) {
1972 subdomain = getBestNSNamesFromCache(subdomain, qtype, nsset, &flawedNSSet, depth, prefix, beenthere); // pass beenthere to both occasions
1973 }
1974
1975 res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, prefix, beenthere, context, stopAtDelegation, nullptr);
1976
1977 if (res == -1 && s_save_parent_ns_set) {
1978 // It did not work out, lets check if we have a saved parent NS set
1979 map<DNSName, vector<ComboAddress>> fallBack;
1980 {
1981 auto lock = s_savedParentNSSet.lock();
1982 auto domainData = lock->find(subdomain);
1983 if (domainData != lock->end() && domainData->d_nsAddresses.size() > 0) {
1984 nsset.clear();
1985 // Build the nsset arg and fallBack data for the fallback doResolveAt() attempt
1986 // Take a copy to be able to release the lock, NsSet is actually a map, go figure
1987 for (const auto& ns : domainData->d_nsAddresses) {
1988 nsset.emplace(ns.first, pair(std::vector<ComboAddress>(), false));
1989 fallBack.emplace(ns.first, ns.second);
1990 }
1991 }
1992 }
1993 if (fallBack.size() > 0) {
1994 LOG(prefix << qname << ": Failure, but we have a saved parent NS set, trying that one" << endl);
1995 res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, prefix, beenthere, context, stopAtDelegation, &fallBack);
1996 if (res == 0) {
1997 // It did work out
1998 s_savedParentNSSet.lock()->inc(subdomain);
1999 }
2000 }
2001 }
2002 /* Apply Post filtering policies */
2003 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
2004 auto luaLocal = g_luaconfs.getLocal();
2005 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
2006 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
2007 bool done = false;
2008 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
2009 }
2010 }
2011
2012 if (!res) {
2013 return 0;
2014 }
2015
2016 LOG(prefix << qname << ": Failed (res=" << res << ")" << endl);
2017 if (res >= 0) {
2018 break;
2019 }
2020 }
2021 return res < 0 ? RCode::ServFail : res;
2022 }
2023
2024 #if 0
2025 // for testing purposes
2026 static bool ipv6First(const ComboAddress& a, const ComboAddress& b)
2027 {
2028 return !(a.sin4.sin_family < a.sin4.sin_family);
2029 }
2030 #endif
2031
2032 struct speedOrderCA
2033 {
2034 speedOrderCA(std::map<ComboAddress, float>& speeds) :
2035 d_speeds(speeds) {}
2036 bool operator()(const ComboAddress& a, const ComboAddress& b) const
2037 {
2038 return d_speeds[a] < d_speeds[b];
2039 }
2040 std::map<ComboAddress, float>& d_speeds;
2041 };
2042
2043 /** This function explicitly goes out for A or AAAA addresses
2044 */
2045 vector<ComboAddress> SyncRes::getAddrs(const DNSName& qname, unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere, bool cacheOnly, unsigned int& addressQueriesForNS)
2046 {
2047 typedef vector<DNSRecord> res_t;
2048 typedef vector<ComboAddress> ret_t;
2049 ret_t ret;
2050
2051 bool oldCacheOnly = setCacheOnly(cacheOnly);
2052 bool oldRequireAuthData = d_requireAuthData;
2053 bool oldValidationRequested = d_DNSSECValidationRequested;
2054 bool oldFollowCNAME = d_followCNAME;
2055 bool seenV6 = false;
2056 const unsigned int startqueries = d_outqueries;
2057 d_requireAuthData = false;
2058 d_DNSSECValidationRequested = false;
2059 d_followCNAME = true;
2060
2061 MemRecursorCache::Flags flags = MemRecursorCache::None;
2062 if (d_serveStale) {
2063 flags |= MemRecursorCache::ServeStale;
2064 }
2065 try {
2066 // First look for both A and AAAA in the cache
2067 res_t cset;
2068 if (s_doIPv4 && g_recCache->get(d_now.tv_sec, qname, QType::A, flags, &cset, d_cacheRemote, d_routingTag) > 0) {
2069 for (const auto& i : cset) {
2070 if (auto rec = getRR<ARecordContent>(i)) {
2071 ret.push_back(rec->getCA(53));
2072 }
2073 }
2074 }
2075 if (s_doIPv6 && g_recCache->get(d_now.tv_sec, qname, QType::AAAA, flags, &cset, d_cacheRemote, d_routingTag) > 0) {
2076 for (const auto& i : cset) {
2077 if (auto rec = getRR<AAAARecordContent>(i)) {
2078 seenV6 = true;
2079 ret.push_back(rec->getCA(53));
2080 }
2081 }
2082 }
2083 if (ret.empty()) {
2084 // Neither A nor AAAA in the cache...
2085 Context newContext1;
2086 cset.clear();
2087 // Go out to get A's
2088 if (s_doIPv4 && doResolve(qname, QType::A, cset, depth + 1, beenthere, newContext1) == 0) { // this consults cache, OR goes out
2089 for (auto const& i : cset) {
2090 if (i.d_type == QType::A) {
2091 if (auto rec = getRR<ARecordContent>(i)) {
2092 ret.push_back(rec->getCA(53));
2093 }
2094 }
2095 }
2096 }
2097 if (s_doIPv6) { // s_doIPv6 **IMPLIES** pdns::isQueryLocalAddressFamilyEnabled(AF_INET6) returned true
2098 if (ret.empty()) {
2099 // We only go out immediately to find IPv6 records if we did not find any IPv4 ones.
2100 Context newContext2;
2101 if (doResolve(qname, QType::AAAA, cset, depth + 1, beenthere, newContext2) == 0) { // this consults cache, OR goes out
2102 for (const auto& i : cset) {
2103 if (i.d_type == QType::AAAA) {
2104 if (auto rec = getRR<AAAARecordContent>(i)) {
2105 seenV6 = true;
2106 ret.push_back(rec->getCA(53));
2107 }
2108 }
2109 }
2110 }
2111 }
2112 else {
2113 // We have some IPv4 records, consult the cache, we might have encountered some IPv6 glue
2114 cset.clear();
2115 if (g_recCache->get(d_now.tv_sec, qname, QType::AAAA, flags, &cset, d_cacheRemote, d_routingTag) > 0) {
2116 for (const auto& i : cset) {
2117 if (auto rec = getRR<AAAARecordContent>(i)) {
2118 seenV6 = true;
2119 ret.push_back(rec->getCA(53));
2120 }
2121 }
2122 }
2123 }
2124 }
2125 }
2126 if (s_doIPv6 && !seenV6 && !cacheOnly) {
2127 // No IPv6 records in cache, check negcache and submit async task if negache does not have the data
2128 // so that the next time the cache or the negcache will have data
2129 NegCache::NegCacheEntry ne;
2130 bool inNegCache = g_negCache->get(qname, QType::AAAA, d_now, ne, false);
2131 if (!inNegCache) {
2132 pushResolveTask(qname, QType::AAAA, d_now.tv_sec, d_now.tv_sec + 60);
2133 }
2134 }
2135 }
2136 catch (const PolicyHitException&) {
2137 // We ignore a policy hit while trying to retrieve the addresses
2138 // of a NS and keep processing the current query
2139 }
2140
2141 if (ret.empty() && d_outqueries > startqueries) {
2142 // We did 1 or more outgoing queries to resolve this NS name but returned empty handed
2143 addressQueriesForNS++;
2144 }
2145 d_requireAuthData = oldRequireAuthData;
2146 d_DNSSECValidationRequested = oldValidationRequested;
2147 setCacheOnly(oldCacheOnly);
2148 d_followCNAME = oldFollowCNAME;
2149
2150 if (s_max_busy_dot_probes > 0 && s_dot_to_port_853) {
2151 for (auto& add : ret) {
2152 if (shouldDoDoT(add, d_now.tv_sec)) {
2153 add.setPort(853);
2154 }
2155 }
2156 }
2157 /* we need to remove from the nsSpeeds collection the existing IPs
2158 for this nameserver that are no longer in the set, even if there
2159 is only one or none at all in the current set.
2160 */
2161 map<ComboAddress, float> speeds;
2162 {
2163 auto lock = s_nsSpeeds.lock();
2164 auto& collection = lock->find_or_enter(qname, d_now);
2165 float factor = collection.getFactor(d_now);
2166 for (const auto& val : ret) {
2167 speeds[val] = collection.d_collection[val].get(factor);
2168 }
2169 collection.purge(speeds);
2170 }
2171
2172 if (ret.size() > 1) {
2173 shuffle(ret.begin(), ret.end(), pdns::dns_random_engine());
2174 speedOrderCA so(speeds);
2175 stable_sort(ret.begin(), ret.end(), so);
2176 }
2177
2178 if (doLog()) {
2179 LOG(prefix << qname << ": Nameserver " << qname << " IPs: ");
2180 bool first = true;
2181 for (const auto& addr : ret) {
2182 if (first) {
2183 first = false;
2184 }
2185 else {
2186 LOG(", ");
2187 }
2188 LOG((addr.toString()) << "(" << fmtfloat(speeds[addr] / 1000.0) << "ms)");
2189 }
2190 LOG(endl);
2191 }
2192
2193 return ret;
2194 }
2195
2196 void SyncRes::getBestNSFromCache(const DNSName& qname, const QType qtype, vector<DNSRecord>& bestns, bool* flawedNSSet, unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere, const boost::optional<DNSName>& cutOffDomain)
2197 {
2198 DNSName subdomain(qname);
2199 bestns.clear();
2200 bool brokeloop;
2201 MemRecursorCache::Flags flags = MemRecursorCache::None;
2202 if (d_serveStale) {
2203 flags |= MemRecursorCache::ServeStale;
2204 }
2205 do {
2206 if (cutOffDomain && (subdomain == *cutOffDomain || !subdomain.isPartOf(*cutOffDomain))) {
2207 break;
2208 }
2209 brokeloop = false;
2210 LOG(prefix << qname << ": Checking if we have NS in cache for '" << subdomain << "'" << endl);
2211 vector<DNSRecord> ns;
2212 *flawedNSSet = false;
2213
2214 if (g_recCache->get(d_now.tv_sec, subdomain, QType::NS, flags, &ns, d_cacheRemote, d_routingTag) > 0) {
2215 if (s_maxnsperresolve > 0 && ns.size() > s_maxnsperresolve) {
2216 vector<DNSRecord> selected;
2217 selected.reserve(s_maxnsperresolve);
2218 std::sample(ns.cbegin(), ns.cend(), std::back_inserter(selected), s_maxnsperresolve, pdns::dns_random_engine());
2219 ns = selected;
2220 }
2221 bestns.reserve(ns.size());
2222
2223 for (auto k = ns.cbegin(); k != ns.cend(); ++k) {
2224 if (k->d_ttl > (unsigned int)d_now.tv_sec) {
2225 vector<DNSRecord> aset;
2226 QType nsqt{QType::ADDR};
2227 if (s_doIPv4 && !s_doIPv6) {
2228 nsqt = QType::A;
2229 }
2230 else if (!s_doIPv4 && s_doIPv6) {
2231 nsqt = QType::AAAA;
2232 }
2233
2234 const DNSRecord& dr = *k;
2235 auto nrr = getRR<NSRecordContent>(dr);
2236 if (nrr && (!nrr->getNS().isPartOf(subdomain) || g_recCache->get(d_now.tv_sec, nrr->getNS(), nsqt, flags, doLog() ? &aset : 0, d_cacheRemote, d_routingTag) > 0)) {
2237 bestns.push_back(dr);
2238 LOG(prefix << qname << ": NS (with ip, or non-glue) in cache for '" << subdomain << "' -> '" << nrr->getNS() << "'");
2239 LOG(", within bailiwick: " << nrr->getNS().isPartOf(subdomain));
2240 if (!aset.empty()) {
2241 LOG(", in cache, ttl=" << (unsigned int)(((time_t)aset.begin()->d_ttl - d_now.tv_sec)) << endl);
2242 }
2243 else {
2244 LOG(", not in cache / did not look at cache" << endl);
2245 }
2246 }
2247 else {
2248 *flawedNSSet = true;
2249 LOG(prefix << qname << ": NS in cache for '" << subdomain << "', but needs glue (" << nrr->getNS() << ") which we miss or is expired" << endl);
2250 }
2251 }
2252 }
2253
2254 if (!bestns.empty()) {
2255 GetBestNSAnswer answer;
2256 answer.qname = qname;
2257 answer.qtype = qtype.getCode();
2258 for (const auto& dr : bestns) {
2259 if (auto nsContent = getRR<NSRecordContent>(dr)) {
2260 answer.bestns.emplace(dr.d_name, nsContent->getNS());
2261 }
2262 }
2263
2264 auto insertionPair = beenthere.insert(std::move(answer));
2265 if (!insertionPair.second) {
2266 brokeloop = true;
2267 LOG(prefix << qname << ": We have NS in cache for '" << subdomain << "' but part of LOOP (already seen " << answer.qname << ")! Trying less specific NS" << endl);
2268 ;
2269 if (doLog())
2270 for (set<GetBestNSAnswer>::const_iterator j = beenthere.begin(); j != beenthere.end(); ++j) {
2271 bool neo = (j == insertionPair.first);
2272 LOG(prefix << qname << ": Beenthere" << (neo ? "*" : "") << ": " << j->qname << "|" << DNSRecordContent::NumberToType(j->qtype) << " (" << (unsigned int)j->bestns.size() << ")" << endl);
2273 }
2274 bestns.clear();
2275 }
2276 else {
2277 LOG(prefix << qname << ": We have NS in cache for '" << subdomain << "' (flawedNSSet=" << *flawedNSSet << ")" << endl);
2278 return;
2279 }
2280 }
2281 }
2282 LOG(prefix << qname << ": No valid/useful NS in cache for '" << subdomain << "'" << endl);
2283
2284 if (subdomain.isRoot() && !brokeloop) {
2285 // We lost the root NS records
2286 primeHints();
2287 LOG(prefix << qname << ": Reprimed the root" << endl);
2288 /* let's prevent an infinite loop */
2289 if (!d_updatingRootNS) {
2290 auto log = g_slog->withName("housekeeping");
2291 getRootNS(d_now, d_asyncResolve, depth, log);
2292 }
2293 }
2294 } while (subdomain.chopOff());
2295 }
2296
2297 SyncRes::domainmap_t::const_iterator SyncRes::getBestAuthZone(DNSName* qname) const
2298 {
2299 if (t_sstorage.domainmap->empty()) {
2300 return t_sstorage.domainmap->end();
2301 }
2302
2303 SyncRes::domainmap_t::const_iterator ret;
2304 do {
2305 ret = t_sstorage.domainmap->find(*qname);
2306 if (ret != t_sstorage.domainmap->end())
2307 break;
2308 } while (qname->chopOff());
2309 return ret;
2310 }
2311
2312 /** doesn't actually do the work, leaves that to getBestNSFromCache */
2313 DNSName SyncRes::getBestNSNamesFromCache(const DNSName& qname, const QType qtype, NsSet& nsset, bool* flawedNSSet, unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere)
2314 {
2315 DNSName authOrForwDomain(qname);
2316
2317 domainmap_t::const_iterator iter = getBestAuthZone(&authOrForwDomain);
2318 // We have an auth, forwarder of forwarder-recurse
2319 if (iter != t_sstorage.domainmap->end()) {
2320 if (iter->second.isAuth()) {
2321 // this gets picked up in doResolveAt, the empty DNSName, combined with the
2322 // empty vector means 'we are auth for this zone'
2323 nsset.insert({DNSName(), {{}, false}});
2324 return authOrForwDomain;
2325 }
2326 else {
2327 if (iter->second.shouldRecurse()) {
2328 // Again, picked up in doResolveAt. An empty DNSName, combined with a
2329 // non-empty vector of ComboAddresses means 'this is a forwarded domain'
2330 // This is actually picked up in retrieveAddressesForNS called from doResolveAt.
2331 nsset.insert({DNSName(), {iter->second.d_servers, true}});
2332 return authOrForwDomain;
2333 }
2334 }
2335 }
2336
2337 // We might have a (non-recursive) forwarder, but maybe the cache already contains
2338 // a better NS
2339 vector<DNSRecord> bestns;
2340 DNSName nsFromCacheDomain(g_rootdnsname);
2341 getBestNSFromCache(qname, qtype, bestns, flawedNSSet, depth, prefix, beenthere);
2342
2343 // Pick up the auth domain
2344 for (const auto& k : bestns) {
2345 const auto nsContent = getRR<NSRecordContent>(k);
2346 if (nsContent) {
2347 nsFromCacheDomain = k.d_name;
2348 break;
2349 }
2350 }
2351
2352 if (iter != t_sstorage.domainmap->end()) {
2353 if (doLog()) {
2354 LOG(prefix << qname << " authOrForwDomain: " << authOrForwDomain << " nsFromCacheDomain: " << nsFromCacheDomain << " isPartof: " << authOrForwDomain.isPartOf(nsFromCacheDomain) << endl);
2355 }
2356
2357 // If the forwarder is better or equal to what's found in the cache, use forwarder. Note that name.isPartOf(name).
2358 // So queries that get NS for authOrForwDomain itself go to the forwarder
2359 if (authOrForwDomain.isPartOf(nsFromCacheDomain)) {
2360 if (doLog()) {
2361 LOG(prefix << qname << ": Using forwarder as NS" << endl);
2362 }
2363 nsset.insert({DNSName(), {iter->second.d_servers, false}});
2364 return authOrForwDomain;
2365 }
2366 else {
2367 if (doLog()) {
2368 LOG(prefix << qname << ": Using NS from cache" << endl);
2369 }
2370 }
2371 }
2372 for (auto k = bestns.cbegin(); k != bestns.cend(); ++k) {
2373 // The actual resolver code will not even look at the ComboAddress or bool
2374 const auto nsContent = getRR<NSRecordContent>(*k);
2375 if (nsContent) {
2376 nsset.insert({nsContent->getNS(), {{}, false}});
2377 }
2378 }
2379 return nsFromCacheDomain;
2380 }
2381
2382 void SyncRes::updateValidationStatusInCache(const DNSName& qname, const QType qt, bool aa, vState newState) const
2383 {
2384 if (qt == QType::ANY || qt == QType::ADDR) {
2385 // not doing that
2386 return;
2387 }
2388
2389 if (vStateIsBogus(newState)) {
2390 g_recCache->updateValidationStatus(d_now.tv_sec, qname, qt, d_cacheRemote, d_routingTag, aa, newState, s_maxbogusttl + d_now.tv_sec);
2391 }
2392 else {
2393 g_recCache->updateValidationStatus(d_now.tv_sec, qname, qt, d_cacheRemote, d_routingTag, aa, newState, boost::none);
2394 }
2395 }
2396
2397 static bool scanForCNAMELoop(const DNSName& name, const vector<DNSRecord>& records)
2398 {
2399 for (const auto& record : records) {
2400 if (record.d_type == QType::CNAME && record.d_place == DNSResourceRecord::ANSWER) {
2401 if (name == record.d_name) {
2402 return true;
2403 }
2404 }
2405 }
2406 return false;
2407 }
2408
2409 bool SyncRes::doCNAMECacheCheck(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int depth, const string& prefix, int& res, Context& context, bool wasAuthZone, bool wasForwardRecurse)
2410 {
2411 // Even if s_maxdepth is zero, we want to have this check
2412 auto bound = std::max(40U, getAdjustedRecursionBound());
2413 // Bounds were > 9 and > 15 originally, now they are derived from s_maxdepth (default 40)
2414 // Apply more strict bound if we see throttling
2415 if ((depth >= bound / 4 && d_outqueries > 10 && d_throttledqueries > 5) || depth > bound * 3 / 8) {
2416 LOG(prefix << qname << ": Recursing (CNAME or other indirection) too deep, depth=" << depth << endl);
2417 res = RCode::ServFail;
2418 return true;
2419 }
2420
2421 vector<DNSRecord> cset;
2422 vector<std::shared_ptr<const RRSIGRecordContent>> signatures;
2423 vector<std::shared_ptr<DNSRecord>> authorityRecs;
2424 bool wasAuth;
2425 uint32_t capTTL = std::numeric_limits<uint32_t>::max();
2426 DNSName foundName;
2427 DNSName authZone;
2428 QType foundQT = QType::ENT;
2429
2430 /* we don't require auth data for forward-recurse lookups */
2431 MemRecursorCache::Flags flags = MemRecursorCache::None;
2432 if (!wasForwardRecurse && d_requireAuthData) {
2433 flags |= MemRecursorCache::RequireAuth;
2434 }
2435 if (d_refresh) {
2436 flags |= MemRecursorCache::Refresh;
2437 }
2438 if (d_serveStale) {
2439 flags |= MemRecursorCache::ServeStale;
2440 }
2441 if (g_recCache->get(d_now.tv_sec, qname, QType::CNAME, flags, &cset, d_cacheRemote, d_routingTag, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &context.state, &wasAuth, &authZone, &d_fromAuthIP) > 0) {
2442 foundName = qname;
2443 foundQT = QType::CNAME;
2444 }
2445
2446 if (foundName.empty() && qname != g_rootdnsname) {
2447 // look for a DNAME cache hit
2448 auto labels = qname.getRawLabels();
2449 DNSName dnameName(g_rootdnsname);
2450
2451 do {
2452 dnameName.prependRawLabel(labels.back());
2453 labels.pop_back();
2454 if (dnameName == qname && qtype != QType::DNAME) { // The client does not want a DNAME, but we've reached the QNAME already. So there is no match
2455 break;
2456 }
2457 if (g_recCache->get(d_now.tv_sec, dnameName, QType::DNAME, flags, &cset, d_cacheRemote, d_routingTag, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &context.state, &wasAuth, &authZone, &d_fromAuthIP) > 0) {
2458 foundName = dnameName;
2459 foundQT = QType::DNAME;
2460 break;
2461 }
2462 } while (!labels.empty());
2463 }
2464
2465 if (foundName.empty()) {
2466 return false;
2467 }
2468
2469 if (qtype == QType::DS && authZone == qname) {
2470 /* CNAME at APEX of the child zone, we can't use that to prove that
2471 there is no DS */
2472 LOG(prefix << qname << ": Found a " << foundQT.toString() << " cache hit of '" << qname << "' from " << authZone << ", but such a record at the apex of the child zone does not prove that there is no DS in the parent zone" << endl);
2473 return false;
2474 }
2475
2476 for (auto const& record : cset) {
2477 if (record.d_class != QClass::IN) {
2478 continue;
2479 }
2480
2481 if (record.d_ttl > (unsigned int)d_now.tv_sec) {
2482
2483 if (!wasAuthZone && shouldValidate() && (wasAuth || wasForwardRecurse) && context.state == vState::Indeterminate && d_requireAuthData) {
2484 /* This means we couldn't figure out the state when this entry was cached */
2485
2486 vState recordState = getValidationStatus(foundName, !signatures.empty(), qtype == QType::DS, depth, prefix);
2487 if (recordState == vState::Secure) {
2488 LOG(prefix << qname << ": Got vState::Indeterminate state from the " << foundQT.toString() << " cache, validating.." << endl);
2489 context.state = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, foundName, foundQT, cset, signatures);
2490 if (context.state != vState::Indeterminate) {
2491 LOG(prefix << qname << ": Got vState::Indeterminate state from the " << foundQT.toString() << " cache, new validation result is " << context.state << endl);
2492 if (vStateIsBogus(context.state)) {
2493 capTTL = s_maxbogusttl;
2494 }
2495 updateValidationStatusInCache(foundName, foundQT, wasAuth, context.state);
2496 }
2497 }
2498 }
2499
2500 LOG(prefix << qname << ": Found cache " << foundQT.toString() << " hit for '" << foundName << "|" << foundQT.toString() << "' to '" << record.getContent()->getZoneRepresentation() << "', validation state is " << context.state << endl);
2501
2502 DNSRecord dr = record;
2503 dr.d_ttl -= d_now.tv_sec;
2504 dr.d_ttl = std::min(dr.d_ttl, capTTL);
2505 const uint32_t ttl = dr.d_ttl;
2506 ret.reserve(ret.size() + 2 + signatures.size() + authorityRecs.size());
2507 ret.push_back(dr);
2508
2509 for (const auto& signature : signatures) {
2510 DNSRecord sigdr;
2511 sigdr.d_type = QType::RRSIG;
2512 sigdr.d_name = foundName;
2513 sigdr.d_ttl = ttl;
2514 sigdr.setContent(signature);
2515 sigdr.d_place = DNSResourceRecord::ANSWER;
2516 sigdr.d_class = QClass::IN;
2517 ret.push_back(sigdr);
2518 }
2519
2520 for (const auto& rec : authorityRecs) {
2521 DNSRecord authDR(*rec);
2522 authDR.d_ttl = ttl;
2523 ret.push_back(authDR);
2524 }
2525
2526 DNSName newTarget;
2527 if (foundQT == QType::DNAME) {
2528 if (qtype == QType::DNAME && qname == foundName) { // client wanted the DNAME, no need to synthesize a CNAME
2529 res = RCode::NoError;
2530 return true;
2531 }
2532 // Synthesize a CNAME
2533 auto dnameRR = getRR<DNAMERecordContent>(record);
2534 if (dnameRR == nullptr) {
2535 throw ImmediateServFailException("Unable to get record content for " + foundName.toLogString() + "|DNAME cache entry");
2536 }
2537 const auto& dnameSuffix = dnameRR->getTarget();
2538 DNSName targetPrefix = qname.makeRelative(foundName);
2539 try {
2540 dr.d_type = QType::CNAME;
2541 dr.d_name = targetPrefix + foundName;
2542 newTarget = targetPrefix + dnameSuffix;
2543 dr.setContent(std::make_shared<CNAMERecordContent>(CNAMERecordContent(newTarget)));
2544 ret.push_back(dr);
2545 }
2546 catch (const std::exception& e) {
2547 // We should probably catch an std::range_error here and set the rcode to YXDOMAIN (RFC 6672, section 2.2)
2548 // But this is consistent with processRecords
2549 throw ImmediateServFailException("Unable to perform DNAME substitution(DNAME owner: '" + foundName.toLogString() + "', DNAME target: '" + dnameSuffix.toLogString() + "', substituted name: '" + targetPrefix.toLogString() + "." + dnameSuffix.toLogString() + "' : " + e.what());
2550 }
2551
2552 LOG(prefix << qname << ": Synthesized " << dr.d_name << "|CNAME " << newTarget << endl);
2553 }
2554
2555 if (qtype == QType::CNAME) { // perhaps they really wanted a CNAME!
2556 res = RCode::NoError;
2557 return true;
2558 }
2559
2560 if (qtype == QType::DS || qtype == QType::DNSKEY) {
2561 res = RCode::NoError;
2562 return true;
2563 }
2564
2565 // We have a DNAME _or_ CNAME cache hit and the client wants something else than those two.
2566 // Let's find the answer!
2567 if (foundQT == QType::CNAME) {
2568 const auto cnameContent = getRR<CNAMERecordContent>(record);
2569 if (cnameContent == nullptr) {
2570 throw ImmediateServFailException("Unable to get record content for " + foundName.toLogString() + "|CNAME cache entry");
2571 }
2572 newTarget = cnameContent->getTarget();
2573 }
2574
2575 if (qname == newTarget) {
2576 string msg = "Got a CNAME referral (from cache) to self";
2577 LOG(prefix << qname << ": " << msg << endl);
2578 throw ImmediateServFailException(msg);
2579 }
2580
2581 if (newTarget.isPartOf(qname)) {
2582 // a.b.c. CNAME x.a.b.c will go to great depths with QM on
2583 string msg = "Got a CNAME referral (from cache) to child, disabling QM";
2584 LOG(prefix << qname << ": " << msg << endl);
2585 setQNameMinimization(false);
2586 }
2587
2588 if (!d_followCNAME) {
2589 res = RCode::NoError;
2590 return true;
2591 }
2592
2593 // Check to see if we already have seen the new target as a previous target
2594 if (scanForCNAMELoop(newTarget, ret)) {
2595 string msg = "got a CNAME referral (from cache) that causes a loop";
2596 LOG(prefix << qname << ": Status=" << msg << endl);
2597 throw ImmediateServFailException(msg);
2598 }
2599
2600 set<GetBestNSAnswer> beenthere;
2601 Context cnameContext;
2602 // Be aware that going out on the network might be disabled (cache-only), for example because we are in QM Step0,
2603 // so you can't trust that a real lookup will have been made.
2604 res = doResolve(newTarget, qtype, ret, depth + 1, beenthere, cnameContext);
2605 LOG(prefix << qname << ": Updating validation state for response to " << qname << " from " << context.state << " with the state from the DNAME/CNAME quest: " << cnameContext.state << endl);
2606 updateValidationState(qname, context.state, cnameContext.state, prefix);
2607
2608 return true;
2609 }
2610 }
2611 throw ImmediateServFailException("Could not determine whether or not there was a CNAME or DNAME in cache for '" + qname.toLogString() + "'");
2612 }
2613
2614 namespace
2615 {
2616 struct CacheEntry
2617 {
2618 vector<DNSRecord> records;
2619 vector<shared_ptr<const RRSIGRecordContent>> signatures;
2620 uint32_t signaturesTTL{std::numeric_limits<uint32_t>::max()};
2621 };
2622 struct CacheKey
2623 {
2624 DNSName name;
2625 QType type;
2626 DNSResourceRecord::Place place;
2627 bool operator<(const CacheKey& rhs) const
2628 {
2629 return std::tie(type, place, name) < std::tie(rhs.type, rhs.place, rhs.name);
2630 }
2631 };
2632 using tcache_t = map<CacheKey, CacheEntry>;
2633 }
2634
2635 static void reapRecordsFromNegCacheEntryForValidation(tcache_t& tcache, const vector<DNSRecord>& records)
2636 {
2637 for (const auto& rec : records) {
2638 if (rec.d_type == QType::RRSIG) {
2639 auto rrsig = getRR<RRSIGRecordContent>(rec);
2640 if (rrsig) {
2641 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signatures.push_back(rrsig);
2642 }
2643 }
2644 else {
2645 tcache[{rec.d_name, rec.d_type, rec.d_place}].records.push_back(rec);
2646 }
2647 }
2648 }
2649
2650 static bool negativeCacheEntryHasSOA(const NegCache::NegCacheEntry& ne)
2651 {
2652 return !ne.authoritySOA.records.empty();
2653 }
2654
2655 static void reapRecordsForValidation(std::map<QType, CacheEntry>& entries, const vector<DNSRecord>& records)
2656 {
2657 for (const auto& rec : records) {
2658 entries[rec.d_type].records.push_back(rec);
2659 }
2660 }
2661
2662 static void reapSignaturesForValidation(std::map<QType, CacheEntry>& entries, const vector<std::shared_ptr<const RRSIGRecordContent>>& signatures)
2663 {
2664 for (const auto& sig : signatures) {
2665 entries[sig->d_type].signatures.push_back(sig);
2666 }
2667 }
2668
2669 /*!
2670 * Convenience function to push the records from records into ret with a new TTL
2671 *
2672 * \param records DNSRecords that need to go into ret
2673 * \param ttl The new TTL for these records
2674 * \param ret The vector of DNSRecords that should contain the records with the modified TTL
2675 */
2676 static void addTTLModifiedRecords(vector<DNSRecord>& records, const uint32_t ttl, vector<DNSRecord>& ret)
2677 {
2678 for (auto& rec : records) {
2679 rec.d_ttl = ttl;
2680 ret.push_back(std::move(rec));
2681 }
2682 }
2683
2684 void SyncRes::computeNegCacheValidationStatus(const NegCache::NegCacheEntry& ne, const DNSName& qname, const QType qtype, const int res, vState& state, unsigned int depth, const string& prefix)
2685 {
2686 tcache_t tcache;
2687 reapRecordsFromNegCacheEntryForValidation(tcache, ne.authoritySOA.records);
2688 reapRecordsFromNegCacheEntryForValidation(tcache, ne.authoritySOA.signatures);
2689 reapRecordsFromNegCacheEntryForValidation(tcache, ne.DNSSECRecords.records);
2690 reapRecordsFromNegCacheEntryForValidation(tcache, ne.DNSSECRecords.signatures);
2691
2692 for (const auto& entry : tcache) {
2693 // this happens when we did store signatures, but passed on the records themselves
2694 if (entry.second.records.empty()) {
2695 continue;
2696 }
2697
2698 const DNSName& owner = entry.first.name;
2699
2700 vState recordState = getValidationStatus(owner, !entry.second.signatures.empty(), qtype == QType::DS, depth, prefix);
2701 if (state == vState::Indeterminate) {
2702 state = recordState;
2703 }
2704
2705 if (recordState == vState::Secure) {
2706 recordState = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, owner, QType(entry.first.type), entry.second.records, entry.second.signatures);
2707 }
2708
2709 if (recordState != vState::Indeterminate && recordState != state) {
2710 updateValidationState(qname, state, recordState, prefix);
2711 if (state != vState::Secure) {
2712 break;
2713 }
2714 }
2715 }
2716
2717 if (state == vState::Secure) {
2718 vState neValidationState = ne.d_validationState;
2719 dState expectedState = res == RCode::NXDomain ? dState::NXDOMAIN : dState::NXQTYPE;
2720 dState denialState = getDenialValidationState(ne, expectedState, false, prefix);
2721 updateDenialValidationState(qname, neValidationState, ne.d_name, state, denialState, expectedState, qtype == QType::DS, depth, prefix);
2722 }
2723 if (state != vState::Indeterminate) {
2724 /* validation succeeded, let's update the cache entry so we don't have to validate again */
2725 boost::optional<time_t> capTTD = boost::none;
2726 if (vStateIsBogus(state)) {
2727 capTTD = d_now.tv_sec + s_maxbogusttl;
2728 }
2729 g_negCache->updateValidationStatus(ne.d_name, ne.d_qtype, state, capTTD);
2730 }
2731 }
2732
2733 bool SyncRes::doCacheCheck(const DNSName& qname, const DNSName& authname, bool wasForwardedOrAuthZone, bool wasAuthZone, bool wasForwardRecurse, QType qtype, vector<DNSRecord>& ret, unsigned int depth, const string& prefix, int& res, Context& context)
2734 {
2735 bool giveNegative = false;
2736
2737 // sqname and sqtype are used contain 'higher' names if we have them (e.g. powerdns.com|SOA when we find a negative entry for doesnotexist.powerdns.com|A)
2738 DNSName sqname(qname);
2739 QType sqt(qtype);
2740 uint32_t sttl = 0;
2741 // cout<<"Lookup for '"<<qname<<"|"<<qtype.toString()<<"' -> "<<getLastLabel(qname)<<endl;
2742 vState cachedState;
2743 NegCache::NegCacheEntry ne;
2744
2745 if (s_rootNXTrust && g_negCache->getRootNXTrust(qname, d_now, ne, d_serveStale, d_refresh) && ne.d_auth.isRoot() && !(wasForwardedOrAuthZone && !authname.isRoot())) { // when forwarding, the root may only neg-cache if it was forwarded to.
2746 sttl = ne.d_ttd - d_now.tv_sec;
2747 LOG(prefix << qname << ": Entire name '" << qname << "', is negatively cached via '" << ne.d_auth << "' & '" << ne.d_name << "' for another " << sttl << " seconds" << endl);
2748 res = RCode::NXDomain;
2749 giveNegative = true;
2750 cachedState = ne.d_validationState;
2751 if (s_addExtendedResolutionDNSErrors) {
2752 context.extendedError = EDNSExtendedError{0, "Result synthesized by root-nx-trust"};
2753 }
2754 }
2755 else if (g_negCache->get(qname, qtype, d_now, ne, false, d_serveStale, d_refresh)) {
2756 /* If we are looking for a DS, discard NXD if auth == qname
2757 and ask for a specific denial instead */
2758 if (qtype != QType::DS || ne.d_qtype.getCode() || ne.d_auth != qname || g_negCache->get(qname, qtype, d_now, ne, true, d_serveStale, d_refresh)) {
2759 /* Careful! If the client is asking for a DS that does not exist, we need to provide the SOA along with the NSEC(3) proof
2760 and we might not have it if we picked up the proof from a delegation, in which case we need to keep on to do the actual DS
2761 query. */
2762 if (qtype == QType::DS && ne.d_qtype.getCode() && !d_externalDSQuery.empty() && qname == d_externalDSQuery && !negativeCacheEntryHasSOA(ne)) {
2763 giveNegative = false;
2764 }
2765 else {
2766 res = RCode::NXDomain;
2767 sttl = ne.d_ttd - d_now.tv_sec;
2768 giveNegative = true;
2769 cachedState = ne.d_validationState;
2770 if (ne.d_qtype.getCode()) {
2771 LOG(prefix << qname << "|" << qtype << ": Is negatively cached via '" << ne.d_auth << "' for another " << sttl << " seconds" << endl);
2772 res = RCode::NoError;
2773 if (s_addExtendedResolutionDNSErrors) {
2774 context.extendedError = EDNSExtendedError{0, "Result from negative cache"};
2775 }
2776 }
2777 else {
2778 LOG(prefix << qname << ": Entire name '" << qname << "' is negatively cached via '" << ne.d_auth << "' for another " << sttl << " seconds" << endl);
2779 if (s_addExtendedResolutionDNSErrors) {
2780 context.extendedError = EDNSExtendedError{0, "Result from negative cache for entire name"};
2781 }
2782 }
2783 }
2784 }
2785 }
2786 else if (s_hardenNXD != HardenNXD::No && !qname.isRoot() && !wasForwardedOrAuthZone) {
2787 auto labels = qname.getRawLabels();
2788 DNSName negCacheName(g_rootdnsname);
2789 negCacheName.prependRawLabel(labels.back());
2790 labels.pop_back();
2791 while (!labels.empty()) {
2792 if (g_negCache->get(negCacheName, QType::ENT, d_now, ne, true, d_serveStale, d_refresh)) {
2793 if (ne.d_validationState == vState::Indeterminate && validationEnabled()) {
2794 // LOG(prefix << negCacheName << " negatively cached and vState::Indeterminate, trying to validate NXDOMAIN" << endl);
2795 // ...
2796 // And get the updated ne struct
2797 // t_sstorage.negcache.get(negCacheName, QType(0), d_now, ne, true);
2798 }
2799 if ((s_hardenNXD == HardenNXD::Yes && !vStateIsBogus(ne.d_validationState)) || ne.d_validationState == vState::Secure) {
2800 res = RCode::NXDomain;
2801 sttl = ne.d_ttd - d_now.tv_sec;
2802 giveNegative = true;
2803 cachedState = ne.d_validationState;
2804 LOG(prefix << qname << ": Name '" << negCacheName << "' and below, is negatively cached via '" << ne.d_auth << "' for another " << sttl << " seconds" << endl);
2805 if (s_addExtendedResolutionDNSErrors) {
2806 context.extendedError = EDNSExtendedError{0, "Result synthesized by nothing-below-nxdomain (RFC8020)"};
2807 }
2808 break;
2809 }
2810 }
2811 negCacheName.prependRawLabel(labels.back());
2812 labels.pop_back();
2813 }
2814 }
2815
2816 if (giveNegative) {
2817
2818 context.state = cachedState;
2819
2820 if (!wasAuthZone && shouldValidate() && context.state == vState::Indeterminate) {
2821 LOG(prefix << qname << ": Got vState::Indeterminate state for records retrieved from the negative cache, validating.." << endl);
2822 computeNegCacheValidationStatus(ne, qname, qtype, res, context.state, depth, prefix);
2823
2824 if (context.state != cachedState && vStateIsBogus(context.state)) {
2825 sttl = std::min(sttl, s_maxbogusttl);
2826 }
2827 }
2828
2829 // Transplant SOA to the returned packet
2830 addTTLModifiedRecords(ne.authoritySOA.records, sttl, ret);
2831 if (d_doDNSSEC) {
2832 addTTLModifiedRecords(ne.authoritySOA.signatures, sttl, ret);
2833 addTTLModifiedRecords(ne.DNSSECRecords.records, sttl, ret);
2834 addTTLModifiedRecords(ne.DNSSECRecords.signatures, sttl, ret);
2835 }
2836
2837 LOG(prefix << qname << ": Updating validation state with negative cache content for " << qname << " to " << context.state << endl);
2838 return true;
2839 }
2840
2841 vector<DNSRecord> cset;
2842 bool found = false, expired = false;
2843 vector<std::shared_ptr<const RRSIGRecordContent>> signatures;
2844 vector<std::shared_ptr<DNSRecord>> authorityRecs;
2845 uint32_t ttl = 0;
2846 uint32_t capTTL = std::numeric_limits<uint32_t>::max();
2847 bool wasCachedAuth;
2848 MemRecursorCache::Flags flags = MemRecursorCache::None;
2849 if (!wasForwardRecurse && d_requireAuthData) {
2850 flags |= MemRecursorCache::RequireAuth;
2851 }
2852 if (d_serveStale) {
2853 flags |= MemRecursorCache::ServeStale;
2854 }
2855 if (d_refresh) {
2856 flags |= MemRecursorCache::Refresh;
2857 }
2858 if (g_recCache->get(d_now.tv_sec, sqname, sqt, flags, &cset, d_cacheRemote, d_routingTag, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &cachedState, &wasCachedAuth, nullptr, &d_fromAuthIP) > 0) {
2859
2860 LOG(prefix << sqname << ": Found cache hit for " << sqt.toString() << ": ");
2861
2862 if (!wasAuthZone && shouldValidate() && (wasCachedAuth || wasForwardRecurse) && cachedState == vState::Indeterminate && d_requireAuthData) {
2863
2864 /* This means we couldn't figure out the state when this entry was cached */
2865 vState recordState = getValidationStatus(qname, !signatures.empty(), qtype == QType::DS, depth, prefix);
2866
2867 if (recordState == vState::Secure) {
2868 LOG(prefix << sqname << ": Got vState::Indeterminate state from the cache, validating.." << endl);
2869 if (sqt == QType::DNSKEY && sqname == getSigner(signatures)) {
2870 cachedState = validateDNSKeys(sqname, cset, signatures, depth, prefix);
2871 }
2872 else {
2873 if (sqt == QType::ANY) {
2874 std::map<QType, CacheEntry> types;
2875 reapRecordsForValidation(types, cset);
2876 reapSignaturesForValidation(types, signatures);
2877
2878 for (const auto& type : types) {
2879 vState cachedRecordState;
2880 if (type.first == QType::DNSKEY && sqname == getSigner(type.second.signatures)) {
2881 cachedRecordState = validateDNSKeys(sqname, type.second.records, type.second.signatures, depth, prefix);
2882 }
2883 else {
2884 cachedRecordState = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, sqname, type.first, type.second.records, type.second.signatures);
2885 }
2886 updateDNSSECValidationState(cachedState, cachedRecordState);
2887 }
2888 }
2889 else {
2890 cachedState = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, sqname, sqt, cset, signatures);
2891 }
2892 }
2893 }
2894 else {
2895 cachedState = recordState;
2896 }
2897
2898 if (cachedState != vState::Indeterminate) {
2899 LOG(prefix << qname << ": Got vState::Indeterminate state from the cache, validation result is " << cachedState << endl);
2900 if (vStateIsBogus(cachedState)) {
2901 capTTL = s_maxbogusttl;
2902 }
2903 if (sqt != QType::ANY && sqt != QType::ADDR) {
2904 updateValidationStatusInCache(sqname, sqt, wasCachedAuth, cachedState);
2905 }
2906 }
2907 }
2908
2909 for (auto j = cset.cbegin(); j != cset.cend(); ++j) {
2910
2911 LOG(j->getContent()->getZoneRepresentation());
2912
2913 if (j->d_class != QClass::IN) {
2914 continue;
2915 }
2916
2917 if (j->d_ttl > (unsigned int)d_now.tv_sec) {
2918 DNSRecord dr = *j;
2919 dr.d_ttl -= d_now.tv_sec;
2920 dr.d_ttl = std::min(dr.d_ttl, capTTL);
2921 ttl = dr.d_ttl;
2922 ret.push_back(dr);
2923 LOG("[ttl=" << dr.d_ttl << "] ");
2924 found = true;
2925 }
2926 else {
2927 LOG("[expired] ");
2928 expired = true;
2929 }
2930 }
2931
2932 ret.reserve(ret.size() + signatures.size() + authorityRecs.size());
2933
2934 for (const auto& signature : signatures) {
2935 DNSRecord dr;
2936 dr.d_type = QType::RRSIG;
2937 dr.d_name = sqname;
2938 dr.d_ttl = ttl;
2939 dr.setContent(signature);
2940 dr.d_place = DNSResourceRecord::ANSWER;
2941 dr.d_class = QClass::IN;
2942 ret.push_back(dr);
2943 }
2944
2945 for (const auto& rec : authorityRecs) {
2946 DNSRecord dr(*rec);
2947 dr.d_ttl = ttl;
2948 ret.push_back(dr);
2949 }
2950
2951 LOG(endl);
2952 if (found && !expired) {
2953 if (!giveNegative)
2954 res = 0;
2955 LOG(prefix << qname << ": Updating validation state with cache content for " << qname << " to " << cachedState << endl);
2956 context.state = cachedState;
2957 return true;
2958 }
2959 else
2960 LOG(prefix << qname << ": Cache had only stale entries" << endl);
2961 }
2962
2963 /* let's check if we have a NSEC covering that record */
2964 if (g_aggressiveNSECCache && !wasForwardedOrAuthZone) {
2965 if (g_aggressiveNSECCache->getDenial(d_now.tv_sec, qname, qtype, ret, res, d_cacheRemote, d_routingTag, d_doDNSSEC, LogObject(prefix))) {
2966 context.state = vState::Secure;
2967 if (s_addExtendedResolutionDNSErrors) {
2968 context.extendedError = EDNSExtendedError{0, "Result synthesized from aggressive NSEC cache (RFC8198)"};
2969 }
2970 return true;
2971 }
2972 }
2973
2974 return false;
2975 }
2976
2977 bool SyncRes::moreSpecificThan(const DNSName& a, const DNSName& b) const
2978 {
2979 return (a.isPartOf(b) && a.countLabels() > b.countLabels());
2980 }
2981
2982 struct speedOrder
2983 {
2984 bool operator()(const std::pair<DNSName, float>& a, const std::pair<DNSName, float>& b) const
2985 {
2986 return a.second < b.second;
2987 }
2988 };
2989
2990 std::vector<std::pair<DNSName, float>> SyncRes::shuffleInSpeedOrder(const DNSName& qname, NsSet& tnameservers, const string& prefix)
2991 {
2992 std::vector<std::pair<DNSName, float>> rnameservers;
2993 rnameservers.reserve(tnameservers.size());
2994 for (const auto& tns : tnameservers) {
2995 float speed = s_nsSpeeds.lock()->fastest(tns.first, d_now);
2996 rnameservers.emplace_back(tns.first, speed);
2997 if (tns.first.empty()) // this was an authoritative OOB zone, don't pollute the nsSpeeds with that
2998 return rnameservers;
2999 }
3000
3001 shuffle(rnameservers.begin(), rnameservers.end(), pdns::dns_random_engine());
3002 speedOrder so;
3003 stable_sort(rnameservers.begin(), rnameservers.end(), so);
3004
3005 if (doLog()) {
3006 LOG(prefix << qname << ": Nameservers: ");
3007 for (auto i = rnameservers.begin(); i != rnameservers.end(); ++i) {
3008 if (i != rnameservers.begin()) {
3009 LOG(", ");
3010 if (!((i - rnameservers.begin()) % 3)) {
3011 LOG(endl
3012 << prefix << " ");
3013 }
3014 }
3015 LOG(i->first.toLogString() << "(" << fmtfloat(i->second / 1000.0) << "ms)");
3016 }
3017 LOG(endl);
3018 }
3019 return rnameservers;
3020 }
3021
3022 vector<ComboAddress> SyncRes::shuffleForwardSpeed(const DNSName& qname, const vector<ComboAddress>& rnameservers, const string& prefix, const bool wasRd)
3023 {
3024 vector<ComboAddress> nameservers = rnameservers;
3025 map<ComboAddress, float> speeds;
3026
3027 for (const auto& val : nameservers) {
3028 DNSName nsName = DNSName(val.toStringWithPort());
3029 float speed = s_nsSpeeds.lock()->fastest(nsName, d_now);
3030 speeds[val] = speed;
3031 }
3032 shuffle(nameservers.begin(), nameservers.end(), pdns::dns_random_engine());
3033 speedOrderCA so(speeds);
3034 stable_sort(nameservers.begin(), nameservers.end(), so);
3035
3036 if (doLog()) {
3037 LOG(prefix << qname << ": Nameservers: ");
3038 for (vector<ComboAddress>::const_iterator i = nameservers.cbegin(); i != nameservers.cend(); ++i) {
3039 if (i != nameservers.cbegin()) {
3040 LOG(", ");
3041 if (!((i - nameservers.cbegin()) % 3)) {
3042 LOG(endl
3043 << prefix << " ");
3044 }
3045 }
3046 LOG((wasRd ? string("+") : string("-")) << i->toStringWithPort() << "(" << fmtfloat(speeds[*i] / 1000.0) << "ms)");
3047 }
3048 LOG(endl);
3049 }
3050 return nameservers;
3051 }
3052
3053 static uint32_t getRRSIGTTL(const time_t now, const std::shared_ptr<const RRSIGRecordContent>& rrsig)
3054 {
3055 uint32_t res = 0;
3056 if (now < rrsig->d_sigexpire) {
3057 res = static_cast<uint32_t>(rrsig->d_sigexpire) - now;
3058 }
3059 return res;
3060 }
3061
3062 static const set<QType> nsecTypes = {QType::NSEC, QType::NSEC3};
3063
3064 /* Fills the authoritySOA and DNSSECRecords fields from ne with those found in the records
3065 *
3066 * \param records The records to parse for the authority SOA and NSEC(3) records
3067 * \param ne The NegCacheEntry to be filled out (will not be cleared, only appended to
3068 */
3069 static void harvestNXRecords(const vector<DNSRecord>& records, NegCache::NegCacheEntry& ne, const time_t now, uint32_t* lowestTTL)
3070 {
3071 for (const auto& rec : records) {
3072 if (rec.d_place != DNSResourceRecord::AUTHORITY) {
3073 // RFC 4035 section 3.1.3. indicates that NSEC records MUST be placed in
3074 // the AUTHORITY section. Section 3.1.1 indicates that that RRSIGs for
3075 // records MUST be in the same section as the records they cover.
3076 // Hence, we ignore all records outside of the AUTHORITY section.
3077 continue;
3078 }
3079
3080 if (rec.d_type == QType::RRSIG) {
3081 auto rrsig = getRR<RRSIGRecordContent>(rec);
3082 if (rrsig) {
3083 if (rrsig->d_type == QType::SOA) {
3084 ne.authoritySOA.signatures.push_back(rec);
3085 if (lowestTTL && isRRSIGNotExpired(now, *rrsig)) {
3086 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3087 *lowestTTL = min(*lowestTTL, getRRSIGTTL(now, rrsig));
3088 }
3089 }
3090 if (nsecTypes.count(rrsig->d_type)) {
3091 ne.DNSSECRecords.signatures.push_back(rec);
3092 if (lowestTTL && isRRSIGNotExpired(now, *rrsig)) {
3093 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3094 *lowestTTL = min(*lowestTTL, getRRSIGTTL(now, rrsig));
3095 }
3096 }
3097 }
3098 continue;
3099 }
3100 if (rec.d_type == QType::SOA) {
3101 ne.authoritySOA.records.push_back(rec);
3102 if (lowestTTL) {
3103 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3104 }
3105 continue;
3106 }
3107 if (nsecTypes.count(rec.d_type)) {
3108 ne.DNSSECRecords.records.push_back(rec);
3109 if (lowestTTL) {
3110 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3111 }
3112 continue;
3113 }
3114 }
3115 }
3116
3117 static cspmap_t harvestCSPFromNE(const NegCache::NegCacheEntry& ne)
3118 {
3119 cspmap_t cspmap;
3120 for (const auto& rec : ne.DNSSECRecords.signatures) {
3121 if (rec.d_type == QType::RRSIG) {
3122 auto rrc = getRR<RRSIGRecordContent>(rec);
3123 if (rrc) {
3124 cspmap[{rec.d_name, rrc->d_type}].signatures.push_back(rrc);
3125 }
3126 }
3127 }
3128 for (const auto& rec : ne.DNSSECRecords.records) {
3129 cspmap[{rec.d_name, rec.d_type}].records.insert(rec.getContent());
3130 }
3131 return cspmap;
3132 }
3133
3134 // TODO remove after processRecords is fixed!
3135 // Adds the RRSIG for the SOA and the NSEC(3) + RRSIGs to ret
3136 static void addNXNSECS(vector<DNSRecord>& ret, const vector<DNSRecord>& records)
3137 {
3138 NegCache::NegCacheEntry ne;
3139 harvestNXRecords(records, ne, 0, nullptr);
3140 ret.insert(ret.end(), ne.authoritySOA.signatures.begin(), ne.authoritySOA.signatures.end());
3141 ret.insert(ret.end(), ne.DNSSECRecords.records.begin(), ne.DNSSECRecords.records.end());
3142 ret.insert(ret.end(), ne.DNSSECRecords.signatures.begin(), ne.DNSSECRecords.signatures.end());
3143 }
3144
3145 static bool rpzHitShouldReplaceContent(const DNSName& qname, const QType qtype, const std::vector<DNSRecord>& records)
3146 {
3147 if (qtype == QType::CNAME) {
3148 return true;
3149 }
3150
3151 for (const auto& record : records) {
3152 if (record.d_type == QType::CNAME) {
3153 if (auto content = getRR<CNAMERecordContent>(record)) {
3154 if (qname == content->getTarget()) {
3155 /* we have a CNAME whose target matches the entry we are about to
3156 generate, so it will complete the current records, not replace
3157 them
3158 */
3159 return false;
3160 }
3161 }
3162 }
3163 }
3164
3165 return true;
3166 }
3167
3168 static void removeConflictingRecord(std::vector<DNSRecord>& records, const DNSName& name, const QType dtype)
3169 {
3170 for (auto it = records.begin(); it != records.end();) {
3171 bool remove = false;
3172
3173 if (it->d_class == QClass::IN && (it->d_type == QType::CNAME || dtype == QType::CNAME || it->d_type == dtype) && it->d_name == name) {
3174 remove = true;
3175 }
3176 else if (it->d_class == QClass::IN && it->d_type == QType::RRSIG && it->d_name == name) {
3177 if (auto rrc = getRR<RRSIGRecordContent>(*it)) {
3178 if (rrc->d_type == QType::CNAME || rrc->d_type == dtype) {
3179 /* also remove any RRSIG that could conflict */
3180 remove = true;
3181 }
3182 }
3183 }
3184
3185 if (remove) {
3186 it = records.erase(it);
3187 }
3188 else {
3189 ++it;
3190 }
3191 }
3192 }
3193
3194 void SyncRes::handlePolicyHit(const std::string& prefix, const DNSName& qname, const QType qtype, std::vector<DNSRecord>& ret, bool& done, int& rcode, unsigned int depth)
3195 {
3196 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
3197 /* reset to no match */
3198 d_appliedPolicy = DNSFilterEngine::Policy();
3199 return;
3200 }
3201
3202 /* don't account truncate actions for TCP queries, since they are not applied */
3203 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::Truncate || !d_queryReceivedOverTCP) {
3204 ++t_Counters.at(rec::PolicyHistogram::policy).at(d_appliedPolicy.d_kind);
3205 ++t_Counters.at(rec::PolicyNameHits::policyName).counts[d_appliedPolicy.getName()];
3206 }
3207
3208 if (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) {
3209 LOG(prefix << qname << "|" << qtype << ':' << d_appliedPolicy.getLogString() << endl);
3210 }
3211
3212 switch (d_appliedPolicy.d_kind) {
3213
3214 case DNSFilterEngine::PolicyKind::NoAction:
3215 return;
3216
3217 case DNSFilterEngine::PolicyKind::Drop:
3218 ++t_Counters.at(rec::Counter::policyDrops);
3219 throw ImmediateQueryDropException();
3220
3221 case DNSFilterEngine::PolicyKind::NXDOMAIN:
3222 ret.clear();
3223 rcode = RCode::NXDomain;
3224 done = true;
3225 return;
3226
3227 case DNSFilterEngine::PolicyKind::NODATA:
3228 ret.clear();
3229 rcode = RCode::NoError;
3230 done = true;
3231 return;
3232
3233 case DNSFilterEngine::PolicyKind::Truncate:
3234 if (!d_queryReceivedOverTCP) {
3235 ret.clear();
3236 rcode = RCode::NoError;
3237 throw SendTruncatedAnswerException();
3238 }
3239 return;
3240
3241 case DNSFilterEngine::PolicyKind::Custom: {
3242 if (rpzHitShouldReplaceContent(qname, qtype, ret)) {
3243 ret.clear();
3244 }
3245
3246 rcode = RCode::NoError;
3247 done = true;
3248 auto spoofed = d_appliedPolicy.getCustomRecords(qname, qtype.getCode());
3249 for (auto& dr : spoofed) {
3250 removeConflictingRecord(ret, dr.d_name, dr.d_type);
3251 }
3252
3253 for (auto& dr : spoofed) {
3254 ret.push_back(dr);
3255
3256 if (dr.d_name == qname && dr.d_type == QType::CNAME && qtype != QType::CNAME) {
3257 if (auto content = getRR<CNAMERecordContent>(dr)) {
3258 vState newTargetState = vState::Indeterminate;
3259 handleNewTarget(prefix, qname, content->getTarget(), qtype.getCode(), ret, rcode, depth, {}, newTargetState);
3260 }
3261 }
3262 }
3263 }
3264 }
3265 }
3266
3267 bool SyncRes::nameserversBlockedByRPZ(const DNSFilterEngine& dfe, const NsSet& nameservers)
3268 {
3269 /* we skip RPZ processing if:
3270 - it was disabled (d_wantsRPZ is false) ;
3271 - we already got a RPZ hit (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) since
3272 the only way we can get back here is that it was a 'pass-thru' (NoAction) meaning that we should not
3273 process any further RPZ rules. Except that we need to process rules of higher priority..
3274 */
3275 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
3276 for (auto const& ns : nameservers) {
3277 bool match = dfe.getProcessingPolicy(ns.first, d_discardedPolicies, d_appliedPolicy);
3278 if (match) {
3279 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
3280 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
3281 LOG(", however nameserver " << ns.first << " was blocked by RPZ policy '" << d_appliedPolicy.getName() << "'" << endl);
3282 return true;
3283 }
3284 }
3285
3286 // Traverse all IP addresses for this NS to see if they have an RPN NSIP policy
3287 for (auto const& address : ns.second.first) {
3288 match = dfe.getProcessingPolicy(address, d_discardedPolicies, d_appliedPolicy);
3289 if (match) {
3290 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
3291 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
3292 LOG(", however nameserver " << ns.first << " IP address " << address.toString() << " was blocked by RPZ policy '" << d_appliedPolicy.getName() << "'" << endl);
3293 return true;
3294 }
3295 }
3296 }
3297 }
3298 }
3299 return false;
3300 }
3301
3302 bool SyncRes::nameserverIPBlockedByRPZ(const DNSFilterEngine& dfe, const ComboAddress& remoteIP)
3303 {
3304 /* we skip RPZ processing if:
3305 - it was disabled (d_wantsRPZ is false) ;
3306 - we already got a RPZ hit (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) since
3307 the only way we can get back here is that it was a 'pass-thru' (NoAction) meaning that we should not
3308 process any further RPZ rules. Except that we need to process rules of higher priority..
3309 */
3310 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
3311 bool match = dfe.getProcessingPolicy(remoteIP, d_discardedPolicies, d_appliedPolicy);
3312 if (match) {
3313 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
3314 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) {
3315 LOG(" (blocked by RPZ policy '" + d_appliedPolicy.getName() + "')");
3316 return true;
3317 }
3318 }
3319 }
3320 return false;
3321 }
3322
3323 vector<ComboAddress> SyncRes::retrieveAddressesForNS(const std::string& prefix, const DNSName& qname, std::vector<std::pair<DNSName, float>>::const_iterator& tns, const unsigned int depth, set<GetBestNSAnswer>& beenthere, const vector<std::pair<DNSName, float>>& rnameservers, NsSet& nameservers, bool& sendRDQuery, bool& pierceDontQuery, bool& flawedNSSet, bool cacheOnly, unsigned int& nretrieveAddressesForNS)
3324 {
3325 vector<ComboAddress> result;
3326
3327 size_t nonresolvingfails = 0;
3328 if (!tns->first.empty()) {
3329 if (s_nonresolvingnsmaxfails > 0) {
3330 nonresolvingfails = s_nonresolving.lock()->value(tns->first);
3331 if (nonresolvingfails >= s_nonresolvingnsmaxfails) {
3332 LOG(prefix << qname << ": NS " << tns->first << " in non-resolving map, skipping" << endl);
3333 return result;
3334 }
3335 }
3336
3337 LOG(prefix << qname << ": Trying to resolve NS '" << tns->first << "' (" << 1 + tns - rnameservers.begin() << "/" << (unsigned int)rnameservers.size() << ")" << endl);
3338 const unsigned int oldOutQueries = d_outqueries;
3339 try {
3340 result = getAddrs(tns->first, depth, prefix, beenthere, cacheOnly, nretrieveAddressesForNS);
3341 }
3342 // Other exceptions should likely not throttle...
3343 catch (const ImmediateServFailException& ex) {
3344 if (s_nonresolvingnsmaxfails > 0 && d_outqueries > oldOutQueries) {
3345 auto dontThrottleNames = g_dontThrottleNames.getLocal();
3346 if (!dontThrottleNames->check(tns->first)) {
3347 s_nonresolving.lock()->incr(tns->first, d_now);
3348 }
3349 }
3350 throw ex;
3351 }
3352 if (s_nonresolvingnsmaxfails > 0 && d_outqueries > oldOutQueries) {
3353 if (result.empty()) {
3354 auto dontThrottleNames = g_dontThrottleNames.getLocal();
3355 if (!dontThrottleNames->check(tns->first)) {
3356 s_nonresolving.lock()->incr(tns->first, d_now);
3357 }
3358 }
3359 else if (nonresolvingfails > 0) {
3360 // Succeeding resolve, clear memory of recent failures
3361 s_nonresolving.lock()->clear(tns->first);
3362 }
3363 }
3364 pierceDontQuery = false;
3365 }
3366 else {
3367 LOG(prefix << qname << ": Domain has hardcoded nameserver");
3368
3369 if (nameservers[tns->first].first.size() > 1) {
3370 LOG("s");
3371 }
3372 LOG(endl);
3373
3374 sendRDQuery = nameservers[tns->first].second;
3375 result = shuffleForwardSpeed(qname, nameservers[tns->first].first, prefix, sendRDQuery);
3376 pierceDontQuery = true;
3377 }
3378 return result;
3379 }
3380
3381 void SyncRes::checkMaxQperQ(const DNSName& qname) const
3382 {
3383 if (d_outqueries + d_throttledqueries > s_maxqperq) {
3384 throw ImmediateServFailException("more than " + std::to_string(s_maxqperq) + " (max-qperq) queries sent or throttled while resolving " + qname.toLogString());
3385 }
3386 }
3387
3388 bool SyncRes::throttledOrBlocked(const std::string& prefix, const ComboAddress& remoteIP, const DNSName& qname, const QType qtype, bool pierceDontQuery)
3389 {
3390 if (isThrottled(d_now.tv_sec, remoteIP)) {
3391 LOG(prefix << qname << ": Server throttled " << endl);
3392 t_Counters.at(rec::Counter::throttledqueries)++;
3393 d_throttledqueries++;
3394 return true;
3395 }
3396 else if (isThrottled(d_now.tv_sec, remoteIP, qname, qtype)) {
3397 LOG(prefix << qname << ": Query throttled " << remoteIP.toString() << ", " << qname << "; " << qtype << endl);
3398 t_Counters.at(rec::Counter::throttledqueries)++;
3399 d_throttledqueries++;
3400 return true;
3401 }
3402 else if (!pierceDontQuery && s_dontQuery && s_dontQuery->match(&remoteIP)) {
3403 // We could have retrieved an NS from the cache in a forwarding domain
3404 // Even in the case of !pierceDontQuery we still want to allow that NS
3405 DNSName forwardCandidate(qname);
3406 auto it = getBestAuthZone(&forwardCandidate);
3407 if (it == t_sstorage.domainmap->end()) {
3408 LOG(prefix << qname << ": Not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
3409 t_Counters.at(rec::Counter::dontqueries)++;
3410 return true;
3411 }
3412 else {
3413 // The name (from the cache) is forwarded, but is it forwarded to an IP in known forwarders?
3414 const auto& ips = it->second.d_servers;
3415 if (std::find(ips.cbegin(), ips.cend(), remoteIP) == ips.cend()) {
3416 LOG(prefix << qname << ": Not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
3417 t_Counters.at(rec::Counter::dontqueries)++;
3418 return true;
3419 }
3420 else {
3421 LOG(prefix << qname << ": Sending query to " << remoteIP.toString() << ", blocked by 'dont-query' but a forwarding/auth case" << endl);
3422 }
3423 }
3424 }
3425 return false;
3426 }
3427
3428 bool SyncRes::validationEnabled() const
3429 {
3430 return g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate;
3431 }
3432
3433 uint32_t SyncRes::computeLowestTTD(const std::vector<DNSRecord>& records, const std::vector<std::shared_ptr<const RRSIGRecordContent>>& signatures, uint32_t signaturesTTL, const std::vector<std::shared_ptr<DNSRecord>>& authorityRecs) const
3434 {
3435 uint32_t lowestTTD = std::numeric_limits<uint32_t>::max();
3436 for (const auto& record : records) {
3437 lowestTTD = min(lowestTTD, record.d_ttl);
3438 }
3439
3440 /* even if it was not requested for that request (Process, and neither AD nor DO set),
3441 it might be requested at a later time so we need to be careful with the TTL. */
3442 if (validationEnabled() && !signatures.empty()) {
3443 /* if we are validating, we don't want to cache records after their signatures expire. */
3444 /* records TTL are now TTD, let's add 'now' to the signatures lowest TTL */
3445 lowestTTD = min(lowestTTD, static_cast<uint32_t>(signaturesTTL + d_now.tv_sec));
3446
3447 for (const auto& sig : signatures) {
3448 if (isRRSIGNotExpired(d_now.tv_sec, *sig)) {
3449 // we don't decrement d_sigexpire by 'now' because we actually want a TTD, not a TTL */
3450 lowestTTD = min(lowestTTD, static_cast<uint32_t>(sig->d_sigexpire));
3451 }
3452 }
3453 }
3454
3455 for (const auto& entry : authorityRecs) {
3456 /* be careful, this is still a TTL here */
3457 lowestTTD = min(lowestTTD, static_cast<uint32_t>(entry->d_ttl + d_now.tv_sec));
3458
3459 if (entry->d_type == QType::RRSIG && validationEnabled()) {
3460 auto rrsig = getRR<RRSIGRecordContent>(*entry);
3461 if (rrsig) {
3462 if (isRRSIGNotExpired(d_now.tv_sec, *rrsig)) {
3463 // we don't decrement d_sigexpire by 'now' because we actually want a TTD, not a TTL */
3464 lowestTTD = min(lowestTTD, static_cast<uint32_t>(rrsig->d_sigexpire));
3465 }
3466 }
3467 }
3468 }
3469
3470 return lowestTTD;
3471 }
3472
3473 void SyncRes::updateValidationState(const DNSName& qname, vState& state, const vState stateUpdate, const string& prefix)
3474 {
3475 LOG(prefix << qname << ": Validation state was " << state << ", state update is " << stateUpdate);
3476 updateDNSSECValidationState(state, stateUpdate);
3477 LOG(", validation state is now " << state << endl);
3478 }
3479
3480 vState SyncRes::getTA(const DNSName& zone, dsmap_t& ds, const string& prefix)
3481 {
3482 auto luaLocal = g_luaconfs.getLocal();
3483
3484 if (luaLocal->dsAnchors.empty()) {
3485 LOG(prefix << zone << ": No trust anchors configured, everything is Insecure" << endl);
3486 /* We have no TA, everything is insecure */
3487 return vState::Insecure;
3488 }
3489
3490 std::string reason;
3491 if (haveNegativeTrustAnchor(luaLocal->negAnchors, zone, reason)) {
3492 LOG(prefix << zone << ": Got NTA" << endl);
3493 return vState::NTA;
3494 }
3495
3496 if (getTrustAnchor(luaLocal->dsAnchors, zone, ds)) {
3497 if (!zone.isRoot()) {
3498 LOG(prefix << zone << ": Got TA" << endl);
3499 }
3500 return vState::TA;
3501 }
3502
3503 if (zone.isRoot()) {
3504 /* No TA for the root */
3505 return vState::Insecure;
3506 }
3507
3508 return vState::Indeterminate;
3509 }
3510
3511 size_t SyncRes::countSupportedDS(const dsmap_t& dsmap, const string& prefix)
3512 {
3513 size_t count = 0;
3514
3515 for (const auto& ds : dsmap) {
3516 if (isSupportedDS(ds, LogObject(prefix))) {
3517 count++;
3518 }
3519 }
3520
3521 return count;
3522 }
3523
3524 void SyncRes::initZoneCutsFromTA(const DNSName& from, const string& prefix)
3525 {
3526 DNSName zone(from);
3527 do {
3528 dsmap_t ds;
3529 vState result = getTA(zone, ds, prefix);
3530 if (result != vState::Indeterminate) {
3531 if (result == vState::TA) {
3532 if (countSupportedDS(ds, prefix) == 0) {
3533 ds.clear();
3534 result = vState::Insecure;
3535 }
3536 else {
3537 result = vState::Secure;
3538 }
3539 }
3540 else if (result == vState::NTA) {
3541 result = vState::Insecure;
3542 }
3543
3544 d_cutStates[zone] = result;
3545 }
3546 } while (zone.chopOff());
3547 }
3548
3549 vState SyncRes::getDSRecords(const DNSName& zone, dsmap_t& ds, bool taOnly, unsigned int depth, const string& prefix, bool bogusOnNXD, bool* foundCut)
3550 {
3551 vState result = getTA(zone, ds, prefix);
3552
3553 if (result != vState::Indeterminate || taOnly) {
3554 if (foundCut) {
3555 *foundCut = (result != vState::Indeterminate);
3556 }
3557
3558 if (result == vState::TA) {
3559 if (countSupportedDS(ds, prefix) == 0) {
3560 ds.clear();
3561 result = vState::Insecure;
3562 }
3563 else {
3564 result = vState::Secure;
3565 }
3566 }
3567 else if (result == vState::NTA) {
3568 result = vState::Insecure;
3569 }
3570
3571 return result;
3572 }
3573
3574 std::set<GetBestNSAnswer> beenthere;
3575 std::vector<DNSRecord> dsrecords;
3576
3577 Context context;
3578
3579 const bool oldCacheOnly = setCacheOnly(false);
3580 const bool oldQM = setQNameMinimization(!getQMFallbackMode());
3581 int rcode = doResolve(zone, QType::DS, dsrecords, depth + 1, beenthere, context);
3582 setCacheOnly(oldCacheOnly);
3583 setQNameMinimization(oldQM);
3584
3585 if (rcode == RCode::ServFail) {
3586 throw ImmediateServFailException("Server Failure while retrieving DS records for " + zone.toLogString());
3587 }
3588
3589 if (rcode == RCode::NoError || (rcode == RCode::NXDomain && !bogusOnNXD)) {
3590 uint8_t bestDigestType = 0;
3591
3592 bool gotCNAME = false;
3593 for (const auto& record : dsrecords) {
3594 if (record.d_type == QType::DS) {
3595 const auto dscontent = getRR<DSRecordContent>(record);
3596 if (dscontent && isSupportedDS(*dscontent, LogObject(prefix))) {
3597 // Make GOST a lower prio than SHA256
3598 if (dscontent->d_digesttype == DNSSECKeeper::DIGEST_GOST && bestDigestType == DNSSECKeeper::DIGEST_SHA256) {
3599 continue;
3600 }
3601 if (dscontent->d_digesttype > bestDigestType || (bestDigestType == DNSSECKeeper::DIGEST_GOST && dscontent->d_digesttype == DNSSECKeeper::DIGEST_SHA256)) {
3602 bestDigestType = dscontent->d_digesttype;
3603 }
3604 ds.insert(*dscontent);
3605 }
3606 }
3607 else if (record.d_type == QType::CNAME && record.d_name == zone) {
3608 gotCNAME = true;
3609 }
3610 }
3611
3612 /* RFC 4509 section 3: "Validator implementations SHOULD ignore DS RRs containing SHA-1
3613 * digests if DS RRs with SHA-256 digests are present in the DS RRset."
3614 * We interpret that as: do not use SHA-1 if SHA-256 or SHA-384 is available
3615 */
3616 for (auto dsrec = ds.begin(); dsrec != ds.end();) {
3617 if (dsrec->d_digesttype == DNSSECKeeper::DIGEST_SHA1 && dsrec->d_digesttype != bestDigestType) {
3618 dsrec = ds.erase(dsrec);
3619 }
3620 else {
3621 ++dsrec;
3622 }
3623 }
3624
3625 if (rcode == RCode::NoError) {
3626 if (ds.empty()) {
3627 /* we have no DS, it's either:
3628 - a delegation to a non-DNSSEC signed zone
3629 - no delegation, we stay in the same zone
3630 */
3631 if (gotCNAME || denialProvesNoDelegation(zone, dsrecords)) {
3632 /* we are still inside the same zone */
3633
3634 if (foundCut) {
3635 *foundCut = false;
3636 }
3637 return context.state;
3638 }
3639
3640 d_cutStates[zone] = context.state == vState::Secure ? vState::Insecure : context.state;
3641 /* delegation with no DS, might be Secure -> Insecure */
3642 if (foundCut) {
3643 *foundCut = true;
3644 }
3645
3646 /* a delegation with no DS is either:
3647 - a signed zone (Secure) to an unsigned one (Insecure)
3648 - an unsigned zone to another unsigned one (Insecure stays Insecure, Bogus stays Bogus)
3649 */
3650 return context.state == vState::Secure ? vState::Insecure : context.state;
3651 }
3652 else {
3653 /* we have a DS */
3654 d_cutStates[zone] = context.state;
3655 if (foundCut) {
3656 *foundCut = true;
3657 }
3658 }
3659 }
3660
3661 return context.state;
3662 }
3663
3664 LOG(prefix << zone << ": Returning Bogus state from " << __func__ << "(" << zone << ")" << endl);
3665 return vState::BogusUnableToGetDSs;
3666 }
3667
3668 vState SyncRes::getValidationStatus(const DNSName& name, bool wouldBeValid, bool typeIsDS, unsigned int depth, const string& prefix)
3669 {
3670 vState result = vState::Indeterminate;
3671
3672 if (!shouldValidate()) {
3673 return result;
3674 }
3675
3676 DNSName subdomain(name);
3677 if (typeIsDS) {
3678 subdomain.chopOff();
3679 }
3680
3681 {
3682 const auto& it = d_cutStates.find(subdomain);
3683 if (it != d_cutStates.cend()) {
3684 LOG(prefix << name << ": Got status " << it->second << " for name " << subdomain << endl);
3685 return it->second;
3686 }
3687 }
3688
3689 /* look for the best match we have */
3690 DNSName best(subdomain);
3691 while (best.chopOff()) {
3692 const auto& it = d_cutStates.find(best);
3693 if (it != d_cutStates.cend()) {
3694 result = it->second;
3695 if (vStateIsBogus(result) || result == vState::Insecure) {
3696 LOG(prefix << name << ": Got status " << result << " for name " << best << endl);
3697 return result;
3698 }
3699 break;
3700 }
3701 }
3702
3703 /* by now we have the best match, it's likely Secure (otherwise we would not be there)
3704 but we don't know if we missed a cut (or several).
3705 We could see if we have DS (or denial of) in cache but let's not worry for now,
3706 we will if we don't have a signature, or if the signer doesn't match what we expect */
3707 if (!wouldBeValid && best != subdomain) {
3708 /* no signatures or Bogus, we likely missed a cut, let's try to find it */
3709 LOG(prefix << name << ": No or invalid signature/proof for " << name << ", we likely missed a cut between " << best << " and " << subdomain << ", looking for it" << endl);
3710 DNSName ds(best);
3711 std::vector<string> labelsToAdd = subdomain.makeRelative(ds).getRawLabels();
3712
3713 while (!labelsToAdd.empty()) {
3714
3715 ds.prependRawLabel(labelsToAdd.back());
3716 labelsToAdd.pop_back();
3717 LOG(prefix << name << ": - Looking for a DS at " << ds << endl);
3718
3719 bool foundCut = false;
3720 dsmap_t results;
3721 vState dsState = getDSRecords(ds, results, false, depth, prefix, false, &foundCut);
3722
3723 if (foundCut) {
3724 LOG(prefix << name << ": - Found cut at " << ds << endl);
3725 LOG(prefix << name << ": New state for " << ds << " is " << dsState << endl);
3726 d_cutStates[ds] = dsState;
3727
3728 if (dsState != vState::Secure) {
3729 return dsState;
3730 }
3731 }
3732 }
3733
3734 /* we did not miss a cut, good luck */
3735 return result;
3736 }
3737
3738 #if 0
3739 /* we don't need this, we actually do the right thing later */
3740 DNSName signer = getSigner(signatures);
3741
3742 if (!signer.empty() && name.isPartOf(signer)) {
3743 if (signer == best) {
3744 return result;
3745 }
3746 /* the zone cut is not the one we expected,
3747 this is fine because we will retrieve the needed DNSKEYs and DSs
3748 later, and even go Insecure if we missed a cut to Insecure (no DS)
3749 and the signatures do not validate (we should not go Bogus in that
3750 case) */
3751 }
3752 /* something is not right, but let's not worry about that for now.. */
3753 #endif
3754
3755 return result;
3756 }
3757
3758 vState SyncRes::validateDNSKeys(const DNSName& zone, const std::vector<DNSRecord>& dnskeys, const std::vector<std::shared_ptr<const RRSIGRecordContent>>& signatures, unsigned int depth, const string& prefix)
3759 {
3760 dsmap_t ds;
3761 if (signatures.empty()) {
3762 LOG(prefix << zone << ": We have " << std::to_string(dnskeys.size()) << " DNSKEYs but no signature, going Bogus!" << endl);
3763 return vState::BogusNoRRSIG;
3764 }
3765
3766 DNSName signer = getSigner(signatures);
3767
3768 if (!signer.empty() && zone.isPartOf(signer)) {
3769 vState state = getDSRecords(signer, ds, false, depth, prefix);
3770
3771 if (state != vState::Secure) {
3772 return state;
3773 }
3774 }
3775 else {
3776 LOG(prefix << zone << ": We have " << std::to_string(dnskeys.size()) << " DNSKEYs but the zone (" << zone << ") is not part of the signer (" << signer << "), check that we did not miss a zone cut" << endl);
3777 /* try again to get the missed cuts, harder this time */
3778 auto zState = getValidationStatus(zone, false, false, depth, prefix);
3779 if (zState == vState::Secure) {
3780 /* too bad */
3781 LOG(prefix << zone << ": After checking the zone cuts again, we still have " << std::to_string(dnskeys.size()) << " DNSKEYs and the zone (" << zone << ") is still not part of the signer (" << signer << "), going Bogus!" << endl);
3782 return vState::BogusNoValidRRSIG;
3783 }
3784 else {
3785 return zState;
3786 }
3787 }
3788
3789 skeyset_t tentativeKeys;
3790 sortedRecords_t toSign;
3791
3792 for (const auto& dnskey : dnskeys) {
3793 if (dnskey.d_type == QType::DNSKEY) {
3794 auto content = getRR<DNSKEYRecordContent>(dnskey);
3795 if (content) {
3796 tentativeKeys.insert(content);
3797 toSign.insert(content);
3798 }
3799 }
3800 }
3801
3802 LOG(prefix << zone << ": Trying to validate " << std::to_string(tentativeKeys.size()) << " DNSKEYs with " << std::to_string(ds.size()) << " DS" << endl);
3803 skeyset_t validatedKeys;
3804 auto state = validateDNSKeysAgainstDS(d_now.tv_sec, zone, ds, tentativeKeys, toSign, signatures, validatedKeys, LogObject(prefix));
3805
3806 LOG(prefix << zone << ": We now have " << std::to_string(validatedKeys.size()) << " DNSKEYs" << endl);
3807
3808 /* if we found at least one valid RRSIG covering the set,
3809 all tentative keys are validated keys. Otherwise it means
3810 we haven't found at least one DNSKEY and a matching RRSIG
3811 covering this set, this looks Bogus. */
3812 if (validatedKeys.size() != tentativeKeys.size()) {
3813 LOG(prefix << zone << ": Let's check whether we missed a zone cut before returning a Bogus state from " << __func__ << "(" << zone << ")" << endl);
3814 /* try again to get the missed cuts, harder this time */
3815 auto zState = getValidationStatus(zone, false, false, depth, prefix);
3816 if (zState == vState::Secure) {
3817 /* too bad */
3818 LOG(prefix << zone << ": After checking the zone cuts we are still in a Secure zone, returning Bogus state from " << __func__ << "(" << zone << ")" << endl);
3819 return state;
3820 }
3821 else {
3822 return zState;
3823 }
3824 }
3825
3826 return state;
3827 }
3828
3829 vState SyncRes::getDNSKeys(const DNSName& signer, skeyset_t& keys, bool& servFailOccurred, unsigned int depth, const string& prefix)
3830 {
3831 std::vector<DNSRecord> records;
3832 std::set<GetBestNSAnswer> beenthere;
3833 LOG(prefix << signer << ": Retrieving DNSKEYs" << endl);
3834
3835 Context context;
3836
3837 const bool oldCacheOnly = setCacheOnly(false);
3838 int rcode = doResolve(signer, QType::DNSKEY, records, depth + 1, beenthere, context);
3839 setCacheOnly(oldCacheOnly);
3840
3841 if (rcode == RCode::ServFail) {
3842 servFailOccurred = true;
3843 return vState::BogusUnableToGetDNSKEYs;
3844 }
3845
3846 if (rcode == RCode::NoError) {
3847 if (context.state == vState::Secure) {
3848 for (const auto& key : records) {
3849 if (key.d_type == QType::DNSKEY) {
3850 auto content = getRR<DNSKEYRecordContent>(key);
3851 if (content) {
3852 keys.insert(content);
3853 }
3854 }
3855 }
3856 }
3857 LOG(prefix << signer << ": Retrieved " << keys.size() << " DNSKeys, state is " << context.state << endl);
3858 return context.state;
3859 }
3860
3861 if (context.state == vState::Insecure) {
3862 return context.state;
3863 }
3864
3865 LOG(prefix << signer << ": Returning Bogus state from " << __func__ << "(" << signer << ")" << endl);
3866 return vState::BogusUnableToGetDNSKEYs;
3867 }
3868
3869 vState SyncRes::validateRecordsWithSigs(unsigned int depth, const string& prefix, const DNSName& qname, const QType qtype, const DNSName& name, const QType type, const std::vector<DNSRecord>& records, const std::vector<std::shared_ptr<const RRSIGRecordContent>>& signatures)
3870 {
3871 skeyset_t keys;
3872 if (signatures.empty()) {
3873 LOG(prefix << qname << ": Bogus!" << endl);
3874 return vState::BogusNoRRSIG;
3875 }
3876
3877 const DNSName signer = getSigner(signatures);
3878 bool dsFailed = false;
3879 if (!signer.empty() && name.isPartOf(signer)) {
3880 vState state = vState::Secure;
3881
3882 if ((qtype == QType::DNSKEY || qtype == QType::DS) && signer == qname) {
3883 /* we are already retrieving those keys, sorry */
3884 if (type == QType::DS && signer == name && !signer.isRoot()) {
3885 /* Unless we are getting the DS of the root zone, we should never see a
3886 DS (or a denial of a DS) signed by the DS itself, since we should be
3887 requesting it from the parent zone. Something is very wrong */
3888 LOG(prefix << qname << ": The DS for " << qname << " is signed by itself" << endl);
3889 state = vState::BogusSelfSignedDS;
3890 dsFailed = true;
3891 }
3892 else if (qtype == QType::DS && signer == qname && !signer.isRoot()) {
3893 if (type == QType::SOA || type == QType::NSEC || type == QType::NSEC3) {
3894 /* if we are trying to validate the DS or more likely NSEC(3)s proving that it does not exist, we have a problem.
3895 In that case let's go Bogus (we will check later if we missed a cut)
3896 */
3897 state = vState::BogusSelfSignedDS;
3898 dsFailed = true;
3899 }
3900 else if (type == QType::CNAME) {
3901 state = vState::BogusUnableToGetDSs;
3902 dsFailed = true;
3903 }
3904 }
3905 else if (qtype == QType::DNSKEY && signer == qname) {
3906 /* that actually does happen when a server returns NS records in authority
3907 along with the DNSKEY, leading us to trying to validate the RRSIGs for
3908 the NS with the DNSKEY that we are about to process. */
3909 if ((name == signer && type == QType::NSEC) || type == QType::NSEC3) {
3910 /* if we are trying to validate the DNSKEY (should not happen here),
3911 or more likely NSEC(3)s proving that it does not exist, we have a problem.
3912 In that case let's see if the DS does exist, and if it does let's go Bogus
3913 */
3914 dsmap_t results;
3915 vState dsState = getDSRecords(signer, results, false, depth, prefix, true);
3916 if (vStateIsBogus(dsState) || dsState == vState::Insecure) {
3917 state = dsState;
3918 if (vStateIsBogus(dsState)) {
3919 dsFailed = true;
3920 }
3921 }
3922 else {
3923 LOG(prefix << qname << ": Unable to get the DS for " << signer << endl);
3924 state = vState::BogusUnableToGetDNSKEYs;
3925 dsFailed = true;
3926 }
3927 }
3928 else {
3929 /* return immediately since looking at the cuts is not going to change the
3930 fact that we are looking at a signature done with the key we are trying to
3931 obtain */
3932 LOG(prefix << qname << ": We are looking at a signature done with the key we are trying to obtain " << signer << endl);
3933 return vState::Indeterminate;
3934 }
3935 }
3936 }
3937 bool servFailOccurred = false;
3938 if (state == vState::Secure) {
3939 state = getDNSKeys(signer, keys, servFailOccurred, depth, prefix);
3940 }
3941
3942 if (state != vState::Secure) {
3943 if (!vStateIsBogus(state)) {
3944 return state;
3945 }
3946 /* try again to get the missed cuts, harder this time */
3947 LOG(prefix << signer << ": Checking whether we missed a zone cut for " << signer << " before returning a Bogus state for " << name << "|" << type.toString() << endl);
3948 auto zState = getValidationStatus(signer, false, dsFailed, depth, prefix);
3949 if (zState == vState::Secure) {
3950 if (state == vState::BogusUnableToGetDNSKEYs && servFailOccurred) {
3951 throw ImmediateServFailException("Server Failure while retrieving DNSKEY records for " + signer.toLogString());
3952 }
3953 /* too bad */
3954 LOG(prefix << signer << ": We are still in a Secure zone, returning " << vStateToString(state) << endl);
3955 return state;
3956 }
3957 else {
3958 return zState;
3959 }
3960 }
3961 }
3962
3963 sortedRecords_t recordcontents;
3964 for (const auto& record : records) {
3965 recordcontents.insert(record.getContent());
3966 }
3967
3968 LOG(prefix << name << ": Going to validate " << recordcontents.size() << " record contents with " << signatures.size() << " sigs and " << keys.size() << " keys for " << name << "|" << type.toString() << endl);
3969 vState state = validateWithKeySet(d_now.tv_sec, name, recordcontents, signatures, keys, LogObject(prefix), false);
3970 if (state == vState::Secure) {
3971 LOG(prefix << name << ": Secure!" << endl);
3972 return vState::Secure;
3973 }
3974
3975 LOG(prefix << vStateToString(state) << "!" << endl);
3976 /* try again to get the missed cuts, harder this time */
3977 auto zState = getValidationStatus(name, false, type == QType::DS, depth, prefix);
3978 LOG(prefix << name << ": Checking whether we missed a zone cut before returning a Bogus state" << endl);
3979 if (zState == vState::Secure) {
3980 /* too bad */
3981 LOG(prefix << name << ": We are still in a Secure zone, returning " << vStateToString(state) << endl);
3982 return state;
3983 }
3984 else {
3985 return zState;
3986 }
3987 }
3988
3989 /* This function will check whether the answer should have the AA bit set, and will set if it should be set and isn't.
3990 This is unfortunately needed to deal with very crappy so-called DNS servers */
3991 void SyncRes::fixupAnswer(const std::string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, bool rdQuery)
3992 {
3993 const bool wasForwardRecurse = wasForwarded && rdQuery;
3994
3995 if (wasForwardRecurse || lwr.d_aabit) {
3996 /* easy */
3997 return;
3998 }
3999
4000 for (const auto& rec : lwr.d_records) {
4001
4002 if (rec.d_type == QType::OPT) {
4003 continue;
4004 }
4005
4006 if (rec.d_class != QClass::IN) {
4007 continue;
4008 }
4009
4010 if (rec.d_type == QType::ANY) {
4011 continue;
4012 }
4013
4014 if (rec.d_place == DNSResourceRecord::ANSWER && (rec.d_type == qtype || rec.d_type == QType::CNAME || qtype == QType::ANY) && rec.d_name == qname && rec.d_name.isPartOf(auth)) {
4015 /* This is clearly an answer to the question we were asking, from an authoritative server that is allowed to send it.
4016 We are going to assume this server is broken and does not know it should set the AA bit, even though it is DNS 101 */
4017 LOG(prefix << qname << ": Received a record for " << rec.d_name << "|" << DNSRecordContent::NumberToType(rec.d_type) << " in the answer section from " << auth << ", without the AA bit set. Assuming this server is clueless and setting the AA bit." << endl);
4018 lwr.d_aabit = true;
4019 return;
4020 }
4021
4022 if (rec.d_place != DNSResourceRecord::ANSWER) {
4023 /* we have scanned all the records in the answer section, if any, we are done */
4024 return;
4025 }
4026 }
4027 }
4028
4029 static void allowAdditionalEntry(std::unordered_set<DNSName>& allowedAdditionals, const DNSRecord& rec)
4030 {
4031 switch (rec.d_type) {
4032 case QType::MX:
4033 if (auto mxContent = getRR<MXRecordContent>(rec)) {
4034 allowedAdditionals.insert(mxContent->d_mxname);
4035 }
4036 break;
4037 case QType::NS:
4038 if (auto nsContent = getRR<NSRecordContent>(rec)) {
4039 allowedAdditionals.insert(nsContent->getNS());
4040 }
4041 break;
4042 case QType::SRV:
4043 if (auto srvContent = getRR<SRVRecordContent>(rec)) {
4044 allowedAdditionals.insert(srvContent->d_target);
4045 }
4046 break;
4047 case QType::SVCB: /* fall-through */
4048 case QType::HTTPS:
4049 if (auto svcbContent = getRR<SVCBBaseRecordContent>(rec)) {
4050 if (svcbContent->getPriority() > 0) {
4051 DNSName target = svcbContent->getTarget();
4052 if (target.isRoot()) {
4053 target = rec.d_name;
4054 }
4055 allowedAdditionals.insert(target);
4056 }
4057 else {
4058 // FIXME: Alias mode not implemented yet
4059 }
4060 }
4061 break;
4062 case QType::NAPTR:
4063 if (auto naptrContent = getRR<NAPTRRecordContent>(rec)) {
4064 auto flags = naptrContent->getFlags();
4065 toLowerInPlace(flags);
4066 if (flags.find('a') != string::npos || flags.find('s') != string::npos) {
4067 allowedAdditionals.insert(naptrContent->getReplacement());
4068 }
4069 }
4070 break;
4071 default:
4072 break;
4073 }
4074 }
4075
4076 void SyncRes::sanitizeRecords(const std::string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, bool rdQuery)
4077 {
4078 const bool wasForwardRecurse = wasForwarded && rdQuery;
4079 /* list of names for which we will allow A and AAAA records in the additional section
4080 to remain */
4081 std::unordered_set<DNSName> allowedAdditionals = {qname};
4082 bool haveAnswers = false;
4083 bool isNXDomain = false;
4084 bool isNXQType = false;
4085
4086 for (auto rec = lwr.d_records.begin(); rec != lwr.d_records.end();) {
4087
4088 if (rec->d_type == QType::OPT) {
4089 ++rec;
4090 continue;
4091 }
4092
4093 if (rec->d_class != QClass::IN) {
4094 LOG(prefix << qname << ": Removing non internet-classed data received from " << auth << endl);
4095 rec = lwr.d_records.erase(rec);
4096 continue;
4097 }
4098
4099 if (rec->d_type == QType::ANY) {
4100 LOG(prefix << qname << ": Removing 'ANY'-typed data received from " << auth << endl);
4101 rec = lwr.d_records.erase(rec);
4102 continue;
4103 }
4104
4105 if (!rec->d_name.isPartOf(auth)) {
4106 LOG(prefix << qname << ": Removing record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section received from " << auth << endl);
4107 rec = lwr.d_records.erase(rec);
4108 continue;
4109 }
4110
4111 /* dealing with the records in answer */
4112 if (!(lwr.d_aabit || wasForwardRecurse) && rec->d_place == DNSResourceRecord::ANSWER) {
4113 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
4114 are sending such responses */
4115 if (!(rec->d_type == QType::CNAME && qname == rec->d_name)) {
4116 LOG(prefix << qname << ": Removing record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the answer section without the AA bit set received from " << auth << endl);
4117 rec = lwr.d_records.erase(rec);
4118 continue;
4119 }
4120 }
4121
4122 if (rec->d_type == QType::DNAME && (rec->d_place != DNSResourceRecord::ANSWER || !qname.isPartOf(rec->d_name))) {
4123 LOG(prefix << qname << ": Removing invalid DNAME record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section received from " << auth << endl);
4124 rec = lwr.d_records.erase(rec);
4125 continue;
4126 }
4127
4128 if (rec->d_place == DNSResourceRecord::ANSWER && (qtype != QType::ANY && rec->d_type != qtype.getCode() && s_redirectionQTypes.count(rec->d_type) == 0 && rec->d_type != QType::SOA && rec->d_type != QType::RRSIG)) {
4129 LOG(prefix << qname << ": Removing irrelevant record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the ANSWER section received from " << auth << endl);
4130 rec = lwr.d_records.erase(rec);
4131 continue;
4132 }
4133
4134 if (rec->d_place == DNSResourceRecord::ANSWER && !haveAnswers) {
4135 haveAnswers = true;
4136 }
4137
4138 if (rec->d_place == DNSResourceRecord::ANSWER) {
4139 allowAdditionalEntry(allowedAdditionals, *rec);
4140 }
4141
4142 /* dealing with the records in authority */
4143 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type != QType::NS && rec->d_type != QType::DS && rec->d_type != QType::SOA && rec->d_type != QType::RRSIG && rec->d_type != QType::NSEC && rec->d_type != QType::NSEC3) {
4144 LOG(prefix << qname << ": Removing irrelevant record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the AUTHORITY section received from " << auth << endl);
4145 rec = lwr.d_records.erase(rec);
4146 continue;
4147 }
4148
4149 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::SOA) {
4150 if (!qname.isPartOf(rec->d_name)) {
4151 LOG(prefix << qname << ": Removing irrelevant SOA record '" << rec->d_name << "|" << rec->getContent()->getZoneRepresentation() << "' in the AUTHORITY section received from " << auth << endl);
4152 rec = lwr.d_records.erase(rec);
4153 continue;
4154 }
4155
4156 if (!(lwr.d_aabit || wasForwardRecurse)) {
4157 LOG(prefix << qname << ": Removing irrelevant record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the AUTHORITY section received from " << auth << endl);
4158 rec = lwr.d_records.erase(rec);
4159 continue;
4160 }
4161
4162 if (!haveAnswers) {
4163 if (lwr.d_rcode == RCode::NXDomain) {
4164 isNXDomain = true;
4165 }
4166 else if (lwr.d_rcode == RCode::NoError) {
4167 isNXQType = true;
4168 }
4169 }
4170 }
4171
4172 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS && (isNXDomain || isNXQType)) {
4173 /*
4174 * We don't want to pick up NS records in AUTHORITY and their ADDITIONAL sections of NXDomain answers
4175 * because they are somewhat easy to insert into a large, fragmented UDP response
4176 * for an off-path attacker by injecting spoofed UDP fragments. So do not add these to allowedAdditionals.
4177 */
4178 LOG(prefix << qname << ": Removing NS record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section of a " << (isNXDomain ? "NXD" : "NXQTYPE") << " response received from " << auth << endl);
4179 rec = lwr.d_records.erase(rec);
4180 continue;
4181 }
4182
4183 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS && !d_updatingRootNS && rec->d_name == g_rootdnsname) {
4184 /*
4185 * We don't want to pick up root NS records in AUTHORITY and their associated ADDITIONAL sections of random queries.
4186 * So don't add them to allowedAdditionals.
4187 */
4188 LOG(prefix << qname << ": Removing NS record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section of a response received from " << auth << endl);
4189 rec = lwr.d_records.erase(rec);
4190 continue;
4191 }
4192
4193 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS) {
4194 allowAdditionalEntry(allowedAdditionals, *rec);
4195 }
4196
4197 /* dealing with the records in additional */
4198 if (rec->d_place == DNSResourceRecord::ADDITIONAL && rec->d_type != QType::A && rec->d_type != QType::AAAA && rec->d_type != QType::RRSIG) {
4199 LOG(prefix << qname << ": Removing irrelevant record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the ADDITIONAL section received from " << auth << endl);
4200 rec = lwr.d_records.erase(rec);
4201 continue;
4202 }
4203
4204 if (rec->d_place == DNSResourceRecord::ADDITIONAL && allowedAdditionals.count(rec->d_name) == 0) {
4205 LOG(prefix << qname << ": Removing irrelevant additional record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the ADDITIONAL section received from " << auth << endl);
4206 rec = lwr.d_records.erase(rec);
4207 continue;
4208 }
4209
4210 ++rec;
4211 }
4212 }
4213
4214 void SyncRes::rememberParentSetIfNeeded(const DNSName& domain, const vector<DNSRecord>& newRecords, unsigned int depth, const string& prefix)
4215 {
4216 vector<DNSRecord> existing;
4217 bool wasAuth = false;
4218 auto ttl = g_recCache->get(d_now.tv_sec, domain, QType::NS, MemRecursorCache::None, &existing, d_cacheRemote, d_routingTag, nullptr, nullptr, nullptr, nullptr, &wasAuth);
4219
4220 if (ttl <= 0 || wasAuth) {
4221 return;
4222 }
4223 {
4224 auto lock = s_savedParentNSSet.lock();
4225 if (lock->find(domain) != lock->end()) {
4226 // no relevant data, or we already stored the parent data
4227 return;
4228 }
4229 }
4230
4231 set<DNSName> authSet;
4232 for (const auto& ns : newRecords) {
4233 auto content = getRR<NSRecordContent>(ns);
4234 authSet.insert(content->getNS());
4235 }
4236 // The glue IPs could also differ, but we're not checking that yet, we're only looking for parent NS records not
4237 // in the child set
4238 bool shouldSave = false;
4239 for (const auto& ns : existing) {
4240 auto content = getRR<NSRecordContent>(ns);
4241 if (authSet.count(content->getNS()) == 0) {
4242 LOG(prefix << domain << ": At least one parent-side NS was not in the child-side NS set, remembering parent NS set and cached IPs" << endl);
4243 shouldSave = true;
4244 break;
4245 }
4246 }
4247
4248 if (shouldSave) {
4249 map<DNSName, vector<ComboAddress>> entries;
4250 for (const auto& ns : existing) {
4251 auto content = getRR<NSRecordContent>(ns);
4252 const DNSName& name = content->getNS();
4253 set<GetBestNSAnswer> beenthereIgnored;
4254 unsigned int nretrieveAddressesForNSIgnored;
4255 auto addresses = getAddrs(name, depth, prefix, beenthereIgnored, true, nretrieveAddressesForNSIgnored);
4256 entries.emplace(name, addresses);
4257 }
4258 s_savedParentNSSet.lock()->emplace(domain, std::move(entries), d_now.tv_sec + ttl);
4259 }
4260 }
4261
4262 RCode::rcodes_ SyncRes::updateCacheFromRecords(unsigned int depth, const string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, const boost::optional<Netmask> ednsmask, vState& state, bool& needWildcardProof, bool& gatherWildcardProof, unsigned int& wildcardLabelsCount, bool rdQuery, const ComboAddress& remoteIP)
4263 {
4264 bool wasForwardRecurse = wasForwarded && rdQuery;
4265 tcache_t tcache;
4266
4267 fixupAnswer(prefix, lwr, qname, qtype, auth, wasForwarded, rdQuery);
4268 sanitizeRecords(prefix, lwr, qname, qtype, auth, wasForwarded, rdQuery);
4269
4270 std::vector<std::shared_ptr<DNSRecord>> authorityRecs;
4271 const unsigned int labelCount = qname.countLabels();
4272 bool isCNAMEAnswer = false;
4273 bool isDNAMEAnswer = false;
4274 DNSName seenAuth;
4275
4276 for (auto& rec : lwr.d_records) {
4277 if (rec.d_type == QType::OPT || rec.d_class != QClass::IN) {
4278 continue;
4279 }
4280
4281 rec.d_ttl = min(s_maxcachettl, rec.d_ttl);
4282
4283 if (!isCNAMEAnswer && rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == QType::CNAME && (!(qtype == QType::CNAME)) && rec.d_name == qname && !isDNAMEAnswer) {
4284 isCNAMEAnswer = true;
4285 }
4286 if (!isDNAMEAnswer && rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == QType::DNAME && qtype != QType::DNAME && qname.isPartOf(rec.d_name)) {
4287 isDNAMEAnswer = true;
4288 isCNAMEAnswer = false;
4289 }
4290
4291 if (rec.d_type == QType::SOA && rec.d_place == DNSResourceRecord::AUTHORITY && qname.isPartOf(rec.d_name)) {
4292 seenAuth = rec.d_name;
4293 }
4294
4295 if (rec.d_type == QType::RRSIG) {
4296 auto rrsig = getRR<RRSIGRecordContent>(rec);
4297 if (rrsig) {
4298 /* As illustrated in rfc4035's Appendix B.6, the RRSIG label
4299 count can be lower than the name's label count if it was
4300 synthesized from the wildcard. Note that the difference might
4301 be > 1. */
4302 if (rec.d_name == qname && isWildcardExpanded(labelCount, *rrsig)) {
4303 gatherWildcardProof = true;
4304 if (!isWildcardExpandedOntoItself(rec.d_name, labelCount, *rrsig)) {
4305 /* if we have a wildcard expanded onto itself, we don't need to prove
4306 that the exact name doesn't exist because it actually does.
4307 We still want to gather the corresponding NSEC/NSEC3 records
4308 to pass them to our client in case it wants to validate by itself.
4309 */
4310 LOG(prefix << qname << ": RRSIG indicates the name was synthesized from a wildcard, we need a wildcard proof" << endl);
4311 needWildcardProof = true;
4312 }
4313 else {
4314 LOG(prefix << qname << ": RRSIG indicates the name was synthesized from a wildcard expanded onto itself, we need to gather wildcard proof" << endl);
4315 }
4316 wildcardLabelsCount = rrsig->d_labels;
4317 }
4318
4319 // cerr<<"Got an RRSIG for "<<DNSRecordContent::NumberToType(rrsig->d_type)<<" with name '"<<rec.d_name<<"' and place "<<rec.d_place<<endl;
4320 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signatures.push_back(rrsig);
4321 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signaturesTTL = std::min(tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signaturesTTL, rec.d_ttl);
4322 }
4323 }
4324 }
4325
4326 /* if we have a positive answer synthesized from a wildcard,
4327 we need to store the corresponding NSEC/NSEC3 records proving
4328 that the exact name did not exist in the negative cache */
4329 if (gatherWildcardProof) {
4330 for (const auto& rec : lwr.d_records) {
4331 if (rec.d_type == QType::OPT || rec.d_class != QClass::IN) {
4332 continue;
4333 }
4334
4335 if (nsecTypes.count(rec.d_type)) {
4336 authorityRecs.push_back(std::make_shared<DNSRecord>(rec));
4337 }
4338 else if (rec.d_type == QType::RRSIG) {
4339 auto rrsig = getRR<RRSIGRecordContent>(rec);
4340 if (rrsig && nsecTypes.count(rrsig->d_type)) {
4341 authorityRecs.push_back(std::make_shared<DNSRecord>(rec));
4342 }
4343 }
4344 }
4345 }
4346
4347 // reap all answers from this packet that are acceptable
4348 for (auto& rec : lwr.d_records) {
4349 if (rec.d_type == QType::OPT) {
4350 LOG(prefix << qname << ": OPT answer '" << rec.d_name << "' from '" << auth << "' nameservers" << endl);
4351 continue;
4352 }
4353
4354 LOG(prefix << qname << ": Accept answer '" << rec.d_name << "|" << DNSRecordContent::NumberToType(rec.d_type) << "|" << rec.getContent()->getZoneRepresentation() << "' from '" << auth << "' nameservers? ttl=" << rec.d_ttl << ", place=" << (int)rec.d_place << " ");
4355
4356 // We called sanitizeRecords before, so all ANY, non-IN and non-aa/non-forwardrecurse answer records are already removed
4357
4358 if (rec.d_name.isPartOf(auth)) {
4359 if (rec.d_type == QType::RRSIG) {
4360 LOG("RRSIG - separate" << endl);
4361 }
4362 else if (rec.d_type == QType::DS && rec.d_name == auth) {
4363 LOG("NO - DS provided by child zone" << endl);
4364 }
4365 else {
4366 bool haveLogged = false;
4367 if (isDNAMEAnswer && rec.d_type == QType::CNAME) {
4368 LOG("NO - we already have a DNAME answer for this domain" << endl);
4369 continue;
4370 }
4371 if (!t_sstorage.domainmap->empty()) {
4372 // Check if we are authoritative for a zone in this answer
4373 DNSName tmp_qname(rec.d_name);
4374 // We may be auth for domain example.com, but the DS record needs to come from the parent (.com) nameserver
4375 if (rec.d_type == QType::DS) {
4376 tmp_qname.chopOff();
4377 }
4378 auto auth_domain_iter = getBestAuthZone(&tmp_qname);
4379 if (auth_domain_iter != t_sstorage.domainmap->end() && auth.countLabels() <= auth_domain_iter->first.countLabels()) {
4380 if (auth_domain_iter->first != auth) {
4381 LOG("NO! - we are authoritative for the zone " << auth_domain_iter->first << endl);
4382 continue;
4383 }
4384 else {
4385 LOG("YES! - This answer was ");
4386 if (!wasForwarded) {
4387 LOG("retrieved from the local auth store.");
4388 }
4389 else {
4390 LOG("received from a server we forward to.");
4391 }
4392 haveLogged = true;
4393 LOG(endl);
4394 }
4395 }
4396 }
4397 if (!haveLogged) {
4398 LOG("YES!" << endl);
4399 }
4400
4401 rec.d_ttl = min(s_maxcachettl, rec.d_ttl);
4402
4403 DNSRecord dr(rec);
4404 dr.d_ttl += d_now.tv_sec;
4405 dr.d_place = DNSResourceRecord::ANSWER;
4406 tcache[{rec.d_name, rec.d_type, rec.d_place}].records.push_back(dr);
4407 }
4408 }
4409 else
4410 LOG("NO!" << endl);
4411 }
4412
4413 // supplant
4414 for (auto& entry : tcache) {
4415 if ((entry.second.records.size() + entry.second.signatures.size() + authorityRecs.size()) > 1) { // need to group the ttl to be the minimum of the RRSET (RFC 2181, 5.2)
4416 uint32_t lowestTTD = computeLowestTTD(entry.second.records, entry.second.signatures, entry.second.signaturesTTL, authorityRecs);
4417
4418 for (auto& record : entry.second.records) {
4419 record.d_ttl = lowestTTD; // boom
4420 }
4421 }
4422 }
4423
4424 for (tcache_t::iterator i = tcache.begin(); i != tcache.end(); ++i) {
4425
4426 if (i->second.records.empty()) // this happens when we did store signatures, but passed on the records themselves
4427 continue;
4428
4429 /* Even if the AA bit is set, additional data cannot be considered
4430 as authoritative. This is especially important during validation
4431 because keeping records in the additional section is allowed even
4432 if the corresponding RRSIGs are not included, without setting the TC
4433 bit, as stated in rfc4035's section 3.1.1. Including RRSIG RRs in a Response:
4434 "When placing a signed RRset in the Additional section, the name
4435 server MUST also place its RRSIG RRs in the Additional section.
4436 If space does not permit inclusion of both the RRset and its
4437 associated RRSIG RRs, the name server MAY retain the RRset while
4438 dropping the RRSIG RRs. If this happens, the name server MUST NOT
4439 set the TC bit solely because these RRSIG RRs didn't fit."
4440 */
4441 bool isAA = lwr.d_aabit && i->first.place != DNSResourceRecord::ADDITIONAL;
4442 /* if we forwarded the query to a recursor, we can expect the answer to be signed,
4443 even if the answer is not AA. Of course that's not only true inside a Secure
4444 zone, but we check that below. */
4445 bool expectSignature = i->first.place == DNSResourceRecord::ANSWER || ((lwr.d_aabit || wasForwardRecurse) && i->first.place != DNSResourceRecord::ADDITIONAL);
4446 /* in a non authoritative answer, we only care about the DS record (or lack of) */
4447 if (!isAA && (i->first.type == QType::DS || i->first.type == QType::NSEC || i->first.type == QType::NSEC3) && i->first.place == DNSResourceRecord::AUTHORITY) {
4448 expectSignature = true;
4449 }
4450
4451 if (isCNAMEAnswer && (i->first.place != DNSResourceRecord::ANSWER || i->first.type != QType::CNAME || i->first.name != qname)) {
4452 /*
4453 rfc2181 states:
4454 Note that the answer section of an authoritative answer normally
4455 contains only authoritative data. However when the name sought is an
4456 alias (see section 10.1.1) only the record describing that alias is
4457 necessarily authoritative. Clients should assume that other records
4458 may have come from the server's cache. Where authoritative answers
4459 are required, the client should query again, using the canonical name
4460 associated with the alias.
4461 */
4462 isAA = false;
4463 expectSignature = false;
4464 }
4465 else if (isDNAMEAnswer && (i->first.place != DNSResourceRecord::ANSWER || i->first.type != QType::DNAME || !qname.isPartOf(i->first.name))) {
4466 /* see above */
4467 isAA = false;
4468 expectSignature = false;
4469 }
4470
4471 if ((isCNAMEAnswer || isDNAMEAnswer) && i->first.place == DNSResourceRecord::AUTHORITY && i->first.type == QType::NS && auth == i->first.name) {
4472 /* These NS can't be authoritative since we have a CNAME/DNAME answer for which (see above) only the
4473 record describing that alias is necessarily authoritative.
4474 But if we allow the current auth, which might be serving the child zone, to raise the TTL
4475 of non-authoritative NS in the cache, they might be able to keep a "ghost" zone alive forever,
4476 even after the delegation is gone from the parent.
4477 So let's just do nothing with them, we can fetch them directly if we need them.
4478 */
4479 LOG(prefix << qname << ": Skipping authority NS from '" << auth << "' nameservers in CNAME/DNAME answer " << i->first.name << "|" << DNSRecordContent::NumberToType(i->first.type) << endl);
4480 continue;
4481 }
4482
4483 /*
4484 * RFC 6672 section 5.3.1
4485 * In any response, a signed DNAME RR indicates a non-terminal
4486 * redirection of the query. There might or might not be a server-
4487 * synthesized CNAME in the answer section; if there is, the CNAME will
4488 * never be signed. For a DNSSEC validator, verification of the DNAME
4489 * RR and then that the CNAME was properly synthesized is sufficient
4490 * proof.
4491 *
4492 * We do the synthesis check in processRecords, here we make sure we
4493 * don't validate the CNAME.
4494 */
4495 if (isDNAMEAnswer && i->first.type == QType::CNAME) {
4496 expectSignature = false;
4497 }
4498
4499 vState recordState = vState::Indeterminate;
4500
4501 if (expectSignature && shouldValidate()) {
4502 vState initialState = getValidationStatus(i->first.name, !i->second.signatures.empty(), i->first.type == QType::DS, depth, prefix);
4503 LOG(prefix << qname << ": Got initial zone status " << initialState << " for record " << i->first.name << "|" << DNSRecordContent::NumberToType(i->first.type) << endl);
4504
4505 if (initialState == vState::Secure) {
4506 if (i->first.type == QType::DNSKEY && i->first.place == DNSResourceRecord::ANSWER && i->first.name == getSigner(i->second.signatures)) {
4507 LOG(prefix << qname << ": Validating DNSKEY for " << i->first.name << endl);
4508 recordState = validateDNSKeys(i->first.name, i->second.records, i->second.signatures, depth, prefix);
4509 }
4510 else {
4511 LOG(prefix << qname << ": Validating non-additional " << QType(i->first.type).toString() << " record for " << i->first.name << endl);
4512 recordState = validateRecordsWithSigs(depth, prefix, qname, qtype, i->first.name, QType(i->first.type), i->second.records, i->second.signatures);
4513 }
4514 }
4515 else {
4516 recordState = initialState;
4517 LOG(prefix << qname << ": Skipping validation because the current state is " << recordState << endl);
4518 }
4519
4520 LOG(prefix << qname << ": Validation result is " << recordState << ", current state is " << state << endl);
4521 if (state != recordState) {
4522 updateValidationState(qname, state, recordState, prefix);
4523 }
4524 }
4525
4526 if (vStateIsBogus(recordState)) {
4527 /* this is a TTD by now, be careful */
4528 for (auto& record : i->second.records) {
4529 record.d_ttl = std::min(record.d_ttl, static_cast<uint32_t>(s_maxbogusttl + d_now.tv_sec));
4530 }
4531 }
4532
4533 /* We don't need to store NSEC3 records in the positive cache because:
4534 - we don't allow direct NSEC3 queries
4535 - denial of existence proofs in wildcard expanded positive responses are stored in authorityRecs
4536 - denial of existence proofs for negative responses are stored in the negative cache
4537 We also don't want to cache non-authoritative data except for:
4538 - records coming from non forward-recurse servers (those will never be AA)
4539 - DS (special case)
4540 - NS, A and AAAA (used for infra queries)
4541 */
4542 if (i->first.type != QType::NSEC3 && (i->first.type == QType::DS || i->first.type == QType::NS || i->first.type == QType::A || i->first.type == QType::AAAA || isAA || wasForwardRecurse)) {
4543
4544 bool doCache = true;
4545 if (i->first.place == DNSResourceRecord::ANSWER && ednsmask) {
4546 const bool isv4 = ednsmask->isIPv4();
4547 if ((isv4 && s_ecsipv4nevercache) || (!isv4 && s_ecsipv6nevercache)) {
4548 doCache = false;
4549 }
4550 // If ednsmask is relevant, we do not want to cache if the scope prefix length is large and TTL is small
4551 if (doCache && s_ecscachelimitttl > 0) {
4552 bool manyMaskBits = (isv4 && ednsmask->getBits() > s_ecsipv4cachelimit) || (!isv4 && ednsmask->getBits() > s_ecsipv6cachelimit);
4553
4554 if (manyMaskBits) {
4555 uint32_t minttl = UINT32_MAX;
4556 for (const auto& it : i->second.records) {
4557 if (it.d_ttl < minttl)
4558 minttl = it.d_ttl;
4559 }
4560 bool ttlIsSmall = minttl < s_ecscachelimitttl + d_now.tv_sec;
4561 if (ttlIsSmall) {
4562 // Case: many bits and ttlIsSmall
4563 doCache = false;
4564 }
4565 }
4566 }
4567 }
4568
4569 d_fromAuthIP = remoteIP;
4570
4571 if (doCache) {
4572 // Check if we are going to replace a non-auth (parent) NS recordset
4573 if (isAA && i->first.type == QType::NS && s_save_parent_ns_set) {
4574 rememberParentSetIfNeeded(i->first.name, i->second.records, depth, prefix);
4575 }
4576 g_recCache->replace(d_now.tv_sec, i->first.name, i->first.type, i->second.records, i->second.signatures, authorityRecs, i->first.type == QType::DS ? true : isAA, auth, i->first.place == DNSResourceRecord::ANSWER ? ednsmask : boost::none, d_routingTag, recordState, remoteIP, d_refresh);
4577
4578 // Delete potential negcache entry. When a record recovers with serve-stale the negcache entry can cause the wrong entry to
4579 // be served, as negcache entries are checked before record cache entries
4580 if (NegCache::s_maxServedStaleExtensions > 0) {
4581 g_negCache->wipeTyped(i->first.name, i->first.type);
4582 }
4583
4584 if (g_aggressiveNSECCache && needWildcardProof && recordState == vState::Secure && i->first.place == DNSResourceRecord::ANSWER && i->first.name == qname && !i->second.signatures.empty() && !d_routingTag && !ednsmask) {
4585 /* we have an answer synthesized from a wildcard and aggressive NSEC is enabled, we need to store the
4586 wildcard in its non-expanded form in the cache to be able to synthesize wildcard answers later */
4587 const auto& rrsig = i->second.signatures.at(0);
4588
4589 if (isWildcardExpanded(labelCount, *rrsig) && !isWildcardExpandedOntoItself(i->first.name, labelCount, *rrsig)) {
4590 DNSName realOwner = getNSECOwnerName(i->first.name, i->second.signatures);
4591
4592 std::vector<DNSRecord> content;
4593 content.reserve(i->second.records.size());
4594 for (const auto& record : i->second.records) {
4595 DNSRecord nonExpandedRecord(record);
4596 nonExpandedRecord.d_name = realOwner;
4597 content.push_back(std::move(nonExpandedRecord));
4598 }
4599
4600 g_recCache->replace(d_now.tv_sec, realOwner, QType(i->first.type), content, i->second.signatures, /* no additional records in that case */ {}, i->first.type == QType::DS ? true : isAA, auth, boost::none, boost::none, recordState, remoteIP, d_refresh);
4601 }
4602 }
4603 }
4604 }
4605
4606 if (seenAuth.empty() && !i->second.signatures.empty()) {
4607 seenAuth = getSigner(i->second.signatures);
4608 }
4609
4610 if (g_aggressiveNSECCache && (i->first.type == QType::NSEC || i->first.type == QType::NSEC3) && recordState == vState::Secure && !seenAuth.empty()) {
4611 // Good candidate for NSEC{,3} caching
4612 g_aggressiveNSECCache->insertNSEC(seenAuth, i->first.name, i->second.records.at(0), i->second.signatures, i->first.type == QType::NSEC3);
4613 }
4614
4615 if (i->first.place == DNSResourceRecord::ANSWER && ednsmask) {
4616 d_wasVariable = true;
4617 }
4618 }
4619
4620 return RCode::NoError;
4621 }
4622
4623 void SyncRes::updateDenialValidationState(const DNSName& qname, vState& neValidationState, const DNSName& neName, vState& state, const dState denialState, const dState expectedState, bool isDS, unsigned int depth, const string& prefix)
4624 {
4625 if (denialState == expectedState) {
4626 neValidationState = vState::Secure;
4627 }
4628 else {
4629 if (denialState == dState::OPTOUT) {
4630 LOG(prefix << qname << ": OPT-out denial found for " << neName << endl);
4631 /* rfc5155 states:
4632 "The AD bit, as defined by [RFC4035], MUST NOT be set when returning a
4633 response containing a closest (provable) encloser proof in which the
4634 NSEC3 RR that covers the "next closer" name has the Opt-Out bit set.
4635
4636 This rule is based on what this closest encloser proof actually
4637 proves: names that would be covered by the Opt-Out NSEC3 RR may or
4638 may not exist as insecure delegations. As such, not all the data in
4639 responses containing such closest encloser proofs will have been
4640 cryptographically verified, so the AD bit cannot be set."
4641
4642 At best the Opt-Out NSEC3 RR proves that there is no signed DS (so no
4643 secure delegation).
4644 */
4645 neValidationState = vState::Insecure;
4646 }
4647 else if (denialState == dState::INSECURE) {
4648 LOG(prefix << qname << ": Insecure denial found for " << neName << ", returning Insecure" << endl);
4649 neValidationState = vState::Insecure;
4650 }
4651 else {
4652 LOG(prefix << qname << ": Invalid denial found for " << neName << ", res=" << denialState << ", expectedState=" << expectedState << ", checking whether we have missed a zone cut before returning a Bogus state" << endl);
4653 /* try again to get the missed cuts, harder this time */
4654 auto zState = getValidationStatus(neName, false, isDS, depth, prefix);
4655 if (zState != vState::Secure) {
4656 neValidationState = zState;
4657 }
4658 else {
4659 LOG(prefix << qname << ": Still in a secure zone with an invalid denial for " << neName << ", returning " << vStateToString(vState::BogusInvalidDenial) << endl);
4660 neValidationState = vState::BogusInvalidDenial;
4661 }
4662 }
4663 }
4664 updateValidationState(qname, state, neValidationState, prefix);
4665 }
4666
4667 dState SyncRes::getDenialValidationState(const NegCache::NegCacheEntry& ne, const dState expectedState, bool referralToUnsigned, const string& prefix)
4668 {
4669 cspmap_t csp = harvestCSPFromNE(ne);
4670 return getDenial(csp, ne.d_name, ne.d_qtype.getCode(), referralToUnsigned, expectedState == dState::NXQTYPE, LogObject(prefix));
4671 }
4672
4673 bool SyncRes::processRecords(const std::string& prefix, const DNSName& qname, const QType qtype, const DNSName& auth, LWResult& lwr, const bool sendRDQuery, vector<DNSRecord>& ret, set<DNSName>& nsset, DNSName& newtarget, DNSName& newauth, bool& realreferral, bool& negindic, vState& state, const bool needWildcardProof, const bool gatherWildcardProof, const unsigned int wildcardLabelsCount, int& rcode, bool& negIndicHasSignatures, unsigned int depth)
4674 {
4675 bool done = false;
4676 DNSName dnameTarget, dnameOwner;
4677 uint32_t dnameTTL = 0;
4678 bool referralOnDS = false;
4679
4680 for (auto& rec : lwr.d_records) {
4681 if (rec.d_type == QType::OPT || rec.d_class != QClass::IN) {
4682 continue;
4683 }
4684
4685 if (rec.d_place == DNSResourceRecord::ANSWER && !(lwr.d_aabit || sendRDQuery)) {
4686 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
4687 are sending such responses */
4688 if (!(rec.d_type == QType::CNAME && rec.d_name == qname)) {
4689 continue;
4690 }
4691 }
4692 const bool negCacheIndication = rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::SOA && lwr.d_rcode == RCode::NXDomain && qname.isPartOf(rec.d_name) && rec.d_name.isPartOf(auth);
4693
4694 bool putInNegCache = true;
4695 if (negCacheIndication && qtype == QType::DS && isForwardOrAuth(qname)) {
4696 // #10189, a NXDOMAIN to a DS query for a forwarded or auth domain should not NXDOMAIN the whole domain
4697 putInNegCache = false;
4698 }
4699
4700 if (negCacheIndication) {
4701 LOG(prefix << qname << ": Got negative caching indication for name '" << qname << "' (accept=" << rec.d_name.isPartOf(auth) << "), newtarget='" << newtarget << "'" << endl);
4702
4703 rec.d_ttl = min(rec.d_ttl, s_maxnegttl);
4704 // only add a SOA if we're not going anywhere after this
4705 if (newtarget.empty()) {
4706 ret.push_back(rec);
4707 }
4708
4709 NegCache::NegCacheEntry ne;
4710
4711 uint32_t lowestTTL = rec.d_ttl;
4712 /* if we get an NXDomain answer with a CNAME, the name
4713 does exist but the target does not */
4714 ne.d_name = newtarget.empty() ? qname : newtarget;
4715 ne.d_qtype = QType::ENT; // this encodes 'whole record'
4716 ne.d_auth = rec.d_name;
4717 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
4718
4719 if (vStateIsBogus(state)) {
4720 ne.d_validationState = state;
4721 }
4722 else {
4723 /* here we need to get the validation status of the zone telling us that the domain does not
4724 exist, ie the owner of the SOA */
4725 auto recordState = getValidationStatus(rec.d_name, !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty(), false, depth, prefix);
4726 if (recordState == vState::Secure) {
4727 dState denialState = getDenialValidationState(ne, dState::NXDOMAIN, false, prefix);
4728 updateDenialValidationState(qname, ne.d_validationState, ne.d_name, state, denialState, dState::NXDOMAIN, false, depth, prefix);
4729 }
4730 else {
4731 ne.d_validationState = recordState;
4732 updateValidationState(qname, state, ne.d_validationState, prefix);
4733 }
4734 }
4735
4736 if (vStateIsBogus(ne.d_validationState)) {
4737 lowestTTL = min(lowestTTL, s_maxbogusttl);
4738 }
4739
4740 ne.d_ttd = d_now.tv_sec + lowestTTL;
4741 ne.d_orig_ttl = lowestTTL;
4742 /* if we get an NXDomain answer with a CNAME, let's not cache the
4743 target, even the server was authoritative for it,
4744 and do an additional query for the CNAME target.
4745 We have a regression test making sure we do exactly that.
4746 */
4747 if (newtarget.empty() && putInNegCache) {
4748 g_negCache->add(ne);
4749 // doCNAMECacheCheck() checks record cache and does not look into negcache. That means that an old record might be found if
4750 // serve-stale is active. Avoid that by explicitly zapping that CNAME record.
4751 if (qtype == QType::CNAME && MemRecursorCache::s_maxServedStaleExtensions > 0) {
4752 g_recCache->doWipeCache(qname, false, qtype);
4753 }
4754 if (s_rootNXTrust && ne.d_auth.isRoot() && auth.isRoot() && lwr.d_aabit) {
4755 ne.d_name = ne.d_name.getLastLabel();
4756 g_negCache->add(ne);
4757 }
4758 }
4759
4760 negIndicHasSignatures = !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty();
4761 negindic = true;
4762 }
4763 else if (rec.d_place == DNSResourceRecord::ANSWER && s_redirectionQTypes.count(rec.d_type) > 0 && // CNAME or DNAME answer
4764 s_redirectionQTypes.count(qtype.getCode()) == 0) { // But not in response to a CNAME or DNAME query
4765 if (rec.d_type == QType::CNAME && rec.d_name == qname) {
4766 if (!dnameOwner.empty()) { // We synthesize ourselves
4767 continue;
4768 }
4769 ret.push_back(rec);
4770 if (auto content = getRR<CNAMERecordContent>(rec)) {
4771 newtarget = DNSName(content->getTarget());
4772 }
4773 }
4774 else if (rec.d_type == QType::DNAME && qname.isPartOf(rec.d_name)) { // DNAME
4775 ret.push_back(rec);
4776 if (auto content = getRR<DNAMERecordContent>(rec)) {
4777 dnameOwner = rec.d_name;
4778 dnameTarget = content->getTarget();
4779 dnameTTL = rec.d_ttl;
4780 if (!newtarget.empty()) { // We had a CNAME before, remove it from ret so we don't cache it
4781 ret.erase(std::remove_if(
4782 ret.begin(),
4783 ret.end(),
4784 [&qname](DNSRecord& rr) {
4785 return (rr.d_place == DNSResourceRecord::ANSWER && rr.d_type == QType::CNAME && rr.d_name == qname);
4786 }),
4787 ret.end());
4788 }
4789 try {
4790 newtarget = qname.makeRelative(dnameOwner) + dnameTarget;
4791 }
4792 catch (const std::exception& e) {
4793 // We should probably catch an std::range_error here and set the rcode to YXDOMAIN (RFC 6672, section 2.2)
4794 // But there is no way to set the RCODE from this function
4795 throw ImmediateServFailException("Unable to perform DNAME substitution(DNAME owner: '" + dnameOwner.toLogString() + "', DNAME target: '" + dnameTarget.toLogString() + "', substituted name: '" + qname.makeRelative(dnameOwner).toLogString() + "." + dnameTarget.toLogString() + "' : " + e.what());
4796 }
4797 }
4798 }
4799 }
4800 /* if we have a positive answer synthesized from a wildcard, we need to
4801 return the corresponding NSEC/NSEC3 records from the AUTHORITY section
4802 proving that the exact name did not exist.
4803 Except if this is a NODATA answer because then we will gather the NXNSEC records later */
4804 else if (gatherWildcardProof && !negindic && (rec.d_type == QType::RRSIG || rec.d_type == QType::NSEC || rec.d_type == QType::NSEC3) && rec.d_place == DNSResourceRecord::AUTHORITY) {
4805 ret.push_back(rec); // enjoy your DNSSEC
4806 }
4807 // for ANY answers we *must* have an authoritative answer, unless we are forwarding recursively
4808 else if (rec.d_place == DNSResourceRecord::ANSWER && rec.d_name == qname && (rec.d_type == qtype.getCode() || ((lwr.d_aabit || sendRDQuery) && qtype == QType::ANY))) {
4809 LOG(prefix << qname << ": Answer is in: resolved to '" << rec.getContent()->getZoneRepresentation() << "|" << DNSRecordContent::NumberToType(rec.d_type) << "'" << endl);
4810
4811 done = true;
4812 rcode = RCode::NoError;
4813
4814 if (needWildcardProof) {
4815 /* positive answer synthesized from a wildcard */
4816 NegCache::NegCacheEntry ne;
4817 ne.d_name = qname;
4818 ne.d_qtype = QType::ENT; // this encodes 'whole record'
4819 uint32_t lowestTTL = rec.d_ttl;
4820 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
4821
4822 if (vStateIsBogus(state)) {
4823 ne.d_validationState = state;
4824 }
4825 else {
4826 auto recordState = getValidationStatus(qname, !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty(), false, depth, prefix);
4827
4828 if (recordState == vState::Secure) {
4829 /* We have a positive answer synthesized from a wildcard, we need to check that we have
4830 proof that the exact name doesn't exist so the wildcard can be used,
4831 as described in section 5.3.4 of RFC 4035 and 5.3 of RFC 7129.
4832 */
4833 cspmap_t csp = harvestCSPFromNE(ne);
4834 dState res = getDenial(csp, qname, ne.d_qtype.getCode(), false, false, LogObject(prefix), false, wildcardLabelsCount);
4835 if (res != dState::NXDOMAIN) {
4836 vState st = vState::BogusInvalidDenial;
4837 if (res == dState::INSECURE || res == dState::OPTOUT) {
4838 /* Some part could not be validated, for example a NSEC3 record with a too large number of iterations,
4839 this is not enough to warrant a Bogus, but go Insecure. */
4840 st = vState::Insecure;
4841 LOG(prefix << qname << ": Unable to validate denial in wildcard expanded positive response found for " << qname << ", returning Insecure, res=" << res << endl);
4842 }
4843 else {
4844 LOG(prefix << qname << ": Invalid denial in wildcard expanded positive response found for " << qname << ", returning Bogus, res=" << res << endl);
4845 rec.d_ttl = std::min(rec.d_ttl, s_maxbogusttl);
4846 }
4847
4848 updateValidationState(qname, state, st, prefix);
4849 /* we already stored the record with a different validation status, let's fix it */
4850 updateValidationStatusInCache(qname, qtype, lwr.d_aabit, st);
4851 }
4852 }
4853 }
4854 }
4855
4856 ret.push_back(rec);
4857 }
4858 else if ((rec.d_type == QType::RRSIG || rec.d_type == QType::NSEC || rec.d_type == QType::NSEC3) && rec.d_place == DNSResourceRecord::ANSWER) {
4859 if (rec.d_type != QType::RRSIG || rec.d_name == qname) {
4860 ret.push_back(rec); // enjoy your DNSSEC
4861 }
4862 else if (rec.d_type == QType::RRSIG && qname.isPartOf(rec.d_name)) {
4863 auto rrsig = getRR<RRSIGRecordContent>(rec);
4864 if (rrsig != nullptr && rrsig->d_type == QType::DNAME) {
4865 ret.push_back(rec);
4866 }
4867 }
4868 }
4869 else if (rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::NS && qname.isPartOf(rec.d_name)) {
4870 if (moreSpecificThan(rec.d_name, auth)) {
4871 newauth = rec.d_name;
4872 LOG(prefix << qname << ": Got NS record '" << rec.d_name << "' -> '" << rec.getContent()->getZoneRepresentation() << "'" << endl);
4873
4874 /* check if we have a referral from the parent zone to a child zone for a DS query, which is not right */
4875 if (qtype == QType::DS && (newauth.isPartOf(qname) || qname == newauth)) {
4876 /* just got a referral from the parent zone when asking for a DS, looks like this server did not get the DNSSEC memo.. */
4877 referralOnDS = true;
4878 }
4879 else {
4880 realreferral = true;
4881 if (auto content = getRR<NSRecordContent>(rec)) {
4882 nsset.insert(content->getNS());
4883 }
4884 }
4885 }
4886 else {
4887 LOG(prefix << qname << ": Got upwards/level NS record '" << rec.d_name << "' -> '" << rec.getContent()->getZoneRepresentation() << "', had '" << auth << "'" << endl);
4888 if (auto content = getRR<NSRecordContent>(rec)) {
4889 nsset.insert(content->getNS());
4890 }
4891 }
4892 }
4893 else if (rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::DS && qname.isPartOf(rec.d_name)) {
4894 LOG(prefix << qname << ": Got DS record '" << rec.d_name << "' -> '" << rec.getContent()->getZoneRepresentation() << "'" << endl);
4895 }
4896 else if (realreferral && rec.d_place == DNSResourceRecord::AUTHORITY && (rec.d_type == QType::NSEC || rec.d_type == QType::NSEC3) && newauth.isPartOf(auth)) {
4897 /* we might have received a denial of the DS, let's check */
4898 NegCache::NegCacheEntry ne;
4899 uint32_t lowestTTL = rec.d_ttl;
4900 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
4901
4902 if (!vStateIsBogus(state)) {
4903 auto recordState = getValidationStatus(newauth, !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty(), true, depth, prefix);
4904
4905 if (recordState == vState::Secure) {
4906 ne.d_auth = auth;
4907 ne.d_name = newauth;
4908 ne.d_qtype = QType::DS;
4909 rec.d_ttl = min(s_maxnegttl, rec.d_ttl);
4910
4911 dState denialState = getDenialValidationState(ne, dState::NXQTYPE, true, prefix);
4912
4913 if (denialState == dState::NXQTYPE || denialState == dState::OPTOUT || denialState == dState::INSECURE) {
4914 ne.d_ttd = lowestTTL + d_now.tv_sec;
4915 ne.d_orig_ttl = lowestTTL;
4916 ne.d_validationState = vState::Secure;
4917 if (denialState == dState::OPTOUT) {
4918 ne.d_validationState = vState::Insecure;
4919 }
4920 LOG(prefix << qname << ": Got negative indication of DS record for '" << newauth << "'" << endl);
4921
4922 g_negCache->add(ne);
4923
4924 /* Careful! If the client is asking for a DS that does not exist, we need to provide the SOA along with the NSEC(3) proof
4925 and we might not have it if we picked up the proof from a delegation, in which case we need to keep on to do the actual DS
4926 query. */
4927 if (qtype == QType::DS && qname == newauth && (d_externalDSQuery.empty() || qname != d_externalDSQuery)) {
4928 /* we are actually done! */
4929 negindic = true;
4930 negIndicHasSignatures = !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty();
4931 nsset.clear();
4932 }
4933 }
4934 }
4935 }
4936 }
4937 else if (!done && rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::SOA && lwr.d_rcode == RCode::NoError && qname.isPartOf(rec.d_name)) {
4938 LOG(prefix << qname << ": Got negative caching indication for '" << qname << "|" << qtype << "'" << endl);
4939
4940 if (!newtarget.empty()) {
4941 LOG(prefix << qname << ": Hang on! Got a redirect to '" << newtarget << "' already" << endl);
4942 }
4943 else {
4944 rec.d_ttl = min(s_maxnegttl, rec.d_ttl);
4945
4946 NegCache::NegCacheEntry ne;
4947 ne.d_auth = rec.d_name;
4948 uint32_t lowestTTL = rec.d_ttl;
4949 ne.d_name = qname;
4950 ne.d_qtype = qtype;
4951 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
4952
4953 if (vStateIsBogus(state)) {
4954 ne.d_validationState = state;
4955 }
4956 else {
4957 auto recordState = getValidationStatus(qname, !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty(), qtype == QType::DS, depth, prefix);
4958 if (recordState == vState::Secure) {
4959 dState denialState = getDenialValidationState(ne, dState::NXQTYPE, false, prefix);
4960 updateDenialValidationState(qname, ne.d_validationState, ne.d_name, state, denialState, dState::NXQTYPE, qtype == QType::DS, depth, prefix);
4961 }
4962 else {
4963 ne.d_validationState = recordState;
4964 updateValidationState(qname, state, ne.d_validationState, prefix);
4965 }
4966 }
4967
4968 if (vStateIsBogus(ne.d_validationState)) {
4969 lowestTTL = min(lowestTTL, s_maxbogusttl);
4970 rec.d_ttl = min(rec.d_ttl, s_maxbogusttl);
4971 }
4972 ne.d_ttd = d_now.tv_sec + lowestTTL;
4973 ne.d_orig_ttl = lowestTTL;
4974 if (qtype.getCode()) { // prevents us from NXDOMAIN'ing a whole domain
4975 g_negCache->add(ne);
4976 }
4977
4978 ret.push_back(rec);
4979 negindic = true;
4980 negIndicHasSignatures = !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty();
4981 }
4982 }
4983 }
4984
4985 if (!dnameTarget.empty()) {
4986 // Synthesize a CNAME
4987 auto cnamerec = DNSRecord();
4988 cnamerec.d_name = qname;
4989 cnamerec.d_type = QType::CNAME;
4990 cnamerec.d_ttl = dnameTTL;
4991 cnamerec.setContent(std::make_shared<CNAMERecordContent>(CNAMERecordContent(newtarget)));
4992 ret.push_back(std::move(cnamerec));
4993 }
4994
4995 /* If we have seen a proper denial, let's forget that we also had a referral for a DS query.
4996 Otherwise we need to deal with it. */
4997 if (referralOnDS && !negindic) {
4998 LOG(prefix << qname << ": Got a referral to the child zone for a DS query without a negative indication (missing SOA in authority), treating that as a NODATA" << endl);
4999 if (!vStateIsBogus(state)) {
5000 auto recordState = getValidationStatus(qname, false, true, depth, prefix);
5001 if (recordState == vState::Secure) {
5002 /* we are in a secure zone, got a referral to the child zone on a DS query, no denial, that's wrong */
5003 LOG(prefix << qname << ": NODATA without a negative indication (missing SOA in authority) in a DNSSEC secure zone, going Bogus" << endl);
5004 updateValidationState(qname, state, vState::BogusMissingNegativeIndication, prefix);
5005 }
5006 }
5007 negindic = true;
5008 negIndicHasSignatures = false;
5009 }
5010
5011 return done;
5012 }
5013
5014 static void submitTryDotTask(ComboAddress address, const DNSName& auth, const DNSName nsname, time_t now)
5015 {
5016 if (address.getPort() == 853) {
5017 return;
5018 }
5019 address.setPort(853);
5020 auto lock = s_dotMap.lock();
5021 if (lock->d_numBusy >= SyncRes::s_max_busy_dot_probes) {
5022 return;
5023 }
5024 auto it = lock->d_map.emplace(DoTStatus{address, auth, now + dotFailWait}).first;
5025 if (it->d_status == DoTStatus::Busy) {
5026 return;
5027 }
5028 if (it->d_ttd > now) {
5029 if (it->d_status == DoTStatus::Bad) {
5030 return;
5031 }
5032 if (it->d_status == DoTStatus::Good) {
5033 return;
5034 }
5035 // We only want to probe auths that we have seen before, auth that only come around once are not interesting
5036 if (it->d_status == DoTStatus::Unknown && it->d_count == 0) {
5037 return;
5038 }
5039 }
5040 lock->d_map.modify(it, [=](DoTStatus& st) { st.d_ttd = now + dotFailWait; });
5041 bool pushed = pushTryDoTTask(auth, QType::SOA, address, std::numeric_limits<time_t>::max(), nsname);
5042 if (pushed) {
5043 it->d_status = DoTStatus::Busy;
5044 ++lock->d_numBusy;
5045 }
5046 }
5047
5048 static bool shouldDoDoT(ComboAddress address, time_t now)
5049 {
5050 address.setPort(853);
5051 auto lock = s_dotMap.lock();
5052 auto it = lock->d_map.find(address);
5053 if (it == lock->d_map.end()) {
5054 return false;
5055 }
5056 it->d_count++;
5057 if (it->d_status == DoTStatus::Good && it->d_ttd > now) {
5058 return true;
5059 }
5060 return false;
5061 }
5062
5063 static void updateDoTStatus(ComboAddress address, DoTStatus::Status status, time_t time, bool updateBusy = false)
5064 {
5065 address.setPort(853);
5066 auto lock = s_dotMap.lock();
5067 auto it = lock->d_map.find(address);
5068 if (it != lock->d_map.end()) {
5069 it->d_status = status;
5070 lock->d_map.modify(it, [=](DoTStatus& st) { st.d_ttd = time; });
5071 if (updateBusy) {
5072 --lock->d_numBusy;
5073 }
5074 }
5075 }
5076
5077 bool SyncRes::tryDoT(const DNSName& qname, const QType qtype, const DNSName& nsName, ComboAddress address, time_t now)
5078 {
5079 auto log = g_slog->withName("taskq")->withValues("method", Logging::Loggable("tryDoT"), "name", Logging::Loggable(qname), "qtype", Logging::Loggable(QType(qtype).toString()), "ip", Logging::Loggable(address));
5080
5081 auto logHelper1 = [&log](const string& ename) {
5082 log->info(Logr::Debug, "Failed to probe DoT records, got an exception", "exception", Logging::Loggable(ename));
5083 };
5084 auto logHelper2 = [&log](const string& msg, const string& ename) {
5085 log->error(Logr::Debug, msg, "Failed to probe DoT records, got an exception", "exception", Logging::Loggable(ename));
5086 };
5087 LWResult lwr;
5088 bool truncated;
5089 bool spoofed;
5090 boost::optional<Netmask> nm;
5091 address.setPort(853);
5092 // We use the fact that qname equals auth
5093 bool ok = false;
5094 try {
5095 boost::optional<EDNSExtendedError> extendedError;
5096 ok = doResolveAtThisIP("", qname, qtype, lwr, nm, qname, false, false, nsName, address, true, true, truncated, spoofed, extendedError, true);
5097 ok = ok && lwr.d_rcode == RCode::NoError && lwr.d_records.size() > 0;
5098 }
5099 catch (const PDNSException& e) {
5100 logHelper2(e.reason, "PDNSException");
5101 }
5102 catch (const ImmediateServFailException& e) {
5103 logHelper2(e.reason, "ImmediateServFailException");
5104 }
5105 catch (const PolicyHitException& e) {
5106 logHelper1("PolicyHitException");
5107 }
5108 catch (const std::exception& e) {
5109 logHelper2(e.what(), "std::exception");
5110 }
5111 catch (...) {
5112 logHelper1("other");
5113 }
5114 updateDoTStatus(address, ok ? DoTStatus::Good : DoTStatus::Bad, now + (ok ? dotSuccessWait : dotFailWait), true);
5115 return ok;
5116 }
5117
5118 bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname, const QType qtype, LWResult& lwr, boost::optional<Netmask>& ednsmask, const DNSName& auth, bool const sendRDQuery, const bool wasForwarded, const DNSName& nsName, const ComboAddress& remoteIP, bool doTCP, bool doDoT, bool& truncated, bool& spoofed, boost::optional<EDNSExtendedError>& extendedError, bool dontThrottle)
5119 {
5120 bool chained = false;
5121 LWResult::Result resolveret = LWResult::Result::Success;
5122 t_Counters.at(rec::Counter::outqueries)++;
5123 d_outqueries++;
5124 checkMaxQperQ(qname);
5125
5126 if (s_maxtotusec && d_totUsec > s_maxtotusec) {
5127 if (s_addExtendedResolutionDNSErrors) {
5128 extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::NoReachableAuthority), "Timeout waiting for answer(s)"};
5129 }
5130 throw ImmediateServFailException("Too much time waiting for " + qname.toLogString() + "|" + qtype.toString() + ", timeouts: " + std::to_string(d_timeouts) + ", throttles: " + std::to_string(d_throttledqueries) + ", queries: " + std::to_string(d_outqueries) + ", " + std::to_string(d_totUsec / 1000) + "msec");
5131 }
5132
5133 if (doTCP) {
5134 if (doDoT) {
5135 LOG(prefix << qname << ": Using DoT with " << remoteIP.toStringWithPort() << endl);
5136 t_Counters.at(rec::Counter::dotoutqueries)++;
5137 d_dotoutqueries++;
5138 }
5139 else {
5140 LOG(prefix << qname << ": Using TCP with " << remoteIP.toStringWithPort() << endl);
5141 t_Counters.at(rec::Counter::tcpoutqueries)++;
5142 d_tcpoutqueries++;
5143 }
5144 }
5145
5146 int preOutQueryRet = RCode::NoError;
5147 if (d_pdl && d_pdl->preoutquery(remoteIP, d_requestor, qname, qtype, doTCP, lwr.d_records, preOutQueryRet, d_eventTrace, timeval{0, 0})) {
5148 LOG(prefix << qname << ": Query handled by Lua" << endl);
5149 }
5150 else {
5151 ednsmask = getEDNSSubnetMask(qname, remoteIP);
5152 if (ednsmask) {
5153 LOG(prefix << qname << ": Adding EDNS Client Subnet Mask " << ednsmask->toString() << " to query" << endl);
5154 s_ecsqueries++;
5155 }
5156 resolveret = asyncresolveWrapper(remoteIP, d_doDNSSEC, qname, auth, qtype.getCode(),
5157 doTCP, sendRDQuery, &d_now, ednsmask, &lwr, &chained, nsName); // <- we go out on the wire!
5158 if (ednsmask) {
5159 s_ecsresponses++;
5160 LOG(prefix << qname << ": Received EDNS Client Subnet Mask " << ednsmask->toString() << " on response" << endl);
5161 if (ednsmask->getBits() > 0) {
5162 if (ednsmask->isIPv4()) {
5163 ++SyncRes::s_ecsResponsesBySubnetSize4.at(ednsmask->getBits() - 1);
5164 }
5165 else {
5166 ++SyncRes::s_ecsResponsesBySubnetSize6.at(ednsmask->getBits() - 1);
5167 }
5168 }
5169 }
5170 }
5171
5172 /* preoutquery killed the query by setting dq.rcode to -3 */
5173 if (preOutQueryRet == -3) {
5174 throw ImmediateServFailException("Query killed by policy");
5175 }
5176
5177 d_totUsec += lwr.d_usec;
5178 accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
5179 ++t_Counters.at(rec::RCode::auth).rcodeCounters.at(static_cast<uint8_t>(lwr.d_rcode));
5180
5181 if (!dontThrottle) {
5182 auto dontThrottleNames = g_dontThrottleNames.getLocal();
5183 auto dontThrottleNetmasks = g_dontThrottleNetmasks.getLocal();
5184 dontThrottle = dontThrottleNames->check(nsName) || dontThrottleNetmasks->match(remoteIP);
5185 }
5186
5187 if (resolveret != LWResult::Result::Success) {
5188 /* Error while resolving */
5189 if (resolveret == LWResult::Result::Timeout) {
5190 /* Time out */
5191
5192 LOG(prefix << qname << ": Timeout resolving after " << lwr.d_usec / 1000.0 << "msec " << (doTCP ? "over TCP" : "") << endl);
5193 d_timeouts++;
5194 t_Counters.at(rec::Counter::outgoingtimeouts)++;
5195
5196 if (remoteIP.sin4.sin_family == AF_INET)
5197 t_Counters.at(rec::Counter::outgoing4timeouts)++;
5198 else
5199 t_Counters.at(rec::Counter::outgoing6timeouts)++;
5200
5201 if (t_timeouts)
5202 t_timeouts->push_back(remoteIP);
5203 }
5204 else if (resolveret == LWResult::Result::OSLimitError) {
5205 /* OS resource limit reached */
5206 LOG(prefix << qname << ": Hit a local resource limit resolving" << (doTCP ? " over TCP" : "") << ", probable error: " << stringerror() << endl);
5207 t_Counters.at(rec::Counter::resourceLimits)++;
5208 }
5209 else if (resolveret == LWResult::Result::Spoofed) {
5210 spoofed = true;
5211 }
5212 else {
5213 /* LWResult::Result::PermanentError */
5214 t_Counters.at(rec::Counter::unreachables)++;
5215 d_unreachables++;
5216 // XXX questionable use of errno
5217 LOG(prefix << qname << ": Error resolving from " << remoteIP.toString() << (doTCP ? " over TCP" : "") << ", possible error: " << stringerror() << endl);
5218 }
5219
5220 if (resolveret != LWResult::Result::OSLimitError && !chained && !dontThrottle) {
5221 // don't account for resource limits, they are our own fault
5222 // And don't throttle when the IP address is on the dontThrottleNetmasks list or the name is part of dontThrottleNames
5223 s_nsSpeeds.lock()->find_or_enter(nsName.empty() ? DNSName(remoteIP.toStringWithPort()) : nsName, d_now).submit(remoteIP, 1000000, d_now); // 1 sec
5224
5225 // code below makes sure we don't filter COM or the root
5226 if (s_serverdownmaxfails > 0 && (auth != g_rootdnsname) && s_fails.lock()->incr(remoteIP, d_now) >= s_serverdownmaxfails) {
5227 LOG(prefix << qname << ": Max fails reached resolving on " << remoteIP.toString() << ". Going full throttle for " << s_serverdownthrottletime << " seconds" << endl);
5228 // mark server as down
5229 doThrottle(d_now.tv_sec, remoteIP, s_serverdownthrottletime, 10000);
5230 }
5231 else if (resolveret == LWResult::Result::PermanentError) {
5232 // unreachable, 1 minute or 100 queries
5233 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 100);
5234 }
5235 else {
5236 // timeout, 10 seconds or 5 queries
5237 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 10, 5);
5238 }
5239 }
5240
5241 return false;
5242 }
5243
5244 if (lwr.d_validpacket == false) {
5245 LOG(prefix << qname << ": " << nsName << " (" << remoteIP.toString() << ") returned a packet we could not parse over " << (doTCP ? "TCP" : "UDP") << ", trying sibling IP or NS" << endl);
5246 if (!chained && !dontThrottle) {
5247
5248 // let's make sure we prefer a different server for some time, if there is one available
5249 s_nsSpeeds.lock()->find_or_enter(nsName.empty() ? DNSName(remoteIP.toStringWithPort()) : nsName, d_now).submit(remoteIP, 1000000, d_now); // 1 sec
5250
5251 if (doTCP) {
5252 // we can be more heavy-handed over TCP
5253 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 10);
5254 }
5255 else {
5256 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 10, 2);
5257 }
5258 }
5259 return false;
5260 }
5261 else {
5262 /* we got an answer */
5263 if (lwr.d_rcode != RCode::NoError && lwr.d_rcode != RCode::NXDomain) {
5264 LOG(prefix << qname << ": " << nsName << " (" << remoteIP.toString() << ") returned a " << RCode::to_s(lwr.d_rcode) << ", trying sibling IP or NS" << endl);
5265 if (!chained && !dontThrottle) {
5266 if (wasForwarded && lwr.d_rcode == RCode::ServFail) {
5267 // rather than throttling what could be the only server we have for this destination, let's make sure we try a different one if there is one available
5268 // on the other hand, we might keep hammering a server under attack if there is no other alternative, or the alternative is overwhelmed as well, but
5269 // at the very least we will detect that if our packets stop being answered
5270 s_nsSpeeds.lock()->find_or_enter(nsName.empty() ? DNSName(remoteIP.toStringWithPort()) : nsName, d_now).submit(remoteIP, 1000000, d_now); // 1 sec
5271 }
5272 else {
5273 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 3);
5274 }
5275 }
5276 return false;
5277 }
5278 }
5279
5280 /* this server sent a valid answer, mark it backup up if it was down */
5281 if (s_serverdownmaxfails > 0) {
5282 s_fails.lock()->clear(remoteIP);
5283 }
5284
5285 if (lwr.d_tcbit) {
5286 truncated = true;
5287
5288 if (doTCP) {
5289 LOG(prefix << qname << ": Truncated bit set, over TCP?" << endl);
5290 if (!dontThrottle) {
5291 /* let's treat that as a ServFail answer from this server */
5292 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 3);
5293 }
5294 return false;
5295 }
5296 LOG(prefix << qname << ": Truncated bit set, over UDP" << endl);
5297
5298 return true;
5299 }
5300
5301 return true;
5302 }
5303
5304 void SyncRes::handleNewTarget(const std::string& prefix, const DNSName& qname, const DNSName& newtarget, const QType qtype, std::vector<DNSRecord>& ret, int& rcode, unsigned int depth, const std::vector<DNSRecord>& recordsFromAnswer, vState& state)
5305 {
5306 if (newtarget == qname) {
5307 LOG(prefix << qname << ": Status=got a CNAME referral to self, returning SERVFAIL" << endl);
5308 ret.clear();
5309 rcode = RCode::ServFail;
5310 return;
5311 }
5312 if (newtarget.isPartOf(qname)) {
5313 // a.b.c. CNAME x.a.b.c will go to great depths with QM on
5314 LOG(prefix << qname << ": Status=got a CNAME referral to child, disabling QM" << endl);
5315 setQNameMinimization(false);
5316 }
5317
5318 // Was 10 originally, default s_maxdepth is 40, but even if it is zero we want to apply a bound
5319 auto bound = std::max(40U, getAdjustedRecursionBound()) / 4;
5320 if (depth > bound) {
5321 LOG(prefix << qname << ": Status=got a CNAME referral, but recursing too deep, returning SERVFAIL" << endl);
5322 rcode = RCode::ServFail;
5323 return;
5324 }
5325
5326 if (!d_followCNAME) {
5327 rcode = RCode::NoError;
5328 return;
5329 }
5330
5331 // Check to see if we already have seen the new target as a previous target
5332 if (scanForCNAMELoop(newtarget, ret)) {
5333 LOG(prefix << qname << ": Status=got a CNAME referral that causes a loop, returning SERVFAIL" << endl);
5334 ret.clear();
5335 rcode = RCode::ServFail;
5336 return;
5337 }
5338
5339 if (qtype == QType::DS || qtype == QType::DNSKEY) {
5340 LOG(prefix << qname << ": Status=got a CNAME referral, but we are looking for a DS or DNSKEY" << endl);
5341
5342 if (d_doDNSSEC) {
5343 addNXNSECS(ret, recordsFromAnswer);
5344 }
5345
5346 rcode = RCode::NoError;
5347 return;
5348 }
5349
5350 LOG(prefix << qname << ": Status=got a CNAME referral, starting over with " << newtarget << endl);
5351
5352 set<GetBestNSAnswer> beenthere;
5353 Context cnameContext;
5354 rcode = doResolve(newtarget, qtype, ret, depth + 1, beenthere, cnameContext);
5355 LOG(prefix << qname << ": Updating validation state for response to " << qname << " from " << state << " with the state from the CNAME quest: " << cnameContext.state << endl);
5356 updateValidationState(qname, state, cnameContext.state, prefix);
5357 }
5358
5359 bool SyncRes::processAnswer(unsigned int depth, const string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, DNSName& auth, bool wasForwarded, const boost::optional<Netmask> ednsmask, bool sendRDQuery, NsSet& nameservers, std::vector<DNSRecord>& ret, const DNSFilterEngine& dfe, bool* gotNewServers, int* rcode, vState& state, const ComboAddress& remoteIP)
5360 {
5361 if (s_minimumTTL) {
5362 for (auto& rec : lwr.d_records) {
5363 rec.d_ttl = max(rec.d_ttl, s_minimumTTL);
5364 }
5365 }
5366
5367 /* if the answer is ECS-specific, a minimum TTL is set for this kind of answers
5368 and it's higher than the global minimum TTL */
5369 if (ednsmask && s_minimumECSTTL > 0 && (s_minimumTTL == 0 || s_minimumECSTTL > s_minimumTTL)) {
5370 for (auto& rec : lwr.d_records) {
5371 if (rec.d_place == DNSResourceRecord::ANSWER) {
5372 rec.d_ttl = max(rec.d_ttl, s_minimumECSTTL);
5373 }
5374 }
5375 }
5376
5377 bool needWildcardProof = false;
5378 bool gatherWildcardProof = false;
5379 unsigned int wildcardLabelsCount = 0;
5380 *rcode = updateCacheFromRecords(depth, prefix, lwr, qname, qtype, auth, wasForwarded, ednsmask, state, needWildcardProof, gatherWildcardProof, wildcardLabelsCount, sendRDQuery, remoteIP);
5381 if (*rcode != RCode::NoError) {
5382 return true;
5383 }
5384
5385 LOG(prefix << qname << ": Determining status after receiving this packet" << endl);
5386
5387 set<DNSName> nsset;
5388 bool realreferral = false;
5389 bool negindic = false;
5390 bool negIndicHasSignatures = false;
5391 DNSName newauth;
5392 DNSName newtarget;
5393
5394 bool done = processRecords(prefix, qname, qtype, auth, lwr, sendRDQuery, ret, nsset, newtarget, newauth, realreferral, negindic, state, needWildcardProof, gatherWildcardProof, wildcardLabelsCount, *rcode, negIndicHasSignatures, depth);
5395
5396 if (done) {
5397 LOG(prefix << qname << ": Status=got results, this level of recursion done" << endl);
5398 LOG(prefix << qname << ": Validation status is " << state << endl);
5399 return true;
5400 }
5401
5402 if (!newtarget.empty()) {
5403 handleNewTarget(prefix, qname, newtarget, qtype.getCode(), ret, *rcode, depth, lwr.d_records, state);
5404 return true;
5405 }
5406
5407 if (lwr.d_rcode == RCode::NXDomain) {
5408 LOG(prefix << qname << ": Status=NXDOMAIN, we are done " << (negindic ? "(have negative SOA)" : "") << endl);
5409
5410 auto tempState = getValidationStatus(qname, negIndicHasSignatures, qtype == QType::DS, depth, prefix);
5411 if (tempState == vState::Secure && (lwr.d_aabit || sendRDQuery) && !negindic) {
5412 LOG(prefix << qname << ": NXDOMAIN without a negative indication (missing SOA in authority) in a DNSSEC secure zone, going Bogus" << endl);
5413 updateValidationState(qname, state, vState::BogusMissingNegativeIndication, prefix);
5414 }
5415 else {
5416 /* we might not have validated any record, because we did get a NXDOMAIN without any SOA
5417 from an insecure zone, for example */
5418 updateValidationState(qname, state, tempState, prefix);
5419 }
5420
5421 if (d_doDNSSEC) {
5422 addNXNSECS(ret, lwr.d_records);
5423 }
5424
5425 *rcode = RCode::NXDomain;
5426 return true;
5427 }
5428
5429 if (nsset.empty() && !lwr.d_rcode && (negindic || lwr.d_aabit || sendRDQuery)) {
5430 LOG(prefix << qname << ": Status=noerror, other types may exist, but we are done " << (negindic ? "(have negative SOA) " : "") << (lwr.d_aabit ? "(have aa bit) " : "") << endl);
5431
5432 auto tempState = getValidationStatus(qname, negIndicHasSignatures, qtype == QType::DS, depth, prefix);
5433 if (tempState == vState::Secure && (lwr.d_aabit || sendRDQuery) && !negindic) {
5434 LOG(prefix << qname << ": NODATA without a negative indication (missing SOA in authority) in a DNSSEC secure zone, going Bogus" << endl);
5435 updateValidationState(qname, state, vState::BogusMissingNegativeIndication, prefix);
5436 }
5437 else {
5438 /* we might not have validated any record, because we did get a NODATA without any SOA
5439 from an insecure zone, for example */
5440 updateValidationState(qname, state, tempState, prefix);
5441 }
5442
5443 if (d_doDNSSEC) {
5444 addNXNSECS(ret, lwr.d_records);
5445 }
5446
5447 *rcode = RCode::NoError;
5448 return true;
5449 }
5450
5451 if (realreferral) {
5452 LOG(prefix << qname << ": Status=did not resolve, got " << (unsigned int)nsset.size() << " NS, ");
5453
5454 nameservers.clear();
5455 for (auto const& nameserver : nsset) {
5456 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
5457 bool match = dfe.getProcessingPolicy(nameserver, d_discardedPolicies, d_appliedPolicy);
5458 if (match) {
5459 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
5460 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
5461 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
5462 /* reset to no match */
5463 d_appliedPolicy = DNSFilterEngine::Policy();
5464 }
5465 else {
5466 LOG("however " << nameserver << " was blocked by RPZ policy '" << d_appliedPolicy.getName() << "'" << endl);
5467 throw PolicyHitException();
5468 }
5469 }
5470 }
5471 }
5472 nameservers.insert({nameserver, {{}, false}});
5473 }
5474 LOG("looping to them" << endl);
5475 *gotNewServers = true;
5476 auth = newauth;
5477
5478 return false;
5479 }
5480
5481 return false;
5482 }
5483
5484 bool SyncRes::doDoTtoAuth(const DNSName& ns) const
5485 {
5486 return g_DoTToAuthNames.getLocal()->check(ns);
5487 }
5488
5489 /** returns:
5490 * -1 in case of no results
5491 * rcode otherwise
5492 */
5493 int SyncRes::doResolveAt(NsSet& nameservers, DNSName auth, bool flawedNSSet, const DNSName& qname, const QType qtype,
5494 vector<DNSRecord>& ret,
5495 unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere, Context& context, StopAtDelegation* stopAtDelegation,
5496 map<DNSName, vector<ComboAddress>>* fallBack)
5497 {
5498 auto luaconfsLocal = g_luaconfs.getLocal();
5499
5500 LOG(prefix << qname << ": Cache consultations done, have " << (unsigned int)nameservers.size() << " NS to contact");
5501
5502 if (nameserversBlockedByRPZ(luaconfsLocal->dfe, nameservers)) {
5503 /* RPZ hit */
5504 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
5505 /* reset to no match */
5506 d_appliedPolicy = DNSFilterEngine::Policy();
5507 }
5508 else {
5509 throw PolicyHitException();
5510 }
5511 }
5512
5513 LOG(endl);
5514
5515 unsigned int addressQueriesForNS = 0;
5516 for (;;) { // we may get more specific nameservers
5517 auto rnameservers = shuffleInSpeedOrder(qname, nameservers, prefix);
5518
5519 // We allow s_maxnsaddressqperq (default 10) queries with empty responses when resolving NS names.
5520 // If a zone publishes many (more than s_maxnsaddressqperq) NS records, we allow less.
5521 // This is to "punish" zones that publish many non-resolving NS names.
5522 // We always allow 5 NS name resolving attempts with empty results.
5523 unsigned int nsLimit = s_maxnsaddressqperq;
5524 if (rnameservers.size() > nsLimit) {
5525 int newLimit = static_cast<int>(nsLimit) - (rnameservers.size() - nsLimit);
5526 nsLimit = std::max(5, newLimit);
5527 }
5528
5529 for (auto tns = rnameservers.cbegin();; ++tns) {
5530 if (addressQueriesForNS >= nsLimit) {
5531 throw ImmediateServFailException(std::to_string(nsLimit) + " (adjusted max-ns-address-qperq) or more queries with empty results for NS addresses sent resolving " + qname.toLogString());
5532 }
5533 if (tns == rnameservers.cend()) {
5534 LOG(prefix << qname << ": Failed to resolve via any of the " << (unsigned int)rnameservers.size() << " offered NS at level '" << auth << "'" << endl);
5535 if (s_addExtendedResolutionDNSErrors) {
5536 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::NoReachableAuthority), "delegation " + auth.toLogString()};
5537 }
5538 if (!auth.isRoot() && flawedNSSet) {
5539 LOG(prefix << qname << ": Ageing nameservers for level '" << auth << "', next query might succeed" << endl);
5540 if (g_recCache->doAgeCache(d_now.tv_sec, auth, QType::NS, 10)) {
5541 t_Counters.at(rec::Counter::nsSetInvalidations)++;
5542 }
5543 }
5544 return -1;
5545 }
5546
5547 bool cacheOnly = false;
5548 // this line needs to identify the 'self-resolving' behaviour
5549 if (qname == tns->first && (qtype.getCode() == QType::A || qtype.getCode() == QType::AAAA)) {
5550 /* we might have a glue entry in cache so let's try this NS
5551 but only if we have enough in the cache to know how to reach it */
5552 LOG(prefix << qname << ": Using NS to resolve itself, but only using what we have in cache (" << (1 + tns - rnameservers.cbegin()) << "/" << rnameservers.size() << ")" << endl);
5553 cacheOnly = true;
5554 }
5555
5556 typedef vector<ComboAddress> remoteIPs_t;
5557 remoteIPs_t remoteIPs;
5558 remoteIPs_t::iterator remoteIP;
5559 bool pierceDontQuery = false;
5560 bool sendRDQuery = false;
5561 boost::optional<Netmask> ednsmask;
5562 LWResult lwr;
5563 const bool wasForwarded = tns->first.empty() && (!nameservers[tns->first].first.empty());
5564 int rcode = RCode::NoError;
5565 bool gotNewServers = false;
5566
5567 if (tns->first.empty() && !wasForwarded) {
5568 static ComboAddress const s_oobRemote("255.255.255.255");
5569 LOG(prefix << qname << ": Domain is out-of-band" << endl);
5570 /* setting state to indeterminate since validation is disabled for local auth zone,
5571 and Insecure would be misleading. */
5572 context.state = vState::Indeterminate;
5573 d_wasOutOfBand = doOOBResolve(qname, qtype, lwr.d_records, depth, prefix, lwr.d_rcode);
5574 lwr.d_tcbit = false;
5575 lwr.d_aabit = true;
5576
5577 /* we have received an answer, are we done ? */
5578 bool done = processAnswer(depth, prefix, lwr, qname, qtype, auth, false, ednsmask, sendRDQuery, nameservers, ret, luaconfsLocal->dfe, &gotNewServers, &rcode, context.state, s_oobRemote);
5579 if (done) {
5580 return rcode;
5581 }
5582 if (gotNewServers) {
5583 if (stopAtDelegation && *stopAtDelegation == Stop) {
5584 *stopAtDelegation = Stopped;
5585 return rcode;
5586 }
5587 break;
5588 }
5589 }
5590 else {
5591 if (fallBack != nullptr) {
5592 if (auto it = fallBack->find(tns->first); it != fallBack->end()) {
5593 remoteIPs = it->second;
5594 }
5595 }
5596 if (remoteIPs.size() == 0) {
5597 remoteIPs = retrieveAddressesForNS(prefix, qname, tns, depth, beenthere, rnameservers, nameservers, sendRDQuery, pierceDontQuery, flawedNSSet, cacheOnly, addressQueriesForNS);
5598 }
5599
5600 if (remoteIPs.empty()) {
5601 LOG(prefix << qname << ": Failed to get IP for NS " << tns->first << ", trying next if available" << endl);
5602 flawedNSSet = true;
5603 continue;
5604 }
5605 else {
5606 bool hitPolicy{false};
5607 LOG(prefix << qname << ": Resolved '" << auth << "' NS " << tns->first << " to: ");
5608 for (remoteIP = remoteIPs.begin(); remoteIP != remoteIPs.end(); ++remoteIP) {
5609 if (remoteIP != remoteIPs.begin()) {
5610 LOG(", ");
5611 }
5612 LOG(remoteIP->toString());
5613 if (nameserverIPBlockedByRPZ(luaconfsLocal->dfe, *remoteIP)) {
5614 hitPolicy = true;
5615 }
5616 }
5617 LOG(endl);
5618 if (hitPolicy) { // implies d_wantsRPZ
5619 /* RPZ hit */
5620 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
5621 /* reset to no match */
5622 d_appliedPolicy = DNSFilterEngine::Policy();
5623 }
5624 else {
5625 throw PolicyHitException();
5626 }
5627 }
5628 }
5629
5630 for (remoteIP = remoteIPs.begin(); remoteIP != remoteIPs.end(); ++remoteIP) {
5631 LOG(prefix << qname << ": Trying IP " << remoteIP->toStringWithPort() << ", asking '" << qname << "|" << qtype << "'" << endl);
5632
5633 if (throttledOrBlocked(prefix, *remoteIP, qname, qtype, pierceDontQuery)) {
5634 // As d_throttledqueries might be increased, check the max-qperq condition
5635 checkMaxQperQ(qname);
5636 continue;
5637 }
5638
5639 bool truncated = false;
5640 bool spoofed = false;
5641 bool gotAnswer = false;
5642 bool doDoT = false;
5643
5644 if (doDoTtoAuth(tns->first)) {
5645 remoteIP->setPort(853);
5646 doDoT = true;
5647 }
5648 if (SyncRes::s_dot_to_port_853 && remoteIP->getPort() == 853) {
5649 doDoT = true;
5650 }
5651 bool forceTCP = doDoT;
5652
5653 if (!doDoT && s_max_busy_dot_probes > 0) {
5654 submitTryDotTask(*remoteIP, auth, tns->first, d_now.tv_sec);
5655 }
5656 if (!forceTCP) {
5657 gotAnswer = doResolveAtThisIP(prefix, qname, qtype, lwr, ednsmask, auth, sendRDQuery, wasForwarded,
5658 tns->first, *remoteIP, false, false, truncated, spoofed, context.extendedError);
5659 }
5660 if (forceTCP || (spoofed || (gotAnswer && truncated))) {
5661 /* retry, over TCP this time */
5662 gotAnswer = doResolveAtThisIP(prefix, qname, qtype, lwr, ednsmask, auth, sendRDQuery, wasForwarded,
5663 tns->first, *remoteIP, true, doDoT, truncated, spoofed, context.extendedError);
5664 }
5665
5666 if (!gotAnswer) {
5667 if (doDoT && s_max_busy_dot_probes > 0) {
5668 // This is quite pessimistic...
5669 updateDoTStatus(*remoteIP, DoTStatus::Bad, d_now.tv_sec + dotFailWait);
5670 }
5671 continue;
5672 }
5673
5674 LOG(prefix << qname << ": Got " << (unsigned int)lwr.d_records.size() << " answers from " << tns->first << " (" << remoteIP->toString() << "), rcode=" << lwr.d_rcode << " (" << RCode::to_s(lwr.d_rcode) << "), aa=" << lwr.d_aabit << ", in " << lwr.d_usec / 1000 << "ms" << endl);
5675
5676 if (doDoT && s_max_busy_dot_probes > 0) {
5677 updateDoTStatus(*remoteIP, DoTStatus::Good, d_now.tv_sec + dotSuccessWait);
5678 }
5679 /* // for you IPv6 fanatics :-)
5680 if(remoteIP->sin4.sin_family==AF_INET6)
5681 lwr.d_usec/=3;
5682 */
5683 // cout<<"msec: "<<lwr.d_usec/1000.0<<", "<<g_avgLatency/1000.0<<'\n';
5684
5685 s_nsSpeeds.lock()->find_or_enter(tns->first.empty() ? DNSName(remoteIP->toStringWithPort()) : tns->first, d_now).submit(*remoteIP, lwr.d_usec, d_now);
5686
5687 /* we have received an answer, are we done ? */
5688 bool done = processAnswer(depth, prefix, lwr, qname, qtype, auth, wasForwarded, ednsmask, sendRDQuery, nameservers, ret, luaconfsLocal->dfe, &gotNewServers, &rcode, context.state, *remoteIP);
5689 if (done) {
5690 return rcode;
5691 }
5692 if (gotNewServers) {
5693 if (stopAtDelegation && *stopAtDelegation == Stop) {
5694 *stopAtDelegation = Stopped;
5695 return rcode;
5696 }
5697 break;
5698 }
5699 /* was lame */
5700 doThrottle(d_now.tv_sec, *remoteIP, qname, qtype, 60, 100);
5701 }
5702
5703 if (gotNewServers) {
5704 break;
5705 }
5706
5707 if (remoteIP == remoteIPs.cend()) // we tried all IP addresses, none worked
5708 continue;
5709 }
5710 }
5711 }
5712 return -1;
5713 }
5714
5715 void SyncRes::setQuerySource(const Netmask& netmask)
5716 {
5717 if (!netmask.empty()) {
5718 d_outgoingECSNetwork = netmask;
5719 }
5720 else {
5721 d_outgoingECSNetwork = boost::none;
5722 }
5723 }
5724
5725 void SyncRes::setQuerySource(const ComboAddress& requestor, boost::optional<const EDNSSubnetOpts&> incomingECS)
5726 {
5727 d_requestor = requestor;
5728
5729 if (incomingECS && incomingECS->source.getBits() > 0) {
5730 d_cacheRemote = incomingECS->source.getMaskedNetwork();
5731 uint8_t bits = std::min(incomingECS->source.getBits(), (incomingECS->source.isIPv4() ? s_ecsipv4limit : s_ecsipv6limit));
5732 ComboAddress trunc = incomingECS->source.getNetwork();
5733 trunc.truncate(bits);
5734 d_outgoingECSNetwork = boost::optional<Netmask>(Netmask(trunc, bits));
5735 }
5736 else {
5737 d_cacheRemote = d_requestor;
5738 if (!incomingECS && s_ednslocalsubnets.match(d_requestor)) {
5739 ComboAddress trunc = d_requestor;
5740 uint8_t bits = d_requestor.isIPv4() ? 32 : 128;
5741 bits = std::min(bits, (trunc.isIPv4() ? s_ecsipv4limit : s_ecsipv6limit));
5742 trunc.truncate(bits);
5743 d_outgoingECSNetwork = boost::optional<Netmask>(Netmask(trunc, bits));
5744 }
5745 else if (s_ecsScopeZero.source.getBits() > 0) {
5746 /* RFC7871 says we MUST NOT send any ECS if the source scope is 0.
5747 But using an empty ECS in that case would mean inserting
5748 a non ECS-specific entry into the cache, preventing any further
5749 ECS-specific query to be sent.
5750 So instead we use the trick described in section 7.1.2:
5751 "The subsequent Recursive Resolver query to the Authoritative Nameserver
5752 will then either not include an ECS option or MAY optionally include
5753 its own address information, which is what the Authoritative
5754 Nameserver will almost certainly use to generate any Tailored
5755 Response in lieu of an option. This allows the answer to be handled
5756 by the same caching mechanism as other queries, with an explicit
5757 indicator of the applicable scope. Subsequent Stub Resolver queries
5758 for /0 can then be answered from this cached response.
5759 */
5760 d_outgoingECSNetwork = boost::optional<Netmask>(s_ecsScopeZero.source.getMaskedNetwork());
5761 d_cacheRemote = s_ecsScopeZero.source.getNetwork();
5762 }
5763 else {
5764 // ECS disabled because no scope-zero address could be derived.
5765 d_outgoingECSNetwork = boost::none;
5766 }
5767 }
5768 }
5769
5770 boost::optional<Netmask> SyncRes::getEDNSSubnetMask(const DNSName& dn, const ComboAddress& rem)
5771 {
5772 if (d_outgoingECSNetwork && (s_ednsdomains.check(dn) || s_ednsremotesubnets.match(rem))) {
5773 return d_outgoingECSNetwork;
5774 }
5775 return boost::none;
5776 }
5777
5778 void SyncRes::parseEDNSSubnetAllowlist(const std::string& alist)
5779 {
5780 vector<string> parts;
5781 stringtok(parts, alist, ",; ");
5782 for (const auto& a : parts) {
5783 try {
5784 s_ednsremotesubnets.addMask(Netmask(a));
5785 }
5786 catch (...) {
5787 s_ednsdomains.add(DNSName(a));
5788 }
5789 }
5790 }
5791
5792 void SyncRes::parseEDNSSubnetAddFor(const std::string& subnetlist)
5793 {
5794 vector<string> parts;
5795 stringtok(parts, subnetlist, ",; ");
5796 for (const auto& a : parts) {
5797 s_ednslocalsubnets.addMask(a);
5798 }
5799 }
5800
5801 // used by PowerDNSLua - note that this neglects to add the packet count & statistics back to pdns_recursor.cc
5802 int directResolve(const DNSName& qname, const QType qtype, const QClass qclass, vector<DNSRecord>& ret, shared_ptr<RecursorLua4> pdl, Logr::log_t log)
5803 {
5804 return directResolve(qname, qtype, qclass, ret, pdl, SyncRes::s_qnameminimization, log);
5805 }
5806
5807 int directResolve(const DNSName& qname, const QType qtype, const QClass qclass, vector<DNSRecord>& ret, shared_ptr<RecursorLua4> pdl, bool qm, Logr::log_t slog)
5808 {
5809 auto log = slog->withValues("qname", Logging::Loggable(qname), "qtype", Logging::Loggable(qtype));
5810
5811 struct timeval now;
5812 gettimeofday(&now, 0);
5813
5814 SyncRes sr(now);
5815 sr.setQNameMinimization(qm);
5816 if (pdl) {
5817 sr.setLuaEngine(pdl);
5818 }
5819
5820 int res = -1;
5821 const std::string msg = "Exception while resolving";
5822 try {
5823 res = sr.beginResolve(qname, qtype, qclass, ret, 0);
5824 }
5825 catch (const PDNSException& e) {
5826 SLOG(g_log << Logger::Error << "Failed to resolve " << qname << ", got pdns exception: " << e.reason << endl,
5827 log->error(Logr::Error, e.reason, msg, "exception", Logging::Loggable("PDNSException")));
5828 ret.clear();
5829 }
5830 catch (const ImmediateServFailException& e) {
5831 SLOG(g_log << Logger::Error << "Failed to resolve " << qname << ", got ImmediateServFailException: " << e.reason << endl,
5832 log->error(Logr::Error, e.reason, msg, "exception", Logging::Loggable("ImmediateServFailException")));
5833 ret.clear();
5834 }
5835 catch (const PolicyHitException& e) {
5836 SLOG(g_log << Logger::Error << "Failed to resolve " << qname << ", got a policy hit" << endl,
5837 log->info(Logr::Error, msg, "exception", Logging::Loggable("PolicyHitException")));
5838 ret.clear();
5839 }
5840 catch (const std::exception& e) {
5841 SLOG(g_log << Logger::Error << "Failed to resolve " << qname << ", got STL error: " << e.what() << endl,
5842 log->error(Logr::Error, e.what(), msg, "exception", Logging::Loggable("std::exception")));
5843 ret.clear();
5844 }
5845 catch (...) {
5846 SLOG(g_log << Logger::Error << "Failed to resolve " << qname << ", got an exception" << endl,
5847 log->info(Logr::Error, msg));
5848 ret.clear();
5849 }
5850
5851 return res;
5852 }
5853
5854 int SyncRes::getRootNS(struct timeval now, asyncresolve_t asyncCallback, unsigned int depth, Logr::log_t log)
5855 {
5856 SyncRes sr(now);
5857 sr.d_prefix = "[getRootNS]";
5858 sr.setDoEDNS0(true);
5859 sr.setUpdatingRootNS();
5860 sr.setDoDNSSEC(g_dnssecmode != DNSSECMode::Off);
5861 sr.setDNSSECValidationRequested(g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate);
5862 sr.setAsyncCallback(asyncCallback);
5863 sr.setRefreshAlmostExpired(true);
5864
5865 const string msg = "Failed to update . records";
5866 vector<DNSRecord> ret;
5867 int res = -1;
5868 try {
5869 res = sr.beginResolve(g_rootdnsname, QType::NS, 1, ret, depth + 1);
5870 if (g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate) {
5871 auto state = sr.getValidationState();
5872 if (vStateIsBogus(state)) {
5873 throw PDNSException("Got Bogus validation result for .|NS");
5874 }
5875 }
5876 }
5877 catch (const PDNSException& e) {
5878 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception: " << e.reason << endl,
5879 log->error(Logr::Error, e.reason, msg, "exception", Logging::Loggable("PDNSException")));
5880 }
5881 catch (const ImmediateServFailException& e) {
5882 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception: " << e.reason << endl,
5883 log->error(Logr::Error, e.reason, msg, "exception", Logging::Loggable("ImmediateServFailException")));
5884 }
5885 catch (const PolicyHitException& e) {
5886 SLOG(g_log << Logger::Error << "Failed to update . records, got a policy hit" << endl,
5887 log->info(Logr::Error, msg, "exception", Logging::Loggable("PolicyHitException")));
5888 ret.clear();
5889 }
5890 catch (const std::exception& e) {
5891 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception: " << e.what() << endl,
5892 log->error(Logr::Error, e.what(), msg, "exception", Logging::Loggable("std::exception")));
5893 }
5894 catch (...) {
5895 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception" << endl,
5896 log->info(Logr::Error, msg));
5897 }
5898
5899 if (res == 0) {
5900 SLOG(g_log << Logger::Debug << "Refreshed . records" << endl,
5901 log->info(Logr::Debug, "Refreshed . records"));
5902 }
5903 else {
5904 SLOG(g_log << Logger::Warning << "Failed to update root NS records, RCODE=" << res << endl,
5905 log->info(Logr::Warning, msg, "rcode", Logging::Loggable(res)));
5906 }
5907 return res;
5908 }