]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/recursordist/syncres.cc
Merge pull request #12698 from omoerbeek/rec-synthesized
[thirdparty/pdns.git] / pdns / recursordist / syncres.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include "arguments.hh"
27 #include "aggressive_nsec.hh"
28 #include "cachecleaner.hh"
29 #include "dns_random.hh"
30 #include "dnsparser.hh"
31 #include "dnsrecords.hh"
32 #include "ednssubnet.hh"
33 #include "logger.hh"
34 #include "lua-recursor4.hh"
35 #include "rec-lua-conf.hh"
36 #include "syncres.hh"
37 #include "dnsseckeeper.hh"
38 #include "validate-recursor.hh"
39 #include "rec-taskqueue.hh"
40
41 rec::GlobalCounters g_Counters;
42 thread_local rec::TCounters t_Counters(g_Counters);
43
44 template <class T>
45 class fails_t : public boost::noncopyable
46 {
47 public:
48 typedef uint64_t counter_t;
49 struct value_t
50 {
51 value_t(const T& a) :
52 key(a) {}
53 T key;
54 mutable counter_t value{0};
55 time_t last{0};
56 };
57
58 typedef multi_index_container<value_t,
59 indexed_by<
60 ordered_unique<tag<T>, member<value_t, T, &value_t::key>>,
61 ordered_non_unique<tag<time_t>, member<value_t, time_t, &value_t::last>>>>
62 cont_t;
63
64 cont_t getMapCopy() const
65 {
66 return d_cont;
67 }
68
69 counter_t value(const T& t) const
70 {
71 auto i = d_cont.find(t);
72
73 if (i == d_cont.end()) {
74 return 0;
75 }
76 return i->value;
77 }
78
79 counter_t incr(const T& key, const struct timeval& now)
80 {
81 auto i = d_cont.insert(key).first;
82
83 if (i->value < std::numeric_limits<counter_t>::max()) {
84 i->value++;
85 }
86 auto& ind = d_cont.template get<T>();
87 time_t tm = now.tv_sec;
88 ind.modify(i, [tm](value_t& val) { val.last = tm; });
89 return i->value;
90 }
91
92 void clear(const T& a)
93 {
94 d_cont.erase(a);
95 }
96
97 void clear()
98 {
99 d_cont.clear();
100 }
101
102 size_t size() const
103 {
104 return d_cont.size();
105 }
106
107 void prune(time_t cutoff)
108 {
109 auto& ind = d_cont.template get<time_t>();
110 ind.erase(ind.begin(), ind.upper_bound(cutoff));
111 }
112
113 private:
114 cont_t d_cont;
115 };
116
117 /** Class that implements a decaying EWMA.
118 This class keeps an exponentially weighted moving average which, additionally, decays over time.
119 The decaying is only done on get.
120 */
121
122 //! This represents a number of decaying Ewmas, used to store performance per nameserver-name.
123 /** Modelled to work mostly like the underlying DecayingEwma */
124 class DecayingEwmaCollection
125 {
126 private:
127 struct DecayingEwma
128 {
129 public:
130 void submit(int arg, const struct timeval& last, const struct timeval& now)
131 {
132 d_last = arg;
133 auto val = static_cast<float>(arg);
134 if (d_val == 0) {
135 d_val = val;
136 }
137 else {
138 auto diff = makeFloat(last - now);
139 auto factor = expf(diff) / 2.0f; // might be '0.5', or 0.0001
140 d_val = (1.0f - factor) * val + factor * d_val;
141 }
142 }
143
144 float get(float factor)
145 {
146 return d_val *= factor;
147 }
148
149 float peek(void) const
150 {
151 return d_val;
152 }
153
154 int last(void) const
155 {
156 return d_last;
157 }
158
159 float d_val{0};
160 int d_last{0};
161 };
162
163 public:
164 DecayingEwmaCollection(const DNSName& name, const struct timeval ts = {0, 0}) :
165 d_name(name), d_lastget(ts)
166 {
167 }
168
169 void submit(const ComboAddress& remote, int usecs, const struct timeval& now) const
170 {
171 d_collection[remote].submit(usecs, d_lastget, now);
172 }
173
174 float getFactor(const struct timeval& now) const
175 {
176 float diff = makeFloat(d_lastget - now);
177 return expf(diff / 60.0f); // is 1.0 or less
178 }
179
180 bool stale(time_t limit) const
181 {
182 return limit > d_lastget.tv_sec;
183 }
184
185 void purge(const std::map<ComboAddress, float>& keep) const
186 {
187 for (auto iter = d_collection.begin(); iter != d_collection.end();) {
188 if (keep.find(iter->first) != keep.end()) {
189 ++iter;
190 }
191 else {
192 iter = d_collection.erase(iter);
193 }
194 }
195 }
196
197 // d_collection is the modifyable part of the record, we index on DNSName and timeval, and DNSName never changes
198 mutable std::map<ComboAddress, DecayingEwma> d_collection;
199 const DNSName d_name;
200 struct timeval d_lastget;
201 };
202
203 class nsspeeds_t : public multi_index_container<DecayingEwmaCollection,
204 indexed_by<
205 hashed_unique<tag<DNSName>, member<DecayingEwmaCollection, const DNSName, &DecayingEwmaCollection::d_name>>,
206 ordered_non_unique<tag<timeval>, member<DecayingEwmaCollection, timeval, &DecayingEwmaCollection::d_lastget>>>>
207 {
208 public:
209 const auto& find_or_enter(const DNSName& name, const struct timeval& now)
210 {
211 const auto it = insert(DecayingEwmaCollection{name, now}).first;
212 return *it;
213 }
214
215 const auto& find_or_enter(const DNSName& name)
216 {
217 const auto it = insert(DecayingEwmaCollection{name}).first;
218 return *it;
219 }
220
221 float fastest(const DNSName& name, const struct timeval& now)
222 {
223 auto& ind = get<DNSName>();
224 auto it = insert(DecayingEwmaCollection{name, now}).first;
225 if (it->d_collection.empty()) {
226 return 0;
227 }
228 // This could happen if find(DNSName) entered an entry; it's used only by test code
229 if (it->d_lastget.tv_sec == 0 && it->d_lastget.tv_usec == 0) {
230 ind.modify(it, [&](DecayingEwmaCollection& d) { d.d_lastget = now; });
231 }
232
233 float ret = std::numeric_limits<float>::max();
234 const float factor = it->getFactor(now);
235 for (auto& entry : it->d_collection) {
236 if (float tmp = entry.second.get(factor); tmp < ret) {
237 ret = tmp;
238 }
239 }
240 ind.modify(it, [&](DecayingEwmaCollection& d) { d.d_lastget = now; });
241 return ret;
242 }
243 };
244
245 static LockGuarded<nsspeeds_t> s_nsSpeeds;
246
247 template <class Thing>
248 class Throttle : public boost::noncopyable
249 {
250 public:
251 struct entry_t
252 {
253 entry_t(const Thing& thing_, time_t ttd_, unsigned int count_) :
254 thing(thing_), ttd(ttd_), count(count_)
255 {
256 }
257 Thing thing;
258 time_t ttd;
259 mutable unsigned int count;
260 };
261 typedef multi_index_container<entry_t,
262 indexed_by<
263 ordered_unique<tag<Thing>, member<entry_t, Thing, &entry_t::thing>>,
264 ordered_non_unique<tag<time_t>, member<entry_t, time_t, &entry_t::ttd>>>>
265 cont_t;
266
267 bool shouldThrottle(time_t now, const Thing& t)
268 {
269 auto i = d_cont.find(t);
270 if (i == d_cont.end()) {
271 return false;
272 }
273 if (now > i->ttd || i->count == 0) {
274 d_cont.erase(i);
275 return false;
276 }
277 i->count--;
278
279 return true; // still listed, still blocked
280 }
281
282 void throttle(time_t now, const Thing& t, time_t ttl, unsigned int count)
283 {
284 auto i = d_cont.find(t);
285 time_t ttd = now + ttl;
286 if (i == d_cont.end()) {
287 d_cont.emplace(t, ttd, count);
288 }
289 else if (ttd > i->ttd || count > i->count) {
290 ttd = std::max(i->ttd, ttd);
291 count = std::max(i->count, count);
292 auto& ind = d_cont.template get<Thing>();
293 ind.modify(i, [ttd, count](entry_t& e) { e.ttd = ttd; e.count = count; });
294 }
295 }
296
297 size_t size() const
298 {
299 return d_cont.size();
300 }
301
302 cont_t getThrottleMap() const
303 {
304 return d_cont;
305 }
306
307 void clear()
308 {
309 d_cont.clear();
310 }
311
312 void prune(time_t now)
313 {
314 auto& ind = d_cont.template get<time_t>();
315 ind.erase(ind.begin(), ind.upper_bound(now));
316 }
317
318 private:
319 cont_t d_cont;
320 };
321
322 static LockGuarded<Throttle<std::tuple<ComboAddress, DNSName, QType>>> s_throttle;
323
324 struct SavedParentEntry
325 {
326 SavedParentEntry(const DNSName& name, map<DNSName, vector<ComboAddress>>&& nsAddresses, time_t ttd) :
327 d_domain(name), d_nsAddresses(nsAddresses), d_ttd(ttd)
328 {
329 }
330 DNSName d_domain;
331 map<DNSName, vector<ComboAddress>> d_nsAddresses;
332 time_t d_ttd;
333 mutable uint64_t d_count{0};
334 };
335
336 typedef multi_index_container<
337 SavedParentEntry,
338 indexed_by<ordered_unique<tag<DNSName>, member<SavedParentEntry, DNSName, &SavedParentEntry::d_domain>>,
339 ordered_non_unique<tag<time_t>, member<SavedParentEntry, time_t, &SavedParentEntry::d_ttd>>>>
340 SavedParentNSSetBase;
341
342 class SavedParentNSSet : public SavedParentNSSetBase
343 {
344 public:
345 void prune(time_t now)
346 {
347 auto& ind = get<time_t>();
348 ind.erase(ind.begin(), ind.upper_bound(now));
349 }
350 void inc(const DNSName& name)
351 {
352 auto it = find(name);
353 if (it != end()) {
354 ++(*it).d_count;
355 }
356 }
357 SavedParentNSSet getMapCopy() const
358 {
359 return *this;
360 }
361 };
362
363 static LockGuarded<SavedParentNSSet> s_savedParentNSSet;
364
365 thread_local SyncRes::ThreadLocalStorage SyncRes::t_sstorage;
366 thread_local std::unique_ptr<addrringbuf_t> t_timeouts;
367
368 std::unique_ptr<NetmaskGroup> SyncRes::s_dontQuery{nullptr};
369 NetmaskGroup SyncRes::s_ednslocalsubnets;
370 NetmaskGroup SyncRes::s_ednsremotesubnets;
371 SuffixMatchNode SyncRes::s_ednsdomains;
372 EDNSSubnetOpts SyncRes::s_ecsScopeZero;
373 string SyncRes::s_serverID;
374 SyncRes::LogMode SyncRes::s_lm;
375 const std::unordered_set<QType> SyncRes::s_redirectionQTypes = {QType::CNAME, QType::DNAME};
376 static LockGuarded<fails_t<ComboAddress>> s_fails;
377 static LockGuarded<fails_t<DNSName>> s_nonresolving;
378
379 struct DoTStatus
380 {
381 DoTStatus(const ComboAddress& ip, const DNSName& auth, time_t ttd) :
382 d_address(ip), d_auth(auth), d_ttd(ttd)
383 {
384 }
385 enum Status : uint8_t
386 {
387 Unknown,
388 Busy,
389 Bad,
390 Good
391 };
392 const ComboAddress d_address;
393 const DNSName d_auth;
394 time_t d_ttd;
395 mutable uint64_t d_count{0};
396 mutable Status d_status{Unknown};
397 std::string toString() const
398 {
399 const std::array<std::string, 4> n{"Unknown", "Busy", "Bad", "Good"};
400 unsigned int v = static_cast<unsigned int>(d_status);
401 return v >= n.size() ? "?" : n[v];
402 }
403 };
404
405 struct DoTMap
406 {
407 multi_index_container<DoTStatus,
408 indexed_by<
409 ordered_unique<tag<ComboAddress>, member<DoTStatus, const ComboAddress, &DoTStatus::d_address>>,
410 ordered_non_unique<tag<time_t>, member<DoTStatus, time_t, &DoTStatus::d_ttd>>>>
411 d_map;
412 uint64_t d_numBusy{0};
413
414 void prune(time_t cutoff)
415 {
416 auto& ind = d_map.template get<time_t>();
417 ind.erase(ind.begin(), ind.upper_bound(cutoff));
418 }
419 };
420
421 static LockGuarded<DoTMap> s_dotMap;
422
423 static const time_t dotFailWait = 24 * 3600;
424 static const time_t dotSuccessWait = 3 * 24 * 3600;
425 static bool shouldDoDoT(ComboAddress address, time_t now);
426
427 unsigned int SyncRes::s_maxnegttl;
428 unsigned int SyncRes::s_maxbogusttl;
429 unsigned int SyncRes::s_maxcachettl;
430 unsigned int SyncRes::s_maxqperq;
431 unsigned int SyncRes::s_maxnsperresolve;
432 unsigned int SyncRes::s_maxnsaddressqperq;
433 unsigned int SyncRes::s_maxtotusec;
434 unsigned int SyncRes::s_maxdepth;
435 unsigned int SyncRes::s_minimumTTL;
436 unsigned int SyncRes::s_minimumECSTTL;
437 unsigned int SyncRes::s_packetcachettl;
438 unsigned int SyncRes::s_packetcacheservfailttl;
439 unsigned int SyncRes::s_serverdownmaxfails;
440 unsigned int SyncRes::s_serverdownthrottletime;
441 unsigned int SyncRes::s_nonresolvingnsmaxfails;
442 unsigned int SyncRes::s_nonresolvingnsthrottletime;
443 unsigned int SyncRes::s_ecscachelimitttl;
444 pdns::stat_t SyncRes::s_ecsqueries;
445 pdns::stat_t SyncRes::s_ecsresponses;
446 std::map<uint8_t, pdns::stat_t> SyncRes::s_ecsResponsesBySubnetSize4;
447 std::map<uint8_t, pdns::stat_t> SyncRes::s_ecsResponsesBySubnetSize6;
448
449 uint8_t SyncRes::s_ecsipv4limit;
450 uint8_t SyncRes::s_ecsipv6limit;
451 uint8_t SyncRes::s_ecsipv4cachelimit;
452 uint8_t SyncRes::s_ecsipv6cachelimit;
453 bool SyncRes::s_ecsipv4nevercache;
454 bool SyncRes::s_ecsipv6nevercache;
455
456 bool SyncRes::s_doIPv4;
457 bool SyncRes::s_doIPv6;
458 bool SyncRes::s_rootNXTrust;
459 bool SyncRes::s_noEDNS;
460 bool SyncRes::s_qnameminimization;
461 SyncRes::HardenNXD SyncRes::s_hardenNXD;
462 unsigned int SyncRes::s_refresh_ttlperc;
463 unsigned int SyncRes::s_locked_ttlperc;
464 int SyncRes::s_tcp_fast_open;
465 bool SyncRes::s_tcp_fast_open_connect;
466 bool SyncRes::s_dot_to_port_853;
467 int SyncRes::s_event_trace_enabled;
468 bool SyncRes::s_save_parent_ns_set;
469 unsigned int SyncRes::s_max_busy_dot_probes;
470 bool SyncRes::s_addExtendedResolutionDNSErrors;
471
472 #define LOG(x) \
473 if (d_lm == Log) { \
474 g_log << Logger::Warning << x; \
475 } \
476 else if (d_lm == Store) { \
477 addTraceTS(d_fixednow, d_trace); \
478 d_trace << x; \
479 }
480
481 OptLog SyncRes::LogObject(const string& prefix)
482 {
483 OptLog ret;
484 if (d_lm == Log) {
485 ret = {prefix, d_fixednow, &g_log};
486 }
487 else if (d_lm == Store) {
488 ret = {prefix, d_fixednow, &d_trace};
489 }
490 return ret;
491 }
492
493 // A helper function to print a double with specific printf format.
494 // Not using boost::format since it is not thread safe while calling
495 // into locale handling code according to tsan.
496 // This allocates a string, but that's nothing compared to what
497 // boost::format is doing and may even be optimized away anyway.
498 static inline std::string fmtfloat(double f)
499 {
500 char buf[20];
501 int ret = snprintf(buf, sizeof(buf), "%0.2f", f);
502 if (ret < 0 || ret >= static_cast<int>(sizeof(buf))) {
503 return "?";
504 }
505 return std::string(buf, ret);
506 }
507
508 static inline void accountAuthLatency(uint64_t usec, int family)
509 {
510 if (family == AF_INET) {
511 t_Counters.at(rec::Histogram::auth4Answers)(usec);
512 t_Counters.at(rec::Histogram::cumulativeAuth4Answers)(usec);
513 }
514 else {
515 t_Counters.at(rec::Histogram::auth6Answers)(usec);
516 t_Counters.at(rec::Histogram::cumulativeAuth6Answers)(usec);
517 }
518 }
519
520 SyncRes::SyncRes(const struct timeval& now) :
521 d_authzonequeries(0), d_outqueries(0), d_tcpoutqueries(0), d_dotoutqueries(0), d_throttledqueries(0), d_timeouts(0), d_unreachables(0), d_totUsec(0), d_fixednow(now), d_now(now), d_cacheonly(false), d_doDNSSEC(false), d_doEDNS0(false), d_qNameMinimization(s_qnameminimization), d_lm(s_lm)
522
523 {
524 }
525
526 static void allowAdditionalEntry(std::unordered_set<DNSName>& allowedAdditionals, const DNSRecord& rec);
527
528 void SyncRes::resolveAdditionals(const DNSName& qname, QType qtype, AdditionalMode mode, std::vector<DNSRecord>& additionals, unsigned int depth, bool& additionalsNotInCache)
529 {
530 vector<DNSRecord> addRecords;
531
532 Context context;
533 switch (mode) {
534 case AdditionalMode::ResolveImmediately: {
535 set<GetBestNSAnswer> beenthere;
536 int res = doResolve(qname, qtype, addRecords, depth, beenthere, context);
537 if (res != 0) {
538 return;
539 }
540 // We're conservative here. We do not add Bogus records in any circumstance, we add Indeterminates only if no
541 // validation is required.
542 if (vStateIsBogus(context.state)) {
543 return;
544 }
545 if (shouldValidate() && context.state != vState::Secure && context.state != vState::Insecure) {
546 return;
547 }
548 for (auto& rec : addRecords) {
549 if (rec.d_place == DNSResourceRecord::ANSWER) {
550 additionals.push_back(std::move(rec));
551 }
552 }
553 break;
554 }
555 case AdditionalMode::CacheOnly:
556 case AdditionalMode::CacheOnlyRequireAuth: {
557 // Peek into cache
558 MemRecursorCache::Flags flags = mode == AdditionalMode::CacheOnlyRequireAuth ? MemRecursorCache::RequireAuth : MemRecursorCache::None;
559 if (g_recCache->get(d_now.tv_sec, qname, qtype, flags, &addRecords, d_cacheRemote, d_routingTag, nullptr, nullptr, nullptr, &context.state) <= 0) {
560 return;
561 }
562 // See the comment for the ResolveImmediately case
563 if (vStateIsBogus(context.state)) {
564 return;
565 }
566 if (shouldValidate() && context.state != vState::Secure && context.state != vState::Insecure) {
567 return;
568 }
569 for (auto& rec : addRecords) {
570 if (rec.d_place == DNSResourceRecord::ANSWER) {
571 rec.d_ttl -= d_now.tv_sec;
572 additionals.push_back(std::move(rec));
573 }
574 }
575 break;
576 }
577 case AdditionalMode::ResolveDeferred: {
578 const bool oldCacheOnly = setCacheOnly(true);
579 set<GetBestNSAnswer> beenthere;
580 int res = doResolve(qname, qtype, addRecords, depth, beenthere, context);
581 setCacheOnly(oldCacheOnly);
582 if (res == 0 && addRecords.size() > 0) {
583 // We're conservative here. We do not add Bogus records in any circumstance, we add Indeterminates only if no
584 // validation is required.
585 if (vStateIsBogus(context.state)) {
586 return;
587 }
588 if (shouldValidate() && context.state != vState::Secure && context.state != vState::Insecure) {
589 return;
590 }
591 bool found = false;
592 for (auto& rec : addRecords) {
593 if (rec.d_place == DNSResourceRecord::ANSWER) {
594 found = true;
595 additionals.push_back(std::move(rec));
596 }
597 }
598 if (found) {
599 return;
600 }
601 }
602 // Not found in cache, check negcache and push task if also not in negcache
603 NegCache::NegCacheEntry ne;
604 bool inNegCache = g_negCache->get(qname, qtype, d_now, ne, false);
605 if (!inNegCache) {
606 // There are a few cases where an answer is neither stored in the record cache nor in the neg cache.
607 // An example is a SOA-less NODATA response. Rate limiting will kick in if those tasks are pushed too often.
608 // We might want to fix these cases (and always either store positive or negative) some day.
609 pushResolveTask(qname, qtype, d_now.tv_sec, d_now.tv_sec + 60);
610 additionalsNotInCache = true;
611 }
612 break;
613 }
614 case AdditionalMode::Ignore:
615 break;
616 }
617 }
618
619 // The main (recursive) function to add additionals
620 // qtype: the original query type to expand
621 // start: records to start from
622 // This function uses to state sets to avoid infinite recursion and allow depulication
623 // depth is the main recursion depth
624 // additionaldepth is the depth for addAdditionals itself
625 void SyncRes::addAdditionals(QType qtype, const vector<DNSRecord>& start, vector<DNSRecord>& additionals, std::set<std::pair<DNSName, QType>>& uniqueCalls, std::set<std::tuple<DNSName, QType, QType>>& uniqueResults, unsigned int depth, unsigned additionaldepth, bool& additionalsNotInCache)
626 {
627 if (additionaldepth >= 5 || start.empty()) {
628 return;
629 }
630
631 auto luaLocal = g_luaconfs.getLocal();
632 const auto it = luaLocal->allowAdditionalQTypes.find(qtype);
633 if (it == luaLocal->allowAdditionalQTypes.end()) {
634 return;
635 }
636 std::unordered_set<DNSName> addnames;
637 for (const auto& rec : start) {
638 if (rec.d_place == DNSResourceRecord::ANSWER) {
639 // currently, this function only knows about names, we could also take the target types that are dependent on
640 // record contents into account
641 // e.g. for NAPTR records, go only for SRV for flag value "s", or A/AAAA for flag value "a"
642 allowAdditionalEntry(addnames, rec);
643 }
644 }
645
646 // We maintain two sets for deduplication:
647 // - uniqueCalls makes sure we never resolve a qname/qtype twice
648 // - uniqueResults makes sure we never add the same qname/qytype RRSet to the result twice,
649 // but note that that set might contain multiple elements.
650
651 auto mode = it->second.second;
652 for (const auto& targettype : it->second.first) {
653 for (const auto& addname : addnames) {
654 std::vector<DNSRecord> records;
655 bool inserted = uniqueCalls.emplace(addname, targettype).second;
656 if (inserted) {
657 resolveAdditionals(addname, targettype, mode, records, depth, additionalsNotInCache);
658 }
659 if (!records.empty()) {
660 for (auto r = records.begin(); r != records.end();) {
661 QType covered = QType::ENT;
662 if (r->d_type == QType::RRSIG) {
663 if (auto rsig = getRR<RRSIGRecordContent>(*r); rsig != nullptr) {
664 covered = rsig->d_type;
665 }
666 }
667 if (uniqueResults.count(std::tuple(r->d_name, QType(r->d_type), covered)) > 0) {
668 // A bit expensive for vectors, but they are small
669 r = records.erase(r);
670 }
671 else {
672 ++r;
673 }
674 }
675 for (const auto& r : records) {
676 additionals.push_back(r);
677 QType covered = QType::ENT;
678 if (r.d_type == QType::RRSIG) {
679 if (auto rsig = getRR<RRSIGRecordContent>(r); rsig != nullptr) {
680 covered = rsig->d_type;
681 }
682 }
683 uniqueResults.emplace(r.d_name, r.d_type, covered);
684 }
685 addAdditionals(targettype, records, additionals, uniqueCalls, uniqueResults, depth, additionaldepth + 1, additionalsNotInCache);
686 }
687 }
688 }
689 }
690
691 // The entry point for other code
692 bool SyncRes::addAdditionals(QType qtype, vector<DNSRecord>& ret, unsigned int depth)
693 {
694 // The additional records of interest
695 std::vector<DNSRecord> additionals;
696
697 // We only call resolve for a specific name/type combo once
698 std::set<std::pair<DNSName, QType>> uniqueCalls;
699
700 // Collect multiple name/qtype from a single resolve but do not add a new set from new resolve calls
701 // For RRSIGs, the type covered is stored in the second Qtype
702 std::set<std::tuple<DNSName, QType, QType>> uniqueResults;
703
704 bool additionalsNotInCache = false;
705 addAdditionals(qtype, ret, additionals, uniqueCalls, uniqueResults, depth, 0, additionalsNotInCache);
706
707 for (auto& rec : additionals) {
708 rec.d_place = DNSResourceRecord::ADDITIONAL;
709 ret.push_back(std::move(rec));
710 }
711 return additionalsNotInCache;
712 }
713
714 /** everything begins here - this is the entry point just after receiving a packet */
715 int SyncRes::beginResolve(const DNSName& qname, const QType qtype, QClass qclass, vector<DNSRecord>& ret, unsigned int depth)
716 {
717 d_eventTrace.add(RecEventTrace::SyncRes);
718 t_Counters.at(rec::Counter::syncresqueries)++;
719 d_wasVariable = false;
720 d_wasOutOfBand = false;
721 d_cutStates.clear();
722
723 if (doSpecialNamesResolve(qname, qtype, qclass, ret)) {
724 d_queryValidationState = vState::Insecure; // this could fool our stats into thinking a validation took place
725 return 0; // so do check before updating counters (we do now)
726 }
727
728 if (isUnsupported(qtype)) {
729 return -1;
730 }
731
732 if (qclass == QClass::ANY)
733 qclass = QClass::IN;
734 else if (qclass != QClass::IN)
735 return -1;
736
737 if (qtype == QType::DS) {
738 d_externalDSQuery = qname;
739 }
740 else {
741 d_externalDSQuery.clear();
742 }
743
744 set<GetBestNSAnswer> beenthere;
745 Context context;
746 int res = doResolve(qname, qtype, ret, depth, beenthere, context);
747 d_queryValidationState = context.state;
748 d_extendedError = context.extendedError;
749
750 if (shouldValidate()) {
751 if (d_queryValidationState != vState::Indeterminate) {
752 t_Counters.at(rec::Counter::dnssecValidations)++;
753 }
754 auto xdnssec = g_xdnssec.getLocal();
755 if (xdnssec->check(qname)) {
756 increaseXDNSSECStateCounter(d_queryValidationState);
757 }
758 else {
759 increaseDNSSECStateCounter(d_queryValidationState);
760 }
761 }
762
763 // Avoid calling addAdditionals() if we know we won't find anything
764 auto luaLocal = g_luaconfs.getLocal();
765 if (res == 0 && qclass == QClass::IN && luaLocal->allowAdditionalQTypes.find(qtype) != luaLocal->allowAdditionalQTypes.end()) {
766 bool additionalsNotInCache = addAdditionals(qtype, ret, depth);
767 if (additionalsNotInCache) {
768 d_wasVariable = true;
769 }
770 }
771 d_eventTrace.add(RecEventTrace::SyncRes, res, false);
772 return res;
773 }
774
775 /*! Handles all special, built-in names
776 * Fills ret with an answer and returns true if it handled the query.
777 *
778 * Handles the following queries (and their ANY variants):
779 *
780 * - localhost. IN A
781 * - localhost. IN AAAA
782 * - 1.0.0.127.in-addr.arpa. IN PTR
783 * - 1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa. IN PTR
784 * - version.bind. CH TXT
785 * - version.pdns. CH TXT
786 * - id.server. CH TXT
787 * - trustanchor.server CH TXT
788 * - negativetrustanchor.server CH TXT
789 */
790 bool SyncRes::doSpecialNamesResolve(const DNSName& qname, const QType qtype, const QClass qclass, vector<DNSRecord>& ret)
791 {
792 static const DNSName arpa("1.0.0.127.in-addr.arpa."), ip6_arpa("1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa."),
793 localhost("localhost."), versionbind("version.bind."), idserver("id.server."), versionpdns("version.pdns."), trustanchorserver("trustanchor.server."),
794 negativetrustanchorserver("negativetrustanchor.server.");
795
796 bool handled = false;
797 vector<pair<QType::typeenum, string>> answers;
798
799 if ((qname == arpa || qname == ip6_arpa) && qclass == QClass::IN) {
800 handled = true;
801 if (qtype == QType::PTR || qtype == QType::ANY)
802 answers.emplace_back(QType::PTR, "localhost.");
803 }
804
805 if (qname.isPartOf(localhost) && qclass == QClass::IN) {
806 handled = true;
807 if (qtype == QType::A || qtype == QType::ANY)
808 answers.emplace_back(QType::A, "127.0.0.1");
809 if (qtype == QType::AAAA || qtype == QType::ANY)
810 answers.emplace_back(QType::AAAA, "::1");
811 }
812
813 if ((qname == versionbind || qname == idserver || qname == versionpdns) && qclass == QClass::CHAOS) {
814 handled = true;
815 if (qtype == QType::TXT || qtype == QType::ANY) {
816 if (qname == versionbind || qname == versionpdns)
817 answers.emplace_back(QType::TXT, "\"" + ::arg()["version-string"] + "\"");
818 else if (s_serverID != "disabled")
819 answers.emplace_back(QType::TXT, "\"" + s_serverID + "\"");
820 }
821 }
822
823 if (qname == trustanchorserver && qclass == QClass::CHAOS && ::arg().mustDo("allow-trust-anchor-query")) {
824 handled = true;
825 if (qtype == QType::TXT || qtype == QType::ANY) {
826 auto luaLocal = g_luaconfs.getLocal();
827 for (auto const& dsAnchor : luaLocal->dsAnchors) {
828 ostringstream ans;
829 ans << "\"";
830 ans << dsAnchor.first.toString(); // Explicit toString to have a trailing dot
831 for (auto const& dsRecord : dsAnchor.second) {
832 ans << " ";
833 ans << dsRecord.d_tag;
834 }
835 ans << "\"";
836 answers.emplace_back(QType::TXT, ans.str());
837 }
838 }
839 }
840
841 if (qname == negativetrustanchorserver && qclass == QClass::CHAOS && ::arg().mustDo("allow-trust-anchor-query")) {
842 handled = true;
843 if (qtype == QType::TXT || qtype == QType::ANY) {
844 auto luaLocal = g_luaconfs.getLocal();
845 for (auto const& negAnchor : luaLocal->negAnchors) {
846 ostringstream ans;
847 ans << "\"";
848 ans << negAnchor.first.toString(); // Explicit toString to have a trailing dot
849 if (negAnchor.second.length())
850 ans << " " << negAnchor.second;
851 ans << "\"";
852 answers.emplace_back(QType::TXT, ans.str());
853 }
854 }
855 }
856
857 if (handled && !answers.empty()) {
858 ret.clear();
859 d_wasOutOfBand = true;
860
861 DNSRecord dr;
862 dr.d_name = qname;
863 dr.d_place = DNSResourceRecord::ANSWER;
864 dr.d_class = qclass;
865 dr.d_ttl = 86400;
866 for (const auto& ans : answers) {
867 dr.d_type = ans.first;
868 dr.setContent(DNSRecordContent::mastermake(ans.first, qclass, ans.second));
869 ret.push_back(dr);
870 }
871 }
872
873 return handled;
874 }
875
876 //! This is the 'out of band resolver', in other words, the authoritative server
877 void SyncRes::AuthDomain::addSOA(std::vector<DNSRecord>& records) const
878 {
879 SyncRes::AuthDomain::records_t::const_iterator ziter = d_records.find(std::make_tuple(getName(), QType::SOA));
880 if (ziter != d_records.end()) {
881 DNSRecord dr = *ziter;
882 dr.d_place = DNSResourceRecord::AUTHORITY;
883 records.push_back(dr);
884 }
885 }
886
887 bool SyncRes::AuthDomain::operator==(const AuthDomain& rhs) const
888 {
889 return d_records == rhs.d_records
890 && d_servers == rhs.d_servers
891 && d_name == rhs.d_name
892 && d_rdForward == rhs.d_rdForward;
893 }
894
895 [[nodiscard]] std::string SyncRes::AuthDomain::print(const std::string& indent,
896 const std::string& indentLevel) const
897 {
898 std::stringstream s;
899 s << indent << "DNSName = " << d_name << std::endl;
900 s << indent << "rdForward = " << d_rdForward << std::endl;
901 s << indent << "Records {" << std::endl;
902 auto recordContentIndentation = indent;
903 recordContentIndentation += indentLevel;
904 recordContentIndentation += indentLevel;
905 for (const auto& record : d_records) {
906 s << indent << indentLevel << "Record `" << record.d_name << "` {" << std::endl;
907 s << record.print(recordContentIndentation);
908 s << indent << indentLevel << "}" << std::endl;
909 }
910 s << indent << "}" << std::endl;
911 s << indent << "Servers {" << std::endl;
912 for (const auto& server : d_servers) {
913 s << indent << indentLevel << server.toString() << std::endl;
914 }
915 s << indent << "}" << std::endl;
916 return s.str();
917 }
918
919 int SyncRes::AuthDomain::getRecords(const DNSName& qname, const QType qtype, std::vector<DNSRecord>& records) const
920 {
921 int result = RCode::NoError;
922 records.clear();
923
924 // partial lookup
925 std::pair<records_t::const_iterator, records_t::const_iterator> range = d_records.equal_range(std::tie(qname));
926
927 SyncRes::AuthDomain::records_t::const_iterator ziter;
928 bool somedata = false;
929
930 for (ziter = range.first; ziter != range.second; ++ziter) {
931 somedata = true;
932
933 if (qtype == QType::ANY || ziter->d_type == qtype || ziter->d_type == QType::CNAME) {
934 // let rest of nameserver do the legwork on this one
935 records.push_back(*ziter);
936 }
937 else if (ziter->d_type == QType::NS && ziter->d_name.countLabels() > getName().countLabels()) {
938 // we hit a delegation point!
939 DNSRecord dr = *ziter;
940 dr.d_place = DNSResourceRecord::AUTHORITY;
941 records.push_back(dr);
942 }
943 }
944
945 if (!records.empty()) {
946 /* We have found an exact match, we're done */
947 return result;
948 }
949
950 if (somedata) {
951 /* We have records for that name, but not of the wanted qtype */
952 addSOA(records);
953
954 return result;
955 }
956
957 DNSName wcarddomain(qname);
958 while (wcarddomain != getName() && wcarddomain.chopOff()) {
959 range = d_records.equal_range(std::make_tuple(g_wildcarddnsname + wcarddomain));
960 if (range.first == range.second)
961 continue;
962
963 for (ziter = range.first; ziter != range.second; ++ziter) {
964 DNSRecord dr = *ziter;
965 // if we hit a CNAME, just answer that - rest of recursor will do the needful & follow
966 if (dr.d_type == qtype || qtype == QType::ANY || dr.d_type == QType::CNAME) {
967 dr.d_name = qname;
968 dr.d_place = DNSResourceRecord::ANSWER;
969 records.push_back(dr);
970 }
971 }
972
973 if (records.empty()) {
974 addSOA(records);
975 }
976
977 return result;
978 }
979
980 /* Nothing for this name, no wildcard, let's see if there is some NS */
981 DNSName nsdomain(qname);
982 while (nsdomain.chopOff() && nsdomain != getName()) {
983 range = d_records.equal_range(std::make_tuple(nsdomain, QType::NS));
984 if (range.first == range.second)
985 continue;
986
987 for (ziter = range.first; ziter != range.second; ++ziter) {
988 DNSRecord dr = *ziter;
989 dr.d_place = DNSResourceRecord::AUTHORITY;
990 records.push_back(dr);
991 }
992 }
993
994 if (records.empty()) {
995 addSOA(records);
996 result = RCode::NXDomain;
997 }
998
999 return result;
1000 }
1001
1002 bool SyncRes::doOOBResolve(const AuthDomain& domain, const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, int& res)
1003 {
1004 d_authzonequeries++;
1005 t_Counters.at(rec::Counter::authzonequeries)++;
1006
1007 res = domain.getRecords(qname, qtype, ret);
1008 return true;
1009 }
1010
1011 bool SyncRes::doOOBResolve(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int depth, const string& prefix, int& res)
1012 {
1013 DNSName authdomain(qname);
1014 domainmap_t::const_iterator iter = getBestAuthZone(&authdomain);
1015 if (iter == t_sstorage.domainmap->end() || !iter->second.isAuth()) {
1016 LOG(prefix << qname << ": Auth storage has no zone for this query!" << endl);
1017 return false;
1018 }
1019
1020 LOG(prefix << qname << ": Auth storage has data, zone='" << authdomain << "'" << endl);
1021 return doOOBResolve(iter->second, qname, qtype, ret, res);
1022 }
1023
1024 bool SyncRes::isRecursiveForwardOrAuth(const DNSName& qname) const
1025 {
1026 DNSName authname(qname);
1027 domainmap_t::const_iterator iter = getBestAuthZone(&authname);
1028 return iter != t_sstorage.domainmap->end() && (iter->second.isAuth() || iter->second.shouldRecurse());
1029 }
1030
1031 bool SyncRes::isForwardOrAuth(const DNSName& qname) const
1032 {
1033 DNSName authname(qname);
1034 domainmap_t::const_iterator iter = getBestAuthZone(&authname);
1035 return iter != t_sstorage.domainmap->end();
1036 }
1037
1038 const char* isoDateTimeMillis(const struct timeval& tv, char* buf, size_t sz)
1039 {
1040 const std::string s_timestampFormat = "%Y-%m-%dT%T";
1041 struct tm tm;
1042 size_t len = strftime(buf, sz, s_timestampFormat.c_str(), localtime_r(&tv.tv_sec, &tm));
1043 if (len == 0) {
1044 int ret = snprintf(buf, sz, "%lld", static_cast<long long>(tv.tv_sec));
1045 if (ret < 0 || static_cast<size_t>(ret) >= sz) {
1046 if (sz > 0) {
1047 buf[0] = '\0';
1048 }
1049 return buf;
1050 }
1051 len = ret;
1052 }
1053
1054 if (sz > len + 4) {
1055 snprintf(buf + len, sz - len, ".%03ld", static_cast<long>(tv.tv_usec) / 1000);
1056 }
1057 return buf;
1058 }
1059
1060 static const char* timestamp(time_t t, char* buf, size_t sz)
1061 {
1062 const std::string s_timestampFormat = "%Y-%m-%dT%T";
1063 struct tm tm;
1064 size_t len = strftime(buf, sz, s_timestampFormat.c_str(), localtime_r(&t, &tm));
1065 if (len == 0) {
1066 int ret = snprintf(buf, sz, "%lld", static_cast<long long>(t));
1067 if (ret < 0 || static_cast<size_t>(ret) >= sz) {
1068 if (sz > 0) {
1069 buf[0] = '\0';
1070 }
1071 }
1072 }
1073 return buf;
1074 }
1075
1076 struct ednsstatus_t : public multi_index_container<SyncRes::EDNSStatus,
1077 indexed_by<
1078 ordered_unique<tag<ComboAddress>, member<SyncRes::EDNSStatus, ComboAddress, &SyncRes::EDNSStatus::address>>,
1079 ordered_non_unique<tag<time_t>, member<SyncRes::EDNSStatus, time_t, &SyncRes::EDNSStatus::ttd>>>>
1080 {
1081 // Get a copy
1082 ednsstatus_t getMap() const
1083 {
1084 return *this;
1085 }
1086
1087 void setMode(index<ComboAddress>::type& ind, iterator it, SyncRes::EDNSStatus::EDNSMode mode, time_t ts)
1088 {
1089 if (it->mode != mode || it->ttd == 0) {
1090 ind.modify(it, [=](SyncRes::EDNSStatus& s) { s.mode = mode; s.ttd = ts + Expire; });
1091 }
1092 }
1093
1094 void prune(time_t now)
1095 {
1096 auto& ind = get<time_t>();
1097 ind.erase(ind.begin(), ind.upper_bound(now));
1098 }
1099
1100 static const time_t Expire = 7200;
1101 };
1102
1103 static LockGuarded<ednsstatus_t> s_ednsstatus;
1104
1105 SyncRes::EDNSStatus::EDNSMode SyncRes::getEDNSStatus(const ComboAddress& server)
1106 {
1107 auto lock = s_ednsstatus.lock();
1108 const auto& it = lock->find(server);
1109 if (it == lock->end()) {
1110 return EDNSStatus::EDNSOK;
1111 }
1112 return it->mode;
1113 }
1114
1115 uint64_t SyncRes::getEDNSStatusesSize()
1116 {
1117 return s_ednsstatus.lock()->size();
1118 }
1119
1120 void SyncRes::clearEDNSStatuses()
1121 {
1122 s_ednsstatus.lock()->clear();
1123 }
1124
1125 void SyncRes::pruneEDNSStatuses(time_t cutoff)
1126 {
1127 s_ednsstatus.lock()->prune(cutoff);
1128 }
1129
1130 uint64_t SyncRes::doEDNSDump(int fd)
1131 {
1132 int newfd = dup(fd);
1133 if (newfd == -1) {
1134 return 0;
1135 }
1136 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1137 if (!fp) {
1138 close(newfd);
1139 return 0;
1140 }
1141 uint64_t count = 0;
1142
1143 fprintf(fp.get(), "; edns dump follows\n; ip\tstatus\tttd\n");
1144 const auto copy = s_ednsstatus.lock()->getMap();
1145 for (const auto& eds : copy) {
1146 count++;
1147 char tmp[26];
1148 fprintf(fp.get(), "%s\t%s\t%s\n", eds.address.toString().c_str(), eds.toString().c_str(), timestamp(eds.ttd, tmp, sizeof(tmp)));
1149 }
1150 return count;
1151 }
1152
1153 void SyncRes::pruneNSSpeeds(time_t limit)
1154 {
1155 auto lock = s_nsSpeeds.lock();
1156 auto& ind = lock->get<timeval>();
1157 ind.erase(ind.begin(), ind.upper_bound(timeval{limit, 0}));
1158 }
1159
1160 uint64_t SyncRes::getNSSpeedsSize()
1161 {
1162 return s_nsSpeeds.lock()->size();
1163 }
1164
1165 void SyncRes::submitNSSpeed(const DNSName& server, const ComboAddress& ca, uint32_t usec, const struct timeval& now)
1166 {
1167 auto lock = s_nsSpeeds.lock();
1168 lock->find_or_enter(server, now).submit(ca, usec, now);
1169 }
1170
1171 void SyncRes::clearNSSpeeds()
1172 {
1173 s_nsSpeeds.lock()->clear();
1174 }
1175
1176 float SyncRes::getNSSpeed(const DNSName& server, const ComboAddress& ca)
1177 {
1178 auto lock = s_nsSpeeds.lock();
1179 return lock->find_or_enter(server).d_collection[ca].peek();
1180 }
1181
1182 uint64_t SyncRes::doDumpNSSpeeds(int fd)
1183 {
1184 int newfd = dup(fd);
1185 if (newfd == -1) {
1186 return 0;
1187 }
1188 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1189 if (!fp) {
1190 close(newfd);
1191 return 0;
1192 }
1193
1194 fprintf(fp.get(), "; nsspeed dump follows\n; nsname\ttimestamp\t[ip/decaying-ms/last-ms...]\n");
1195 uint64_t count = 0;
1196
1197 // Create a copy to avoid holding the lock while doing I/O
1198 for (const auto& i : *s_nsSpeeds.lock()) {
1199 count++;
1200
1201 // an <empty> can appear hear in case of authoritative (hosted) zones
1202 char tmp[26];
1203 fprintf(fp.get(), "%s\t%s\t", i.d_name.toLogString().c_str(), isoDateTimeMillis(i.d_lastget, tmp, sizeof(tmp)));
1204 bool first = true;
1205 for (const auto& j : i.d_collection) {
1206 fprintf(fp.get(), "%s%s/%.3f/%.3f", first ? "" : "\t", j.first.toStringWithPortExcept(53).c_str(), j.second.peek() / 1000.0f, j.second.last() / 1000.0f);
1207 first = false;
1208 }
1209 fprintf(fp.get(), "\n");
1210 }
1211 return count;
1212 }
1213
1214 uint64_t SyncRes::getThrottledServersSize()
1215 {
1216 return s_throttle.lock()->size();
1217 }
1218
1219 void SyncRes::pruneThrottledServers(time_t now)
1220 {
1221 s_throttle.lock()->prune(now);
1222 }
1223
1224 void SyncRes::clearThrottle()
1225 {
1226 s_throttle.lock()->clear();
1227 }
1228
1229 bool SyncRes::isThrottled(time_t now, const ComboAddress& server, const DNSName& target, QType qtype)
1230 {
1231 return s_throttle.lock()->shouldThrottle(now, std::make_tuple(server, target, qtype));
1232 }
1233
1234 bool SyncRes::isThrottled(time_t now, const ComboAddress& server)
1235 {
1236 return s_throttle.lock()->shouldThrottle(now, std::make_tuple(server, g_rootdnsname, 0));
1237 }
1238
1239 void SyncRes::doThrottle(time_t now, const ComboAddress& server, time_t duration, unsigned int tries)
1240 {
1241 s_throttle.lock()->throttle(now, std::make_tuple(server, g_rootdnsname, 0), duration, tries);
1242 }
1243
1244 void SyncRes::doThrottle(time_t now, const ComboAddress& server, const DNSName& name, QType qtype, time_t duration, unsigned int tries)
1245 {
1246 s_throttle.lock()->throttle(now, std::make_tuple(server, name, qtype), duration, tries);
1247 }
1248
1249 uint64_t SyncRes::doDumpThrottleMap(int fd)
1250 {
1251 int newfd = dup(fd);
1252 if (newfd == -1) {
1253 return 0;
1254 }
1255 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1256 if (!fp) {
1257 close(newfd);
1258 return 0;
1259 }
1260 fprintf(fp.get(), "; throttle map dump follows\n");
1261 fprintf(fp.get(), "; remote IP\tqname\tqtype\tcount\tttd\n");
1262 uint64_t count = 0;
1263
1264 // Get a copy to avoid holding the lock while doing I/O
1265 const auto throttleMap = s_throttle.lock()->getThrottleMap();
1266 for (const auto& i : throttleMap) {
1267 count++;
1268 char tmp[26];
1269 // remote IP, dns name, qtype, count, ttd
1270 fprintf(fp.get(), "%s\t%s\t%s\t%u\t%s\n", std::get<0>(i.thing).toString().c_str(), std::get<1>(i.thing).toLogString().c_str(), std::get<2>(i.thing).toString().c_str(), i.count, timestamp(i.ttd, tmp, sizeof(tmp)));
1271 }
1272
1273 return count;
1274 }
1275
1276 uint64_t SyncRes::getFailedServersSize()
1277 {
1278 return s_fails.lock()->size();
1279 }
1280
1281 void SyncRes::clearFailedServers()
1282 {
1283 s_fails.lock()->clear();
1284 }
1285
1286 void SyncRes::pruneFailedServers(time_t cutoff)
1287 {
1288 s_fails.lock()->prune(cutoff);
1289 }
1290
1291 unsigned long SyncRes::getServerFailsCount(const ComboAddress& server)
1292 {
1293 return s_fails.lock()->value(server);
1294 }
1295
1296 uint64_t SyncRes::doDumpFailedServers(int fd)
1297 {
1298 int newfd = dup(fd);
1299 if (newfd == -1) {
1300 return 0;
1301 }
1302 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1303 if (!fp) {
1304 close(newfd);
1305 return 0;
1306 }
1307 fprintf(fp.get(), "; failed servers dump follows\n");
1308 fprintf(fp.get(), "; remote IP\tcount\ttimestamp\n");
1309 uint64_t count = 0;
1310
1311 // We get a copy, so the I/O does not need to happen while holding the lock
1312 for (const auto& i : s_fails.lock()->getMapCopy()) {
1313 count++;
1314 char tmp[26];
1315 fprintf(fp.get(), "%s\t%" PRIu64 "\t%s\n", i.key.toString().c_str(), i.value, timestamp(i.last, tmp, sizeof(tmp)));
1316 }
1317
1318 return count;
1319 }
1320
1321 uint64_t SyncRes::getNonResolvingNSSize()
1322 {
1323 return s_nonresolving.lock()->size();
1324 }
1325
1326 void SyncRes::clearNonResolvingNS()
1327 {
1328 s_nonresolving.lock()->clear();
1329 }
1330
1331 void SyncRes::pruneNonResolving(time_t cutoff)
1332 {
1333 s_nonresolving.lock()->prune(cutoff);
1334 }
1335
1336 uint64_t SyncRes::doDumpNonResolvingNS(int fd)
1337 {
1338 int newfd = dup(fd);
1339 if (newfd == -1) {
1340 return 0;
1341 }
1342 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1343 if (!fp) {
1344 close(newfd);
1345 return 0;
1346 }
1347 fprintf(fp.get(), "; non-resolving nameserver dump follows\n");
1348 fprintf(fp.get(), "; name\tcount\ttimestamp\n");
1349 uint64_t count = 0;
1350
1351 // We get a copy, so the I/O does not need to happen while holding the lock
1352 for (const auto& i : s_nonresolving.lock()->getMapCopy()) {
1353 count++;
1354 char tmp[26];
1355 fprintf(fp.get(), "%s\t%" PRIu64 "\t%s\n", i.key.toString().c_str(), i.value, timestamp(i.last, tmp, sizeof(tmp)));
1356 }
1357
1358 return count;
1359 }
1360
1361 void SyncRes::clearSaveParentsNSSets()
1362 {
1363 s_savedParentNSSet.lock()->clear();
1364 }
1365
1366 size_t SyncRes::getSaveParentsNSSetsSize()
1367 {
1368 return s_savedParentNSSet.lock()->size();
1369 }
1370
1371 void SyncRes::pruneSaveParentsNSSets(time_t now)
1372 {
1373 s_savedParentNSSet.lock()->prune(now);
1374 }
1375
1376 uint64_t SyncRes::doDumpSavedParentNSSets(int fd)
1377 {
1378 int newfd = dup(fd);
1379 if (newfd == -1) {
1380 return 0;
1381 }
1382 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1383 if (!fp) {
1384 close(newfd);
1385 return 0;
1386 }
1387 fprintf(fp.get(), "; dump of saved parent nameserver sets succesfully used follows\n");
1388 fprintf(fp.get(), "; total entries: %zu\n", s_savedParentNSSet.lock()->size());
1389 fprintf(fp.get(), "; domain\tsuccess\tttd\n");
1390 uint64_t count = 0;
1391
1392 // We get a copy, so the I/O does not need to happen while holding the lock
1393 for (const auto& i : s_savedParentNSSet.lock()->getMapCopy()) {
1394 if (i.d_count == 0) {
1395 continue;
1396 }
1397 count++;
1398 char tmp[26];
1399 fprintf(fp.get(), "%s\t%" PRIu64 "\t%s\n", i.d_domain.toString().c_str(), i.d_count, timestamp(i.d_ttd, tmp, sizeof(tmp)));
1400 }
1401 return count;
1402 }
1403
1404 void SyncRes::pruneDoTProbeMap(time_t cutoff)
1405 {
1406 auto lock = s_dotMap.lock();
1407 auto& ind = lock->d_map.get<time_t>();
1408
1409 for (auto i = ind.begin(); i != ind.end();) {
1410 if (i->d_ttd >= cutoff) {
1411 // We're done as we loop ordered by d_ttd
1412 break;
1413 }
1414 if (i->d_status == DoTStatus::Status::Busy) {
1415 lock->d_numBusy--;
1416 }
1417 i = ind.erase(i);
1418 }
1419 }
1420
1421 uint64_t SyncRes::doDumpDoTProbeMap(int fd)
1422 {
1423 int newfd = dup(fd);
1424 if (newfd == -1) {
1425 return 0;
1426 }
1427 auto fp = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1428 if (!fp) {
1429 close(newfd);
1430 return 0;
1431 }
1432 fprintf(fp.get(), "; DoT probing map follows\n");
1433 fprintf(fp.get(), "; ip\tdomain\tcount\tstatus\tttd\n");
1434 uint64_t count = 0;
1435
1436 // We get a copy, so the I/O does not need to happen while holding the lock
1437 DoTMap copy;
1438 {
1439 copy = *s_dotMap.lock();
1440 }
1441 fprintf(fp.get(), "; %" PRIu64 " Busy entries\n", copy.d_numBusy);
1442 for (const auto& i : copy.d_map) {
1443 count++;
1444 char tmp[26];
1445 fprintf(fp.get(), "%s\t%s\t%" PRIu64 "\t%s\t%s\n", i.d_address.toString().c_str(), i.d_auth.toString().c_str(), i.d_count, i.toString().c_str(), timestamp(i.d_ttd, tmp, sizeof(tmp)));
1446 }
1447 return count;
1448 }
1449
1450 /* so here is the story. First we complete the full resolution process for a domain name. And only THEN do we decide
1451 to also do DNSSEC validation, which leads to new queries. To make this simple, we *always* ask for DNSSEC records
1452 so that if there are RRSIGs for a name, we'll have them.
1453
1454 However, some hosts simply can't answer questions which ask for DNSSEC. This can manifest itself as:
1455 * No answer
1456 * FormErr
1457 * Nonsense answer
1458
1459 The cause of "No answer" may be fragmentation, and it is tempting to probe if smaller answers would get through.
1460 Another cause of "No answer" may simply be a network condition.
1461 Nonsense answers are a clearer indication this host won't be able to do DNSSEC evah.
1462
1463 Previous implementations have suffered from turning off DNSSEC questions for an authoritative server based on timeouts.
1464 A clever idea is to only turn off DNSSEC if we know a domain isn't signed anyhow. The problem with that really
1465 clever idea however is that at this point in PowerDNS, we may simply not know that yet. All the DNSSEC thinking happens
1466 elsewhere. It may not have happened yet.
1467
1468 For now this means we can't be clever, but will turn off DNSSEC if you reply with FormError or gibberish.
1469 */
1470
1471 LWResult::Result SyncRes::asyncresolveWrapper(const ComboAddress& ip, bool ednsMANDATORY, const DNSName& domain, const DNSName& auth, int type, bool doTCP, bool sendRDQuery, struct timeval* now, boost::optional<Netmask>& srcmask, LWResult* res, bool* chained, const DNSName& nsName) const
1472 {
1473 /* what is your QUEST?
1474 the goal is to get as many remotes as possible on the best level of EDNS support
1475 The levels are:
1476
1477 1) EDNSOK: Honors EDNS0, absent from table
1478 2) EDNSIGNORANT: Ignores EDNS0, gives replies without EDNS0
1479 3) NOEDNS: Generates FORMERR on EDNS queries
1480
1481 Everybody starts out assumed to be EDNSOK.
1482 If EDNSOK, send out EDNS0
1483 If you FORMERR us, go to NOEDNS,
1484 If no EDNS in response, go to EDNSIGNORANT
1485 If EDNSIGNORANT, keep on including EDNS0, see what happens
1486 Same behaviour as EDNSOK
1487 If NOEDNS, send bare queries
1488 */
1489
1490 // Read current status, defaulting to OK
1491 SyncRes::EDNSStatus::EDNSMode mode = EDNSStatus::EDNSOK;
1492 {
1493 auto lock = s_ednsstatus.lock();
1494 auto ednsstatus = lock->find(ip); // does this include port? YES
1495 if (ednsstatus != lock->end()) {
1496 if (ednsstatus->ttd && ednsstatus->ttd < d_now.tv_sec) {
1497 lock->erase(ednsstatus);
1498 }
1499 else {
1500 mode = ednsstatus->mode;
1501 }
1502 }
1503 }
1504
1505 int EDNSLevel = 0;
1506 auto luaconfsLocal = g_luaconfs.getLocal();
1507 ResolveContext ctx;
1508 ctx.d_initialRequestId = d_initialRequestId;
1509 ctx.d_nsName = nsName;
1510 #ifdef HAVE_FSTRM
1511 ctx.d_auth = auth;
1512 #endif
1513
1514 LWResult::Result ret;
1515
1516 for (int tries = 0; tries < 2; ++tries) {
1517
1518 if (mode == EDNSStatus::NOEDNS) {
1519 t_Counters.at(rec::Counter::noEdnsOutQueries)++;
1520 EDNSLevel = 0; // level != mode
1521 }
1522 else if (ednsMANDATORY || mode != EDNSStatus::NOEDNS) {
1523 EDNSLevel = 1;
1524 }
1525
1526 DNSName sendQname(domain);
1527 if (g_lowercaseOutgoing) {
1528 sendQname.makeUsLowerCase();
1529 }
1530
1531 if (d_asyncResolve) {
1532 ret = d_asyncResolve(ip, sendQname, type, doTCP, sendRDQuery, EDNSLevel, now, srcmask, ctx, res, chained);
1533 }
1534 else {
1535 ret = asyncresolve(ip, sendQname, type, doTCP, sendRDQuery, EDNSLevel, now, srcmask, ctx, d_outgoingProtobufServers, d_frameStreamServers, luaconfsLocal->outgoingProtobufExportConfig.exportTypes, res, chained);
1536 }
1537
1538 if (ret == LWResult::Result::PermanentError || ret == LWResult::Result::OSLimitError || ret == LWResult::Result::Spoofed) {
1539 break; // transport error, nothing to learn here
1540 }
1541
1542 if (ret == LWResult::Result::Timeout) { // timeout, not doing anything with it now
1543 break;
1544 }
1545
1546 if (EDNSLevel == 1) {
1547 // We sent out with EDNS
1548 // ret is LWResult::Result::Success
1549 // ednsstatus in table might be pruned or changed by another request/thread, so do a new lookup/insert if needed
1550 auto lock = s_ednsstatus.lock(); // all three branches below need a lock
1551
1552 // Determine new mode
1553 if (res->d_validpacket && !res->d_haveEDNS && res->d_rcode == RCode::FormErr) {
1554 mode = EDNSStatus::NOEDNS;
1555 auto ednsstatus = lock->insert(ip).first;
1556 auto& ind = lock->get<ComboAddress>();
1557 lock->setMode(ind, ednsstatus, mode, d_now.tv_sec);
1558 // This is the only path that re-iterates the loop
1559 continue;
1560 }
1561 else if (!res->d_haveEDNS) {
1562 auto ednsstatus = lock->insert(ip).first;
1563 auto& ind = lock->get<ComboAddress>();
1564 lock->setMode(ind, ednsstatus, EDNSStatus::EDNSIGNORANT, d_now.tv_sec);
1565 }
1566 else {
1567 // New status is EDNSOK
1568 lock->erase(ip);
1569 }
1570 }
1571
1572 break;
1573 }
1574 return ret;
1575 }
1576
1577 /* The parameters from rfc9156. */
1578 /* maximum number of QNAME minimisation iterations */
1579 static const unsigned int s_max_minimise_count = 10;
1580 /* number of queries that should only have one label appended */
1581 static const unsigned int s_minimise_one_lab = 4;
1582
1583 static unsigned int qmStepLen(unsigned int labels, unsigned int qnamelen, unsigned int i)
1584 {
1585 unsigned int step;
1586
1587 if (i < s_minimise_one_lab) {
1588 step = 1;
1589 }
1590 else if (i < s_max_minimise_count) {
1591 step = std::max(1U, (qnamelen - labels) / (10 - i));
1592 }
1593 else {
1594 step = qnamelen - labels;
1595 }
1596 unsigned int targetlen = std::min(labels + step, qnamelen);
1597 return targetlen;
1598 }
1599
1600 int SyncRes::doResolve(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int depth, set<GetBestNSAnswer>& beenthere, Context& context)
1601 {
1602 auto prefix = getPrefix(depth);
1603 auto luaconfsLocal = g_luaconfs.getLocal();
1604
1605 /* Apply qname (including CNAME chain) filtering policies */
1606 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
1607 if (luaconfsLocal->dfe.getQueryPolicy(qname, d_discardedPolicies, d_appliedPolicy)) {
1608 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1609 bool done = false;
1610 int rcode = RCode::NoError;
1611 handlePolicyHit(prefix, qname, qtype, ret, done, rcode, depth);
1612 if (done) {
1613 return rcode;
1614 }
1615 }
1616 }
1617
1618 initZoneCutsFromTA(qname, prefix);
1619
1620 // In the auth or recursive forward case, it does not make sense to do qname-minimization
1621 if (!getQNameMinimization() || isRecursiveForwardOrAuth(qname)) {
1622 return doResolveNoQNameMinimization(qname, qtype, ret, depth, beenthere, context);
1623 }
1624
1625 // The qname minimization algorithm is a simplified version of the one in RFC 7816 (bis).
1626 // It could be simplified because the cache maintenance (both positive and negative)
1627 // is already done by doResolveNoQNameMinimization().
1628 //
1629 // Sketch of algorithm:
1630 // Check cache
1631 // If result found: done
1632 // Otherwise determine closes ancestor from cache data
1633 // Repeat querying A, adding more labels of the original qname
1634 // If we get a delegation continue at ancestor determination
1635 // Until we have the full name.
1636 //
1637 // The algorithm starts with adding a single label per iteration, and
1638 // moves to three labels per iteration after three iterations.
1639
1640 DNSName child;
1641 prefix.append(string("QM "));
1642
1643 LOG(prefix << qname << ": doResolve" << endl);
1644
1645 // Look in cache only
1646 vector<DNSRecord> retq;
1647 bool old = setCacheOnly(true);
1648 bool fromCache = false;
1649 // For cache peeking, we tell doResolveNoQNameMinimization not to consider the (non-recursive) forward case.
1650 // Otherwise all queries in a forward domain will be forwarded, while we want to consult the cache.
1651 int res = doResolveNoQNameMinimization(qname, qtype, retq, depth, beenthere, context, &fromCache, nullptr);
1652 setCacheOnly(old);
1653 if (fromCache) {
1654 LOG(prefix << qname << ": Step0 Found in cache" << endl);
1655 if (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None && (d_appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NXDOMAIN || d_appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NODATA)) {
1656 ret.clear();
1657 }
1658 ret.insert(ret.end(), retq.begin(), retq.end());
1659
1660 return res;
1661 }
1662 LOG(prefix << qname << ": Step0 Not cached" << endl);
1663
1664 const unsigned int qnamelen = qname.countLabels();
1665
1666 DNSName fwdomain(qname);
1667 const bool forwarded = getBestAuthZone(&fwdomain) != t_sstorage.domainmap->end();
1668 if (forwarded) {
1669 LOG(prefix << qname << ": Step0 qname is in a forwarded domain " << fwdomain << endl);
1670 }
1671
1672 for (unsigned int i = 0; i <= qnamelen;) {
1673
1674 // Step 1
1675 vector<DNSRecord> bestns;
1676 DNSName nsdomain(qname);
1677 if (qtype == QType::DS) {
1678 nsdomain.chopOff();
1679 }
1680 // the two retries allow getBestNSFromCache&co to reprime the root
1681 // hints, in case they ever go missing
1682 for (int tries = 0; tries < 2 && bestns.empty(); ++tries) {
1683 bool flawedNSSet = false;
1684 set<GetBestNSAnswer> beenthereIgnored;
1685 getBestNSFromCache(nsdomain, qtype, bestns, &flawedNSSet, depth, prefix, beenthereIgnored, boost::make_optional(forwarded, fwdomain));
1686 if (forwarded) {
1687 break;
1688 }
1689 }
1690
1691 if (bestns.size() == 0) {
1692 if (!forwarded) {
1693 // Something terrible is wrong
1694 LOG(prefix << qname << ": Step1 No ancestor found return ServFail" << endl);
1695 return RCode::ServFail;
1696 }
1697 child = fwdomain;
1698 }
1699 else {
1700 LOG(prefix << qname << ": Step1 Ancestor from cache is " << bestns[0].d_name << endl);
1701 if (forwarded) {
1702 child = bestns[0].d_name.isPartOf(fwdomain) ? bestns[0].d_name : fwdomain;
1703 LOG(prefix << qname << ": Step1 Final Ancestor (using forwarding info) is " << child << endl);
1704 }
1705 else {
1706 child = bestns[0].d_name;
1707 }
1708 }
1709 for (; i <= qnamelen; i++) {
1710 // Step 2
1711 unsigned int labels = child.countLabels();
1712 unsigned int targetlen = qmStepLen(labels, qnamelen, i);
1713
1714 while (labels < targetlen) {
1715 child.prependRawLabel(qname.getRawLabel(qnamelen - labels - 1));
1716 labels++;
1717 }
1718 // rfc9156 section-2.3, append labels if they start with an underscore
1719 while (labels < qnamelen) {
1720 auto prependLabel = qname.getRawLabel(qnamelen - labels - 1);
1721 if (prependLabel.at(0) != '_') {
1722 break;
1723 }
1724 child.prependRawLabel(prependLabel);
1725 labels++;
1726 }
1727
1728 LOG(prefix << qname << ": Step2 New child " << child << endl);
1729
1730 // Step 3 resolve
1731 if (child == qname) {
1732 LOG(prefix << qname << ": Step3 Going to do final resolve" << endl);
1733 res = doResolveNoQNameMinimization(qname, qtype, ret, depth, beenthere, context);
1734 LOG(prefix << qname << ": Step3 Final resolve: " << RCode::to_s(res) << "/" << ret.size() << endl);
1735 return res;
1736 }
1737
1738 // If we have seen this child during resolution already; just skip it. We tried to QM it already or otherwise broken.
1739 bool skipStep4 = false;
1740 for (const auto& visitedNS : beenthere) {
1741 if (visitedNS.qname == child) {
1742 skipStep4 = true;
1743 break;
1744 }
1745 }
1746 if (skipStep4) {
1747 LOG(prefix << ": Step4 Being skipped as visited this child name already" << endl);
1748 continue;
1749 }
1750
1751 // Step 4
1752 LOG(prefix << qname << ": Step4 Resolve A for child " << child << endl);
1753 bool oldFollowCNAME = d_followCNAME;
1754 d_followCNAME = false;
1755 retq.resize(0);
1756 StopAtDelegation stopAtDelegation = Stop;
1757 res = doResolveNoQNameMinimization(child, QType::A, retq, depth, beenthere, context, nullptr, &stopAtDelegation);
1758 d_followCNAME = oldFollowCNAME;
1759 LOG(prefix << qname << ": Step4 Resolve " << child << "|A result is " << RCode::to_s(res) << "/" << retq.size() << "/" << stopAtDelegation << endl);
1760 if (stopAtDelegation == Stopped) {
1761 LOG(prefix << qname << ": Delegation seen, continue at step 1" << endl);
1762 break;
1763 }
1764
1765 if (res != RCode::NoError) {
1766 // Case 5: unexpected answer
1767 LOG(prefix << qname << ": Step5: other rcode, last effort final resolve" << endl);
1768 setQNameMinimization(false);
1769 setQMFallbackMode(true);
1770
1771 auto oldEDE = context.extendedError;
1772 res = doResolveNoQNameMinimization(qname, qtype, ret, depth + 1, beenthere, context);
1773
1774 if (res == RCode::NoError) {
1775 t_Counters.at(rec::Counter::qnameminfallbacksuccess)++;
1776 }
1777 else {
1778 // as doResolveNoQNameMinimization clears the EDE, we put it back here, it is relevant but might not be set by the last effort attempt
1779 if (!context.extendedError) {
1780 context.extendedError = oldEDE;
1781 }
1782 }
1783
1784 LOG(prefix << qname << ": Step5 End resolve: " << RCode::to_s(res) << "/" << ret.size() << endl);
1785 return res;
1786 }
1787 }
1788 }
1789
1790 // Should not be reached
1791 LOG(prefix << qname << ": Max iterations reached, return ServFail" << endl);
1792 return RCode::ServFail;
1793 }
1794
1795 unsigned int SyncRes::getAdjustedRecursionBound() const
1796 {
1797 auto bound = s_maxdepth; // 40 is default value of s_maxdepth
1798 if (getQMFallbackMode()) {
1799 // We might have hit a depth level check, but we still want to allow some recursion levels in the fallback
1800 // no-qname-minimization case. This has the effect that a qname minimization fallback case might reach 150% of
1801 // maxdepth, taking care to not repeatedly increase the bound.
1802 bound += s_maxdepth / 2;
1803 }
1804 return bound;
1805 }
1806
1807 /*! This function will check the cache and go out to the internet if the answer is not in cache
1808 *
1809 * \param qname The name we need an answer for
1810 * \param qtype
1811 * \param ret The vector of DNSRecords we need to fill with the answers
1812 * \param depth The recursion depth we are in
1813 * \param beenthere
1814 * \param fromCache tells the caller the result came from the cache, may be nullptr
1815 * \param stopAtDelegation if non-nullptr and pointed-to value is Stop requests the callee to stop at a delegation, if so pointed-to value is set to Stopped
1816 * \return DNS RCODE or -1 (Error)
1817 */
1818 int SyncRes::doResolveNoQNameMinimization(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int depth, set<GetBestNSAnswer>& beenthere, Context& context, bool* fromCache, StopAtDelegation* stopAtDelegation)
1819 {
1820 context.extendedError.reset();
1821 auto prefix = getPrefix(depth);
1822
1823 LOG(prefix << qname << ": Wants " << (d_doDNSSEC ? "" : "NO ") << "DNSSEC processing, " << (d_requireAuthData ? "" : "NO ") << "auth data required by query for " << qtype << endl);
1824
1825 if (s_maxdepth > 0) {
1826 auto bound = getAdjustedRecursionBound();
1827 if (depth > bound) {
1828 string msg = "More than " + std::to_string(bound) + " (adjusted max-recursion-depth) levels of recursion needed while resolving " + qname.toLogString();
1829 LOG(prefix << qname << ": " << msg << endl);
1830 throw ImmediateServFailException(msg);
1831 }
1832 }
1833
1834 int res = 0;
1835
1836 const int iterations = !d_refresh && MemRecursorCache::s_maxServedStaleExtensions > 0 ? 2 : 1;
1837 for (int loop = 0; loop < iterations; loop++) {
1838
1839 d_serveStale = loop == 1;
1840
1841 // This is a difficult way of expressing "this is a normal query", i.e. not getRootNS.
1842 if (!(d_updatingRootNS && qtype.getCode() == QType::NS && qname.isRoot())) {
1843 DNSName authname(qname);
1844 const auto iter = getBestAuthZone(&authname);
1845
1846 if (d_cacheonly) {
1847 if (iter != t_sstorage.domainmap->end()) {
1848 if (iter->second.isAuth()) {
1849 LOG(prefix << qname << ": Cache only lookup for '" << qname << "|" << qtype << "', in auth zone" << endl);
1850 ret.clear();
1851 d_wasOutOfBand = doOOBResolve(qname, qtype, ret, depth, prefix, res);
1852 if (fromCache != nullptr) {
1853 *fromCache = d_wasOutOfBand;
1854 }
1855 return res;
1856 }
1857 }
1858 }
1859
1860 bool wasForwardedOrAuthZone = false;
1861 bool wasAuthZone = false;
1862 bool wasForwardRecurse = false;
1863
1864 if (iter != t_sstorage.domainmap->end()) {
1865 wasForwardedOrAuthZone = true;
1866
1867 if (iter->second.isAuth()) {
1868 wasAuthZone = true;
1869 }
1870 else if (iter->second.shouldRecurse()) {
1871 wasForwardRecurse = true;
1872 }
1873 }
1874
1875 /* When we are looking for a DS, we want to the non-CNAME cache check first
1876 because we can actually have a DS (from the parent zone) AND a CNAME (from
1877 the child zone), and what we really want is the DS */
1878 if (qtype != QType::DS && doCNAMECacheCheck(qname, qtype, ret, depth, prefix, res, context, wasAuthZone, wasForwardRecurse)) { // will reroute us if needed
1879 d_wasOutOfBand = wasAuthZone;
1880 // Here we have an issue. If we were prevented from going out to the network (cache-only was set, possibly because we
1881 // are in QM Step0) we might have a CNAME but not the corresponding target.
1882 // It means that we will sometimes go to the next steps when we are in fact done, but that's fine since
1883 // we will get the records from the cache, resulting in a small overhead.
1884 // This might be a real problem if we had a RPZ hit, though, because we do not want the processing to continue, since
1885 // RPZ rules will not be evaluated anymore (we already matched).
1886 const bool stoppedByPolicyHit = d_appliedPolicy.wasHit();
1887
1888 if (fromCache && (!d_cacheonly || stoppedByPolicyHit)) {
1889 *fromCache = true;
1890 }
1891 /* Apply Post filtering policies */
1892
1893 if (d_wantsRPZ && !stoppedByPolicyHit) {
1894 auto luaLocal = g_luaconfs.getLocal();
1895 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
1896 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1897 bool done = false;
1898 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
1899 if (done && fromCache) {
1900 *fromCache = true;
1901 }
1902 }
1903 }
1904 return res;
1905 }
1906
1907 if (doCacheCheck(qname, authname, wasForwardedOrAuthZone, wasAuthZone, wasForwardRecurse, qtype, ret, depth, prefix, res, context)) {
1908 // we done
1909 d_wasOutOfBand = wasAuthZone;
1910 if (fromCache) {
1911 *fromCache = true;
1912 }
1913
1914 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
1915 auto luaLocal = g_luaconfs.getLocal();
1916 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
1917 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1918 bool done = false;
1919 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
1920 }
1921 }
1922
1923 return res;
1924 }
1925
1926 /* if we have not found a cached DS (or denial of), now is the time to look for a CNAME */
1927 if (qtype == QType::DS && doCNAMECacheCheck(qname, qtype, ret, depth, prefix, res, context, wasAuthZone, wasForwardRecurse)) { // will reroute us if needed
1928 d_wasOutOfBand = wasAuthZone;
1929 // Here we have an issue. If we were prevented from going out to the network (cache-only was set, possibly because we
1930 // are in QM Step0) we might have a CNAME but not the corresponding target.
1931 // It means that we will sometimes go to the next steps when we are in fact done, but that's fine since
1932 // we will get the records from the cache, resulting in a small overhead.
1933 // This might be a real problem if we had a RPZ hit, though, because we do not want the processing to continue, since
1934 // RPZ rules will not be evaluated anymore (we already matched).
1935 const bool stoppedByPolicyHit = d_appliedPolicy.wasHit();
1936
1937 if (fromCache && (!d_cacheonly || stoppedByPolicyHit)) {
1938 *fromCache = true;
1939 }
1940 /* Apply Post filtering policies */
1941
1942 if (d_wantsRPZ && !stoppedByPolicyHit) {
1943 auto luaLocal = g_luaconfs.getLocal();
1944 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
1945 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1946 bool done = false;
1947 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
1948 if (done && fromCache) {
1949 *fromCache = true;
1950 }
1951 }
1952 }
1953
1954 return res;
1955 }
1956 }
1957
1958 if (d_cacheonly) {
1959 return 0;
1960 }
1961
1962 // When trying to serve-stale, we also only look at the cache. Don't look at d_serveStale, it
1963 // might be changed by recursive calls (this should be fixed in a better way!).
1964 if (loop == 1) {
1965 return res;
1966 }
1967
1968 LOG(prefix << qname << ": No cache hit for '" << qname << "|" << qtype << "', trying to find an appropriate NS record" << endl);
1969
1970 DNSName subdomain(qname);
1971 if (qtype == QType::DS)
1972 subdomain.chopOff();
1973
1974 NsSet nsset;
1975 bool flawedNSSet = false;
1976
1977 // the two retries allow getBestNSNamesFromCache&co to reprime the root
1978 // hints, in case they ever go missing
1979 for (int tries = 0; tries < 2 && nsset.empty(); ++tries) {
1980 subdomain = getBestNSNamesFromCache(subdomain, qtype, nsset, &flawedNSSet, depth, prefix, beenthere); // pass beenthere to both occasions
1981 }
1982
1983 res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, prefix, beenthere, context, stopAtDelegation, nullptr);
1984
1985 if (res == -1 && s_save_parent_ns_set) {
1986 // It did not work out, lets check if we have a saved parent NS set
1987 map<DNSName, vector<ComboAddress>> fallBack;
1988 {
1989 auto lock = s_savedParentNSSet.lock();
1990 auto domainData = lock->find(subdomain);
1991 if (domainData != lock->end() && domainData->d_nsAddresses.size() > 0) {
1992 nsset.clear();
1993 // Build the nsset arg and fallBack data for the fallback doResolveAt() attempt
1994 // Take a copy to be able to release the lock, NsSet is actually a map, go figure
1995 for (const auto& ns : domainData->d_nsAddresses) {
1996 nsset.emplace(ns.first, pair(std::vector<ComboAddress>(), false));
1997 fallBack.emplace(ns.first, ns.second);
1998 }
1999 }
2000 }
2001 if (fallBack.size() > 0) {
2002 LOG(prefix << qname << ": Failure, but we have a saved parent NS set, trying that one" << endl);
2003 res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, prefix, beenthere, context, stopAtDelegation, &fallBack);
2004 if (res == 0) {
2005 // It did work out
2006 s_savedParentNSSet.lock()->inc(subdomain);
2007 }
2008 }
2009 }
2010 /* Apply Post filtering policies */
2011 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
2012 auto luaLocal = g_luaconfs.getLocal();
2013 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
2014 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
2015 bool done = false;
2016 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
2017 }
2018 }
2019
2020 if (!res) {
2021 return 0;
2022 }
2023
2024 LOG(prefix << qname << ": Failed (res=" << res << ")" << endl);
2025 if (res >= 0) {
2026 break;
2027 }
2028 }
2029 return res < 0 ? RCode::ServFail : res;
2030 }
2031
2032 #if 0
2033 // for testing purposes
2034 static bool ipv6First(const ComboAddress& a, const ComboAddress& b)
2035 {
2036 return !(a.sin4.sin_family < a.sin4.sin_family);
2037 }
2038 #endif
2039
2040 struct speedOrderCA
2041 {
2042 speedOrderCA(std::map<ComboAddress, float>& speeds) :
2043 d_speeds(speeds) {}
2044 bool operator()(const ComboAddress& a, const ComboAddress& b) const
2045 {
2046 return d_speeds[a] < d_speeds[b];
2047 }
2048 std::map<ComboAddress, float>& d_speeds;
2049 };
2050
2051 /** This function explicitly goes out for A or AAAA addresses
2052 */
2053 vector<ComboAddress> SyncRes::getAddrs(const DNSName& qname, unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere, bool cacheOnly, unsigned int& addressQueriesForNS)
2054 {
2055 typedef vector<DNSRecord> res_t;
2056 typedef vector<ComboAddress> ret_t;
2057 ret_t ret;
2058
2059 bool oldCacheOnly = setCacheOnly(cacheOnly);
2060 bool oldRequireAuthData = d_requireAuthData;
2061 bool oldValidationRequested = d_DNSSECValidationRequested;
2062 bool oldFollowCNAME = d_followCNAME;
2063 bool seenV6 = false;
2064 const unsigned int startqueries = d_outqueries;
2065 d_requireAuthData = false;
2066 d_DNSSECValidationRequested = false;
2067 d_followCNAME = true;
2068
2069 MemRecursorCache::Flags flags = MemRecursorCache::None;
2070 if (d_serveStale) {
2071 flags |= MemRecursorCache::ServeStale;
2072 }
2073 try {
2074 // First look for both A and AAAA in the cache
2075 res_t cset;
2076 if (s_doIPv4 && g_recCache->get(d_now.tv_sec, qname, QType::A, flags, &cset, d_cacheRemote, d_routingTag) > 0) {
2077 for (const auto& i : cset) {
2078 if (auto rec = getRR<ARecordContent>(i)) {
2079 ret.push_back(rec->getCA(53));
2080 }
2081 }
2082 }
2083 if (s_doIPv6 && g_recCache->get(d_now.tv_sec, qname, QType::AAAA, flags, &cset, d_cacheRemote, d_routingTag) > 0) {
2084 for (const auto& i : cset) {
2085 if (auto rec = getRR<AAAARecordContent>(i)) {
2086 seenV6 = true;
2087 ret.push_back(rec->getCA(53));
2088 }
2089 }
2090 }
2091 if (ret.empty()) {
2092 // Neither A nor AAAA in the cache...
2093 Context newContext1;
2094 cset.clear();
2095 // Go out to get A's
2096 if (s_doIPv4 && doResolve(qname, QType::A, cset, depth + 1, beenthere, newContext1) == 0) { // this consults cache, OR goes out
2097 for (auto const& i : cset) {
2098 if (i.d_type == QType::A) {
2099 if (auto rec = getRR<ARecordContent>(i)) {
2100 ret.push_back(rec->getCA(53));
2101 }
2102 }
2103 }
2104 }
2105 if (s_doIPv6) { // s_doIPv6 **IMPLIES** pdns::isQueryLocalAddressFamilyEnabled(AF_INET6) returned true
2106 if (ret.empty()) {
2107 // We only go out immediately to find IPv6 records if we did not find any IPv4 ones.
2108 Context newContext2;
2109 if (doResolve(qname, QType::AAAA, cset, depth + 1, beenthere, newContext2) == 0) { // this consults cache, OR goes out
2110 for (const auto& i : cset) {
2111 if (i.d_type == QType::AAAA) {
2112 if (auto rec = getRR<AAAARecordContent>(i)) {
2113 seenV6 = true;
2114 ret.push_back(rec->getCA(53));
2115 }
2116 }
2117 }
2118 }
2119 }
2120 else {
2121 // We have some IPv4 records, consult the cache, we might have encountered some IPv6 glue
2122 cset.clear();
2123 if (g_recCache->get(d_now.tv_sec, qname, QType::AAAA, flags, &cset, d_cacheRemote, d_routingTag) > 0) {
2124 for (const auto& i : cset) {
2125 if (auto rec = getRR<AAAARecordContent>(i)) {
2126 seenV6 = true;
2127 ret.push_back(rec->getCA(53));
2128 }
2129 }
2130 }
2131 }
2132 }
2133 }
2134 if (s_doIPv6 && !seenV6 && !cacheOnly) {
2135 // No IPv6 records in cache, check negcache and submit async task if negache does not have the data
2136 // so that the next time the cache or the negcache will have data
2137 NegCache::NegCacheEntry ne;
2138 bool inNegCache = g_negCache->get(qname, QType::AAAA, d_now, ne, false);
2139 if (!inNegCache) {
2140 pushResolveTask(qname, QType::AAAA, d_now.tv_sec, d_now.tv_sec + 60);
2141 }
2142 }
2143 }
2144 catch (const PolicyHitException&) {
2145 // We ignore a policy hit while trying to retrieve the addresses
2146 // of a NS and keep processing the current query
2147 }
2148
2149 if (ret.empty() && d_outqueries > startqueries) {
2150 // We did 1 or more outgoing queries to resolve this NS name but returned empty handed
2151 addressQueriesForNS++;
2152 }
2153 d_requireAuthData = oldRequireAuthData;
2154 d_DNSSECValidationRequested = oldValidationRequested;
2155 setCacheOnly(oldCacheOnly);
2156 d_followCNAME = oldFollowCNAME;
2157
2158 if (s_max_busy_dot_probes > 0 && s_dot_to_port_853) {
2159 for (auto& add : ret) {
2160 if (shouldDoDoT(add, d_now.tv_sec)) {
2161 add.setPort(853);
2162 }
2163 }
2164 }
2165 /* we need to remove from the nsSpeeds collection the existing IPs
2166 for this nameserver that are no longer in the set, even if there
2167 is only one or none at all in the current set.
2168 */
2169 map<ComboAddress, float> speeds;
2170 {
2171 auto lock = s_nsSpeeds.lock();
2172 auto& collection = lock->find_or_enter(qname, d_now);
2173 float factor = collection.getFactor(d_now);
2174 for (const auto& val : ret) {
2175 speeds[val] = collection.d_collection[val].get(factor);
2176 }
2177 collection.purge(speeds);
2178 }
2179
2180 if (ret.size() > 1) {
2181 shuffle(ret.begin(), ret.end(), pdns::dns_random_engine());
2182 speedOrderCA so(speeds);
2183 stable_sort(ret.begin(), ret.end(), so);
2184 }
2185
2186 if (doLog()) {
2187 LOG(prefix << qname << ": Nameserver " << qname << " IPs: ");
2188 bool first = true;
2189 for (const auto& addr : ret) {
2190 if (first) {
2191 first = false;
2192 }
2193 else {
2194 LOG(", ");
2195 }
2196 LOG((addr.toString()) << "(" << fmtfloat(speeds[addr] / 1000.0) << "ms)");
2197 }
2198 LOG(endl);
2199 }
2200
2201 return ret;
2202 }
2203
2204 void SyncRes::getBestNSFromCache(const DNSName& qname, const QType qtype, vector<DNSRecord>& bestns, bool* flawedNSSet, unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere, const boost::optional<DNSName>& cutOffDomain)
2205 {
2206 DNSName subdomain(qname);
2207 bestns.clear();
2208 bool brokeloop;
2209 MemRecursorCache::Flags flags = MemRecursorCache::None;
2210 if (d_serveStale) {
2211 flags |= MemRecursorCache::ServeStale;
2212 }
2213 do {
2214 if (cutOffDomain && (subdomain == *cutOffDomain || !subdomain.isPartOf(*cutOffDomain))) {
2215 break;
2216 }
2217 brokeloop = false;
2218 LOG(prefix << qname << ": Checking if we have NS in cache for '" << subdomain << "'" << endl);
2219 vector<DNSRecord> ns;
2220 *flawedNSSet = false;
2221
2222 if (g_recCache->get(d_now.tv_sec, subdomain, QType::NS, flags, &ns, d_cacheRemote, d_routingTag) > 0) {
2223 if (s_maxnsperresolve > 0 && ns.size() > s_maxnsperresolve) {
2224 vector<DNSRecord> selected;
2225 selected.reserve(s_maxnsperresolve);
2226 std::sample(ns.cbegin(), ns.cend(), std::back_inserter(selected), s_maxnsperresolve, pdns::dns_random_engine());
2227 ns = selected;
2228 }
2229 bestns.reserve(ns.size());
2230
2231 for (auto k = ns.cbegin(); k != ns.cend(); ++k) {
2232 if (k->d_ttl > (unsigned int)d_now.tv_sec) {
2233 vector<DNSRecord> aset;
2234 QType nsqt{QType::ADDR};
2235 if (s_doIPv4 && !s_doIPv6) {
2236 nsqt = QType::A;
2237 }
2238 else if (!s_doIPv4 && s_doIPv6) {
2239 nsqt = QType::AAAA;
2240 }
2241
2242 const DNSRecord& dr = *k;
2243 auto nrr = getRR<NSRecordContent>(dr);
2244 if (nrr && (!nrr->getNS().isPartOf(subdomain) || g_recCache->get(d_now.tv_sec, nrr->getNS(), nsqt, flags, doLog() ? &aset : 0, d_cacheRemote, d_routingTag) > 0)) {
2245 bestns.push_back(dr);
2246 LOG(prefix << qname << ": NS (with ip, or non-glue) in cache for '" << subdomain << "' -> '" << nrr->getNS() << "'");
2247 LOG(", within bailiwick: " << nrr->getNS().isPartOf(subdomain));
2248 if (!aset.empty()) {
2249 LOG(", in cache, ttl=" << (unsigned int)(((time_t)aset.begin()->d_ttl - d_now.tv_sec)) << endl);
2250 }
2251 else {
2252 LOG(", not in cache / did not look at cache" << endl);
2253 }
2254 }
2255 else {
2256 *flawedNSSet = true;
2257 LOG(prefix << qname << ": NS in cache for '" << subdomain << "', but needs glue (" << nrr->getNS() << ") which we miss or is expired" << endl);
2258 }
2259 }
2260 }
2261
2262 if (!bestns.empty()) {
2263 GetBestNSAnswer answer;
2264 answer.qname = qname;
2265 answer.qtype = qtype.getCode();
2266 for (const auto& dr : bestns) {
2267 if (auto nsContent = getRR<NSRecordContent>(dr)) {
2268 answer.bestns.emplace(dr.d_name, nsContent->getNS());
2269 }
2270 }
2271
2272 auto insertionPair = beenthere.insert(std::move(answer));
2273 if (!insertionPair.second) {
2274 brokeloop = true;
2275 LOG(prefix << qname << ": We have NS in cache for '" << subdomain << "' but part of LOOP (already seen " << answer.qname << ")! Trying less specific NS" << endl);
2276 ;
2277 if (doLog())
2278 for (set<GetBestNSAnswer>::const_iterator j = beenthere.begin(); j != beenthere.end(); ++j) {
2279 bool neo = (j == insertionPair.first);
2280 LOG(prefix << qname << ": Beenthere" << (neo ? "*" : "") << ": " << j->qname << "|" << DNSRecordContent::NumberToType(j->qtype) << " (" << (unsigned int)j->bestns.size() << ")" << endl);
2281 }
2282 bestns.clear();
2283 }
2284 else {
2285 LOG(prefix << qname << ": We have NS in cache for '" << subdomain << "' (flawedNSSet=" << *flawedNSSet << ")" << endl);
2286 return;
2287 }
2288 }
2289 }
2290 LOG(prefix << qname << ": No valid/useful NS in cache for '" << subdomain << "'" << endl);
2291
2292 if (subdomain.isRoot() && !brokeloop) {
2293 // We lost the root NS records
2294 primeHints();
2295 LOG(prefix << qname << ": Reprimed the root" << endl);
2296 /* let's prevent an infinite loop */
2297 if (!d_updatingRootNS) {
2298 auto log = g_slog->withName("housekeeping");
2299 getRootNS(d_now, d_asyncResolve, depth, log);
2300 }
2301 }
2302 } while (subdomain.chopOff());
2303 }
2304
2305 SyncRes::domainmap_t::const_iterator SyncRes::getBestAuthZone(DNSName* qname) const
2306 {
2307 if (t_sstorage.domainmap->empty()) {
2308 return t_sstorage.domainmap->end();
2309 }
2310
2311 SyncRes::domainmap_t::const_iterator ret;
2312 do {
2313 ret = t_sstorage.domainmap->find(*qname);
2314 if (ret != t_sstorage.domainmap->end())
2315 break;
2316 } while (qname->chopOff());
2317 return ret;
2318 }
2319
2320 /** doesn't actually do the work, leaves that to getBestNSFromCache */
2321 DNSName SyncRes::getBestNSNamesFromCache(const DNSName& qname, const QType qtype, NsSet& nsset, bool* flawedNSSet, unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere)
2322 {
2323 DNSName authOrForwDomain(qname);
2324
2325 domainmap_t::const_iterator iter = getBestAuthZone(&authOrForwDomain);
2326 // We have an auth, forwarder of forwarder-recurse
2327 if (iter != t_sstorage.domainmap->end()) {
2328 if (iter->second.isAuth()) {
2329 // this gets picked up in doResolveAt, the empty DNSName, combined with the
2330 // empty vector means 'we are auth for this zone'
2331 nsset.insert({DNSName(), {{}, false}});
2332 return authOrForwDomain;
2333 }
2334 else {
2335 if (iter->second.shouldRecurse()) {
2336 // Again, picked up in doResolveAt. An empty DNSName, combined with a
2337 // non-empty vector of ComboAddresses means 'this is a forwarded domain'
2338 // This is actually picked up in retrieveAddressesForNS called from doResolveAt.
2339 nsset.insert({DNSName(), {iter->second.d_servers, true}});
2340 return authOrForwDomain;
2341 }
2342 }
2343 }
2344
2345 // We might have a (non-recursive) forwarder, but maybe the cache already contains
2346 // a better NS
2347 vector<DNSRecord> bestns;
2348 DNSName nsFromCacheDomain(g_rootdnsname);
2349 getBestNSFromCache(qname, qtype, bestns, flawedNSSet, depth, prefix, beenthere);
2350
2351 // Pick up the auth domain
2352 for (const auto& k : bestns) {
2353 const auto nsContent = getRR<NSRecordContent>(k);
2354 if (nsContent) {
2355 nsFromCacheDomain = k.d_name;
2356 break;
2357 }
2358 }
2359
2360 if (iter != t_sstorage.domainmap->end()) {
2361 if (doLog()) {
2362 LOG(prefix << qname << " authOrForwDomain: " << authOrForwDomain << " nsFromCacheDomain: " << nsFromCacheDomain << " isPartof: " << authOrForwDomain.isPartOf(nsFromCacheDomain) << endl);
2363 }
2364
2365 // If the forwarder is better or equal to what's found in the cache, use forwarder. Note that name.isPartOf(name).
2366 // So queries that get NS for authOrForwDomain itself go to the forwarder
2367 if (authOrForwDomain.isPartOf(nsFromCacheDomain)) {
2368 if (doLog()) {
2369 LOG(prefix << qname << ": Using forwarder as NS" << endl);
2370 }
2371 nsset.insert({DNSName(), {iter->second.d_servers, false}});
2372 return authOrForwDomain;
2373 }
2374 else {
2375 if (doLog()) {
2376 LOG(prefix << qname << ": Using NS from cache" << endl);
2377 }
2378 }
2379 }
2380 for (auto k = bestns.cbegin(); k != bestns.cend(); ++k) {
2381 // The actual resolver code will not even look at the ComboAddress or bool
2382 const auto nsContent = getRR<NSRecordContent>(*k);
2383 if (nsContent) {
2384 nsset.insert({nsContent->getNS(), {{}, false}});
2385 }
2386 }
2387 return nsFromCacheDomain;
2388 }
2389
2390 void SyncRes::updateValidationStatusInCache(const DNSName& qname, const QType qt, bool aa, vState newState) const
2391 {
2392 if (qt == QType::ANY || qt == QType::ADDR) {
2393 // not doing that
2394 return;
2395 }
2396
2397 if (vStateIsBogus(newState)) {
2398 g_recCache->updateValidationStatus(d_now.tv_sec, qname, qt, d_cacheRemote, d_routingTag, aa, newState, s_maxbogusttl + d_now.tv_sec);
2399 }
2400 else {
2401 g_recCache->updateValidationStatus(d_now.tv_sec, qname, qt, d_cacheRemote, d_routingTag, aa, newState, boost::none);
2402 }
2403 }
2404
2405 static bool scanForCNAMELoop(const DNSName& name, const vector<DNSRecord>& records)
2406 {
2407 for (const auto& record : records) {
2408 if (record.d_type == QType::CNAME && record.d_place == DNSResourceRecord::ANSWER) {
2409 if (name == record.d_name) {
2410 return true;
2411 }
2412 }
2413 }
2414 return false;
2415 }
2416
2417 bool SyncRes::doCNAMECacheCheck(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int depth, const string& prefix, int& res, Context& context, bool wasAuthZone, bool wasForwardRecurse)
2418 {
2419 // Even if s_maxdepth is zero, we want to have this check
2420 auto bound = std::max(40U, getAdjustedRecursionBound());
2421 // Bounds were > 9 and > 15 originally, now they are derived from s_maxdepth (default 40)
2422 // Apply more strict bound if we see throttling
2423 if ((depth >= bound / 4 && d_outqueries > 10 && d_throttledqueries > 5) || depth > bound * 3 / 8) {
2424 LOG(prefix << qname << ": Recursing (CNAME or other indirection) too deep, depth=" << depth << endl);
2425 res = RCode::ServFail;
2426 return true;
2427 }
2428
2429 vector<DNSRecord> cset;
2430 vector<std::shared_ptr<const RRSIGRecordContent>> signatures;
2431 vector<std::shared_ptr<DNSRecord>> authorityRecs;
2432 bool wasAuth;
2433 uint32_t capTTL = std::numeric_limits<uint32_t>::max();
2434 DNSName foundName;
2435 DNSName authZone;
2436 QType foundQT = QType::ENT;
2437
2438 /* we don't require auth data for forward-recurse lookups */
2439 MemRecursorCache::Flags flags = MemRecursorCache::None;
2440 if (!wasForwardRecurse && d_requireAuthData) {
2441 flags |= MemRecursorCache::RequireAuth;
2442 }
2443 if (d_refresh) {
2444 flags |= MemRecursorCache::Refresh;
2445 }
2446 if (d_serveStale) {
2447 flags |= MemRecursorCache::ServeStale;
2448 }
2449 if (g_recCache->get(d_now.tv_sec, qname, QType::CNAME, flags, &cset, d_cacheRemote, d_routingTag, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &context.state, &wasAuth, &authZone, &d_fromAuthIP) > 0) {
2450 foundName = qname;
2451 foundQT = QType::CNAME;
2452 }
2453
2454 if (foundName.empty() && qname != g_rootdnsname) {
2455 // look for a DNAME cache hit
2456 auto labels = qname.getRawLabels();
2457 DNSName dnameName(g_rootdnsname);
2458
2459 do {
2460 dnameName.prependRawLabel(labels.back());
2461 labels.pop_back();
2462 if (dnameName == qname && qtype != QType::DNAME) { // The client does not want a DNAME, but we've reached the QNAME already. So there is no match
2463 break;
2464 }
2465 if (g_recCache->get(d_now.tv_sec, dnameName, QType::DNAME, flags, &cset, d_cacheRemote, d_routingTag, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &context.state, &wasAuth, &authZone, &d_fromAuthIP) > 0) {
2466 foundName = dnameName;
2467 foundQT = QType::DNAME;
2468 break;
2469 }
2470 } while (!labels.empty());
2471 }
2472
2473 if (foundName.empty()) {
2474 return false;
2475 }
2476
2477 if (qtype == QType::DS && authZone == qname) {
2478 /* CNAME at APEX of the child zone, we can't use that to prove that
2479 there is no DS */
2480 LOG(prefix << qname << ": Found a " << foundQT.toString() << " cache hit of '" << qname << "' from " << authZone << ", but such a record at the apex of the child zone does not prove that there is no DS in the parent zone" << endl);
2481 return false;
2482 }
2483
2484 for (auto const& record : cset) {
2485 if (record.d_class != QClass::IN) {
2486 continue;
2487 }
2488
2489 if (record.d_ttl > (unsigned int)d_now.tv_sec) {
2490
2491 if (!wasAuthZone && shouldValidate() && (wasAuth || wasForwardRecurse) && context.state == vState::Indeterminate && d_requireAuthData) {
2492 /* This means we couldn't figure out the state when this entry was cached */
2493
2494 vState recordState = getValidationStatus(foundName, !signatures.empty(), qtype == QType::DS, depth, prefix);
2495 if (recordState == vState::Secure) {
2496 LOG(prefix << qname << ": Got vState::Indeterminate state from the " << foundQT.toString() << " cache, validating.." << endl);
2497 context.state = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, foundName, foundQT, cset, signatures);
2498 if (context.state != vState::Indeterminate) {
2499 LOG(prefix << qname << ": Got vState::Indeterminate state from the " << foundQT.toString() << " cache, new validation result is " << context.state << endl);
2500 if (vStateIsBogus(context.state)) {
2501 capTTL = s_maxbogusttl;
2502 }
2503 updateValidationStatusInCache(foundName, foundQT, wasAuth, context.state);
2504 }
2505 }
2506 }
2507
2508 LOG(prefix << qname << ": Found cache " << foundQT.toString() << " hit for '" << foundName << "|" << foundQT.toString() << "' to '" << record.getContent()->getZoneRepresentation() << "', validation state is " << context.state << endl);
2509
2510 DNSRecord dr = record;
2511 dr.d_ttl -= d_now.tv_sec;
2512 dr.d_ttl = std::min(dr.d_ttl, capTTL);
2513 const uint32_t ttl = dr.d_ttl;
2514 ret.reserve(ret.size() + 2 + signatures.size() + authorityRecs.size());
2515 ret.push_back(dr);
2516
2517 for (const auto& signature : signatures) {
2518 DNSRecord sigdr;
2519 sigdr.d_type = QType::RRSIG;
2520 sigdr.d_name = foundName;
2521 sigdr.d_ttl = ttl;
2522 sigdr.setContent(signature);
2523 sigdr.d_place = DNSResourceRecord::ANSWER;
2524 sigdr.d_class = QClass::IN;
2525 ret.push_back(sigdr);
2526 }
2527
2528 for (const auto& rec : authorityRecs) {
2529 DNSRecord authDR(*rec);
2530 authDR.d_ttl = ttl;
2531 ret.push_back(authDR);
2532 }
2533
2534 DNSName newTarget;
2535 if (foundQT == QType::DNAME) {
2536 if (qtype == QType::DNAME && qname == foundName) { // client wanted the DNAME, no need to synthesize a CNAME
2537 res = RCode::NoError;
2538 return true;
2539 }
2540 // Synthesize a CNAME
2541 auto dnameRR = getRR<DNAMERecordContent>(record);
2542 if (dnameRR == nullptr) {
2543 throw ImmediateServFailException("Unable to get record content for " + foundName.toLogString() + "|DNAME cache entry");
2544 }
2545 const auto& dnameSuffix = dnameRR->getTarget();
2546 DNSName targetPrefix = qname.makeRelative(foundName);
2547 try {
2548 dr.d_type = QType::CNAME;
2549 dr.d_name = targetPrefix + foundName;
2550 newTarget = targetPrefix + dnameSuffix;
2551 dr.setContent(std::make_shared<CNAMERecordContent>(CNAMERecordContent(newTarget)));
2552 ret.push_back(dr);
2553 }
2554 catch (const std::exception& e) {
2555 // We should probably catch an std::range_error here and set the rcode to YXDOMAIN (RFC 6672, section 2.2)
2556 // But this is consistent with processRecords
2557 throw ImmediateServFailException("Unable to perform DNAME substitution(DNAME owner: '" + foundName.toLogString() + "', DNAME target: '" + dnameSuffix.toLogString() + "', substituted name: '" + targetPrefix.toLogString() + "." + dnameSuffix.toLogString() + "' : " + e.what());
2558 }
2559
2560 LOG(prefix << qname << ": Synthesized " << dr.d_name << "|CNAME " << newTarget << endl);
2561 }
2562
2563 if (qtype == QType::CNAME) { // perhaps they really wanted a CNAME!
2564 res = RCode::NoError;
2565 return true;
2566 }
2567
2568 if (qtype == QType::DS || qtype == QType::DNSKEY) {
2569 res = RCode::NoError;
2570 return true;
2571 }
2572
2573 // We have a DNAME _or_ CNAME cache hit and the client wants something else than those two.
2574 // Let's find the answer!
2575 if (foundQT == QType::CNAME) {
2576 const auto cnameContent = getRR<CNAMERecordContent>(record);
2577 if (cnameContent == nullptr) {
2578 throw ImmediateServFailException("Unable to get record content for " + foundName.toLogString() + "|CNAME cache entry");
2579 }
2580 newTarget = cnameContent->getTarget();
2581 }
2582
2583 if (qname == newTarget) {
2584 string msg = "Got a CNAME referral (from cache) to self";
2585 LOG(prefix << qname << ": " << msg << endl);
2586 throw ImmediateServFailException(msg);
2587 }
2588
2589 if (newTarget.isPartOf(qname)) {
2590 // a.b.c. CNAME x.a.b.c will go to great depths with QM on
2591 string msg = "Got a CNAME referral (from cache) to child, disabling QM";
2592 LOG(prefix << qname << ": " << msg << endl);
2593 setQNameMinimization(false);
2594 }
2595
2596 if (!d_followCNAME) {
2597 res = RCode::NoError;
2598 return true;
2599 }
2600
2601 // Check to see if we already have seen the new target as a previous target
2602 if (scanForCNAMELoop(newTarget, ret)) {
2603 string msg = "got a CNAME referral (from cache) that causes a loop";
2604 LOG(prefix << qname << ": Status=" << msg << endl);
2605 throw ImmediateServFailException(msg);
2606 }
2607
2608 set<GetBestNSAnswer> beenthere;
2609 Context cnameContext;
2610 // Be aware that going out on the network might be disabled (cache-only), for example because we are in QM Step0,
2611 // so you can't trust that a real lookup will have been made.
2612 res = doResolve(newTarget, qtype, ret, depth + 1, beenthere, cnameContext);
2613 LOG(prefix << qname << ": Updating validation state for response to " << qname << " from " << context.state << " with the state from the DNAME/CNAME quest: " << cnameContext.state << endl);
2614 updateValidationState(qname, context.state, cnameContext.state, prefix);
2615
2616 return true;
2617 }
2618 }
2619 throw ImmediateServFailException("Could not determine whether or not there was a CNAME or DNAME in cache for '" + qname.toLogString() + "'");
2620 }
2621
2622 namespace
2623 {
2624 struct CacheEntry
2625 {
2626 vector<DNSRecord> records;
2627 vector<shared_ptr<const RRSIGRecordContent>> signatures;
2628 uint32_t signaturesTTL{std::numeric_limits<uint32_t>::max()};
2629 };
2630 struct CacheKey
2631 {
2632 DNSName name;
2633 QType type;
2634 DNSResourceRecord::Place place;
2635 bool operator<(const CacheKey& rhs) const
2636 {
2637 return std::tie(type, place, name) < std::tie(rhs.type, rhs.place, rhs.name);
2638 }
2639 };
2640 using tcache_t = map<CacheKey, CacheEntry>;
2641 }
2642
2643 static void reapRecordsFromNegCacheEntryForValidation(tcache_t& tcache, const vector<DNSRecord>& records)
2644 {
2645 for (const auto& rec : records) {
2646 if (rec.d_type == QType::RRSIG) {
2647 auto rrsig = getRR<RRSIGRecordContent>(rec);
2648 if (rrsig) {
2649 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signatures.push_back(rrsig);
2650 }
2651 }
2652 else {
2653 tcache[{rec.d_name, rec.d_type, rec.d_place}].records.push_back(rec);
2654 }
2655 }
2656 }
2657
2658 static bool negativeCacheEntryHasSOA(const NegCache::NegCacheEntry& ne)
2659 {
2660 return !ne.authoritySOA.records.empty();
2661 }
2662
2663 static void reapRecordsForValidation(std::map<QType, CacheEntry>& entries, const vector<DNSRecord>& records)
2664 {
2665 for (const auto& rec : records) {
2666 entries[rec.d_type].records.push_back(rec);
2667 }
2668 }
2669
2670 static void reapSignaturesForValidation(std::map<QType, CacheEntry>& entries, const vector<std::shared_ptr<const RRSIGRecordContent>>& signatures)
2671 {
2672 for (const auto& sig : signatures) {
2673 entries[sig->d_type].signatures.push_back(sig);
2674 }
2675 }
2676
2677 /*!
2678 * Convenience function to push the records from records into ret with a new TTL
2679 *
2680 * \param records DNSRecords that need to go into ret
2681 * \param ttl The new TTL for these records
2682 * \param ret The vector of DNSRecords that should contain the records with the modified TTL
2683 */
2684 static void addTTLModifiedRecords(vector<DNSRecord>& records, const uint32_t ttl, vector<DNSRecord>& ret)
2685 {
2686 for (auto& rec : records) {
2687 rec.d_ttl = ttl;
2688 ret.push_back(std::move(rec));
2689 }
2690 }
2691
2692 void SyncRes::computeNegCacheValidationStatus(const NegCache::NegCacheEntry& ne, const DNSName& qname, const QType qtype, const int res, vState& state, unsigned int depth, const string& prefix)
2693 {
2694 tcache_t tcache;
2695 reapRecordsFromNegCacheEntryForValidation(tcache, ne.authoritySOA.records);
2696 reapRecordsFromNegCacheEntryForValidation(tcache, ne.authoritySOA.signatures);
2697 reapRecordsFromNegCacheEntryForValidation(tcache, ne.DNSSECRecords.records);
2698 reapRecordsFromNegCacheEntryForValidation(tcache, ne.DNSSECRecords.signatures);
2699
2700 for (const auto& entry : tcache) {
2701 // this happens when we did store signatures, but passed on the records themselves
2702 if (entry.second.records.empty()) {
2703 continue;
2704 }
2705
2706 const DNSName& owner = entry.first.name;
2707
2708 vState recordState = getValidationStatus(owner, !entry.second.signatures.empty(), qtype == QType::DS, depth, prefix);
2709 if (state == vState::Indeterminate) {
2710 state = recordState;
2711 }
2712
2713 if (recordState == vState::Secure) {
2714 recordState = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, owner, QType(entry.first.type), entry.second.records, entry.second.signatures);
2715 }
2716
2717 if (recordState != vState::Indeterminate && recordState != state) {
2718 updateValidationState(qname, state, recordState, prefix);
2719 if (state != vState::Secure) {
2720 break;
2721 }
2722 }
2723 }
2724
2725 if (state == vState::Secure) {
2726 vState neValidationState = ne.d_validationState;
2727 dState expectedState = res == RCode::NXDomain ? dState::NXDOMAIN : dState::NXQTYPE;
2728 dState denialState = getDenialValidationState(ne, expectedState, false, prefix);
2729 updateDenialValidationState(qname, neValidationState, ne.d_name, state, denialState, expectedState, qtype == QType::DS, depth, prefix);
2730 }
2731 if (state != vState::Indeterminate) {
2732 /* validation succeeded, let's update the cache entry so we don't have to validate again */
2733 boost::optional<time_t> capTTD = boost::none;
2734 if (vStateIsBogus(state)) {
2735 capTTD = d_now.tv_sec + s_maxbogusttl;
2736 }
2737 g_negCache->updateValidationStatus(ne.d_name, ne.d_qtype, state, capTTD);
2738 }
2739 }
2740
2741 bool SyncRes::doCacheCheck(const DNSName& qname, const DNSName& authname, bool wasForwardedOrAuthZone, bool wasAuthZone, bool wasForwardRecurse, QType qtype, vector<DNSRecord>& ret, unsigned int depth, const string& prefix, int& res, Context& context)
2742 {
2743 bool giveNegative = false;
2744
2745 // sqname and sqtype are used contain 'higher' names if we have them (e.g. powerdns.com|SOA when we find a negative entry for doesnotexist.powerdns.com|A)
2746 DNSName sqname(qname);
2747 QType sqt(qtype);
2748 uint32_t sttl = 0;
2749 // cout<<"Lookup for '"<<qname<<"|"<<qtype.toString()<<"' -> "<<getLastLabel(qname)<<endl;
2750 vState cachedState;
2751 NegCache::NegCacheEntry ne;
2752
2753 if (s_rootNXTrust && g_negCache->getRootNXTrust(qname, d_now, ne, d_serveStale, d_refresh) && ne.d_auth.isRoot() && !(wasForwardedOrAuthZone && !authname.isRoot())) { // when forwarding, the root may only neg-cache if it was forwarded to.
2754 sttl = ne.d_ttd - d_now.tv_sec;
2755 LOG(prefix << qname << ": Entire name '" << qname << "', is negatively cached via '" << ne.d_auth << "' & '" << ne.d_name << "' for another " << sttl << " seconds" << endl);
2756 res = RCode::NXDomain;
2757 giveNegative = true;
2758 cachedState = ne.d_validationState;
2759 if (s_addExtendedResolutionDNSErrors) {
2760 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::Synthesized), "Result synthesized by root-nx-trust"};
2761 }
2762 }
2763 else if (g_negCache->get(qname, qtype, d_now, ne, false, d_serveStale, d_refresh)) {
2764 /* If we are looking for a DS, discard NXD if auth == qname
2765 and ask for a specific denial instead */
2766 if (qtype != QType::DS || ne.d_qtype.getCode() || ne.d_auth != qname || g_negCache->get(qname, qtype, d_now, ne, true, d_serveStale, d_refresh)) {
2767 /* Careful! If the client is asking for a DS that does not exist, we need to provide the SOA along with the NSEC(3) proof
2768 and we might not have it if we picked up the proof from a delegation, in which case we need to keep on to do the actual DS
2769 query. */
2770 if (qtype == QType::DS && ne.d_qtype.getCode() && !d_externalDSQuery.empty() && qname == d_externalDSQuery && !negativeCacheEntryHasSOA(ne)) {
2771 giveNegative = false;
2772 }
2773 else {
2774 res = RCode::NXDomain;
2775 sttl = ne.d_ttd - d_now.tv_sec;
2776 giveNegative = true;
2777 cachedState = ne.d_validationState;
2778 if (ne.d_qtype.getCode()) {
2779 LOG(prefix << qname << "|" << qtype << ": Is negatively cached via '" << ne.d_auth << "' for another " << sttl << " seconds" << endl);
2780 res = RCode::NoError;
2781 if (s_addExtendedResolutionDNSErrors) {
2782 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::Synthesized), "Result from negative cache"};
2783 }
2784 }
2785 else {
2786 LOG(prefix << qname << ": Entire name '" << qname << "' is negatively cached via '" << ne.d_auth << "' for another " << sttl << " seconds" << endl);
2787 if (s_addExtendedResolutionDNSErrors) {
2788 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::Synthesized), "Result from negative cache for entire name"};
2789 }
2790 }
2791 }
2792 }
2793 }
2794 else if (s_hardenNXD != HardenNXD::No && !qname.isRoot() && !wasForwardedOrAuthZone) {
2795 auto labels = qname.getRawLabels();
2796 DNSName negCacheName(g_rootdnsname);
2797 negCacheName.prependRawLabel(labels.back());
2798 labels.pop_back();
2799 while (!labels.empty()) {
2800 if (g_negCache->get(negCacheName, QType::ENT, d_now, ne, true, d_serveStale, d_refresh)) {
2801 if (ne.d_validationState == vState::Indeterminate && validationEnabled()) {
2802 // LOG(prefix << negCacheName << " negatively cached and vState::Indeterminate, trying to validate NXDOMAIN" << endl);
2803 // ...
2804 // And get the updated ne struct
2805 // t_sstorage.negcache.get(negCacheName, QType(0), d_now, ne, true);
2806 }
2807 if ((s_hardenNXD == HardenNXD::Yes && !vStateIsBogus(ne.d_validationState)) || ne.d_validationState == vState::Secure) {
2808 res = RCode::NXDomain;
2809 sttl = ne.d_ttd - d_now.tv_sec;
2810 giveNegative = true;
2811 cachedState = ne.d_validationState;
2812 LOG(prefix << qname << ": Name '" << negCacheName << "' and below, is negatively cached via '" << ne.d_auth << "' for another " << sttl << " seconds" << endl);
2813 if (s_addExtendedResolutionDNSErrors) {
2814 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::Synthesized), "Result synthesized by nothing-below-nxdomain (RFC8020)"};
2815 }
2816 break;
2817 }
2818 }
2819 negCacheName.prependRawLabel(labels.back());
2820 labels.pop_back();
2821 }
2822 }
2823
2824 if (giveNegative) {
2825
2826 context.state = cachedState;
2827
2828 if (!wasAuthZone && shouldValidate() && context.state == vState::Indeterminate) {
2829 LOG(prefix << qname << ": Got vState::Indeterminate state for records retrieved from the negative cache, validating.." << endl);
2830 computeNegCacheValidationStatus(ne, qname, qtype, res, context.state, depth, prefix);
2831
2832 if (context.state != cachedState && vStateIsBogus(context.state)) {
2833 sttl = std::min(sttl, s_maxbogusttl);
2834 }
2835 }
2836
2837 // Transplant SOA to the returned packet
2838 addTTLModifiedRecords(ne.authoritySOA.records, sttl, ret);
2839 if (d_doDNSSEC) {
2840 addTTLModifiedRecords(ne.authoritySOA.signatures, sttl, ret);
2841 addTTLModifiedRecords(ne.DNSSECRecords.records, sttl, ret);
2842 addTTLModifiedRecords(ne.DNSSECRecords.signatures, sttl, ret);
2843 }
2844
2845 LOG(prefix << qname << ": Updating validation state with negative cache content for " << qname << " to " << context.state << endl);
2846 return true;
2847 }
2848
2849 vector<DNSRecord> cset;
2850 bool found = false, expired = false;
2851 vector<std::shared_ptr<const RRSIGRecordContent>> signatures;
2852 vector<std::shared_ptr<DNSRecord>> authorityRecs;
2853 uint32_t ttl = 0;
2854 uint32_t capTTL = std::numeric_limits<uint32_t>::max();
2855 bool wasCachedAuth;
2856 MemRecursorCache::Flags flags = MemRecursorCache::None;
2857 if (!wasForwardRecurse && d_requireAuthData) {
2858 flags |= MemRecursorCache::RequireAuth;
2859 }
2860 if (d_serveStale) {
2861 flags |= MemRecursorCache::ServeStale;
2862 }
2863 if (d_refresh) {
2864 flags |= MemRecursorCache::Refresh;
2865 }
2866 if (g_recCache->get(d_now.tv_sec, sqname, sqt, flags, &cset, d_cacheRemote, d_routingTag, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &cachedState, &wasCachedAuth, nullptr, &d_fromAuthIP) > 0) {
2867
2868 LOG(prefix << sqname << ": Found cache hit for " << sqt.toString() << ": ");
2869
2870 if (!wasAuthZone && shouldValidate() && (wasCachedAuth || wasForwardRecurse) && cachedState == vState::Indeterminate && d_requireAuthData) {
2871
2872 /* This means we couldn't figure out the state when this entry was cached */
2873 vState recordState = getValidationStatus(qname, !signatures.empty(), qtype == QType::DS, depth, prefix);
2874
2875 if (recordState == vState::Secure) {
2876 LOG(prefix << sqname << ": Got vState::Indeterminate state from the cache, validating.." << endl);
2877 if (sqt == QType::DNSKEY && sqname == getSigner(signatures)) {
2878 cachedState = validateDNSKeys(sqname, cset, signatures, depth, prefix);
2879 }
2880 else {
2881 if (sqt == QType::ANY) {
2882 std::map<QType, CacheEntry> types;
2883 reapRecordsForValidation(types, cset);
2884 reapSignaturesForValidation(types, signatures);
2885
2886 for (const auto& type : types) {
2887 vState cachedRecordState;
2888 if (type.first == QType::DNSKEY && sqname == getSigner(type.second.signatures)) {
2889 cachedRecordState = validateDNSKeys(sqname, type.second.records, type.second.signatures, depth, prefix);
2890 }
2891 else {
2892 cachedRecordState = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, sqname, type.first, type.second.records, type.second.signatures);
2893 }
2894 updateDNSSECValidationState(cachedState, cachedRecordState);
2895 }
2896 }
2897 else {
2898 cachedState = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, sqname, sqt, cset, signatures);
2899 }
2900 }
2901 }
2902 else {
2903 cachedState = recordState;
2904 }
2905
2906 if (cachedState != vState::Indeterminate) {
2907 LOG(prefix << qname << ": Got vState::Indeterminate state from the cache, validation result is " << cachedState << endl);
2908 if (vStateIsBogus(cachedState)) {
2909 capTTL = s_maxbogusttl;
2910 }
2911 if (sqt != QType::ANY && sqt != QType::ADDR) {
2912 updateValidationStatusInCache(sqname, sqt, wasCachedAuth, cachedState);
2913 }
2914 }
2915 }
2916
2917 for (auto j = cset.cbegin(); j != cset.cend(); ++j) {
2918
2919 LOG(j->getContent()->getZoneRepresentation());
2920
2921 if (j->d_class != QClass::IN) {
2922 continue;
2923 }
2924
2925 if (j->d_ttl > (unsigned int)d_now.tv_sec) {
2926 DNSRecord dr = *j;
2927 dr.d_ttl -= d_now.tv_sec;
2928 dr.d_ttl = std::min(dr.d_ttl, capTTL);
2929 ttl = dr.d_ttl;
2930 ret.push_back(dr);
2931 LOG("[ttl=" << dr.d_ttl << "] ");
2932 found = true;
2933 }
2934 else {
2935 LOG("[expired] ");
2936 expired = true;
2937 }
2938 }
2939
2940 ret.reserve(ret.size() + signatures.size() + authorityRecs.size());
2941
2942 for (const auto& signature : signatures) {
2943 DNSRecord dr;
2944 dr.d_type = QType::RRSIG;
2945 dr.d_name = sqname;
2946 dr.d_ttl = ttl;
2947 dr.setContent(signature);
2948 dr.d_place = DNSResourceRecord::ANSWER;
2949 dr.d_class = QClass::IN;
2950 ret.push_back(dr);
2951 }
2952
2953 for (const auto& rec : authorityRecs) {
2954 DNSRecord dr(*rec);
2955 dr.d_ttl = ttl;
2956 ret.push_back(dr);
2957 }
2958
2959 LOG(endl);
2960 if (found && !expired) {
2961 if (!giveNegative)
2962 res = 0;
2963 LOG(prefix << qname << ": Updating validation state with cache content for " << qname << " to " << cachedState << endl);
2964 context.state = cachedState;
2965 return true;
2966 }
2967 else
2968 LOG(prefix << qname << ": Cache had only stale entries" << endl);
2969 }
2970
2971 /* let's check if we have a NSEC covering that record */
2972 if (g_aggressiveNSECCache && !wasForwardedOrAuthZone) {
2973 if (g_aggressiveNSECCache->getDenial(d_now.tv_sec, qname, qtype, ret, res, d_cacheRemote, d_routingTag, d_doDNSSEC, LogObject(prefix))) {
2974 context.state = vState::Secure;
2975 if (s_addExtendedResolutionDNSErrors) {
2976 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::Synthesized), "Result synthesized from aggressive NSEC cache (RFC8198)"};
2977 }
2978 return true;
2979 }
2980 }
2981
2982 return false;
2983 }
2984
2985 bool SyncRes::moreSpecificThan(const DNSName& a, const DNSName& b) const
2986 {
2987 return (a.isPartOf(b) && a.countLabels() > b.countLabels());
2988 }
2989
2990 struct speedOrder
2991 {
2992 bool operator()(const std::pair<DNSName, float>& a, const std::pair<DNSName, float>& b) const
2993 {
2994 return a.second < b.second;
2995 }
2996 };
2997
2998 std::vector<std::pair<DNSName, float>> SyncRes::shuffleInSpeedOrder(const DNSName& qname, NsSet& tnameservers, const string& prefix)
2999 {
3000 std::vector<std::pair<DNSName, float>> rnameservers;
3001 rnameservers.reserve(tnameservers.size());
3002 for (const auto& tns : tnameservers) {
3003 float speed = s_nsSpeeds.lock()->fastest(tns.first, d_now);
3004 rnameservers.emplace_back(tns.first, speed);
3005 if (tns.first.empty()) // this was an authoritative OOB zone, don't pollute the nsSpeeds with that
3006 return rnameservers;
3007 }
3008
3009 shuffle(rnameservers.begin(), rnameservers.end(), pdns::dns_random_engine());
3010 speedOrder so;
3011 stable_sort(rnameservers.begin(), rnameservers.end(), so);
3012
3013 if (doLog()) {
3014 LOG(prefix << qname << ": Nameservers: ");
3015 for (auto i = rnameservers.begin(); i != rnameservers.end(); ++i) {
3016 if (i != rnameservers.begin()) {
3017 LOG(", ");
3018 if (!((i - rnameservers.begin()) % 3)) {
3019 LOG(endl
3020 << prefix << " ");
3021 }
3022 }
3023 LOG(i->first.toLogString() << "(" << fmtfloat(i->second / 1000.0) << "ms)");
3024 }
3025 LOG(endl);
3026 }
3027 return rnameservers;
3028 }
3029
3030 vector<ComboAddress> SyncRes::shuffleForwardSpeed(const DNSName& qname, const vector<ComboAddress>& rnameservers, const string& prefix, const bool wasRd)
3031 {
3032 vector<ComboAddress> nameservers = rnameservers;
3033 map<ComboAddress, float> speeds;
3034
3035 for (const auto& val : nameservers) {
3036 DNSName nsName = DNSName(val.toStringWithPort());
3037 float speed = s_nsSpeeds.lock()->fastest(nsName, d_now);
3038 speeds[val] = speed;
3039 }
3040 shuffle(nameservers.begin(), nameservers.end(), pdns::dns_random_engine());
3041 speedOrderCA so(speeds);
3042 stable_sort(nameservers.begin(), nameservers.end(), so);
3043
3044 if (doLog()) {
3045 LOG(prefix << qname << ": Nameservers: ");
3046 for (vector<ComboAddress>::const_iterator i = nameservers.cbegin(); i != nameservers.cend(); ++i) {
3047 if (i != nameservers.cbegin()) {
3048 LOG(", ");
3049 if (!((i - nameservers.cbegin()) % 3)) {
3050 LOG(endl
3051 << prefix << " ");
3052 }
3053 }
3054 LOG((wasRd ? string("+") : string("-")) << i->toStringWithPort() << "(" << fmtfloat(speeds[*i] / 1000.0) << "ms)");
3055 }
3056 LOG(endl);
3057 }
3058 return nameservers;
3059 }
3060
3061 static uint32_t getRRSIGTTL(const time_t now, const std::shared_ptr<const RRSIGRecordContent>& rrsig)
3062 {
3063 uint32_t res = 0;
3064 if (now < rrsig->d_sigexpire) {
3065 res = static_cast<uint32_t>(rrsig->d_sigexpire) - now;
3066 }
3067 return res;
3068 }
3069
3070 static const set<QType> nsecTypes = {QType::NSEC, QType::NSEC3};
3071
3072 /* Fills the authoritySOA and DNSSECRecords fields from ne with those found in the records
3073 *
3074 * \param records The records to parse for the authority SOA and NSEC(3) records
3075 * \param ne The NegCacheEntry to be filled out (will not be cleared, only appended to
3076 */
3077 static void harvestNXRecords(const vector<DNSRecord>& records, NegCache::NegCacheEntry& ne, const time_t now, uint32_t* lowestTTL)
3078 {
3079 for (const auto& rec : records) {
3080 if (rec.d_place != DNSResourceRecord::AUTHORITY) {
3081 // RFC 4035 section 3.1.3. indicates that NSEC records MUST be placed in
3082 // the AUTHORITY section. Section 3.1.1 indicates that that RRSIGs for
3083 // records MUST be in the same section as the records they cover.
3084 // Hence, we ignore all records outside of the AUTHORITY section.
3085 continue;
3086 }
3087
3088 if (rec.d_type == QType::RRSIG) {
3089 auto rrsig = getRR<RRSIGRecordContent>(rec);
3090 if (rrsig) {
3091 if (rrsig->d_type == QType::SOA) {
3092 ne.authoritySOA.signatures.push_back(rec);
3093 if (lowestTTL && isRRSIGNotExpired(now, *rrsig)) {
3094 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3095 *lowestTTL = min(*lowestTTL, getRRSIGTTL(now, rrsig));
3096 }
3097 }
3098 if (nsecTypes.count(rrsig->d_type)) {
3099 ne.DNSSECRecords.signatures.push_back(rec);
3100 if (lowestTTL && isRRSIGNotExpired(now, *rrsig)) {
3101 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3102 *lowestTTL = min(*lowestTTL, getRRSIGTTL(now, rrsig));
3103 }
3104 }
3105 }
3106 continue;
3107 }
3108 if (rec.d_type == QType::SOA) {
3109 ne.authoritySOA.records.push_back(rec);
3110 if (lowestTTL) {
3111 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3112 }
3113 continue;
3114 }
3115 if (nsecTypes.count(rec.d_type)) {
3116 ne.DNSSECRecords.records.push_back(rec);
3117 if (lowestTTL) {
3118 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3119 }
3120 continue;
3121 }
3122 }
3123 }
3124
3125 static cspmap_t harvestCSPFromNE(const NegCache::NegCacheEntry& ne)
3126 {
3127 cspmap_t cspmap;
3128 for (const auto& rec : ne.DNSSECRecords.signatures) {
3129 if (rec.d_type == QType::RRSIG) {
3130 auto rrc = getRR<RRSIGRecordContent>(rec);
3131 if (rrc) {
3132 cspmap[{rec.d_name, rrc->d_type}].signatures.push_back(rrc);
3133 }
3134 }
3135 }
3136 for (const auto& rec : ne.DNSSECRecords.records) {
3137 cspmap[{rec.d_name, rec.d_type}].records.insert(rec.getContent());
3138 }
3139 return cspmap;
3140 }
3141
3142 // TODO remove after processRecords is fixed!
3143 // Adds the RRSIG for the SOA and the NSEC(3) + RRSIGs to ret
3144 static void addNXNSECS(vector<DNSRecord>& ret, const vector<DNSRecord>& records)
3145 {
3146 NegCache::NegCacheEntry ne;
3147 harvestNXRecords(records, ne, 0, nullptr);
3148 ret.insert(ret.end(), ne.authoritySOA.signatures.begin(), ne.authoritySOA.signatures.end());
3149 ret.insert(ret.end(), ne.DNSSECRecords.records.begin(), ne.DNSSECRecords.records.end());
3150 ret.insert(ret.end(), ne.DNSSECRecords.signatures.begin(), ne.DNSSECRecords.signatures.end());
3151 }
3152
3153 static bool rpzHitShouldReplaceContent(const DNSName& qname, const QType qtype, const std::vector<DNSRecord>& records)
3154 {
3155 if (qtype == QType::CNAME) {
3156 return true;
3157 }
3158
3159 for (const auto& record : records) {
3160 if (record.d_type == QType::CNAME) {
3161 if (auto content = getRR<CNAMERecordContent>(record)) {
3162 if (qname == content->getTarget()) {
3163 /* we have a CNAME whose target matches the entry we are about to
3164 generate, so it will complete the current records, not replace
3165 them
3166 */
3167 return false;
3168 }
3169 }
3170 }
3171 }
3172
3173 return true;
3174 }
3175
3176 static void removeConflictingRecord(std::vector<DNSRecord>& records, const DNSName& name, const QType dtype)
3177 {
3178 for (auto it = records.begin(); it != records.end();) {
3179 bool remove = false;
3180
3181 if (it->d_class == QClass::IN && (it->d_type == QType::CNAME || dtype == QType::CNAME || it->d_type == dtype) && it->d_name == name) {
3182 remove = true;
3183 }
3184 else if (it->d_class == QClass::IN && it->d_type == QType::RRSIG && it->d_name == name) {
3185 if (auto rrc = getRR<RRSIGRecordContent>(*it)) {
3186 if (rrc->d_type == QType::CNAME || rrc->d_type == dtype) {
3187 /* also remove any RRSIG that could conflict */
3188 remove = true;
3189 }
3190 }
3191 }
3192
3193 if (remove) {
3194 it = records.erase(it);
3195 }
3196 else {
3197 ++it;
3198 }
3199 }
3200 }
3201
3202 void SyncRes::handlePolicyHit(const std::string& prefix, const DNSName& qname, const QType qtype, std::vector<DNSRecord>& ret, bool& done, int& rcode, unsigned int depth)
3203 {
3204 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
3205 /* reset to no match */
3206 d_appliedPolicy = DNSFilterEngine::Policy();
3207 return;
3208 }
3209
3210 /* don't account truncate actions for TCP queries, since they are not applied */
3211 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::Truncate || !d_queryReceivedOverTCP) {
3212 ++t_Counters.at(rec::PolicyHistogram::policy).at(d_appliedPolicy.d_kind);
3213 ++t_Counters.at(rec::PolicyNameHits::policyName).counts[d_appliedPolicy.getName()];
3214 }
3215
3216 if (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) {
3217 LOG(prefix << qname << "|" << qtype << ':' << d_appliedPolicy.getLogString() << endl);
3218 }
3219
3220 switch (d_appliedPolicy.d_kind) {
3221
3222 case DNSFilterEngine::PolicyKind::NoAction:
3223 return;
3224
3225 case DNSFilterEngine::PolicyKind::Drop:
3226 ++t_Counters.at(rec::Counter::policyDrops);
3227 throw ImmediateQueryDropException();
3228
3229 case DNSFilterEngine::PolicyKind::NXDOMAIN:
3230 ret.clear();
3231 rcode = RCode::NXDomain;
3232 done = true;
3233 return;
3234
3235 case DNSFilterEngine::PolicyKind::NODATA:
3236 ret.clear();
3237 rcode = RCode::NoError;
3238 done = true;
3239 return;
3240
3241 case DNSFilterEngine::PolicyKind::Truncate:
3242 if (!d_queryReceivedOverTCP) {
3243 ret.clear();
3244 rcode = RCode::NoError;
3245 throw SendTruncatedAnswerException();
3246 }
3247 return;
3248
3249 case DNSFilterEngine::PolicyKind::Custom: {
3250 if (rpzHitShouldReplaceContent(qname, qtype, ret)) {
3251 ret.clear();
3252 }
3253
3254 rcode = RCode::NoError;
3255 done = true;
3256 auto spoofed = d_appliedPolicy.getCustomRecords(qname, qtype.getCode());
3257 for (auto& dr : spoofed) {
3258 removeConflictingRecord(ret, dr.d_name, dr.d_type);
3259 }
3260
3261 for (auto& dr : spoofed) {
3262 ret.push_back(dr);
3263
3264 if (dr.d_name == qname && dr.d_type == QType::CNAME && qtype != QType::CNAME) {
3265 if (auto content = getRR<CNAMERecordContent>(dr)) {
3266 vState newTargetState = vState::Indeterminate;
3267 handleNewTarget(prefix, qname, content->getTarget(), qtype.getCode(), ret, rcode, depth, {}, newTargetState);
3268 }
3269 }
3270 }
3271 }
3272 }
3273 }
3274
3275 bool SyncRes::nameserversBlockedByRPZ(const DNSFilterEngine& dfe, const NsSet& nameservers)
3276 {
3277 /* we skip RPZ processing if:
3278 - it was disabled (d_wantsRPZ is false) ;
3279 - we already got a RPZ hit (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) since
3280 the only way we can get back here is that it was a 'pass-thru' (NoAction) meaning that we should not
3281 process any further RPZ rules. Except that we need to process rules of higher priority..
3282 */
3283 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
3284 for (auto const& ns : nameservers) {
3285 bool match = dfe.getProcessingPolicy(ns.first, d_discardedPolicies, d_appliedPolicy);
3286 if (match) {
3287 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
3288 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
3289 LOG(", however nameserver " << ns.first << " was blocked by RPZ policy '" << d_appliedPolicy.getName() << "'" << endl);
3290 return true;
3291 }
3292 }
3293
3294 // Traverse all IP addresses for this NS to see if they have an RPN NSIP policy
3295 for (auto const& address : ns.second.first) {
3296 match = dfe.getProcessingPolicy(address, d_discardedPolicies, d_appliedPolicy);
3297 if (match) {
3298 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
3299 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
3300 LOG(", however nameserver " << ns.first << " IP address " << address.toString() << " was blocked by RPZ policy '" << d_appliedPolicy.getName() << "'" << endl);
3301 return true;
3302 }
3303 }
3304 }
3305 }
3306 }
3307 return false;
3308 }
3309
3310 bool SyncRes::nameserverIPBlockedByRPZ(const DNSFilterEngine& dfe, const ComboAddress& remoteIP)
3311 {
3312 /* we skip RPZ processing if:
3313 - it was disabled (d_wantsRPZ is false) ;
3314 - we already got a RPZ hit (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) since
3315 the only way we can get back here is that it was a 'pass-thru' (NoAction) meaning that we should not
3316 process any further RPZ rules. Except that we need to process rules of higher priority..
3317 */
3318 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
3319 bool match = dfe.getProcessingPolicy(remoteIP, d_discardedPolicies, d_appliedPolicy);
3320 if (match) {
3321 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
3322 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) {
3323 LOG(" (blocked by RPZ policy '" + d_appliedPolicy.getName() + "')");
3324 return true;
3325 }
3326 }
3327 }
3328 return false;
3329 }
3330
3331 vector<ComboAddress> SyncRes::retrieveAddressesForNS(const std::string& prefix, const DNSName& qname, std::vector<std::pair<DNSName, float>>::const_iterator& tns, const unsigned int depth, set<GetBestNSAnswer>& beenthere, const vector<std::pair<DNSName, float>>& rnameservers, NsSet& nameservers, bool& sendRDQuery, bool& pierceDontQuery, bool& flawedNSSet, bool cacheOnly, unsigned int& nretrieveAddressesForNS)
3332 {
3333 vector<ComboAddress> result;
3334
3335 size_t nonresolvingfails = 0;
3336 if (!tns->first.empty()) {
3337 if (s_nonresolvingnsmaxfails > 0) {
3338 nonresolvingfails = s_nonresolving.lock()->value(tns->first);
3339 if (nonresolvingfails >= s_nonresolvingnsmaxfails) {
3340 LOG(prefix << qname << ": NS " << tns->first << " in non-resolving map, skipping" << endl);
3341 return result;
3342 }
3343 }
3344
3345 LOG(prefix << qname << ": Trying to resolve NS '" << tns->first << "' (" << 1 + tns - rnameservers.begin() << "/" << (unsigned int)rnameservers.size() << ")" << endl);
3346 const unsigned int oldOutQueries = d_outqueries;
3347 try {
3348 result = getAddrs(tns->first, depth, prefix, beenthere, cacheOnly, nretrieveAddressesForNS);
3349 }
3350 // Other exceptions should likely not throttle...
3351 catch (const ImmediateServFailException& ex) {
3352 if (s_nonresolvingnsmaxfails > 0 && d_outqueries > oldOutQueries) {
3353 auto dontThrottleNames = g_dontThrottleNames.getLocal();
3354 if (!dontThrottleNames->check(tns->first)) {
3355 s_nonresolving.lock()->incr(tns->first, d_now);
3356 }
3357 }
3358 throw ex;
3359 }
3360 if (s_nonresolvingnsmaxfails > 0 && d_outqueries > oldOutQueries) {
3361 if (result.empty()) {
3362 auto dontThrottleNames = g_dontThrottleNames.getLocal();
3363 if (!dontThrottleNames->check(tns->first)) {
3364 s_nonresolving.lock()->incr(tns->first, d_now);
3365 }
3366 }
3367 else if (nonresolvingfails > 0) {
3368 // Succeeding resolve, clear memory of recent failures
3369 s_nonresolving.lock()->clear(tns->first);
3370 }
3371 }
3372 pierceDontQuery = false;
3373 }
3374 else {
3375 LOG(prefix << qname << ": Domain has hardcoded nameserver");
3376
3377 if (nameservers[tns->first].first.size() > 1) {
3378 LOG("s");
3379 }
3380 LOG(endl);
3381
3382 sendRDQuery = nameservers[tns->first].second;
3383 result = shuffleForwardSpeed(qname, nameservers[tns->first].first, prefix, sendRDQuery);
3384 pierceDontQuery = true;
3385 }
3386 return result;
3387 }
3388
3389 void SyncRes::checkMaxQperQ(const DNSName& qname) const
3390 {
3391 if (d_outqueries + d_throttledqueries > s_maxqperq) {
3392 throw ImmediateServFailException("more than " + std::to_string(s_maxqperq) + " (max-qperq) queries sent or throttled while resolving " + qname.toLogString());
3393 }
3394 }
3395
3396 bool SyncRes::throttledOrBlocked(const std::string& prefix, const ComboAddress& remoteIP, const DNSName& qname, const QType qtype, bool pierceDontQuery)
3397 {
3398 if (isThrottled(d_now.tv_sec, remoteIP)) {
3399 LOG(prefix << qname << ": Server throttled " << endl);
3400 t_Counters.at(rec::Counter::throttledqueries)++;
3401 d_throttledqueries++;
3402 return true;
3403 }
3404 else if (isThrottled(d_now.tv_sec, remoteIP, qname, qtype)) {
3405 LOG(prefix << qname << ": Query throttled " << remoteIP.toString() << ", " << qname << "; " << qtype << endl);
3406 t_Counters.at(rec::Counter::throttledqueries)++;
3407 d_throttledqueries++;
3408 return true;
3409 }
3410 else if (!pierceDontQuery && s_dontQuery && s_dontQuery->match(&remoteIP)) {
3411 // We could have retrieved an NS from the cache in a forwarding domain
3412 // Even in the case of !pierceDontQuery we still want to allow that NS
3413 DNSName forwardCandidate(qname);
3414 auto it = getBestAuthZone(&forwardCandidate);
3415 if (it == t_sstorage.domainmap->end()) {
3416 LOG(prefix << qname << ": Not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
3417 t_Counters.at(rec::Counter::dontqueries)++;
3418 return true;
3419 }
3420 else {
3421 // The name (from the cache) is forwarded, but is it forwarded to an IP in known forwarders?
3422 const auto& ips = it->second.d_servers;
3423 if (std::find(ips.cbegin(), ips.cend(), remoteIP) == ips.cend()) {
3424 LOG(prefix << qname << ": Not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
3425 t_Counters.at(rec::Counter::dontqueries)++;
3426 return true;
3427 }
3428 else {
3429 LOG(prefix << qname << ": Sending query to " << remoteIP.toString() << ", blocked by 'dont-query' but a forwarding/auth case" << endl);
3430 }
3431 }
3432 }
3433 return false;
3434 }
3435
3436 bool SyncRes::validationEnabled() const
3437 {
3438 return g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate;
3439 }
3440
3441 uint32_t SyncRes::computeLowestTTD(const std::vector<DNSRecord>& records, const std::vector<std::shared_ptr<const RRSIGRecordContent>>& signatures, uint32_t signaturesTTL, const std::vector<std::shared_ptr<DNSRecord>>& authorityRecs) const
3442 {
3443 uint32_t lowestTTD = std::numeric_limits<uint32_t>::max();
3444 for (const auto& record : records) {
3445 lowestTTD = min(lowestTTD, record.d_ttl);
3446 }
3447
3448 /* even if it was not requested for that request (Process, and neither AD nor DO set),
3449 it might be requested at a later time so we need to be careful with the TTL. */
3450 if (validationEnabled() && !signatures.empty()) {
3451 /* if we are validating, we don't want to cache records after their signatures expire. */
3452 /* records TTL are now TTD, let's add 'now' to the signatures lowest TTL */
3453 lowestTTD = min(lowestTTD, static_cast<uint32_t>(signaturesTTL + d_now.tv_sec));
3454
3455 for (const auto& sig : signatures) {
3456 if (isRRSIGNotExpired(d_now.tv_sec, *sig)) {
3457 // we don't decrement d_sigexpire by 'now' because we actually want a TTD, not a TTL */
3458 lowestTTD = min(lowestTTD, static_cast<uint32_t>(sig->d_sigexpire));
3459 }
3460 }
3461 }
3462
3463 for (const auto& entry : authorityRecs) {
3464 /* be careful, this is still a TTL here */
3465 lowestTTD = min(lowestTTD, static_cast<uint32_t>(entry->d_ttl + d_now.tv_sec));
3466
3467 if (entry->d_type == QType::RRSIG && validationEnabled()) {
3468 auto rrsig = getRR<RRSIGRecordContent>(*entry);
3469 if (rrsig) {
3470 if (isRRSIGNotExpired(d_now.tv_sec, *rrsig)) {
3471 // we don't decrement d_sigexpire by 'now' because we actually want a TTD, not a TTL */
3472 lowestTTD = min(lowestTTD, static_cast<uint32_t>(rrsig->d_sigexpire));
3473 }
3474 }
3475 }
3476 }
3477
3478 return lowestTTD;
3479 }
3480
3481 void SyncRes::updateValidationState(const DNSName& qname, vState& state, const vState stateUpdate, const string& prefix)
3482 {
3483 LOG(prefix << qname << ": Validation state was " << state << ", state update is " << stateUpdate);
3484 updateDNSSECValidationState(state, stateUpdate);
3485 LOG(", validation state is now " << state << endl);
3486 }
3487
3488 vState SyncRes::getTA(const DNSName& zone, dsmap_t& ds, const string& prefix)
3489 {
3490 auto luaLocal = g_luaconfs.getLocal();
3491
3492 if (luaLocal->dsAnchors.empty()) {
3493 LOG(prefix << zone << ": No trust anchors configured, everything is Insecure" << endl);
3494 /* We have no TA, everything is insecure */
3495 return vState::Insecure;
3496 }
3497
3498 std::string reason;
3499 if (haveNegativeTrustAnchor(luaLocal->negAnchors, zone, reason)) {
3500 LOG(prefix << zone << ": Got NTA" << endl);
3501 return vState::NTA;
3502 }
3503
3504 if (getTrustAnchor(luaLocal->dsAnchors, zone, ds)) {
3505 if (!zone.isRoot()) {
3506 LOG(prefix << zone << ": Got TA" << endl);
3507 }
3508 return vState::TA;
3509 }
3510
3511 if (zone.isRoot()) {
3512 /* No TA for the root */
3513 return vState::Insecure;
3514 }
3515
3516 return vState::Indeterminate;
3517 }
3518
3519 size_t SyncRes::countSupportedDS(const dsmap_t& dsmap, const string& prefix)
3520 {
3521 size_t count = 0;
3522
3523 for (const auto& ds : dsmap) {
3524 if (isSupportedDS(ds, LogObject(prefix))) {
3525 count++;
3526 }
3527 }
3528
3529 return count;
3530 }
3531
3532 void SyncRes::initZoneCutsFromTA(const DNSName& from, const string& prefix)
3533 {
3534 DNSName zone(from);
3535 do {
3536 dsmap_t ds;
3537 vState result = getTA(zone, ds, prefix);
3538 if (result != vState::Indeterminate) {
3539 if (result == vState::TA) {
3540 if (countSupportedDS(ds, prefix) == 0) {
3541 ds.clear();
3542 result = vState::Insecure;
3543 }
3544 else {
3545 result = vState::Secure;
3546 }
3547 }
3548 else if (result == vState::NTA) {
3549 result = vState::Insecure;
3550 }
3551
3552 d_cutStates[zone] = result;
3553 }
3554 } while (zone.chopOff());
3555 }
3556
3557 vState SyncRes::getDSRecords(const DNSName& zone, dsmap_t& ds, bool taOnly, unsigned int depth, const string& prefix, bool bogusOnNXD, bool* foundCut)
3558 {
3559 vState result = getTA(zone, ds, prefix);
3560
3561 if (result != vState::Indeterminate || taOnly) {
3562 if (foundCut) {
3563 *foundCut = (result != vState::Indeterminate);
3564 }
3565
3566 if (result == vState::TA) {
3567 if (countSupportedDS(ds, prefix) == 0) {
3568 ds.clear();
3569 result = vState::Insecure;
3570 }
3571 else {
3572 result = vState::Secure;
3573 }
3574 }
3575 else if (result == vState::NTA) {
3576 result = vState::Insecure;
3577 }
3578
3579 return result;
3580 }
3581
3582 std::set<GetBestNSAnswer> beenthere;
3583 std::vector<DNSRecord> dsrecords;
3584
3585 Context context;
3586
3587 const bool oldCacheOnly = setCacheOnly(false);
3588 const bool oldQM = setQNameMinimization(!getQMFallbackMode());
3589 int rcode = doResolve(zone, QType::DS, dsrecords, depth + 1, beenthere, context);
3590 setCacheOnly(oldCacheOnly);
3591 setQNameMinimization(oldQM);
3592
3593 if (rcode == RCode::ServFail) {
3594 throw ImmediateServFailException("Server Failure while retrieving DS records for " + zone.toLogString());
3595 }
3596
3597 if (rcode == RCode::NoError || (rcode == RCode::NXDomain && !bogusOnNXD)) {
3598 uint8_t bestDigestType = 0;
3599
3600 bool gotCNAME = false;
3601 for (const auto& record : dsrecords) {
3602 if (record.d_type == QType::DS) {
3603 const auto dscontent = getRR<DSRecordContent>(record);
3604 if (dscontent && isSupportedDS(*dscontent, LogObject(prefix))) {
3605 // Make GOST a lower prio than SHA256
3606 if (dscontent->d_digesttype == DNSSECKeeper::DIGEST_GOST && bestDigestType == DNSSECKeeper::DIGEST_SHA256) {
3607 continue;
3608 }
3609 if (dscontent->d_digesttype > bestDigestType || (bestDigestType == DNSSECKeeper::DIGEST_GOST && dscontent->d_digesttype == DNSSECKeeper::DIGEST_SHA256)) {
3610 bestDigestType = dscontent->d_digesttype;
3611 }
3612 ds.insert(*dscontent);
3613 }
3614 }
3615 else if (record.d_type == QType::CNAME && record.d_name == zone) {
3616 gotCNAME = true;
3617 }
3618 }
3619
3620 /* RFC 4509 section 3: "Validator implementations SHOULD ignore DS RRs containing SHA-1
3621 * digests if DS RRs with SHA-256 digests are present in the DS RRset."
3622 * We interpret that as: do not use SHA-1 if SHA-256 or SHA-384 is available
3623 */
3624 for (auto dsrec = ds.begin(); dsrec != ds.end();) {
3625 if (dsrec->d_digesttype == DNSSECKeeper::DIGEST_SHA1 && dsrec->d_digesttype != bestDigestType) {
3626 dsrec = ds.erase(dsrec);
3627 }
3628 else {
3629 ++dsrec;
3630 }
3631 }
3632
3633 if (rcode == RCode::NoError) {
3634 if (ds.empty()) {
3635 /* we have no DS, it's either:
3636 - a delegation to a non-DNSSEC signed zone
3637 - no delegation, we stay in the same zone
3638 */
3639 if (gotCNAME || denialProvesNoDelegation(zone, dsrecords)) {
3640 /* we are still inside the same zone */
3641
3642 if (foundCut) {
3643 *foundCut = false;
3644 }
3645 return context.state;
3646 }
3647
3648 d_cutStates[zone] = context.state == vState::Secure ? vState::Insecure : context.state;
3649 /* delegation with no DS, might be Secure -> Insecure */
3650 if (foundCut) {
3651 *foundCut = true;
3652 }
3653
3654 /* a delegation with no DS is either:
3655 - a signed zone (Secure) to an unsigned one (Insecure)
3656 - an unsigned zone to another unsigned one (Insecure stays Insecure, Bogus stays Bogus)
3657 */
3658 return context.state == vState::Secure ? vState::Insecure : context.state;
3659 }
3660 else {
3661 /* we have a DS */
3662 d_cutStates[zone] = context.state;
3663 if (foundCut) {
3664 *foundCut = true;
3665 }
3666 }
3667 }
3668
3669 return context.state;
3670 }
3671
3672 LOG(prefix << zone << ": Returning Bogus state from " << __func__ << "(" << zone << ")" << endl);
3673 return vState::BogusUnableToGetDSs;
3674 }
3675
3676 vState SyncRes::getValidationStatus(const DNSName& name, bool wouldBeValid, bool typeIsDS, unsigned int depth, const string& prefix)
3677 {
3678 vState result = vState::Indeterminate;
3679
3680 if (!shouldValidate()) {
3681 return result;
3682 }
3683
3684 DNSName subdomain(name);
3685 if (typeIsDS) {
3686 subdomain.chopOff();
3687 }
3688
3689 {
3690 const auto& it = d_cutStates.find(subdomain);
3691 if (it != d_cutStates.cend()) {
3692 LOG(prefix << name << ": Got status " << it->second << " for name " << subdomain << endl);
3693 return it->second;
3694 }
3695 }
3696
3697 /* look for the best match we have */
3698 DNSName best(subdomain);
3699 while (best.chopOff()) {
3700 const auto& it = d_cutStates.find(best);
3701 if (it != d_cutStates.cend()) {
3702 result = it->second;
3703 if (vStateIsBogus(result) || result == vState::Insecure) {
3704 LOG(prefix << name << ": Got status " << result << " for name " << best << endl);
3705 return result;
3706 }
3707 break;
3708 }
3709 }
3710
3711 /* by now we have the best match, it's likely Secure (otherwise we would not be there)
3712 but we don't know if we missed a cut (or several).
3713 We could see if we have DS (or denial of) in cache but let's not worry for now,
3714 we will if we don't have a signature, or if the signer doesn't match what we expect */
3715 if (!wouldBeValid && best != subdomain) {
3716 /* no signatures or Bogus, we likely missed a cut, let's try to find it */
3717 LOG(prefix << name << ": No or invalid signature/proof for " << name << ", we likely missed a cut between " << best << " and " << subdomain << ", looking for it" << endl);
3718 DNSName ds(best);
3719 std::vector<string> labelsToAdd = subdomain.makeRelative(ds).getRawLabels();
3720
3721 while (!labelsToAdd.empty()) {
3722
3723 ds.prependRawLabel(labelsToAdd.back());
3724 labelsToAdd.pop_back();
3725 LOG(prefix << name << ": - Looking for a DS at " << ds << endl);
3726
3727 bool foundCut = false;
3728 dsmap_t results;
3729 vState dsState = getDSRecords(ds, results, false, depth, prefix, false, &foundCut);
3730
3731 if (foundCut) {
3732 LOG(prefix << name << ": - Found cut at " << ds << endl);
3733 LOG(prefix << name << ": New state for " << ds << " is " << dsState << endl);
3734 d_cutStates[ds] = dsState;
3735
3736 if (dsState != vState::Secure) {
3737 return dsState;
3738 }
3739 }
3740 }
3741
3742 /* we did not miss a cut, good luck */
3743 return result;
3744 }
3745
3746 #if 0
3747 /* we don't need this, we actually do the right thing later */
3748 DNSName signer = getSigner(signatures);
3749
3750 if (!signer.empty() && name.isPartOf(signer)) {
3751 if (signer == best) {
3752 return result;
3753 }
3754 /* the zone cut is not the one we expected,
3755 this is fine because we will retrieve the needed DNSKEYs and DSs
3756 later, and even go Insecure if we missed a cut to Insecure (no DS)
3757 and the signatures do not validate (we should not go Bogus in that
3758 case) */
3759 }
3760 /* something is not right, but let's not worry about that for now.. */
3761 #endif
3762
3763 return result;
3764 }
3765
3766 vState SyncRes::validateDNSKeys(const DNSName& zone, const std::vector<DNSRecord>& dnskeys, const std::vector<std::shared_ptr<const RRSIGRecordContent>>& signatures, unsigned int depth, const string& prefix)
3767 {
3768 dsmap_t ds;
3769 if (signatures.empty()) {
3770 LOG(prefix << zone << ": We have " << std::to_string(dnskeys.size()) << " DNSKEYs but no signature, going Bogus!" << endl);
3771 return vState::BogusNoRRSIG;
3772 }
3773
3774 DNSName signer = getSigner(signatures);
3775
3776 if (!signer.empty() && zone.isPartOf(signer)) {
3777 vState state = getDSRecords(signer, ds, false, depth, prefix);
3778
3779 if (state != vState::Secure) {
3780 return state;
3781 }
3782 }
3783 else {
3784 LOG(prefix << zone << ": We have " << std::to_string(dnskeys.size()) << " DNSKEYs but the zone (" << zone << ") is not part of the signer (" << signer << "), check that we did not miss a zone cut" << endl);
3785 /* try again to get the missed cuts, harder this time */
3786 auto zState = getValidationStatus(zone, false, false, depth, prefix);
3787 if (zState == vState::Secure) {
3788 /* too bad */
3789 LOG(prefix << zone << ": After checking the zone cuts again, we still have " << std::to_string(dnskeys.size()) << " DNSKEYs and the zone (" << zone << ") is still not part of the signer (" << signer << "), going Bogus!" << endl);
3790 return vState::BogusNoValidRRSIG;
3791 }
3792 else {
3793 return zState;
3794 }
3795 }
3796
3797 skeyset_t tentativeKeys;
3798 sortedRecords_t toSign;
3799
3800 for (const auto& dnskey : dnskeys) {
3801 if (dnskey.d_type == QType::DNSKEY) {
3802 auto content = getRR<DNSKEYRecordContent>(dnskey);
3803 if (content) {
3804 tentativeKeys.insert(content);
3805 toSign.insert(content);
3806 }
3807 }
3808 }
3809
3810 LOG(prefix << zone << ": Trying to validate " << std::to_string(tentativeKeys.size()) << " DNSKEYs with " << std::to_string(ds.size()) << " DS" << endl);
3811 skeyset_t validatedKeys;
3812 auto state = validateDNSKeysAgainstDS(d_now.tv_sec, zone, ds, tentativeKeys, toSign, signatures, validatedKeys, LogObject(prefix));
3813
3814 LOG(prefix << zone << ": We now have " << std::to_string(validatedKeys.size()) << " DNSKEYs" << endl);
3815
3816 /* if we found at least one valid RRSIG covering the set,
3817 all tentative keys are validated keys. Otherwise it means
3818 we haven't found at least one DNSKEY and a matching RRSIG
3819 covering this set, this looks Bogus. */
3820 if (validatedKeys.size() != tentativeKeys.size()) {
3821 LOG(prefix << zone << ": Let's check whether we missed a zone cut before returning a Bogus state from " << __func__ << "(" << zone << ")" << endl);
3822 /* try again to get the missed cuts, harder this time */
3823 auto zState = getValidationStatus(zone, false, false, depth, prefix);
3824 if (zState == vState::Secure) {
3825 /* too bad */
3826 LOG(prefix << zone << ": After checking the zone cuts we are still in a Secure zone, returning Bogus state from " << __func__ << "(" << zone << ")" << endl);
3827 return state;
3828 }
3829 else {
3830 return zState;
3831 }
3832 }
3833
3834 return state;
3835 }
3836
3837 vState SyncRes::getDNSKeys(const DNSName& signer, skeyset_t& keys, bool& servFailOccurred, unsigned int depth, const string& prefix)
3838 {
3839 std::vector<DNSRecord> records;
3840 std::set<GetBestNSAnswer> beenthere;
3841 LOG(prefix << signer << ": Retrieving DNSKEYs" << endl);
3842
3843 Context context;
3844
3845 const bool oldCacheOnly = setCacheOnly(false);
3846 int rcode = doResolve(signer, QType::DNSKEY, records, depth + 1, beenthere, context);
3847 setCacheOnly(oldCacheOnly);
3848
3849 if (rcode == RCode::ServFail) {
3850 servFailOccurred = true;
3851 return vState::BogusUnableToGetDNSKEYs;
3852 }
3853
3854 if (rcode == RCode::NoError) {
3855 if (context.state == vState::Secure) {
3856 for (const auto& key : records) {
3857 if (key.d_type == QType::DNSKEY) {
3858 auto content = getRR<DNSKEYRecordContent>(key);
3859 if (content) {
3860 keys.insert(content);
3861 }
3862 }
3863 }
3864 }
3865 LOG(prefix << signer << ": Retrieved " << keys.size() << " DNSKeys, state is " << context.state << endl);
3866 return context.state;
3867 }
3868
3869 if (context.state == vState::Insecure) {
3870 return context.state;
3871 }
3872
3873 LOG(prefix << signer << ": Returning Bogus state from " << __func__ << "(" << signer << ")" << endl);
3874 return vState::BogusUnableToGetDNSKEYs;
3875 }
3876
3877 vState SyncRes::validateRecordsWithSigs(unsigned int depth, const string& prefix, const DNSName& qname, const QType qtype, const DNSName& name, const QType type, const std::vector<DNSRecord>& records, const std::vector<std::shared_ptr<const RRSIGRecordContent>>& signatures)
3878 {
3879 skeyset_t keys;
3880 if (signatures.empty()) {
3881 LOG(prefix << qname << ": Bogus!" << endl);
3882 return vState::BogusNoRRSIG;
3883 }
3884
3885 const DNSName signer = getSigner(signatures);
3886 bool dsFailed = false;
3887 if (!signer.empty() && name.isPartOf(signer)) {
3888 vState state = vState::Secure;
3889
3890 if ((qtype == QType::DNSKEY || qtype == QType::DS) && signer == qname) {
3891 /* we are already retrieving those keys, sorry */
3892 if (type == QType::DS && signer == name && !signer.isRoot()) {
3893 /* Unless we are getting the DS of the root zone, we should never see a
3894 DS (or a denial of a DS) signed by the DS itself, since we should be
3895 requesting it from the parent zone. Something is very wrong */
3896 LOG(prefix << qname << ": The DS for " << qname << " is signed by itself" << endl);
3897 state = vState::BogusSelfSignedDS;
3898 dsFailed = true;
3899 }
3900 else if (qtype == QType::DS && signer == qname && !signer.isRoot()) {
3901 if (type == QType::SOA || type == QType::NSEC || type == QType::NSEC3) {
3902 /* if we are trying to validate the DS or more likely NSEC(3)s proving that it does not exist, we have a problem.
3903 In that case let's go Bogus (we will check later if we missed a cut)
3904 */
3905 state = vState::BogusSelfSignedDS;
3906 dsFailed = true;
3907 }
3908 else if (type == QType::CNAME) {
3909 state = vState::BogusUnableToGetDSs;
3910 dsFailed = true;
3911 }
3912 }
3913 else if (qtype == QType::DNSKEY && signer == qname) {
3914 /* that actually does happen when a server returns NS records in authority
3915 along with the DNSKEY, leading us to trying to validate the RRSIGs for
3916 the NS with the DNSKEY that we are about to process. */
3917 if ((name == signer && type == QType::NSEC) || type == QType::NSEC3) {
3918 /* if we are trying to validate the DNSKEY (should not happen here),
3919 or more likely NSEC(3)s proving that it does not exist, we have a problem.
3920 In that case let's see if the DS does exist, and if it does let's go Bogus
3921 */
3922 dsmap_t results;
3923 vState dsState = getDSRecords(signer, results, false, depth, prefix, true);
3924 if (vStateIsBogus(dsState) || dsState == vState::Insecure) {
3925 state = dsState;
3926 if (vStateIsBogus(dsState)) {
3927 dsFailed = true;
3928 }
3929 }
3930 else {
3931 LOG(prefix << qname << ": Unable to get the DS for " << signer << endl);
3932 state = vState::BogusUnableToGetDNSKEYs;
3933 dsFailed = true;
3934 }
3935 }
3936 else {
3937 /* return immediately since looking at the cuts is not going to change the
3938 fact that we are looking at a signature done with the key we are trying to
3939 obtain */
3940 LOG(prefix << qname << ": We are looking at a signature done with the key we are trying to obtain " << signer << endl);
3941 return vState::Indeterminate;
3942 }
3943 }
3944 }
3945 bool servFailOccurred = false;
3946 if (state == vState::Secure) {
3947 state = getDNSKeys(signer, keys, servFailOccurred, depth, prefix);
3948 }
3949
3950 if (state != vState::Secure) {
3951 if (!vStateIsBogus(state)) {
3952 return state;
3953 }
3954 /* try again to get the missed cuts, harder this time */
3955 LOG(prefix << signer << ": Checking whether we missed a zone cut for " << signer << " before returning a Bogus state for " << name << "|" << type.toString() << endl);
3956 auto zState = getValidationStatus(signer, false, dsFailed, depth, prefix);
3957 if (zState == vState::Secure) {
3958 if (state == vState::BogusUnableToGetDNSKEYs && servFailOccurred) {
3959 throw ImmediateServFailException("Server Failure while retrieving DNSKEY records for " + signer.toLogString());
3960 }
3961 /* too bad */
3962 LOG(prefix << signer << ": We are still in a Secure zone, returning " << vStateToString(state) << endl);
3963 return state;
3964 }
3965 else {
3966 return zState;
3967 }
3968 }
3969 }
3970
3971 sortedRecords_t recordcontents;
3972 for (const auto& record : records) {
3973 recordcontents.insert(record.getContent());
3974 }
3975
3976 LOG(prefix << name << ": Going to validate " << recordcontents.size() << " record contents with " << signatures.size() << " sigs and " << keys.size() << " keys for " << name << "|" << type.toString() << endl);
3977 vState state = validateWithKeySet(d_now.tv_sec, name, recordcontents, signatures, keys, LogObject(prefix), false);
3978 if (state == vState::Secure) {
3979 LOG(prefix << name << ": Secure!" << endl);
3980 return vState::Secure;
3981 }
3982
3983 LOG(prefix << vStateToString(state) << "!" << endl);
3984 /* try again to get the missed cuts, harder this time */
3985 auto zState = getValidationStatus(name, false, type == QType::DS, depth, prefix);
3986 LOG(prefix << name << ": Checking whether we missed a zone cut before returning a Bogus state" << endl);
3987 if (zState == vState::Secure) {
3988 /* too bad */
3989 LOG(prefix << name << ": We are still in a Secure zone, returning " << vStateToString(state) << endl);
3990 return state;
3991 }
3992 else {
3993 return zState;
3994 }
3995 }
3996
3997 /* This function will check whether the answer should have the AA bit set, and will set if it should be set and isn't.
3998 This is unfortunately needed to deal with very crappy so-called DNS servers */
3999 void SyncRes::fixupAnswer(const std::string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, bool rdQuery)
4000 {
4001 const bool wasForwardRecurse = wasForwarded && rdQuery;
4002
4003 if (wasForwardRecurse || lwr.d_aabit) {
4004 /* easy */
4005 return;
4006 }
4007
4008 for (const auto& rec : lwr.d_records) {
4009
4010 if (rec.d_type == QType::OPT) {
4011 continue;
4012 }
4013
4014 if (rec.d_class != QClass::IN) {
4015 continue;
4016 }
4017
4018 if (rec.d_type == QType::ANY) {
4019 continue;
4020 }
4021
4022 if (rec.d_place == DNSResourceRecord::ANSWER && (rec.d_type == qtype || rec.d_type == QType::CNAME || qtype == QType::ANY) && rec.d_name == qname && rec.d_name.isPartOf(auth)) {
4023 /* This is clearly an answer to the question we were asking, from an authoritative server that is allowed to send it.
4024 We are going to assume this server is broken and does not know it should set the AA bit, even though it is DNS 101 */
4025 LOG(prefix << qname << ": Received a record for " << rec.d_name << "|" << DNSRecordContent::NumberToType(rec.d_type) << " in the answer section from " << auth << ", without the AA bit set. Assuming this server is clueless and setting the AA bit." << endl);
4026 lwr.d_aabit = true;
4027 return;
4028 }
4029
4030 if (rec.d_place != DNSResourceRecord::ANSWER) {
4031 /* we have scanned all the records in the answer section, if any, we are done */
4032 return;
4033 }
4034 }
4035 }
4036
4037 static void allowAdditionalEntry(std::unordered_set<DNSName>& allowedAdditionals, const DNSRecord& rec)
4038 {
4039 switch (rec.d_type) {
4040 case QType::MX:
4041 if (auto mxContent = getRR<MXRecordContent>(rec)) {
4042 allowedAdditionals.insert(mxContent->d_mxname);
4043 }
4044 break;
4045 case QType::NS:
4046 if (auto nsContent = getRR<NSRecordContent>(rec)) {
4047 allowedAdditionals.insert(nsContent->getNS());
4048 }
4049 break;
4050 case QType::SRV:
4051 if (auto srvContent = getRR<SRVRecordContent>(rec)) {
4052 allowedAdditionals.insert(srvContent->d_target);
4053 }
4054 break;
4055 case QType::SVCB: /* fall-through */
4056 case QType::HTTPS:
4057 if (auto svcbContent = getRR<SVCBBaseRecordContent>(rec)) {
4058 if (svcbContent->getPriority() > 0) {
4059 DNSName target = svcbContent->getTarget();
4060 if (target.isRoot()) {
4061 target = rec.d_name;
4062 }
4063 allowedAdditionals.insert(target);
4064 }
4065 else {
4066 // FIXME: Alias mode not implemented yet
4067 }
4068 }
4069 break;
4070 case QType::NAPTR:
4071 if (auto naptrContent = getRR<NAPTRRecordContent>(rec)) {
4072 auto flags = naptrContent->getFlags();
4073 toLowerInPlace(flags);
4074 if (flags.find('a') != string::npos || flags.find('s') != string::npos) {
4075 allowedAdditionals.insert(naptrContent->getReplacement());
4076 }
4077 }
4078 break;
4079 default:
4080 break;
4081 }
4082 }
4083
4084 void SyncRes::sanitizeRecords(const std::string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, bool rdQuery)
4085 {
4086 const bool wasForwardRecurse = wasForwarded && rdQuery;
4087 /* list of names for which we will allow A and AAAA records in the additional section
4088 to remain */
4089 std::unordered_set<DNSName> allowedAdditionals = {qname};
4090 bool haveAnswers = false;
4091 bool isNXDomain = false;
4092 bool isNXQType = false;
4093
4094 for (auto rec = lwr.d_records.begin(); rec != lwr.d_records.end();) {
4095
4096 if (rec->d_type == QType::OPT) {
4097 ++rec;
4098 continue;
4099 }
4100
4101 if (rec->d_class != QClass::IN) {
4102 LOG(prefix << qname << ": Removing non internet-classed data received from " << auth << endl);
4103 rec = lwr.d_records.erase(rec);
4104 continue;
4105 }
4106
4107 if (rec->d_type == QType::ANY) {
4108 LOG(prefix << qname << ": Removing 'ANY'-typed data received from " << auth << endl);
4109 rec = lwr.d_records.erase(rec);
4110 continue;
4111 }
4112
4113 if (!rec->d_name.isPartOf(auth)) {
4114 LOG(prefix << qname << ": Removing record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section received from " << auth << endl);
4115 rec = lwr.d_records.erase(rec);
4116 continue;
4117 }
4118
4119 /* dealing with the records in answer */
4120 if (!(lwr.d_aabit || wasForwardRecurse) && rec->d_place == DNSResourceRecord::ANSWER) {
4121 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
4122 are sending such responses */
4123 if (!(rec->d_type == QType::CNAME && qname == rec->d_name)) {
4124 LOG(prefix << qname << ": Removing record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the answer section without the AA bit set received from " << auth << endl);
4125 rec = lwr.d_records.erase(rec);
4126 continue;
4127 }
4128 }
4129
4130 if (rec->d_type == QType::DNAME && (rec->d_place != DNSResourceRecord::ANSWER || !qname.isPartOf(rec->d_name))) {
4131 LOG(prefix << qname << ": Removing invalid DNAME record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section received from " << auth << endl);
4132 rec = lwr.d_records.erase(rec);
4133 continue;
4134 }
4135
4136 if (rec->d_place == DNSResourceRecord::ANSWER && (qtype != QType::ANY && rec->d_type != qtype.getCode() && s_redirectionQTypes.count(rec->d_type) == 0 && rec->d_type != QType::SOA && rec->d_type != QType::RRSIG)) {
4137 LOG(prefix << qname << ": Removing irrelevant record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the ANSWER section received from " << auth << endl);
4138 rec = lwr.d_records.erase(rec);
4139 continue;
4140 }
4141
4142 if (rec->d_place == DNSResourceRecord::ANSWER && !haveAnswers) {
4143 haveAnswers = true;
4144 }
4145
4146 if (rec->d_place == DNSResourceRecord::ANSWER) {
4147 allowAdditionalEntry(allowedAdditionals, *rec);
4148 }
4149
4150 /* dealing with the records in authority */
4151 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type != QType::NS && rec->d_type != QType::DS && rec->d_type != QType::SOA && rec->d_type != QType::RRSIG && rec->d_type != QType::NSEC && rec->d_type != QType::NSEC3) {
4152 LOG(prefix << qname << ": Removing irrelevant record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the AUTHORITY section received from " << auth << endl);
4153 rec = lwr.d_records.erase(rec);
4154 continue;
4155 }
4156
4157 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::SOA) {
4158 if (!qname.isPartOf(rec->d_name)) {
4159 LOG(prefix << qname << ": Removing irrelevant SOA record '" << rec->d_name << "|" << rec->getContent()->getZoneRepresentation() << "' in the AUTHORITY section received from " << auth << endl);
4160 rec = lwr.d_records.erase(rec);
4161 continue;
4162 }
4163
4164 if (!(lwr.d_aabit || wasForwardRecurse)) {
4165 LOG(prefix << qname << ": Removing irrelevant record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the AUTHORITY section received from " << auth << endl);
4166 rec = lwr.d_records.erase(rec);
4167 continue;
4168 }
4169
4170 if (!haveAnswers) {
4171 if (lwr.d_rcode == RCode::NXDomain) {
4172 isNXDomain = true;
4173 }
4174 else if (lwr.d_rcode == RCode::NoError) {
4175 isNXQType = true;
4176 }
4177 }
4178 }
4179
4180 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS && (isNXDomain || isNXQType)) {
4181 /*
4182 * We don't want to pick up NS records in AUTHORITY and their ADDITIONAL sections of NXDomain answers
4183 * because they are somewhat easy to insert into a large, fragmented UDP response
4184 * for an off-path attacker by injecting spoofed UDP fragments. So do not add these to allowedAdditionals.
4185 */
4186 LOG(prefix << qname << ": Removing NS record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section of a " << (isNXDomain ? "NXD" : "NXQTYPE") << " response received from " << auth << endl);
4187 rec = lwr.d_records.erase(rec);
4188 continue;
4189 }
4190
4191 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS && !d_updatingRootNS && rec->d_name == g_rootdnsname) {
4192 /*
4193 * We don't want to pick up root NS records in AUTHORITY and their associated ADDITIONAL sections of random queries.
4194 * So don't add them to allowedAdditionals.
4195 */
4196 LOG(prefix << qname << ": Removing NS record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section of a response received from " << auth << endl);
4197 rec = lwr.d_records.erase(rec);
4198 continue;
4199 }
4200
4201 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS) {
4202 allowAdditionalEntry(allowedAdditionals, *rec);
4203 }
4204
4205 /* dealing with the records in additional */
4206 if (rec->d_place == DNSResourceRecord::ADDITIONAL && rec->d_type != QType::A && rec->d_type != QType::AAAA && rec->d_type != QType::RRSIG) {
4207 LOG(prefix << qname << ": Removing irrelevant record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the ADDITIONAL section received from " << auth << endl);
4208 rec = lwr.d_records.erase(rec);
4209 continue;
4210 }
4211
4212 if (rec->d_place == DNSResourceRecord::ADDITIONAL && allowedAdditionals.count(rec->d_name) == 0) {
4213 LOG(prefix << qname << ": Removing irrelevant additional record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the ADDITIONAL section received from " << auth << endl);
4214 rec = lwr.d_records.erase(rec);
4215 continue;
4216 }
4217
4218 ++rec;
4219 }
4220 }
4221
4222 void SyncRes::rememberParentSetIfNeeded(const DNSName& domain, const vector<DNSRecord>& newRecords, unsigned int depth, const string& prefix)
4223 {
4224 vector<DNSRecord> existing;
4225 bool wasAuth = false;
4226 auto ttl = g_recCache->get(d_now.tv_sec, domain, QType::NS, MemRecursorCache::None, &existing, d_cacheRemote, d_routingTag, nullptr, nullptr, nullptr, nullptr, &wasAuth);
4227
4228 if (ttl <= 0 || wasAuth) {
4229 return;
4230 }
4231 {
4232 auto lock = s_savedParentNSSet.lock();
4233 if (lock->find(domain) != lock->end()) {
4234 // no relevant data, or we already stored the parent data
4235 return;
4236 }
4237 }
4238
4239 set<DNSName> authSet;
4240 for (const auto& ns : newRecords) {
4241 auto content = getRR<NSRecordContent>(ns);
4242 authSet.insert(content->getNS());
4243 }
4244 // The glue IPs could also differ, but we're not checking that yet, we're only looking for parent NS records not
4245 // in the child set
4246 bool shouldSave = false;
4247 for (const auto& ns : existing) {
4248 auto content = getRR<NSRecordContent>(ns);
4249 if (authSet.count(content->getNS()) == 0) {
4250 LOG(prefix << domain << ": At least one parent-side NS was not in the child-side NS set, remembering parent NS set and cached IPs" << endl);
4251 shouldSave = true;
4252 break;
4253 }
4254 }
4255
4256 if (shouldSave) {
4257 map<DNSName, vector<ComboAddress>> entries;
4258 for (const auto& ns : existing) {
4259 auto content = getRR<NSRecordContent>(ns);
4260 const DNSName& name = content->getNS();
4261 set<GetBestNSAnswer> beenthereIgnored;
4262 unsigned int nretrieveAddressesForNSIgnored;
4263 auto addresses = getAddrs(name, depth, prefix, beenthereIgnored, true, nretrieveAddressesForNSIgnored);
4264 entries.emplace(name, addresses);
4265 }
4266 s_savedParentNSSet.lock()->emplace(domain, std::move(entries), d_now.tv_sec + ttl);
4267 }
4268 }
4269
4270 RCode::rcodes_ SyncRes::updateCacheFromRecords(unsigned int depth, const string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, const boost::optional<Netmask> ednsmask, vState& state, bool& needWildcardProof, bool& gatherWildcardProof, unsigned int& wildcardLabelsCount, bool rdQuery, const ComboAddress& remoteIP)
4271 {
4272 bool wasForwardRecurse = wasForwarded && rdQuery;
4273 tcache_t tcache;
4274
4275 fixupAnswer(prefix, lwr, qname, qtype, auth, wasForwarded, rdQuery);
4276 sanitizeRecords(prefix, lwr, qname, qtype, auth, wasForwarded, rdQuery);
4277
4278 std::vector<std::shared_ptr<DNSRecord>> authorityRecs;
4279 const unsigned int labelCount = qname.countLabels();
4280 bool isCNAMEAnswer = false;
4281 bool isDNAMEAnswer = false;
4282 DNSName seenAuth;
4283
4284 for (auto& rec : lwr.d_records) {
4285 if (rec.d_type == QType::OPT || rec.d_class != QClass::IN) {
4286 continue;
4287 }
4288
4289 rec.d_ttl = min(s_maxcachettl, rec.d_ttl);
4290
4291 if (!isCNAMEAnswer && rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == QType::CNAME && (!(qtype == QType::CNAME)) && rec.d_name == qname && !isDNAMEAnswer) {
4292 isCNAMEAnswer = true;
4293 }
4294 if (!isDNAMEAnswer && rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == QType::DNAME && qtype != QType::DNAME && qname.isPartOf(rec.d_name)) {
4295 isDNAMEAnswer = true;
4296 isCNAMEAnswer = false;
4297 }
4298
4299 if (rec.d_type == QType::SOA && rec.d_place == DNSResourceRecord::AUTHORITY && qname.isPartOf(rec.d_name)) {
4300 seenAuth = rec.d_name;
4301 }
4302
4303 if (rec.d_type == QType::RRSIG) {
4304 auto rrsig = getRR<RRSIGRecordContent>(rec);
4305 if (rrsig) {
4306 /* As illustrated in rfc4035's Appendix B.6, the RRSIG label
4307 count can be lower than the name's label count if it was
4308 synthesized from the wildcard. Note that the difference might
4309 be > 1. */
4310 if (rec.d_name == qname && isWildcardExpanded(labelCount, *rrsig)) {
4311 gatherWildcardProof = true;
4312 if (!isWildcardExpandedOntoItself(rec.d_name, labelCount, *rrsig)) {
4313 /* if we have a wildcard expanded onto itself, we don't need to prove
4314 that the exact name doesn't exist because it actually does.
4315 We still want to gather the corresponding NSEC/NSEC3 records
4316 to pass them to our client in case it wants to validate by itself.
4317 */
4318 LOG(prefix << qname << ": RRSIG indicates the name was synthesized from a wildcard, we need a wildcard proof" << endl);
4319 needWildcardProof = true;
4320 }
4321 else {
4322 LOG(prefix << qname << ": RRSIG indicates the name was synthesized from a wildcard expanded onto itself, we need to gather wildcard proof" << endl);
4323 }
4324 wildcardLabelsCount = rrsig->d_labels;
4325 }
4326
4327 // cerr<<"Got an RRSIG for "<<DNSRecordContent::NumberToType(rrsig->d_type)<<" with name '"<<rec.d_name<<"' and place "<<rec.d_place<<endl;
4328 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signatures.push_back(rrsig);
4329 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signaturesTTL = std::min(tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signaturesTTL, rec.d_ttl);
4330 }
4331 }
4332 }
4333
4334 /* if we have a positive answer synthesized from a wildcard,
4335 we need to store the corresponding NSEC/NSEC3 records proving
4336 that the exact name did not exist in the negative cache */
4337 if (gatherWildcardProof) {
4338 for (const auto& rec : lwr.d_records) {
4339 if (rec.d_type == QType::OPT || rec.d_class != QClass::IN) {
4340 continue;
4341 }
4342
4343 if (nsecTypes.count(rec.d_type)) {
4344 authorityRecs.push_back(std::make_shared<DNSRecord>(rec));
4345 }
4346 else if (rec.d_type == QType::RRSIG) {
4347 auto rrsig = getRR<RRSIGRecordContent>(rec);
4348 if (rrsig && nsecTypes.count(rrsig->d_type)) {
4349 authorityRecs.push_back(std::make_shared<DNSRecord>(rec));
4350 }
4351 }
4352 }
4353 }
4354
4355 // reap all answers from this packet that are acceptable
4356 for (auto& rec : lwr.d_records) {
4357 if (rec.d_type == QType::OPT) {
4358 LOG(prefix << qname << ": OPT answer '" << rec.d_name << "' from '" << auth << "' nameservers" << endl);
4359 continue;
4360 }
4361
4362 LOG(prefix << qname << ": Accept answer '" << rec.d_name << "|" << DNSRecordContent::NumberToType(rec.d_type) << "|" << rec.getContent()->getZoneRepresentation() << "' from '" << auth << "' nameservers? ttl=" << rec.d_ttl << ", place=" << (int)rec.d_place << " ");
4363
4364 // We called sanitizeRecords before, so all ANY, non-IN and non-aa/non-forwardrecurse answer records are already removed
4365
4366 if (rec.d_name.isPartOf(auth)) {
4367 if (rec.d_type == QType::RRSIG) {
4368 LOG("RRSIG - separate" << endl);
4369 }
4370 else if (rec.d_type == QType::DS && rec.d_name == auth) {
4371 LOG("NO - DS provided by child zone" << endl);
4372 }
4373 else {
4374 bool haveLogged = false;
4375 if (isDNAMEAnswer && rec.d_type == QType::CNAME) {
4376 LOG("NO - we already have a DNAME answer for this domain" << endl);
4377 continue;
4378 }
4379 if (!t_sstorage.domainmap->empty()) {
4380 // Check if we are authoritative for a zone in this answer
4381 DNSName tmp_qname(rec.d_name);
4382 // We may be auth for domain example.com, but the DS record needs to come from the parent (.com) nameserver
4383 if (rec.d_type == QType::DS) {
4384 tmp_qname.chopOff();
4385 }
4386 auto auth_domain_iter = getBestAuthZone(&tmp_qname);
4387 if (auth_domain_iter != t_sstorage.domainmap->end() && auth.countLabels() <= auth_domain_iter->first.countLabels()) {
4388 if (auth_domain_iter->first != auth) {
4389 LOG("NO! - we are authoritative for the zone " << auth_domain_iter->first << endl);
4390 continue;
4391 }
4392 else {
4393 LOG("YES! - This answer was ");
4394 if (!wasForwarded) {
4395 LOG("retrieved from the local auth store.");
4396 }
4397 else {
4398 LOG("received from a server we forward to.");
4399 }
4400 haveLogged = true;
4401 LOG(endl);
4402 }
4403 }
4404 }
4405 if (!haveLogged) {
4406 LOG("YES!" << endl);
4407 }
4408
4409 rec.d_ttl = min(s_maxcachettl, rec.d_ttl);
4410
4411 DNSRecord dr(rec);
4412 dr.d_ttl += d_now.tv_sec;
4413 dr.d_place = DNSResourceRecord::ANSWER;
4414 tcache[{rec.d_name, rec.d_type, rec.d_place}].records.push_back(dr);
4415 }
4416 }
4417 else
4418 LOG("NO!" << endl);
4419 }
4420
4421 // supplant
4422 for (auto& entry : tcache) {
4423 if ((entry.second.records.size() + entry.second.signatures.size() + authorityRecs.size()) > 1) { // need to group the ttl to be the minimum of the RRSET (RFC 2181, 5.2)
4424 uint32_t lowestTTD = computeLowestTTD(entry.second.records, entry.second.signatures, entry.second.signaturesTTL, authorityRecs);
4425
4426 for (auto& record : entry.second.records) {
4427 record.d_ttl = lowestTTD; // boom
4428 }
4429 }
4430 }
4431
4432 for (tcache_t::iterator i = tcache.begin(); i != tcache.end(); ++i) {
4433
4434 if (i->second.records.empty()) // this happens when we did store signatures, but passed on the records themselves
4435 continue;
4436
4437 /* Even if the AA bit is set, additional data cannot be considered
4438 as authoritative. This is especially important during validation
4439 because keeping records in the additional section is allowed even
4440 if the corresponding RRSIGs are not included, without setting the TC
4441 bit, as stated in rfc4035's section 3.1.1. Including RRSIG RRs in a Response:
4442 "When placing a signed RRset in the Additional section, the name
4443 server MUST also place its RRSIG RRs in the Additional section.
4444 If space does not permit inclusion of both the RRset and its
4445 associated RRSIG RRs, the name server MAY retain the RRset while
4446 dropping the RRSIG RRs. If this happens, the name server MUST NOT
4447 set the TC bit solely because these RRSIG RRs didn't fit."
4448 */
4449 bool isAA = lwr.d_aabit && i->first.place != DNSResourceRecord::ADDITIONAL;
4450 /* if we forwarded the query to a recursor, we can expect the answer to be signed,
4451 even if the answer is not AA. Of course that's not only true inside a Secure
4452 zone, but we check that below. */
4453 bool expectSignature = i->first.place == DNSResourceRecord::ANSWER || ((lwr.d_aabit || wasForwardRecurse) && i->first.place != DNSResourceRecord::ADDITIONAL);
4454 /* in a non authoritative answer, we only care about the DS record (or lack of) */
4455 if (!isAA && (i->first.type == QType::DS || i->first.type == QType::NSEC || i->first.type == QType::NSEC3) && i->first.place == DNSResourceRecord::AUTHORITY) {
4456 expectSignature = true;
4457 }
4458
4459 if (isCNAMEAnswer && (i->first.place != DNSResourceRecord::ANSWER || i->first.type != QType::CNAME || i->first.name != qname)) {
4460 /*
4461 rfc2181 states:
4462 Note that the answer section of an authoritative answer normally
4463 contains only authoritative data. However when the name sought is an
4464 alias (see section 10.1.1) only the record describing that alias is
4465 necessarily authoritative. Clients should assume that other records
4466 may have come from the server's cache. Where authoritative answers
4467 are required, the client should query again, using the canonical name
4468 associated with the alias.
4469 */
4470 isAA = false;
4471 expectSignature = false;
4472 }
4473 else if (isDNAMEAnswer && (i->first.place != DNSResourceRecord::ANSWER || i->first.type != QType::DNAME || !qname.isPartOf(i->first.name))) {
4474 /* see above */
4475 isAA = false;
4476 expectSignature = false;
4477 }
4478
4479 if ((isCNAMEAnswer || isDNAMEAnswer) && i->first.place == DNSResourceRecord::AUTHORITY && i->first.type == QType::NS && auth == i->first.name) {
4480 /* These NS can't be authoritative since we have a CNAME/DNAME answer for which (see above) only the
4481 record describing that alias is necessarily authoritative.
4482 But if we allow the current auth, which might be serving the child zone, to raise the TTL
4483 of non-authoritative NS in the cache, they might be able to keep a "ghost" zone alive forever,
4484 even after the delegation is gone from the parent.
4485 So let's just do nothing with them, we can fetch them directly if we need them.
4486 */
4487 LOG(prefix << qname << ": Skipping authority NS from '" << auth << "' nameservers in CNAME/DNAME answer " << i->first.name << "|" << DNSRecordContent::NumberToType(i->first.type) << endl);
4488 continue;
4489 }
4490
4491 /*
4492 * RFC 6672 section 5.3.1
4493 * In any response, a signed DNAME RR indicates a non-terminal
4494 * redirection of the query. There might or might not be a server-
4495 * synthesized CNAME in the answer section; if there is, the CNAME will
4496 * never be signed. For a DNSSEC validator, verification of the DNAME
4497 * RR and then that the CNAME was properly synthesized is sufficient
4498 * proof.
4499 *
4500 * We do the synthesis check in processRecords, here we make sure we
4501 * don't validate the CNAME.
4502 */
4503 if (isDNAMEAnswer && i->first.type == QType::CNAME) {
4504 expectSignature = false;
4505 }
4506
4507 vState recordState = vState::Indeterminate;
4508
4509 if (expectSignature && shouldValidate()) {
4510 vState initialState = getValidationStatus(i->first.name, !i->second.signatures.empty(), i->first.type == QType::DS, depth, prefix);
4511 LOG(prefix << qname << ": Got initial zone status " << initialState << " for record " << i->first.name << "|" << DNSRecordContent::NumberToType(i->first.type) << endl);
4512
4513 if (initialState == vState::Secure) {
4514 if (i->first.type == QType::DNSKEY && i->first.place == DNSResourceRecord::ANSWER && i->first.name == getSigner(i->second.signatures)) {
4515 LOG(prefix << qname << ": Validating DNSKEY for " << i->first.name << endl);
4516 recordState = validateDNSKeys(i->first.name, i->second.records, i->second.signatures, depth, prefix);
4517 }
4518 else {
4519 LOG(prefix << qname << ": Validating non-additional " << QType(i->first.type).toString() << " record for " << i->first.name << endl);
4520 recordState = validateRecordsWithSigs(depth, prefix, qname, qtype, i->first.name, QType(i->first.type), i->second.records, i->second.signatures);
4521 }
4522 }
4523 else {
4524 recordState = initialState;
4525 LOG(prefix << qname << ": Skipping validation because the current state is " << recordState << endl);
4526 }
4527
4528 LOG(prefix << qname << ": Validation result is " << recordState << ", current state is " << state << endl);
4529 if (state != recordState) {
4530 updateValidationState(qname, state, recordState, prefix);
4531 }
4532 }
4533
4534 if (vStateIsBogus(recordState)) {
4535 /* this is a TTD by now, be careful */
4536 for (auto& record : i->second.records) {
4537 record.d_ttl = std::min(record.d_ttl, static_cast<uint32_t>(s_maxbogusttl + d_now.tv_sec));
4538 }
4539 }
4540
4541 /* We don't need to store NSEC3 records in the positive cache because:
4542 - we don't allow direct NSEC3 queries
4543 - denial of existence proofs in wildcard expanded positive responses are stored in authorityRecs
4544 - denial of existence proofs for negative responses are stored in the negative cache
4545 We also don't want to cache non-authoritative data except for:
4546 - records coming from non forward-recurse servers (those will never be AA)
4547 - DS (special case)
4548 - NS, A and AAAA (used for infra queries)
4549 */
4550 if (i->first.type != QType::NSEC3 && (i->first.type == QType::DS || i->first.type == QType::NS || i->first.type == QType::A || i->first.type == QType::AAAA || isAA || wasForwardRecurse)) {
4551
4552 bool doCache = true;
4553 if (i->first.place == DNSResourceRecord::ANSWER && ednsmask) {
4554 const bool isv4 = ednsmask->isIPv4();
4555 if ((isv4 && s_ecsipv4nevercache) || (!isv4 && s_ecsipv6nevercache)) {
4556 doCache = false;
4557 }
4558 // If ednsmask is relevant, we do not want to cache if the scope prefix length is large and TTL is small
4559 if (doCache && s_ecscachelimitttl > 0) {
4560 bool manyMaskBits = (isv4 && ednsmask->getBits() > s_ecsipv4cachelimit) || (!isv4 && ednsmask->getBits() > s_ecsipv6cachelimit);
4561
4562 if (manyMaskBits) {
4563 uint32_t minttl = UINT32_MAX;
4564 for (const auto& it : i->second.records) {
4565 if (it.d_ttl < minttl)
4566 minttl = it.d_ttl;
4567 }
4568 bool ttlIsSmall = minttl < s_ecscachelimitttl + d_now.tv_sec;
4569 if (ttlIsSmall) {
4570 // Case: many bits and ttlIsSmall
4571 doCache = false;
4572 }
4573 }
4574 }
4575 }
4576
4577 d_fromAuthIP = remoteIP;
4578
4579 if (doCache) {
4580 // Check if we are going to replace a non-auth (parent) NS recordset
4581 if (isAA && i->first.type == QType::NS && s_save_parent_ns_set) {
4582 rememberParentSetIfNeeded(i->first.name, i->second.records, depth, prefix);
4583 }
4584 g_recCache->replace(d_now.tv_sec, i->first.name, i->first.type, i->second.records, i->second.signatures, authorityRecs, i->first.type == QType::DS ? true : isAA, auth, i->first.place == DNSResourceRecord::ANSWER ? ednsmask : boost::none, d_routingTag, recordState, remoteIP, d_refresh);
4585
4586 // Delete potential negcache entry. When a record recovers with serve-stale the negcache entry can cause the wrong entry to
4587 // be served, as negcache entries are checked before record cache entries
4588 if (NegCache::s_maxServedStaleExtensions > 0) {
4589 g_negCache->wipeTyped(i->first.name, i->first.type);
4590 }
4591
4592 if (g_aggressiveNSECCache && needWildcardProof && recordState == vState::Secure && i->first.place == DNSResourceRecord::ANSWER && i->first.name == qname && !i->second.signatures.empty() && !d_routingTag && !ednsmask) {
4593 /* we have an answer synthesized from a wildcard and aggressive NSEC is enabled, we need to store the
4594 wildcard in its non-expanded form in the cache to be able to synthesize wildcard answers later */
4595 const auto& rrsig = i->second.signatures.at(0);
4596
4597 if (isWildcardExpanded(labelCount, *rrsig) && !isWildcardExpandedOntoItself(i->first.name, labelCount, *rrsig)) {
4598 DNSName realOwner = getNSECOwnerName(i->first.name, i->second.signatures);
4599
4600 std::vector<DNSRecord> content;
4601 content.reserve(i->second.records.size());
4602 for (const auto& record : i->second.records) {
4603 DNSRecord nonExpandedRecord(record);
4604 nonExpandedRecord.d_name = realOwner;
4605 content.push_back(std::move(nonExpandedRecord));
4606 }
4607
4608 g_recCache->replace(d_now.tv_sec, realOwner, QType(i->first.type), content, i->second.signatures, /* no additional records in that case */ {}, i->first.type == QType::DS ? true : isAA, auth, boost::none, boost::none, recordState, remoteIP, d_refresh);
4609 }
4610 }
4611 }
4612 }
4613
4614 if (seenAuth.empty() && !i->second.signatures.empty()) {
4615 seenAuth = getSigner(i->second.signatures);
4616 }
4617
4618 if (g_aggressiveNSECCache && (i->first.type == QType::NSEC || i->first.type == QType::NSEC3) && recordState == vState::Secure && !seenAuth.empty()) {
4619 // Good candidate for NSEC{,3} caching
4620 g_aggressiveNSECCache->insertNSEC(seenAuth, i->first.name, i->second.records.at(0), i->second.signatures, i->first.type == QType::NSEC3);
4621 }
4622
4623 if (i->first.place == DNSResourceRecord::ANSWER && ednsmask) {
4624 d_wasVariable = true;
4625 }
4626 }
4627
4628 return RCode::NoError;
4629 }
4630
4631 void SyncRes::updateDenialValidationState(const DNSName& qname, vState& neValidationState, const DNSName& neName, vState& state, const dState denialState, const dState expectedState, bool isDS, unsigned int depth, const string& prefix)
4632 {
4633 if (denialState == expectedState) {
4634 neValidationState = vState::Secure;
4635 }
4636 else {
4637 if (denialState == dState::OPTOUT) {
4638 LOG(prefix << qname << ": OPT-out denial found for " << neName << endl);
4639 /* rfc5155 states:
4640 "The AD bit, as defined by [RFC4035], MUST NOT be set when returning a
4641 response containing a closest (provable) encloser proof in which the
4642 NSEC3 RR that covers the "next closer" name has the Opt-Out bit set.
4643
4644 This rule is based on what this closest encloser proof actually
4645 proves: names that would be covered by the Opt-Out NSEC3 RR may or
4646 may not exist as insecure delegations. As such, not all the data in
4647 responses containing such closest encloser proofs will have been
4648 cryptographically verified, so the AD bit cannot be set."
4649
4650 At best the Opt-Out NSEC3 RR proves that there is no signed DS (so no
4651 secure delegation).
4652 */
4653 neValidationState = vState::Insecure;
4654 }
4655 else if (denialState == dState::INSECURE) {
4656 LOG(prefix << qname << ": Insecure denial found for " << neName << ", returning Insecure" << endl);
4657 neValidationState = vState::Insecure;
4658 }
4659 else {
4660 LOG(prefix << qname << ": Invalid denial found for " << neName << ", res=" << denialState << ", expectedState=" << expectedState << ", checking whether we have missed a zone cut before returning a Bogus state" << endl);
4661 /* try again to get the missed cuts, harder this time */
4662 auto zState = getValidationStatus(neName, false, isDS, depth, prefix);
4663 if (zState != vState::Secure) {
4664 neValidationState = zState;
4665 }
4666 else {
4667 LOG(prefix << qname << ": Still in a secure zone with an invalid denial for " << neName << ", returning " << vStateToString(vState::BogusInvalidDenial) << endl);
4668 neValidationState = vState::BogusInvalidDenial;
4669 }
4670 }
4671 }
4672 updateValidationState(qname, state, neValidationState, prefix);
4673 }
4674
4675 dState SyncRes::getDenialValidationState(const NegCache::NegCacheEntry& ne, const dState expectedState, bool referralToUnsigned, const string& prefix)
4676 {
4677 cspmap_t csp = harvestCSPFromNE(ne);
4678 return getDenial(csp, ne.d_name, ne.d_qtype.getCode(), referralToUnsigned, expectedState == dState::NXQTYPE, LogObject(prefix));
4679 }
4680
4681 bool SyncRes::processRecords(const std::string& prefix, const DNSName& qname, const QType qtype, const DNSName& auth, LWResult& lwr, const bool sendRDQuery, vector<DNSRecord>& ret, set<DNSName>& nsset, DNSName& newtarget, DNSName& newauth, bool& realreferral, bool& negindic, vState& state, const bool needWildcardProof, const bool gatherWildcardProof, const unsigned int wildcardLabelsCount, int& rcode, bool& negIndicHasSignatures, unsigned int depth)
4682 {
4683 bool done = false;
4684 DNSName dnameTarget, dnameOwner;
4685 uint32_t dnameTTL = 0;
4686 bool referralOnDS = false;
4687
4688 for (auto& rec : lwr.d_records) {
4689 if (rec.d_type == QType::OPT || rec.d_class != QClass::IN) {
4690 continue;
4691 }
4692
4693 if (rec.d_place == DNSResourceRecord::ANSWER && !(lwr.d_aabit || sendRDQuery)) {
4694 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
4695 are sending such responses */
4696 if (!(rec.d_type == QType::CNAME && rec.d_name == qname)) {
4697 continue;
4698 }
4699 }
4700 const bool negCacheIndication = rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::SOA && lwr.d_rcode == RCode::NXDomain && qname.isPartOf(rec.d_name) && rec.d_name.isPartOf(auth);
4701
4702 bool putInNegCache = true;
4703 if (negCacheIndication && qtype == QType::DS && isForwardOrAuth(qname)) {
4704 // #10189, a NXDOMAIN to a DS query for a forwarded or auth domain should not NXDOMAIN the whole domain
4705 putInNegCache = false;
4706 }
4707
4708 if (negCacheIndication) {
4709 LOG(prefix << qname << ": Got negative caching indication for name '" << qname << "' (accept=" << rec.d_name.isPartOf(auth) << "), newtarget='" << newtarget << "'" << endl);
4710
4711 rec.d_ttl = min(rec.d_ttl, s_maxnegttl);
4712 // only add a SOA if we're not going anywhere after this
4713 if (newtarget.empty()) {
4714 ret.push_back(rec);
4715 }
4716
4717 NegCache::NegCacheEntry ne;
4718
4719 uint32_t lowestTTL = rec.d_ttl;
4720 /* if we get an NXDomain answer with a CNAME, the name
4721 does exist but the target does not */
4722 ne.d_name = newtarget.empty() ? qname : newtarget;
4723 ne.d_qtype = QType::ENT; // this encodes 'whole record'
4724 ne.d_auth = rec.d_name;
4725 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
4726
4727 if (vStateIsBogus(state)) {
4728 ne.d_validationState = state;
4729 }
4730 else {
4731 /* here we need to get the validation status of the zone telling us that the domain does not
4732 exist, ie the owner of the SOA */
4733 auto recordState = getValidationStatus(rec.d_name, !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty(), false, depth, prefix);
4734 if (recordState == vState::Secure) {
4735 dState denialState = getDenialValidationState(ne, dState::NXDOMAIN, false, prefix);
4736 updateDenialValidationState(qname, ne.d_validationState, ne.d_name, state, denialState, dState::NXDOMAIN, false, depth, prefix);
4737 }
4738 else {
4739 ne.d_validationState = recordState;
4740 updateValidationState(qname, state, ne.d_validationState, prefix);
4741 }
4742 }
4743
4744 if (vStateIsBogus(ne.d_validationState)) {
4745 lowestTTL = min(lowestTTL, s_maxbogusttl);
4746 }
4747
4748 ne.d_ttd = d_now.tv_sec + lowestTTL;
4749 ne.d_orig_ttl = lowestTTL;
4750 /* if we get an NXDomain answer with a CNAME, let's not cache the
4751 target, even the server was authoritative for it,
4752 and do an additional query for the CNAME target.
4753 We have a regression test making sure we do exactly that.
4754 */
4755 if (newtarget.empty() && putInNegCache) {
4756 g_negCache->add(ne);
4757 // doCNAMECacheCheck() checks record cache and does not look into negcache. That means that an old record might be found if
4758 // serve-stale is active. Avoid that by explicitly zapping that CNAME record.
4759 if (qtype == QType::CNAME && MemRecursorCache::s_maxServedStaleExtensions > 0) {
4760 g_recCache->doWipeCache(qname, false, qtype);
4761 }
4762 if (s_rootNXTrust && ne.d_auth.isRoot() && auth.isRoot() && lwr.d_aabit) {
4763 ne.d_name = ne.d_name.getLastLabel();
4764 g_negCache->add(ne);
4765 }
4766 }
4767
4768 negIndicHasSignatures = !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty();
4769 negindic = true;
4770 }
4771 else if (rec.d_place == DNSResourceRecord::ANSWER && s_redirectionQTypes.count(rec.d_type) > 0 && // CNAME or DNAME answer
4772 s_redirectionQTypes.count(qtype.getCode()) == 0) { // But not in response to a CNAME or DNAME query
4773 if (rec.d_type == QType::CNAME && rec.d_name == qname) {
4774 if (!dnameOwner.empty()) { // We synthesize ourselves
4775 continue;
4776 }
4777 ret.push_back(rec);
4778 if (auto content = getRR<CNAMERecordContent>(rec)) {
4779 newtarget = DNSName(content->getTarget());
4780 }
4781 }
4782 else if (rec.d_type == QType::DNAME && qname.isPartOf(rec.d_name)) { // DNAME
4783 ret.push_back(rec);
4784 if (auto content = getRR<DNAMERecordContent>(rec)) {
4785 dnameOwner = rec.d_name;
4786 dnameTarget = content->getTarget();
4787 dnameTTL = rec.d_ttl;
4788 if (!newtarget.empty()) { // We had a CNAME before, remove it from ret so we don't cache it
4789 ret.erase(std::remove_if(
4790 ret.begin(),
4791 ret.end(),
4792 [&qname](DNSRecord& rr) {
4793 return (rr.d_place == DNSResourceRecord::ANSWER && rr.d_type == QType::CNAME && rr.d_name == qname);
4794 }),
4795 ret.end());
4796 }
4797 try {
4798 newtarget = qname.makeRelative(dnameOwner) + dnameTarget;
4799 }
4800 catch (const std::exception& e) {
4801 // We should probably catch an std::range_error here and set the rcode to YXDOMAIN (RFC 6672, section 2.2)
4802 // But there is no way to set the RCODE from this function
4803 throw ImmediateServFailException("Unable to perform DNAME substitution(DNAME owner: '" + dnameOwner.toLogString() + "', DNAME target: '" + dnameTarget.toLogString() + "', substituted name: '" + qname.makeRelative(dnameOwner).toLogString() + "." + dnameTarget.toLogString() + "' : " + e.what());
4804 }
4805 }
4806 }
4807 }
4808 /* if we have a positive answer synthesized from a wildcard, we need to
4809 return the corresponding NSEC/NSEC3 records from the AUTHORITY section
4810 proving that the exact name did not exist.
4811 Except if this is a NODATA answer because then we will gather the NXNSEC records later */
4812 else if (gatherWildcardProof && !negindic && (rec.d_type == QType::RRSIG || rec.d_type == QType::NSEC || rec.d_type == QType::NSEC3) && rec.d_place == DNSResourceRecord::AUTHORITY) {
4813 ret.push_back(rec); // enjoy your DNSSEC
4814 }
4815 // for ANY answers we *must* have an authoritative answer, unless we are forwarding recursively
4816 else if (rec.d_place == DNSResourceRecord::ANSWER && rec.d_name == qname && (rec.d_type == qtype.getCode() || ((lwr.d_aabit || sendRDQuery) && qtype == QType::ANY))) {
4817 LOG(prefix << qname << ": Answer is in: resolved to '" << rec.getContent()->getZoneRepresentation() << "|" << DNSRecordContent::NumberToType(rec.d_type) << "'" << endl);
4818
4819 done = true;
4820 rcode = RCode::NoError;
4821
4822 if (needWildcardProof) {
4823 /* positive answer synthesized from a wildcard */
4824 NegCache::NegCacheEntry ne;
4825 ne.d_name = qname;
4826 ne.d_qtype = QType::ENT; // this encodes 'whole record'
4827 uint32_t lowestTTL = rec.d_ttl;
4828 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
4829
4830 if (vStateIsBogus(state)) {
4831 ne.d_validationState = state;
4832 }
4833 else {
4834 auto recordState = getValidationStatus(qname, !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty(), false, depth, prefix);
4835
4836 if (recordState == vState::Secure) {
4837 /* We have a positive answer synthesized from a wildcard, we need to check that we have
4838 proof that the exact name doesn't exist so the wildcard can be used,
4839 as described in section 5.3.4 of RFC 4035 and 5.3 of RFC 7129.
4840 */
4841 cspmap_t csp = harvestCSPFromNE(ne);
4842 dState res = getDenial(csp, qname, ne.d_qtype.getCode(), false, false, LogObject(prefix), false, wildcardLabelsCount);
4843 if (res != dState::NXDOMAIN) {
4844 vState st = vState::BogusInvalidDenial;
4845 if (res == dState::INSECURE || res == dState::OPTOUT) {
4846 /* Some part could not be validated, for example a NSEC3 record with a too large number of iterations,
4847 this is not enough to warrant a Bogus, but go Insecure. */
4848 st = vState::Insecure;
4849 LOG(prefix << qname << ": Unable to validate denial in wildcard expanded positive response found for " << qname << ", returning Insecure, res=" << res << endl);
4850 }
4851 else {
4852 LOG(prefix << qname << ": Invalid denial in wildcard expanded positive response found for " << qname << ", returning Bogus, res=" << res << endl);
4853 rec.d_ttl = std::min(rec.d_ttl, s_maxbogusttl);
4854 }
4855
4856 updateValidationState(qname, state, st, prefix);
4857 /* we already stored the record with a different validation status, let's fix it */
4858 updateValidationStatusInCache(qname, qtype, lwr.d_aabit, st);
4859 }
4860 }
4861 }
4862 }
4863
4864 ret.push_back(rec);
4865 }
4866 else if ((rec.d_type == QType::RRSIG || rec.d_type == QType::NSEC || rec.d_type == QType::NSEC3) && rec.d_place == DNSResourceRecord::ANSWER) {
4867 if (rec.d_type != QType::RRSIG || rec.d_name == qname) {
4868 ret.push_back(rec); // enjoy your DNSSEC
4869 }
4870 else if (rec.d_type == QType::RRSIG && qname.isPartOf(rec.d_name)) {
4871 auto rrsig = getRR<RRSIGRecordContent>(rec);
4872 if (rrsig != nullptr && rrsig->d_type == QType::DNAME) {
4873 ret.push_back(rec);
4874 }
4875 }
4876 }
4877 else if (rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::NS && qname.isPartOf(rec.d_name)) {
4878 if (moreSpecificThan(rec.d_name, auth)) {
4879 newauth = rec.d_name;
4880 LOG(prefix << qname << ": Got NS record '" << rec.d_name << "' -> '" << rec.getContent()->getZoneRepresentation() << "'" << endl);
4881
4882 /* check if we have a referral from the parent zone to a child zone for a DS query, which is not right */
4883 if (qtype == QType::DS && (newauth.isPartOf(qname) || qname == newauth)) {
4884 /* just got a referral from the parent zone when asking for a DS, looks like this server did not get the DNSSEC memo.. */
4885 referralOnDS = true;
4886 }
4887 else {
4888 realreferral = true;
4889 if (auto content = getRR<NSRecordContent>(rec)) {
4890 nsset.insert(content->getNS());
4891 }
4892 }
4893 }
4894 else {
4895 LOG(prefix << qname << ": Got upwards/level NS record '" << rec.d_name << "' -> '" << rec.getContent()->getZoneRepresentation() << "', had '" << auth << "'" << endl);
4896 if (auto content = getRR<NSRecordContent>(rec)) {
4897 nsset.insert(content->getNS());
4898 }
4899 }
4900 }
4901 else if (rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::DS && qname.isPartOf(rec.d_name)) {
4902 LOG(prefix << qname << ": Got DS record '" << rec.d_name << "' -> '" << rec.getContent()->getZoneRepresentation() << "'" << endl);
4903 }
4904 else if (realreferral && rec.d_place == DNSResourceRecord::AUTHORITY && (rec.d_type == QType::NSEC || rec.d_type == QType::NSEC3) && newauth.isPartOf(auth)) {
4905 /* we might have received a denial of the DS, let's check */
4906 NegCache::NegCacheEntry ne;
4907 uint32_t lowestTTL = rec.d_ttl;
4908 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
4909
4910 if (!vStateIsBogus(state)) {
4911 auto recordState = getValidationStatus(newauth, !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty(), true, depth, prefix);
4912
4913 if (recordState == vState::Secure) {
4914 ne.d_auth = auth;
4915 ne.d_name = newauth;
4916 ne.d_qtype = QType::DS;
4917 rec.d_ttl = min(s_maxnegttl, rec.d_ttl);
4918
4919 dState denialState = getDenialValidationState(ne, dState::NXQTYPE, true, prefix);
4920
4921 if (denialState == dState::NXQTYPE || denialState == dState::OPTOUT || denialState == dState::INSECURE) {
4922 ne.d_ttd = lowestTTL + d_now.tv_sec;
4923 ne.d_orig_ttl = lowestTTL;
4924 ne.d_validationState = vState::Secure;
4925 if (denialState == dState::OPTOUT) {
4926 ne.d_validationState = vState::Insecure;
4927 }
4928 LOG(prefix << qname << ": Got negative indication of DS record for '" << newauth << "'" << endl);
4929
4930 g_negCache->add(ne);
4931
4932 /* Careful! If the client is asking for a DS that does not exist, we need to provide the SOA along with the NSEC(3) proof
4933 and we might not have it if we picked up the proof from a delegation, in which case we need to keep on to do the actual DS
4934 query. */
4935 if (qtype == QType::DS && qname == newauth && (d_externalDSQuery.empty() || qname != d_externalDSQuery)) {
4936 /* we are actually done! */
4937 negindic = true;
4938 negIndicHasSignatures = !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty();
4939 nsset.clear();
4940 }
4941 }
4942 }
4943 }
4944 }
4945 else if (!done && rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::SOA && lwr.d_rcode == RCode::NoError && qname.isPartOf(rec.d_name)) {
4946 LOG(prefix << qname << ": Got negative caching indication for '" << qname << "|" << qtype << "'" << endl);
4947
4948 if (!newtarget.empty()) {
4949 LOG(prefix << qname << ": Hang on! Got a redirect to '" << newtarget << "' already" << endl);
4950 }
4951 else {
4952 rec.d_ttl = min(s_maxnegttl, rec.d_ttl);
4953
4954 NegCache::NegCacheEntry ne;
4955 ne.d_auth = rec.d_name;
4956 uint32_t lowestTTL = rec.d_ttl;
4957 ne.d_name = qname;
4958 ne.d_qtype = qtype;
4959 harvestNXRecords(lwr.d_records, ne, d_now.tv_sec, &lowestTTL);
4960
4961 if (vStateIsBogus(state)) {
4962 ne.d_validationState = state;
4963 }
4964 else {
4965 auto recordState = getValidationStatus(qname, !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty(), qtype == QType::DS, depth, prefix);
4966 if (recordState == vState::Secure) {
4967 dState denialState = getDenialValidationState(ne, dState::NXQTYPE, false, prefix);
4968 updateDenialValidationState(qname, ne.d_validationState, ne.d_name, state, denialState, dState::NXQTYPE, qtype == QType::DS, depth, prefix);
4969 }
4970 else {
4971 ne.d_validationState = recordState;
4972 updateValidationState(qname, state, ne.d_validationState, prefix);
4973 }
4974 }
4975
4976 if (vStateIsBogus(ne.d_validationState)) {
4977 lowestTTL = min(lowestTTL, s_maxbogusttl);
4978 rec.d_ttl = min(rec.d_ttl, s_maxbogusttl);
4979 }
4980 ne.d_ttd = d_now.tv_sec + lowestTTL;
4981 ne.d_orig_ttl = lowestTTL;
4982 if (qtype.getCode()) { // prevents us from NXDOMAIN'ing a whole domain
4983 g_negCache->add(ne);
4984 }
4985
4986 ret.push_back(rec);
4987 negindic = true;
4988 negIndicHasSignatures = !ne.authoritySOA.signatures.empty() || !ne.DNSSECRecords.signatures.empty();
4989 }
4990 }
4991 }
4992
4993 if (!dnameTarget.empty()) {
4994 // Synthesize a CNAME
4995 auto cnamerec = DNSRecord();
4996 cnamerec.d_name = qname;
4997 cnamerec.d_type = QType::CNAME;
4998 cnamerec.d_ttl = dnameTTL;
4999 cnamerec.setContent(std::make_shared<CNAMERecordContent>(CNAMERecordContent(newtarget)));
5000 ret.push_back(std::move(cnamerec));
5001 }
5002
5003 /* If we have seen a proper denial, let's forget that we also had a referral for a DS query.
5004 Otherwise we need to deal with it. */
5005 if (referralOnDS && !negindic) {
5006 LOG(prefix << qname << ": Got a referral to the child zone for a DS query without a negative indication (missing SOA in authority), treating that as a NODATA" << endl);
5007 if (!vStateIsBogus(state)) {
5008 auto recordState = getValidationStatus(qname, false, true, depth, prefix);
5009 if (recordState == vState::Secure) {
5010 /* we are in a secure zone, got a referral to the child zone on a DS query, no denial, that's wrong */
5011 LOG(prefix << qname << ": NODATA without a negative indication (missing SOA in authority) in a DNSSEC secure zone, going Bogus" << endl);
5012 updateValidationState(qname, state, vState::BogusMissingNegativeIndication, prefix);
5013 }
5014 }
5015 negindic = true;
5016 negIndicHasSignatures = false;
5017 }
5018
5019 return done;
5020 }
5021
5022 static void submitTryDotTask(ComboAddress address, const DNSName& auth, const DNSName nsname, time_t now)
5023 {
5024 if (address.getPort() == 853) {
5025 return;
5026 }
5027 address.setPort(853);
5028 auto lock = s_dotMap.lock();
5029 if (lock->d_numBusy >= SyncRes::s_max_busy_dot_probes) {
5030 return;
5031 }
5032 auto it = lock->d_map.emplace(DoTStatus{address, auth, now + dotFailWait}).first;
5033 if (it->d_status == DoTStatus::Busy) {
5034 return;
5035 }
5036 if (it->d_ttd > now) {
5037 if (it->d_status == DoTStatus::Bad) {
5038 return;
5039 }
5040 if (it->d_status == DoTStatus::Good) {
5041 return;
5042 }
5043 // We only want to probe auths that we have seen before, auth that only come around once are not interesting
5044 if (it->d_status == DoTStatus::Unknown && it->d_count == 0) {
5045 return;
5046 }
5047 }
5048 lock->d_map.modify(it, [=](DoTStatus& st) { st.d_ttd = now + dotFailWait; });
5049 bool pushed = pushTryDoTTask(auth, QType::SOA, address, std::numeric_limits<time_t>::max(), nsname);
5050 if (pushed) {
5051 it->d_status = DoTStatus::Busy;
5052 ++lock->d_numBusy;
5053 }
5054 }
5055
5056 static bool shouldDoDoT(ComboAddress address, time_t now)
5057 {
5058 address.setPort(853);
5059 auto lock = s_dotMap.lock();
5060 auto it = lock->d_map.find(address);
5061 if (it == lock->d_map.end()) {
5062 return false;
5063 }
5064 it->d_count++;
5065 if (it->d_status == DoTStatus::Good && it->d_ttd > now) {
5066 return true;
5067 }
5068 return false;
5069 }
5070
5071 static void updateDoTStatus(ComboAddress address, DoTStatus::Status status, time_t time, bool updateBusy = false)
5072 {
5073 address.setPort(853);
5074 auto lock = s_dotMap.lock();
5075 auto it = lock->d_map.find(address);
5076 if (it != lock->d_map.end()) {
5077 it->d_status = status;
5078 lock->d_map.modify(it, [=](DoTStatus& st) { st.d_ttd = time; });
5079 if (updateBusy) {
5080 --lock->d_numBusy;
5081 }
5082 }
5083 }
5084
5085 bool SyncRes::tryDoT(const DNSName& qname, const QType qtype, const DNSName& nsName, ComboAddress address, time_t now)
5086 {
5087 auto log = g_slog->withName("taskq")->withValues("method", Logging::Loggable("tryDoT"), "name", Logging::Loggable(qname), "qtype", Logging::Loggable(QType(qtype).toString()), "ip", Logging::Loggable(address));
5088
5089 auto logHelper1 = [&log](const string& ename) {
5090 log->info(Logr::Debug, "Failed to probe DoT records, got an exception", "exception", Logging::Loggable(ename));
5091 };
5092 auto logHelper2 = [&log](const string& msg, const string& ename) {
5093 log->error(Logr::Debug, msg, "Failed to probe DoT records, got an exception", "exception", Logging::Loggable(ename));
5094 };
5095 LWResult lwr;
5096 bool truncated;
5097 bool spoofed;
5098 boost::optional<Netmask> nm;
5099 address.setPort(853);
5100 // We use the fact that qname equals auth
5101 bool ok = false;
5102 try {
5103 boost::optional<EDNSExtendedError> extendedError;
5104 ok = doResolveAtThisIP("", qname, qtype, lwr, nm, qname, false, false, nsName, address, true, true, truncated, spoofed, extendedError, true);
5105 ok = ok && lwr.d_rcode == RCode::NoError && lwr.d_records.size() > 0;
5106 }
5107 catch (const PDNSException& e) {
5108 logHelper2(e.reason, "PDNSException");
5109 }
5110 catch (const ImmediateServFailException& e) {
5111 logHelper2(e.reason, "ImmediateServFailException");
5112 }
5113 catch (const PolicyHitException& e) {
5114 logHelper1("PolicyHitException");
5115 }
5116 catch (const std::exception& e) {
5117 logHelper2(e.what(), "std::exception");
5118 }
5119 catch (...) {
5120 logHelper1("other");
5121 }
5122 updateDoTStatus(address, ok ? DoTStatus::Good : DoTStatus::Bad, now + (ok ? dotSuccessWait : dotFailWait), true);
5123 return ok;
5124 }
5125
5126 bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname, const QType qtype, LWResult& lwr, boost::optional<Netmask>& ednsmask, const DNSName& auth, bool const sendRDQuery, const bool wasForwarded, const DNSName& nsName, const ComboAddress& remoteIP, bool doTCP, bool doDoT, bool& truncated, bool& spoofed, boost::optional<EDNSExtendedError>& extendedError, bool dontThrottle)
5127 {
5128 bool chained = false;
5129 LWResult::Result resolveret = LWResult::Result::Success;
5130 t_Counters.at(rec::Counter::outqueries)++;
5131 d_outqueries++;
5132 checkMaxQperQ(qname);
5133
5134 if (s_maxtotusec && d_totUsec > s_maxtotusec) {
5135 if (s_addExtendedResolutionDNSErrors) {
5136 extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::NoReachableAuthority), "Timeout waiting for answer(s)"};
5137 }
5138 throw ImmediateServFailException("Too much time waiting for " + qname.toLogString() + "|" + qtype.toString() + ", timeouts: " + std::to_string(d_timeouts) + ", throttles: " + std::to_string(d_throttledqueries) + ", queries: " + std::to_string(d_outqueries) + ", " + std::to_string(d_totUsec / 1000) + " ms");
5139 }
5140
5141 if (doTCP) {
5142 if (doDoT) {
5143 LOG(prefix << qname << ": Using DoT with " << remoteIP.toStringWithPort() << endl);
5144 t_Counters.at(rec::Counter::dotoutqueries)++;
5145 d_dotoutqueries++;
5146 }
5147 else {
5148 LOG(prefix << qname << ": Using TCP with " << remoteIP.toStringWithPort() << endl);
5149 t_Counters.at(rec::Counter::tcpoutqueries)++;
5150 d_tcpoutqueries++;
5151 }
5152 }
5153
5154 int preOutQueryRet = RCode::NoError;
5155 if (d_pdl && d_pdl->preoutquery(remoteIP, d_requestor, qname, qtype, doTCP, lwr.d_records, preOutQueryRet, d_eventTrace, timeval{0, 0})) {
5156 LOG(prefix << qname << ": Query handled by Lua" << endl);
5157 }
5158 else {
5159 ednsmask = getEDNSSubnetMask(qname, remoteIP);
5160 if (ednsmask) {
5161 LOG(prefix << qname << ": Adding EDNS Client Subnet Mask " << ednsmask->toString() << " to query" << endl);
5162 s_ecsqueries++;
5163 }
5164 resolveret = asyncresolveWrapper(remoteIP, d_doDNSSEC, qname, auth, qtype.getCode(),
5165 doTCP, sendRDQuery, &d_now, ednsmask, &lwr, &chained, nsName); // <- we go out on the wire!
5166 if (ednsmask) {
5167 s_ecsresponses++;
5168 LOG(prefix << qname << ": Received EDNS Client Subnet Mask " << ednsmask->toString() << " on response" << endl);
5169 if (ednsmask->getBits() > 0) {
5170 if (ednsmask->isIPv4()) {
5171 ++SyncRes::s_ecsResponsesBySubnetSize4.at(ednsmask->getBits() - 1);
5172 }
5173 else {
5174 ++SyncRes::s_ecsResponsesBySubnetSize6.at(ednsmask->getBits() - 1);
5175 }
5176 }
5177 }
5178 }
5179
5180 /* preoutquery killed the query by setting dq.rcode to -3 */
5181 if (preOutQueryRet == -3) {
5182 throw ImmediateServFailException("Query killed by policy");
5183 }
5184
5185 d_totUsec += lwr.d_usec;
5186
5187 if (resolveret == LWResult::Result::Spoofed) {
5188 spoofed = true;
5189 return false;
5190 }
5191
5192 accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
5193 ++t_Counters.at(rec::RCode::auth).rcodeCounters.at(static_cast<uint8_t>(lwr.d_rcode));
5194
5195 if (!dontThrottle) {
5196 auto dontThrottleNames = g_dontThrottleNames.getLocal();
5197 auto dontThrottleNetmasks = g_dontThrottleNetmasks.getLocal();
5198 dontThrottle = dontThrottleNames->check(nsName) || dontThrottleNetmasks->match(remoteIP);
5199 }
5200
5201 if (resolveret != LWResult::Result::Success) {
5202 /* Error while resolving */
5203 if (resolveret == LWResult::Result::Timeout) {
5204 /* Time out */
5205
5206 LOG(prefix << qname << ": Timeout resolving after " << lwr.d_usec / 1000.0 << " ms " << (doTCP ? "over TCP" : "") << endl);
5207 d_timeouts++;
5208 t_Counters.at(rec::Counter::outgoingtimeouts)++;
5209
5210 if (remoteIP.sin4.sin_family == AF_INET)
5211 t_Counters.at(rec::Counter::outgoing4timeouts)++;
5212 else
5213 t_Counters.at(rec::Counter::outgoing6timeouts)++;
5214
5215 if (t_timeouts)
5216 t_timeouts->push_back(remoteIP);
5217 }
5218 else if (resolveret == LWResult::Result::OSLimitError) {
5219 /* OS resource limit reached */
5220 LOG(prefix << qname << ": Hit a local resource limit resolving" << (doTCP ? " over TCP" : "") << ", probable error: " << stringerror() << endl);
5221 t_Counters.at(rec::Counter::resourceLimits)++;
5222 }
5223 else {
5224 /* LWResult::Result::PermanentError */
5225 t_Counters.at(rec::Counter::unreachables)++;
5226 d_unreachables++;
5227 // XXX questionable use of errno
5228 LOG(prefix << qname << ": Error resolving from " << remoteIP.toString() << (doTCP ? " over TCP" : "") << ", possible error: " << stringerror() << endl);
5229 }
5230
5231 if (resolveret != LWResult::Result::OSLimitError && !chained && !dontThrottle) {
5232 // don't account for resource limits, they are our own fault
5233 // And don't throttle when the IP address is on the dontThrottleNetmasks list or the name is part of dontThrottleNames
5234 s_nsSpeeds.lock()->find_or_enter(nsName.empty() ? DNSName(remoteIP.toStringWithPort()) : nsName, d_now).submit(remoteIP, 1000000, d_now); // 1 sec
5235
5236 // code below makes sure we don't filter COM or the root
5237 if (s_serverdownmaxfails > 0 && (auth != g_rootdnsname) && s_fails.lock()->incr(remoteIP, d_now) >= s_serverdownmaxfails) {
5238 LOG(prefix << qname << ": Max fails reached resolving on " << remoteIP.toString() << ". Going full throttle for " << s_serverdownthrottletime << " seconds" << endl);
5239 // mark server as down
5240 doThrottle(d_now.tv_sec, remoteIP, s_serverdownthrottletime, 10000);
5241 }
5242 else if (resolveret == LWResult::Result::PermanentError) {
5243 // unreachable, 1 minute or 100 queries
5244 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 100);
5245 }
5246 else {
5247 // timeout, 10 seconds or 5 queries
5248 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 10, 5);
5249 }
5250 }
5251
5252 return false;
5253 }
5254
5255 if (lwr.d_validpacket == false) {
5256 LOG(prefix << qname << ": " << nsName << " (" << remoteIP.toString() << ") returned a packet we could not parse over " << (doTCP ? "TCP" : "UDP") << ", trying sibling IP or NS" << endl);
5257 if (!chained && !dontThrottle) {
5258
5259 // let's make sure we prefer a different server for some time, if there is one available
5260 s_nsSpeeds.lock()->find_or_enter(nsName.empty() ? DNSName(remoteIP.toStringWithPort()) : nsName, d_now).submit(remoteIP, 1000000, d_now); // 1 sec
5261
5262 if (doTCP) {
5263 // we can be more heavy-handed over TCP
5264 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 10);
5265 }
5266 else {
5267 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 10, 2);
5268 }
5269 }
5270 return false;
5271 }
5272 else {
5273 /* we got an answer */
5274 if (lwr.d_rcode != RCode::NoError && lwr.d_rcode != RCode::NXDomain) {
5275 LOG(prefix << qname << ": " << nsName << " (" << remoteIP.toString() << ") returned a " << RCode::to_s(lwr.d_rcode) << ", trying sibling IP or NS" << endl);
5276 if (!chained && !dontThrottle) {
5277 if (wasForwarded && lwr.d_rcode == RCode::ServFail) {
5278 // rather than throttling what could be the only server we have for this destination, let's make sure we try a different one if there is one available
5279 // on the other hand, we might keep hammering a server under attack if there is no other alternative, or the alternative is overwhelmed as well, but
5280 // at the very least we will detect that if our packets stop being answered
5281 s_nsSpeeds.lock()->find_or_enter(nsName.empty() ? DNSName(remoteIP.toStringWithPort()) : nsName, d_now).submit(remoteIP, 1000000, d_now); // 1 sec
5282 }
5283 else {
5284 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 3);
5285 }
5286 }
5287 return false;
5288 }
5289 }
5290
5291 /* this server sent a valid answer, mark it backup up if it was down */
5292 if (s_serverdownmaxfails > 0) {
5293 s_fails.lock()->clear(remoteIP);
5294 }
5295
5296 if (lwr.d_tcbit) {
5297 truncated = true;
5298
5299 if (doTCP) {
5300 LOG(prefix << qname << ": Truncated bit set, over TCP?" << endl);
5301 if (!dontThrottle) {
5302 /* let's treat that as a ServFail answer from this server */
5303 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 3);
5304 }
5305 return false;
5306 }
5307 LOG(prefix << qname << ": Truncated bit set, over UDP" << endl);
5308
5309 return true;
5310 }
5311
5312 return true;
5313 }
5314
5315 void SyncRes::handleNewTarget(const std::string& prefix, const DNSName& qname, const DNSName& newtarget, const QType qtype, std::vector<DNSRecord>& ret, int& rcode, unsigned int depth, const std::vector<DNSRecord>& recordsFromAnswer, vState& state)
5316 {
5317 if (newtarget == qname) {
5318 LOG(prefix << qname << ": Status=got a CNAME referral to self, returning SERVFAIL" << endl);
5319 ret.clear();
5320 rcode = RCode::ServFail;
5321 return;
5322 }
5323 if (newtarget.isPartOf(qname)) {
5324 // a.b.c. CNAME x.a.b.c will go to great depths with QM on
5325 LOG(prefix << qname << ": Status=got a CNAME referral to child, disabling QM" << endl);
5326 setQNameMinimization(false);
5327 }
5328
5329 // Was 10 originally, default s_maxdepth is 40, but even if it is zero we want to apply a bound
5330 auto bound = std::max(40U, getAdjustedRecursionBound()) / 4;
5331 if (depth > bound) {
5332 LOG(prefix << qname << ": Status=got a CNAME referral, but recursing too deep, returning SERVFAIL" << endl);
5333 rcode = RCode::ServFail;
5334 return;
5335 }
5336
5337 if (!d_followCNAME) {
5338 rcode = RCode::NoError;
5339 return;
5340 }
5341
5342 // Check to see if we already have seen the new target as a previous target
5343 if (scanForCNAMELoop(newtarget, ret)) {
5344 LOG(prefix << qname << ": Status=got a CNAME referral that causes a loop, returning SERVFAIL" << endl);
5345 ret.clear();
5346 rcode = RCode::ServFail;
5347 return;
5348 }
5349
5350 if (qtype == QType::DS || qtype == QType::DNSKEY) {
5351 LOG(prefix << qname << ": Status=got a CNAME referral, but we are looking for a DS or DNSKEY" << endl);
5352
5353 if (d_doDNSSEC) {
5354 addNXNSECS(ret, recordsFromAnswer);
5355 }
5356
5357 rcode = RCode::NoError;
5358 return;
5359 }
5360
5361 LOG(prefix << qname << ": Status=got a CNAME referral, starting over with " << newtarget << endl);
5362
5363 set<GetBestNSAnswer> beenthere;
5364 Context cnameContext;
5365 rcode = doResolve(newtarget, qtype, ret, depth + 1, beenthere, cnameContext);
5366 LOG(prefix << qname << ": Updating validation state for response to " << qname << " from " << state << " with the state from the CNAME quest: " << cnameContext.state << endl);
5367 updateValidationState(qname, state, cnameContext.state, prefix);
5368 }
5369
5370 bool SyncRes::processAnswer(unsigned int depth, const string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, DNSName& auth, bool wasForwarded, const boost::optional<Netmask> ednsmask, bool sendRDQuery, NsSet& nameservers, std::vector<DNSRecord>& ret, const DNSFilterEngine& dfe, bool* gotNewServers, int* rcode, vState& state, const ComboAddress& remoteIP)
5371 {
5372 if (s_minimumTTL) {
5373 for (auto& rec : lwr.d_records) {
5374 rec.d_ttl = max(rec.d_ttl, s_minimumTTL);
5375 }
5376 }
5377
5378 /* if the answer is ECS-specific, a minimum TTL is set for this kind of answers
5379 and it's higher than the global minimum TTL */
5380 if (ednsmask && s_minimumECSTTL > 0 && (s_minimumTTL == 0 || s_minimumECSTTL > s_minimumTTL)) {
5381 for (auto& rec : lwr.d_records) {
5382 if (rec.d_place == DNSResourceRecord::ANSWER) {
5383 rec.d_ttl = max(rec.d_ttl, s_minimumECSTTL);
5384 }
5385 }
5386 }
5387
5388 bool needWildcardProof = false;
5389 bool gatherWildcardProof = false;
5390 unsigned int wildcardLabelsCount = 0;
5391 *rcode = updateCacheFromRecords(depth, prefix, lwr, qname, qtype, auth, wasForwarded, ednsmask, state, needWildcardProof, gatherWildcardProof, wildcardLabelsCount, sendRDQuery, remoteIP);
5392 if (*rcode != RCode::NoError) {
5393 return true;
5394 }
5395
5396 LOG(prefix << qname << ": Determining status after receiving this packet" << endl);
5397
5398 set<DNSName> nsset;
5399 bool realreferral = false;
5400 bool negindic = false;
5401 bool negIndicHasSignatures = false;
5402 DNSName newauth;
5403 DNSName newtarget;
5404
5405 bool done = processRecords(prefix, qname, qtype, auth, lwr, sendRDQuery, ret, nsset, newtarget, newauth, realreferral, negindic, state, needWildcardProof, gatherWildcardProof, wildcardLabelsCount, *rcode, negIndicHasSignatures, depth);
5406
5407 if (done) {
5408 LOG(prefix << qname << ": Status=got results, this level of recursion done" << endl);
5409 LOG(prefix << qname << ": Validation status is " << state << endl);
5410 return true;
5411 }
5412
5413 if (!newtarget.empty()) {
5414 handleNewTarget(prefix, qname, newtarget, qtype.getCode(), ret, *rcode, depth, lwr.d_records, state);
5415 return true;
5416 }
5417
5418 if (lwr.d_rcode == RCode::NXDomain) {
5419 LOG(prefix << qname << ": Status=NXDOMAIN, we are done " << (negindic ? "(have negative SOA)" : "") << endl);
5420
5421 auto tempState = getValidationStatus(qname, negIndicHasSignatures, qtype == QType::DS, depth, prefix);
5422 if (tempState == vState::Secure && (lwr.d_aabit || sendRDQuery) && !negindic) {
5423 LOG(prefix << qname << ": NXDOMAIN without a negative indication (missing SOA in authority) in a DNSSEC secure zone, going Bogus" << endl);
5424 updateValidationState(qname, state, vState::BogusMissingNegativeIndication, prefix);
5425 }
5426 else {
5427 /* we might not have validated any record, because we did get a NXDOMAIN without any SOA
5428 from an insecure zone, for example */
5429 updateValidationState(qname, state, tempState, prefix);
5430 }
5431
5432 if (d_doDNSSEC) {
5433 addNXNSECS(ret, lwr.d_records);
5434 }
5435
5436 *rcode = RCode::NXDomain;
5437 return true;
5438 }
5439
5440 if (nsset.empty() && !lwr.d_rcode && (negindic || lwr.d_aabit || sendRDQuery)) {
5441 LOG(prefix << qname << ": Status=noerror, other types may exist, but we are done " << (negindic ? "(have negative SOA) " : "") << (lwr.d_aabit ? "(have aa bit) " : "") << endl);
5442
5443 auto tempState = getValidationStatus(qname, negIndicHasSignatures, qtype == QType::DS, depth, prefix);
5444 if (tempState == vState::Secure && (lwr.d_aabit || sendRDQuery) && !negindic) {
5445 LOG(prefix << qname << ": NODATA without a negative indication (missing SOA in authority) in a DNSSEC secure zone, going Bogus" << endl);
5446 updateValidationState(qname, state, vState::BogusMissingNegativeIndication, prefix);
5447 }
5448 else {
5449 /* we might not have validated any record, because we did get a NODATA without any SOA
5450 from an insecure zone, for example */
5451 updateValidationState(qname, state, tempState, prefix);
5452 }
5453
5454 if (d_doDNSSEC) {
5455 addNXNSECS(ret, lwr.d_records);
5456 }
5457
5458 *rcode = RCode::NoError;
5459 return true;
5460 }
5461
5462 if (realreferral) {
5463 LOG(prefix << qname << ": Status=did not resolve, got " << (unsigned int)nsset.size() << " NS, ");
5464
5465 nameservers.clear();
5466 for (auto const& nameserver : nsset) {
5467 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
5468 bool match = dfe.getProcessingPolicy(nameserver, d_discardedPolicies, d_appliedPolicy);
5469 if (match) {
5470 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
5471 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
5472 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
5473 /* reset to no match */
5474 d_appliedPolicy = DNSFilterEngine::Policy();
5475 }
5476 else {
5477 LOG("however " << nameserver << " was blocked by RPZ policy '" << d_appliedPolicy.getName() << "'" << endl);
5478 throw PolicyHitException();
5479 }
5480 }
5481 }
5482 }
5483 nameservers.insert({nameserver, {{}, false}});
5484 }
5485 LOG("looping to them" << endl);
5486 *gotNewServers = true;
5487 auth = newauth;
5488
5489 return false;
5490 }
5491
5492 return false;
5493 }
5494
5495 bool SyncRes::doDoTtoAuth(const DNSName& ns) const
5496 {
5497 return g_DoTToAuthNames.getLocal()->check(ns);
5498 }
5499
5500 /** returns:
5501 * -1 in case of no results
5502 * rcode otherwise
5503 */
5504 int SyncRes::doResolveAt(NsSet& nameservers, DNSName auth, bool flawedNSSet, const DNSName& qname, const QType qtype,
5505 vector<DNSRecord>& ret,
5506 unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere, Context& context, StopAtDelegation* stopAtDelegation,
5507 map<DNSName, vector<ComboAddress>>* fallBack)
5508 {
5509 auto luaconfsLocal = g_luaconfs.getLocal();
5510
5511 LOG(prefix << qname << ": Cache consultations done, have " << (unsigned int)nameservers.size() << " NS to contact");
5512
5513 if (nameserversBlockedByRPZ(luaconfsLocal->dfe, nameservers)) {
5514 /* RPZ hit */
5515 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
5516 /* reset to no match */
5517 d_appliedPolicy = DNSFilterEngine::Policy();
5518 }
5519 else {
5520 throw PolicyHitException();
5521 }
5522 }
5523
5524 LOG(endl);
5525
5526 unsigned int addressQueriesForNS = 0;
5527 for (;;) { // we may get more specific nameservers
5528 auto rnameservers = shuffleInSpeedOrder(qname, nameservers, prefix);
5529
5530 // We allow s_maxnsaddressqperq (default 10) queries with empty responses when resolving NS names.
5531 // If a zone publishes many (more than s_maxnsaddressqperq) NS records, we allow less.
5532 // This is to "punish" zones that publish many non-resolving NS names.
5533 // We always allow 5 NS name resolving attempts with empty results.
5534 unsigned int nsLimit = s_maxnsaddressqperq;
5535 if (rnameservers.size() > nsLimit) {
5536 int newLimit = static_cast<int>(nsLimit) - (rnameservers.size() - nsLimit);
5537 nsLimit = std::max(5, newLimit);
5538 }
5539
5540 for (auto tns = rnameservers.cbegin();; ++tns) {
5541 if (addressQueriesForNS >= nsLimit) {
5542 throw ImmediateServFailException(std::to_string(nsLimit) + " (adjusted max-ns-address-qperq) or more queries with empty results for NS addresses sent resolving " + qname.toLogString());
5543 }
5544 if (tns == rnameservers.cend()) {
5545 LOG(prefix << qname << ": Failed to resolve via any of the " << (unsigned int)rnameservers.size() << " offered NS at level '" << auth << "'" << endl);
5546 if (s_addExtendedResolutionDNSErrors) {
5547 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::NoReachableAuthority), "delegation " + auth.toLogString()};
5548 }
5549 if (!auth.isRoot() && flawedNSSet) {
5550 LOG(prefix << qname << ": Ageing nameservers for level '" << auth << "', next query might succeed" << endl);
5551 if (g_recCache->doAgeCache(d_now.tv_sec, auth, QType::NS, 10)) {
5552 t_Counters.at(rec::Counter::nsSetInvalidations)++;
5553 }
5554 }
5555 return -1;
5556 }
5557
5558 bool cacheOnly = false;
5559 // this line needs to identify the 'self-resolving' behaviour
5560 if (qname == tns->first && (qtype.getCode() == QType::A || qtype.getCode() == QType::AAAA)) {
5561 /* we might have a glue entry in cache so let's try this NS
5562 but only if we have enough in the cache to know how to reach it */
5563 LOG(prefix << qname << ": Using NS to resolve itself, but only using what we have in cache (" << (1 + tns - rnameservers.cbegin()) << "/" << rnameservers.size() << ")" << endl);
5564 cacheOnly = true;
5565 }
5566
5567 typedef vector<ComboAddress> remoteIPs_t;
5568 remoteIPs_t remoteIPs;
5569 remoteIPs_t::iterator remoteIP;
5570 bool pierceDontQuery = false;
5571 bool sendRDQuery = false;
5572 boost::optional<Netmask> ednsmask;
5573 LWResult lwr;
5574 const bool wasForwarded = tns->first.empty() && (!nameservers[tns->first].first.empty());
5575 int rcode = RCode::NoError;
5576 bool gotNewServers = false;
5577
5578 if (tns->first.empty() && !wasForwarded) {
5579 static ComboAddress const s_oobRemote("255.255.255.255");
5580 LOG(prefix << qname << ": Domain is out-of-band" << endl);
5581 /* setting state to indeterminate since validation is disabled for local auth zone,
5582 and Insecure would be misleading. */
5583 context.state = vState::Indeterminate;
5584 d_wasOutOfBand = doOOBResolve(qname, qtype, lwr.d_records, depth, prefix, lwr.d_rcode);
5585 lwr.d_tcbit = false;
5586 lwr.d_aabit = true;
5587
5588 /* we have received an answer, are we done ? */
5589 bool done = processAnswer(depth, prefix, lwr, qname, qtype, auth, false, ednsmask, sendRDQuery, nameservers, ret, luaconfsLocal->dfe, &gotNewServers, &rcode, context.state, s_oobRemote);
5590 if (done) {
5591 return rcode;
5592 }
5593 if (gotNewServers) {
5594 if (stopAtDelegation && *stopAtDelegation == Stop) {
5595 *stopAtDelegation = Stopped;
5596 return rcode;
5597 }
5598 break;
5599 }
5600 }
5601 else {
5602 if (fallBack != nullptr) {
5603 if (auto it = fallBack->find(tns->first); it != fallBack->end()) {
5604 remoteIPs = it->second;
5605 }
5606 }
5607 if (remoteIPs.size() == 0) {
5608 remoteIPs = retrieveAddressesForNS(prefix, qname, tns, depth, beenthere, rnameservers, nameservers, sendRDQuery, pierceDontQuery, flawedNSSet, cacheOnly, addressQueriesForNS);
5609 }
5610
5611 if (remoteIPs.empty()) {
5612 LOG(prefix << qname << ": Failed to get IP for NS " << tns->first << ", trying next if available" << endl);
5613 flawedNSSet = true;
5614 continue;
5615 }
5616 else {
5617 bool hitPolicy{false};
5618 LOG(prefix << qname << ": Resolved '" << auth << "' NS " << tns->first << " to: ");
5619 for (remoteIP = remoteIPs.begin(); remoteIP != remoteIPs.end(); ++remoteIP) {
5620 if (remoteIP != remoteIPs.begin()) {
5621 LOG(", ");
5622 }
5623 LOG(remoteIP->toString());
5624 if (nameserverIPBlockedByRPZ(luaconfsLocal->dfe, *remoteIP)) {
5625 hitPolicy = true;
5626 }
5627 }
5628 LOG(endl);
5629 if (hitPolicy) { // implies d_wantsRPZ
5630 /* RPZ hit */
5631 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
5632 /* reset to no match */
5633 d_appliedPolicy = DNSFilterEngine::Policy();
5634 }
5635 else {
5636 throw PolicyHitException();
5637 }
5638 }
5639 }
5640
5641 for (remoteIP = remoteIPs.begin(); remoteIP != remoteIPs.end(); ++remoteIP) {
5642 LOG(prefix << qname << ": Trying IP " << remoteIP->toStringWithPort() << ", asking '" << qname << "|" << qtype << "'" << endl);
5643
5644 if (throttledOrBlocked(prefix, *remoteIP, qname, qtype, pierceDontQuery)) {
5645 // As d_throttledqueries might be increased, check the max-qperq condition
5646 checkMaxQperQ(qname);
5647 continue;
5648 }
5649
5650 bool truncated = false;
5651 bool spoofed = false;
5652 bool gotAnswer = false;
5653 bool doDoT = false;
5654
5655 if (doDoTtoAuth(tns->first)) {
5656 remoteIP->setPort(853);
5657 doDoT = true;
5658 }
5659 if (SyncRes::s_dot_to_port_853 && remoteIP->getPort() == 853) {
5660 doDoT = true;
5661 }
5662 bool forceTCP = doDoT;
5663
5664 if (!doDoT && s_max_busy_dot_probes > 0) {
5665 submitTryDotTask(*remoteIP, auth, tns->first, d_now.tv_sec);
5666 }
5667 if (!forceTCP) {
5668 gotAnswer = doResolveAtThisIP(prefix, qname, qtype, lwr, ednsmask, auth, sendRDQuery, wasForwarded,
5669 tns->first, *remoteIP, false, false, truncated, spoofed, context.extendedError);
5670 }
5671 if (forceTCP || (spoofed || (gotAnswer && truncated))) {
5672 /* retry, over TCP this time */
5673 gotAnswer = doResolveAtThisIP(prefix, qname, qtype, lwr, ednsmask, auth, sendRDQuery, wasForwarded,
5674 tns->first, *remoteIP, true, doDoT, truncated, spoofed, context.extendedError);
5675 }
5676
5677 if (!gotAnswer) {
5678 if (doDoT && s_max_busy_dot_probes > 0) {
5679 // This is quite pessimistic...
5680 updateDoTStatus(*remoteIP, DoTStatus::Bad, d_now.tv_sec + dotFailWait);
5681 }
5682 continue;
5683 }
5684
5685 LOG(prefix << qname << ": Got " << (unsigned int)lwr.d_records.size() << " answers from " << tns->first << " (" << remoteIP->toString() << "), rcode=" << lwr.d_rcode << " (" << RCode::to_s(lwr.d_rcode) << "), aa=" << lwr.d_aabit << ", in " << lwr.d_usec / 1000 << "ms" << endl);
5686
5687 if (doDoT && s_max_busy_dot_probes > 0) {
5688 updateDoTStatus(*remoteIP, DoTStatus::Good, d_now.tv_sec + dotSuccessWait);
5689 }
5690 /* // for you IPv6 fanatics :-)
5691 if(remoteIP->sin4.sin_family==AF_INET6)
5692 lwr.d_usec/=3;
5693 */
5694 // cout<<"ms: "<<lwr.d_usec/1000.0<<", "<<g_avgLatency/1000.0<<'\n';
5695
5696 s_nsSpeeds.lock()->find_or_enter(tns->first.empty() ? DNSName(remoteIP->toStringWithPort()) : tns->first, d_now).submit(*remoteIP, lwr.d_usec, d_now);
5697
5698 /* we have received an answer, are we done ? */
5699 bool done = processAnswer(depth, prefix, lwr, qname, qtype, auth, wasForwarded, ednsmask, sendRDQuery, nameservers, ret, luaconfsLocal->dfe, &gotNewServers, &rcode, context.state, *remoteIP);
5700 if (done) {
5701 return rcode;
5702 }
5703 if (gotNewServers) {
5704 if (stopAtDelegation && *stopAtDelegation == Stop) {
5705 *stopAtDelegation = Stopped;
5706 return rcode;
5707 }
5708 break;
5709 }
5710 /* was lame */
5711 doThrottle(d_now.tv_sec, *remoteIP, qname, qtype, 60, 100);
5712 }
5713
5714 if (gotNewServers) {
5715 break;
5716 }
5717
5718 if (remoteIP == remoteIPs.cend()) // we tried all IP addresses, none worked
5719 continue;
5720 }
5721 }
5722 }
5723 return -1;
5724 }
5725
5726 void SyncRes::setQuerySource(const Netmask& netmask)
5727 {
5728 if (!netmask.empty()) {
5729 d_outgoingECSNetwork = netmask;
5730 }
5731 else {
5732 d_outgoingECSNetwork = boost::none;
5733 }
5734 }
5735
5736 void SyncRes::setQuerySource(const ComboAddress& requestor, boost::optional<const EDNSSubnetOpts&> incomingECS)
5737 {
5738 d_requestor = requestor;
5739
5740 if (incomingECS && incomingECS->source.getBits() > 0) {
5741 d_cacheRemote = incomingECS->source.getMaskedNetwork();
5742 uint8_t bits = std::min(incomingECS->source.getBits(), (incomingECS->source.isIPv4() ? s_ecsipv4limit : s_ecsipv6limit));
5743 ComboAddress trunc = incomingECS->source.getNetwork();
5744 trunc.truncate(bits);
5745 d_outgoingECSNetwork = boost::optional<Netmask>(Netmask(trunc, bits));
5746 }
5747 else {
5748 d_cacheRemote = d_requestor;
5749 if (!incomingECS && s_ednslocalsubnets.match(d_requestor)) {
5750 ComboAddress trunc = d_requestor;
5751 uint8_t bits = d_requestor.isIPv4() ? 32 : 128;
5752 bits = std::min(bits, (trunc.isIPv4() ? s_ecsipv4limit : s_ecsipv6limit));
5753 trunc.truncate(bits);
5754 d_outgoingECSNetwork = boost::optional<Netmask>(Netmask(trunc, bits));
5755 }
5756 else if (s_ecsScopeZero.source.getBits() > 0) {
5757 /* RFC7871 says we MUST NOT send any ECS if the source scope is 0.
5758 But using an empty ECS in that case would mean inserting
5759 a non ECS-specific entry into the cache, preventing any further
5760 ECS-specific query to be sent.
5761 So instead we use the trick described in section 7.1.2:
5762 "The subsequent Recursive Resolver query to the Authoritative Nameserver
5763 will then either not include an ECS option or MAY optionally include
5764 its own address information, which is what the Authoritative
5765 Nameserver will almost certainly use to generate any Tailored
5766 Response in lieu of an option. This allows the answer to be handled
5767 by the same caching mechanism as other queries, with an explicit
5768 indicator of the applicable scope. Subsequent Stub Resolver queries
5769 for /0 can then be answered from this cached response.
5770 */
5771 d_outgoingECSNetwork = boost::optional<Netmask>(s_ecsScopeZero.source.getMaskedNetwork());
5772 d_cacheRemote = s_ecsScopeZero.source.getNetwork();
5773 }
5774 else {
5775 // ECS disabled because no scope-zero address could be derived.
5776 d_outgoingECSNetwork = boost::none;
5777 }
5778 }
5779 }
5780
5781 boost::optional<Netmask> SyncRes::getEDNSSubnetMask(const DNSName& dn, const ComboAddress& rem)
5782 {
5783 if (d_outgoingECSNetwork && (s_ednsdomains.check(dn) || s_ednsremotesubnets.match(rem))) {
5784 return d_outgoingECSNetwork;
5785 }
5786 return boost::none;
5787 }
5788
5789 void SyncRes::parseEDNSSubnetAllowlist(const std::string& alist)
5790 {
5791 vector<string> parts;
5792 stringtok(parts, alist, ",; ");
5793 for (const auto& a : parts) {
5794 try {
5795 s_ednsremotesubnets.addMask(Netmask(a));
5796 }
5797 catch (...) {
5798 s_ednsdomains.add(DNSName(a));
5799 }
5800 }
5801 }
5802
5803 void SyncRes::parseEDNSSubnetAddFor(const std::string& subnetlist)
5804 {
5805 vector<string> parts;
5806 stringtok(parts, subnetlist, ",; ");
5807 for (const auto& a : parts) {
5808 s_ednslocalsubnets.addMask(a);
5809 }
5810 }
5811
5812 // used by PowerDNSLua - note that this neglects to add the packet count & statistics back to pdns_recursor.cc
5813 int directResolve(const DNSName& qname, const QType qtype, const QClass qclass, vector<DNSRecord>& ret, shared_ptr<RecursorLua4> pdl, Logr::log_t log)
5814 {
5815 return directResolve(qname, qtype, qclass, ret, pdl, SyncRes::s_qnameminimization, log);
5816 }
5817
5818 int directResolve(const DNSName& qname, const QType qtype, const QClass qclass, vector<DNSRecord>& ret, shared_ptr<RecursorLua4> pdl, bool qm, Logr::log_t slog)
5819 {
5820 auto log = slog->withValues("qname", Logging::Loggable(qname), "qtype", Logging::Loggable(qtype));
5821
5822 struct timeval now;
5823 gettimeofday(&now, 0);
5824
5825 SyncRes sr(now);
5826 sr.setQNameMinimization(qm);
5827 if (pdl) {
5828 sr.setLuaEngine(pdl);
5829 }
5830
5831 int res = -1;
5832 const std::string msg = "Exception while resolving";
5833 try {
5834 res = sr.beginResolve(qname, qtype, qclass, ret, 0);
5835 }
5836 catch (const PDNSException& e) {
5837 SLOG(g_log << Logger::Error << "Failed to resolve " << qname << ", got pdns exception: " << e.reason << endl,
5838 log->error(Logr::Error, e.reason, msg, "exception", Logging::Loggable("PDNSException")));
5839 ret.clear();
5840 }
5841 catch (const ImmediateServFailException& e) {
5842 SLOG(g_log << Logger::Error << "Failed to resolve " << qname << ", got ImmediateServFailException: " << e.reason << endl,
5843 log->error(Logr::Error, e.reason, msg, "exception", Logging::Loggable("ImmediateServFailException")));
5844 ret.clear();
5845 }
5846 catch (const PolicyHitException& e) {
5847 SLOG(g_log << Logger::Error << "Failed to resolve " << qname << ", got a policy hit" << endl,
5848 log->info(Logr::Error, msg, "exception", Logging::Loggable("PolicyHitException")));
5849 ret.clear();
5850 }
5851 catch (const std::exception& e) {
5852 SLOG(g_log << Logger::Error << "Failed to resolve " << qname << ", got STL error: " << e.what() << endl,
5853 log->error(Logr::Error, e.what(), msg, "exception", Logging::Loggable("std::exception")));
5854 ret.clear();
5855 }
5856 catch (...) {
5857 SLOG(g_log << Logger::Error << "Failed to resolve " << qname << ", got an exception" << endl,
5858 log->info(Logr::Error, msg));
5859 ret.clear();
5860 }
5861
5862 return res;
5863 }
5864
5865 int SyncRes::getRootNS(struct timeval now, asyncresolve_t asyncCallback, unsigned int depth, Logr::log_t log)
5866 {
5867 SyncRes sr(now);
5868 sr.d_prefix = "[getRootNS]";
5869 sr.setDoEDNS0(true);
5870 sr.setUpdatingRootNS();
5871 sr.setDoDNSSEC(g_dnssecmode != DNSSECMode::Off);
5872 sr.setDNSSECValidationRequested(g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate);
5873 sr.setAsyncCallback(asyncCallback);
5874 sr.setRefreshAlmostExpired(true);
5875
5876 const string msg = "Failed to update . records";
5877 vector<DNSRecord> ret;
5878 int res = -1;
5879 try {
5880 res = sr.beginResolve(g_rootdnsname, QType::NS, 1, ret, depth + 1);
5881 if (g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate) {
5882 auto state = sr.getValidationState();
5883 if (vStateIsBogus(state)) {
5884 throw PDNSException("Got Bogus validation result for .|NS");
5885 }
5886 }
5887 }
5888 catch (const PDNSException& e) {
5889 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception: " << e.reason << endl,
5890 log->error(Logr::Error, e.reason, msg, "exception", Logging::Loggable("PDNSException")));
5891 }
5892 catch (const ImmediateServFailException& e) {
5893 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception: " << e.reason << endl,
5894 log->error(Logr::Error, e.reason, msg, "exception", Logging::Loggable("ImmediateServFailException")));
5895 }
5896 catch (const PolicyHitException& e) {
5897 SLOG(g_log << Logger::Error << "Failed to update . records, got a policy hit" << endl,
5898 log->info(Logr::Error, msg, "exception", Logging::Loggable("PolicyHitException")));
5899 ret.clear();
5900 }
5901 catch (const std::exception& e) {
5902 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception: " << e.what() << endl,
5903 log->error(Logr::Error, e.what(), msg, "exception", Logging::Loggable("std::exception")));
5904 }
5905 catch (...) {
5906 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception" << endl,
5907 log->info(Logr::Error, msg));
5908 }
5909
5910 if (res == 0) {
5911 SLOG(g_log << Logger::Debug << "Refreshed . records" << endl,
5912 log->info(Logr::Debug, "Refreshed . records"));
5913 }
5914 else {
5915 SLOG(g_log << Logger::Warning << "Failed to update root NS records, RCODE=" << res << endl,
5916 log->info(Logr::Warning, msg, "rcode", Logging::Loggable(res)));
5917 }
5918 return res;
5919 }