]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/recursordist/syncres.cc
rec: CVE-2023-50387 and CVE-2023-50868
[thirdparty/pdns.git] / pdns / recursordist / syncres.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include <utility>
24
25 #include "config.h"
26 #endif
27
28 #include "arguments.hh"
29 #include "aggressive_nsec.hh"
30 #include "cachecleaner.hh"
31 #include "dns_random.hh"
32 #include "dnsparser.hh"
33 #include "dnsrecords.hh"
34 #include "ednssubnet.hh"
35 #include "logger.hh"
36 #include "lua-recursor4.hh"
37 #include "rec-lua-conf.hh"
38 #include "syncres.hh"
39 #include "dnsseckeeper.hh"
40 #include "validate-recursor.hh"
41 #include "rec-taskqueue.hh"
42
43 rec::GlobalCounters g_Counters;
44 thread_local rec::TCounters t_Counters(g_Counters);
45
46 template <class T>
47 class fails_t : public boost::noncopyable
48 {
49 public:
50 using counter_t = uint64_t;
51 struct value_t
52 {
53 value_t(T arg) :
54 key(std::move(arg)) {}
55 T key;
56 mutable counter_t value{0};
57 time_t last{0};
58 };
59
60 using cont_t = multi_index_container<value_t,
61 indexed_by<
62 ordered_unique<tag<T>, member<value_t, T, &value_t::key>>,
63 ordered_non_unique<tag<time_t>, member<value_t, time_t, &value_t::last>>>>;
64
65 [[nodiscard]] cont_t getMapCopy() const
66 {
67 return d_cont;
68 }
69
70 [[nodiscard]] counter_t value(const T& arg) const
71 {
72 auto iter = d_cont.find(arg);
73
74 if (iter == d_cont.end()) {
75 return 0;
76 }
77 return iter->value;
78 }
79
80 counter_t incr(const T& key, const struct timeval& now)
81 {
82 auto iter = d_cont.insert(key).first;
83
84 if (iter->value < std::numeric_limits<counter_t>::max()) {
85 iter->value++;
86 }
87 auto& ind = d_cont.template get<T>();
88 time_t nowSecs = now.tv_sec;
89 ind.modify(iter, [nowSecs](value_t& val) { val.last = nowSecs; });
90 return iter->value;
91 }
92
93 void clear(const T& arg)
94 {
95 d_cont.erase(arg);
96 }
97
98 void clear()
99 {
100 d_cont.clear();
101 }
102
103 [[nodiscard]] size_t size() const
104 {
105 return d_cont.size();
106 }
107
108 void prune(time_t cutoff)
109 {
110 auto& ind = d_cont.template get<time_t>();
111 ind.erase(ind.begin(), ind.upper_bound(cutoff));
112 }
113
114 private:
115 cont_t d_cont;
116 };
117
118 /** Class that implements a decaying EWMA.
119 This class keeps an exponentially weighted moving average which, additionally, decays over time.
120 The decaying is only done on get.
121 */
122
123 //! This represents a number of decaying Ewmas, used to store performance per nameserver-name.
124 /** Modelled to work mostly like the underlying DecayingEwma */
125 class DecayingEwmaCollection
126 {
127 private:
128 struct DecayingEwma
129 {
130 public:
131 void submit(int arg, const struct timeval& last, const struct timeval& now)
132 {
133 d_last = arg;
134 auto val = static_cast<float>(arg);
135 if (d_val == 0) {
136 d_val = val;
137 }
138 else {
139 auto diff = makeFloat(last - now);
140 auto factor = expf(diff) / 2.0F; // might be '0.5', or 0.0001
141 d_val = (1.0F - factor) * val + factor * d_val;
142 }
143 }
144
145 float get(float factor)
146 {
147 return d_val *= factor;
148 }
149
150 [[nodiscard]] float peek() const
151 {
152 return d_val;
153 }
154
155 [[nodiscard]] int last() const
156 {
157 return d_last;
158 }
159
160 float d_val{0};
161 int d_last{0};
162 };
163
164 public:
165 DecayingEwmaCollection(DNSName name, const struct timeval val = {0, 0}) :
166 d_name(std::move(name)), d_lastget(val)
167 {
168 }
169
170 void submit(const ComboAddress& remote, int usecs, const struct timeval& now) const
171 {
172 d_collection[remote].submit(usecs, d_lastget, now);
173 }
174
175 float getFactor(const struct timeval& now) const
176 {
177 float diff = makeFloat(d_lastget - now);
178 return expf(diff / 60.0F); // is 1.0 or less
179 }
180
181 bool stale(time_t limit) const
182 {
183 return limit > d_lastget.tv_sec;
184 }
185
186 void purge(const std::map<ComboAddress, float>& keep) const
187 {
188 for (auto iter = d_collection.begin(); iter != d_collection.end();) {
189 if (keep.find(iter->first) != keep.end()) {
190 ++iter;
191 }
192 else {
193 iter = d_collection.erase(iter);
194 }
195 }
196 }
197
198 // d_collection is the modifyable part of the record, we index on DNSName and timeval, and DNSName never changes
199 mutable std::map<ComboAddress, DecayingEwma> d_collection;
200 DNSName d_name;
201 struct timeval d_lastget;
202 };
203
204 class nsspeeds_t : public multi_index_container<DecayingEwmaCollection,
205 indexed_by<
206 hashed_unique<tag<DNSName>, member<DecayingEwmaCollection, const DNSName, &DecayingEwmaCollection::d_name>>,
207 ordered_non_unique<tag<timeval>, member<DecayingEwmaCollection, timeval, &DecayingEwmaCollection::d_lastget>>>>
208 {
209 public:
210 const auto& find_or_enter(const DNSName& name, const struct timeval& now)
211 {
212 const auto iter = insert(DecayingEwmaCollection{name, now}).first;
213 return *iter;
214 }
215
216 const auto& find_or_enter(const DNSName& name)
217 {
218 const auto iter = insert(DecayingEwmaCollection{name}).first;
219 return *iter;
220 }
221
222 float fastest(const DNSName& name, const struct timeval& now)
223 {
224 auto& ind = get<DNSName>();
225 auto iter = insert(DecayingEwmaCollection{name, now}).first;
226 if (iter->d_collection.empty()) {
227 return 0;
228 }
229 // This could happen if find(DNSName) entered an entry; it's used only by test code
230 if (iter->d_lastget.tv_sec == 0 && iter->d_lastget.tv_usec == 0) {
231 ind.modify(iter, [&](DecayingEwmaCollection& dec) { dec.d_lastget = now; });
232 }
233
234 float ret = std::numeric_limits<float>::max();
235 const float factor = iter->getFactor(now);
236 for (auto& entry : iter->d_collection) {
237 if (float tmp = entry.second.get(factor); tmp < ret) {
238 ret = tmp;
239 }
240 }
241 ind.modify(iter, [&](DecayingEwmaCollection& dec) { dec.d_lastget = now; });
242 return ret;
243 }
244 };
245
246 static LockGuarded<nsspeeds_t> s_nsSpeeds;
247
248 template <class Thing>
249 class Throttle : public boost::noncopyable
250 {
251 public:
252 struct entry_t
253 {
254 entry_t(const Thing& thing_, time_t ttd_, unsigned int count_) :
255 thing(thing_), ttd(ttd_), count(count_)
256 {
257 }
258 Thing thing;
259 time_t ttd;
260 mutable unsigned int count;
261 };
262 using cont_t = multi_index_container<entry_t,
263 indexed_by<
264 ordered_unique<tag<Thing>, member<entry_t, Thing, &entry_t::thing>>,
265 ordered_non_unique<tag<time_t>, member<entry_t, time_t, &entry_t::ttd>>>>;
266
267 bool shouldThrottle(time_t now, const Thing& arg)
268 {
269 auto iter = d_cont.find(arg);
270 if (iter == d_cont.end()) {
271 return false;
272 }
273 if (now > iter->ttd || iter->count == 0) {
274 d_cont.erase(iter);
275 return false;
276 }
277 iter->count--;
278
279 return true; // still listed, still blocked
280 }
281
282 void throttle(time_t now, const Thing& arg, time_t ttl, unsigned int count)
283 {
284 auto iter = d_cont.find(arg);
285 time_t ttd = now + ttl;
286 if (iter == d_cont.end()) {
287 d_cont.emplace(arg, ttd, count);
288 }
289 else if (ttd > iter->ttd || count > iter->count) {
290 ttd = std::max(iter->ttd, ttd);
291 count = std::max(iter->count, count);
292 auto& ind = d_cont.template get<Thing>();
293 ind.modify(iter, [ttd, count](entry_t& entry) { entry.ttd = ttd; entry.count = count; });
294 }
295 }
296
297 [[nodiscard]] size_t size() const
298 {
299 return d_cont.size();
300 }
301
302 [[nodiscard]] cont_t getThrottleMap() const
303 {
304 return d_cont;
305 }
306
307 void clear()
308 {
309 d_cont.clear();
310 }
311
312 void clear(const Thing& thing)
313 {
314 d_cont.erase(thing);
315 }
316 void prune(time_t now)
317 {
318 auto& ind = d_cont.template get<time_t>();
319 ind.erase(ind.begin(), ind.upper_bound(now));
320 }
321
322 private:
323 cont_t d_cont;
324 };
325
326 static LockGuarded<Throttle<std::tuple<ComboAddress, DNSName, QType>>> s_throttle;
327
328 struct SavedParentEntry
329 {
330 SavedParentEntry(DNSName name, map<DNSName, vector<ComboAddress>>&& nsAddresses, time_t ttd) :
331 d_domain(std::move(name)), d_nsAddresses(std::move(nsAddresses)), d_ttd(ttd)
332 {
333 }
334 DNSName d_domain;
335 map<DNSName, vector<ComboAddress>> d_nsAddresses;
336 time_t d_ttd;
337 mutable uint64_t d_count{0};
338 };
339
340 using SavedParentNSSetBase = multi_index_container<
341 SavedParentEntry,
342 indexed_by<ordered_unique<tag<DNSName>, member<SavedParentEntry, DNSName, &SavedParentEntry::d_domain>>,
343 ordered_non_unique<tag<time_t>, member<SavedParentEntry, time_t, &SavedParentEntry::d_ttd>>>>;
344
345 class SavedParentNSSet : public SavedParentNSSetBase
346 {
347 public:
348 void prune(time_t now)
349 {
350 auto& ind = get<time_t>();
351 ind.erase(ind.begin(), ind.upper_bound(now));
352 }
353 void inc(const DNSName& name)
354 {
355 auto iter = find(name);
356 if (iter != end()) {
357 ++(*iter).d_count;
358 }
359 }
360 [[nodiscard]] SavedParentNSSet getMapCopy() const
361 {
362 return *this;
363 }
364 };
365
366 static LockGuarded<SavedParentNSSet> s_savedParentNSSet;
367
368 thread_local SyncRes::ThreadLocalStorage SyncRes::t_sstorage;
369 thread_local std::unique_ptr<addrringbuf_t> t_timeouts;
370
371 std::unique_ptr<NetmaskGroup> SyncRes::s_dontQuery{nullptr};
372 NetmaskGroup SyncRes::s_ednslocalsubnets;
373 NetmaskGroup SyncRes::s_ednsremotesubnets;
374 SuffixMatchNode SyncRes::s_ednsdomains;
375 EDNSSubnetOpts SyncRes::s_ecsScopeZero;
376 string SyncRes::s_serverID;
377 SyncRes::LogMode SyncRes::s_lm;
378 const std::unordered_set<QType> SyncRes::s_redirectionQTypes = {QType::CNAME, QType::DNAME};
379 static LockGuarded<fails_t<ComboAddress>> s_fails;
380 static LockGuarded<fails_t<DNSName>> s_nonresolving;
381
382 struct DoTStatus
383 {
384 DoTStatus(const ComboAddress& address, DNSName auth, time_t ttd) :
385 d_address(address), d_auth(std::move(auth)), d_ttd(ttd)
386 {
387 }
388 enum Status : uint8_t
389 {
390 Unknown,
391 Busy,
392 Bad,
393 Good
394 };
395 ComboAddress d_address;
396 DNSName d_auth;
397 time_t d_ttd;
398 mutable uint64_t d_count{0};
399 mutable Status d_status{Unknown};
400 std::string toString() const
401 {
402 const std::array<std::string, 4> names{"Unknown", "Busy", "Bad", "Good"};
403 auto val = static_cast<unsigned int>(d_status);
404 return val >= names.size() ? "?" : names.at(val);
405 }
406 };
407
408 struct DoTMap
409 {
410 multi_index_container<DoTStatus,
411 indexed_by<
412 ordered_unique<tag<ComboAddress>, member<DoTStatus, const ComboAddress, &DoTStatus::d_address>>,
413 ordered_non_unique<tag<time_t>, member<DoTStatus, time_t, &DoTStatus::d_ttd>>>>
414 d_map;
415 uint64_t d_numBusy{0};
416
417 void prune(time_t cutoff)
418 {
419 auto& ind = d_map.template get<time_t>();
420 ind.erase(ind.begin(), ind.upper_bound(cutoff));
421 }
422 };
423
424 static LockGuarded<DoTMap> s_dotMap;
425
426 static const time_t dotFailWait = static_cast<time_t>(24) * 3600;
427 static const time_t dotSuccessWait = static_cast<time_t>(3) * 24 * 3600;
428 static bool shouldDoDoT(ComboAddress address, time_t now);
429
430 unsigned int SyncRes::s_maxnegttl;
431 unsigned int SyncRes::s_maxbogusttl;
432 unsigned int SyncRes::s_maxcachettl;
433 unsigned int SyncRes::s_maxqperq;
434 unsigned int SyncRes::s_maxnsperresolve;
435 unsigned int SyncRes::s_maxnsaddressqperq;
436 unsigned int SyncRes::s_maxtotusec;
437 unsigned int SyncRes::s_maxdepth;
438 unsigned int SyncRes::s_minimumTTL;
439 unsigned int SyncRes::s_minimumECSTTL;
440 unsigned int SyncRes::s_packetcachettl;
441 unsigned int SyncRes::s_packetcacheservfailttl;
442 unsigned int SyncRes::s_packetcachenegativettl;
443 unsigned int SyncRes::s_serverdownmaxfails;
444 unsigned int SyncRes::s_serverdownthrottletime;
445 unsigned int SyncRes::s_unthrottle_n;
446 unsigned int SyncRes::s_nonresolvingnsmaxfails;
447 unsigned int SyncRes::s_nonresolvingnsthrottletime;
448 unsigned int SyncRes::s_ecscachelimitttl;
449 unsigned int SyncRes::s_maxvalidationsperq;
450 unsigned int SyncRes::s_maxnsec3iterationsperq;
451 pdns::stat_t SyncRes::s_ecsqueries;
452 pdns::stat_t SyncRes::s_ecsresponses;
453 std::map<uint8_t, pdns::stat_t> SyncRes::s_ecsResponsesBySubnetSize4;
454 std::map<uint8_t, pdns::stat_t> SyncRes::s_ecsResponsesBySubnetSize6;
455
456 uint8_t SyncRes::s_ecsipv4limit;
457 uint8_t SyncRes::s_ecsipv6limit;
458 uint8_t SyncRes::s_ecsipv4cachelimit;
459 uint8_t SyncRes::s_ecsipv6cachelimit;
460 bool SyncRes::s_ecsipv4nevercache;
461 bool SyncRes::s_ecsipv6nevercache;
462
463 bool SyncRes::s_doIPv4;
464 bool SyncRes::s_doIPv6;
465 bool SyncRes::s_rootNXTrust;
466 bool SyncRes::s_noEDNS;
467 bool SyncRes::s_qnameminimization;
468 SyncRes::HardenNXD SyncRes::s_hardenNXD;
469 unsigned int SyncRes::s_refresh_ttlperc;
470 unsigned int SyncRes::s_locked_ttlperc;
471 int SyncRes::s_tcp_fast_open;
472 bool SyncRes::s_tcp_fast_open_connect;
473 bool SyncRes::s_dot_to_port_853;
474 int SyncRes::s_event_trace_enabled;
475 bool SyncRes::s_save_parent_ns_set;
476 unsigned int SyncRes::s_max_busy_dot_probes;
477 unsigned int SyncRes::s_max_CNAMES_followed = 10;
478 bool SyncRes::s_addExtendedResolutionDNSErrors;
479
480 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
481 #define LOG(x) \
482 if (d_lm == Log) { \
483 g_log << Logger::Warning << x; \
484 } \
485 else if (d_lm == Store) { \
486 addTraceTS(d_fixednow, d_trace); \
487 d_trace << x; \
488 }
489
490 OptLog SyncRes::LogObject(const string& prefix)
491 {
492 OptLog ret;
493 if (d_lm == Log) {
494 ret = {prefix, d_fixednow, g_log};
495 }
496 else if (d_lm == Store) {
497 ret = {prefix, d_fixednow, d_trace};
498 }
499 return ret;
500 }
501
502 static bool pushResolveIfNotInNegCache(const DNSName& qname, QType qtype, const struct timeval& now)
503 {
504 NegCache::NegCacheEntry negEntry;
505 bool inNegCache = g_negCache->get(qname, qtype, now, negEntry, false);
506 if (!inNegCache) {
507 // There are a few cases where an answer is neither stored in the record cache nor in the neg cache.
508 // An example is a SOA-less NODATA response. Rate limiting will kick in if those tasks are pushed too often.
509 // We might want to fix these cases (and always either store positive or negative) some day.
510 pushResolveTask(qname, qtype, now.tv_sec, now.tv_sec + 60, false);
511 }
512 return !inNegCache;
513 }
514
515 // A helper function to print a double with specific printf format.
516 // Not using boost::format since it is not thread safe while calling
517 // into locale handling code according to tsan.
518 // This allocates a string, but that's nothing compared to what
519 // boost::format is doing and may even be optimized away anyway.
520 static inline std::string fmtfloat(double value)
521 {
522 std::array<char, 20> buf{};
523 int ret = snprintf(buf.data(), buf.size(), "%0.2f", value);
524 if (ret < 0 || ret >= static_cast<int>(buf.size())) {
525 return "?";
526 }
527 return {buf.data(), static_cast<size_t>(ret)};
528 }
529
530 static inline void accountAuthLatency(uint64_t usec, int family)
531 {
532 if (family == AF_INET) {
533 t_Counters.at(rec::Histogram::auth4Answers)(usec);
534 t_Counters.at(rec::Histogram::cumulativeAuth4Answers)(usec);
535 }
536 else {
537 t_Counters.at(rec::Histogram::auth6Answers)(usec);
538 t_Counters.at(rec::Histogram::cumulativeAuth6Answers)(usec);
539 }
540 }
541
542 SyncRes::SyncRes(const struct timeval& now) :
543 d_authzonequeries(0), d_outqueries(0), d_tcpoutqueries(0), d_dotoutqueries(0), d_throttledqueries(0), d_timeouts(0), d_unreachables(0), d_totUsec(0), d_fixednow(now), d_now(now), d_cacheonly(false), d_doDNSSEC(false), d_doEDNS0(false), d_qNameMinimization(s_qnameminimization), d_lm(s_lm)
544 {
545 d_validationContext.d_nsec3IterationsRemainingQuota = s_maxnsec3iterationsperq > 0 ? s_maxnsec3iterationsperq : std::numeric_limits<decltype(d_validationContext.d_nsec3IterationsRemainingQuota)>::max();
546 }
547
548 static void allowAdditionalEntry(std::unordered_set<DNSName>& allowedAdditionals, const DNSRecord& rec);
549
550 void SyncRes::resolveAdditionals(const DNSName& qname, QType qtype, AdditionalMode mode, std::vector<DNSRecord>& additionals, unsigned int depth, bool& additionalsNotInCache)
551 {
552 vector<DNSRecord> addRecords;
553
554 Context context;
555 switch (mode) {
556 case AdditionalMode::ResolveImmediately: {
557 set<GetBestNSAnswer> beenthere;
558 int res = doResolve(qname, qtype, addRecords, depth, beenthere, context);
559 if (res != 0) {
560 return;
561 }
562 // We're conservative here. We do not add Bogus records in any circumstance, we add Indeterminates only if no
563 // validation is required.
564 if (vStateIsBogus(context.state)) {
565 return;
566 }
567 if (shouldValidate() && context.state != vState::Secure && context.state != vState::Insecure) {
568 return;
569 }
570 for (auto& rec : addRecords) {
571 if (rec.d_place == DNSResourceRecord::ANSWER) {
572 additionals.push_back(std::move(rec));
573 }
574 }
575 break;
576 }
577 case AdditionalMode::CacheOnly:
578 case AdditionalMode::CacheOnlyRequireAuth: {
579 // Peek into cache
580 MemRecursorCache::Flags flags = mode == AdditionalMode::CacheOnlyRequireAuth ? MemRecursorCache::RequireAuth : MemRecursorCache::None;
581 if (g_recCache->get(d_now.tv_sec, qname, qtype, flags, &addRecords, d_cacheRemote, d_routingTag, nullptr, nullptr, nullptr, &context.state) <= 0) {
582 return;
583 }
584 // See the comment for the ResolveImmediately case
585 if (vStateIsBogus(context.state)) {
586 return;
587 }
588 if (shouldValidate() && context.state != vState::Secure && context.state != vState::Insecure) {
589 return;
590 }
591 for (auto& rec : addRecords) {
592 if (rec.d_place == DNSResourceRecord::ANSWER) {
593 rec.d_ttl -= d_now.tv_sec;
594 additionals.push_back(std::move(rec));
595 }
596 }
597 break;
598 }
599 case AdditionalMode::ResolveDeferred: {
600 const bool oldCacheOnly = setCacheOnly(true);
601 set<GetBestNSAnswer> beenthere;
602 int res = doResolve(qname, qtype, addRecords, depth, beenthere, context);
603 setCacheOnly(oldCacheOnly);
604 if (res == 0 && !addRecords.empty()) {
605 // We're conservative here. We do not add Bogus records in any circumstance, we add Indeterminates only if no
606 // validation is required.
607 if (vStateIsBogus(context.state)) {
608 return;
609 }
610 if (shouldValidate() && context.state != vState::Secure && context.state != vState::Insecure) {
611 return;
612 }
613 bool found = false;
614 for (auto& rec : addRecords) {
615 if (rec.d_place == DNSResourceRecord::ANSWER) {
616 found = true;
617 additionals.push_back(std::move(rec));
618 }
619 }
620 if (found) {
621 return;
622 }
623 }
624 // Not found in cache, check negcache and push task if also not in negcache
625 if (pushResolveIfNotInNegCache(qname, qtype, d_now)) {
626 additionalsNotInCache = true;
627 }
628 break;
629 }
630 case AdditionalMode::Ignore:
631 break;
632 }
633 }
634
635 // The main (recursive) function to add additionals
636 // qtype: the original query type to expand
637 // start: records to start from
638 // This function uses to state sets to avoid infinite recursion and allow depulication
639 // depth is the main recursion depth
640 // additionaldepth is the depth for addAdditionals itself
641 void SyncRes::addAdditionals(QType qtype, const vector<DNSRecord>& start, vector<DNSRecord>& additionals, std::set<std::pair<DNSName, QType>>& uniqueCalls, std::set<std::tuple<DNSName, QType, QType>>& uniqueResults, unsigned int depth, unsigned additionaldepth, bool& additionalsNotInCache)
642 {
643 if (additionaldepth >= 5 || start.empty()) {
644 return;
645 }
646
647 auto luaLocal = g_luaconfs.getLocal();
648 const auto iter = luaLocal->allowAdditionalQTypes.find(qtype);
649 if (iter == luaLocal->allowAdditionalQTypes.end()) {
650 return;
651 }
652 std::unordered_set<DNSName> addnames;
653 for (const auto& rec : start) {
654 if (rec.d_place == DNSResourceRecord::ANSWER) {
655 // currently, this function only knows about names, we could also take the target types that are dependent on
656 // record contents into account
657 // e.g. for NAPTR records, go only for SRV for flag value "s", or A/AAAA for flag value "a"
658 allowAdditionalEntry(addnames, rec);
659 }
660 }
661
662 // We maintain two sets for deduplication:
663 // - uniqueCalls makes sure we never resolve a qname/qtype twice
664 // - uniqueResults makes sure we never add the same qname/qytype RRSet to the result twice,
665 // but note that that set might contain multiple elements.
666
667 auto mode = iter->second.second;
668 for (const auto& targettype : iter->second.first) {
669 for (const auto& addname : addnames) {
670 std::vector<DNSRecord> records;
671 bool inserted = uniqueCalls.emplace(addname, targettype).second;
672 if (inserted) {
673 resolveAdditionals(addname, targettype, mode, records, depth, additionalsNotInCache);
674 }
675 if (!records.empty()) {
676 for (auto record = records.begin(); record != records.end();) {
677 QType covered = QType::ENT;
678 if (record->d_type == QType::RRSIG) {
679 if (auto rsig = getRR<RRSIGRecordContent>(*record); rsig != nullptr) {
680 covered = rsig->d_type;
681 }
682 }
683 if (uniqueResults.count(std::tuple(record->d_name, QType(record->d_type), covered)) > 0) {
684 // A bit expensive for vectors, but they are small
685 record = records.erase(record);
686 }
687 else {
688 ++record;
689 }
690 }
691 for (const auto& record : records) {
692 additionals.push_back(record);
693 QType covered = QType::ENT;
694 if (record.d_type == QType::RRSIG) {
695 if (auto rsig = getRR<RRSIGRecordContent>(record); rsig != nullptr) {
696 covered = rsig->d_type;
697 }
698 }
699 uniqueResults.emplace(record.d_name, record.d_type, covered);
700 }
701 addAdditionals(targettype, records, additionals, uniqueCalls, uniqueResults, depth, additionaldepth + 1, additionalsNotInCache);
702 }
703 }
704 }
705 }
706
707 // The entry point for other code
708 bool SyncRes::addAdditionals(QType qtype, vector<DNSRecord>& ret, unsigned int depth)
709 {
710 // The additional records of interest
711 std::vector<DNSRecord> additionals;
712
713 // We only call resolve for a specific name/type combo once
714 std::set<std::pair<DNSName, QType>> uniqueCalls;
715
716 // Collect multiple name/qtype from a single resolve but do not add a new set from new resolve calls
717 // For RRSIGs, the type covered is stored in the second Qtype
718 std::set<std::tuple<DNSName, QType, QType>> uniqueResults;
719
720 bool additionalsNotInCache = false;
721 addAdditionals(qtype, ret, additionals, uniqueCalls, uniqueResults, depth, 0, additionalsNotInCache);
722
723 for (auto& rec : additionals) {
724 rec.d_place = DNSResourceRecord::ADDITIONAL;
725 ret.push_back(std::move(rec));
726 }
727 return additionalsNotInCache;
728 }
729
730 /** everything begins here - this is the entry point just after receiving a packet */
731 int SyncRes::beginResolve(const DNSName& qname, const QType qtype, QClass qclass, vector<DNSRecord>& ret, unsigned int depth)
732 {
733 d_eventTrace.add(RecEventTrace::SyncRes);
734 t_Counters.at(rec::Counter::syncresqueries)++;
735 d_wasVariable = false;
736 d_wasOutOfBand = false;
737 d_cutStates.clear();
738
739 if (doSpecialNamesResolve(qname, qtype, qclass, ret)) {
740 d_queryValidationState = vState::Insecure; // this could fool our stats into thinking a validation took place
741 return 0; // so do check before updating counters (we do now)
742 }
743
744 if (isUnsupported(qtype)) {
745 return -1;
746 }
747
748 if (qclass == QClass::ANY) {
749 qclass = QClass::IN;
750 }
751 else if (qclass != QClass::IN) {
752 return -1;
753 }
754
755 if (qtype == QType::DS) {
756 d_externalDSQuery = qname;
757 }
758 else {
759 d_externalDSQuery.clear();
760 }
761
762 set<GetBestNSAnswer> beenthere;
763 Context context;
764 int res = doResolve(qname, qtype, ret, depth, beenthere, context);
765 d_queryValidationState = context.state;
766 d_extendedError = context.extendedError;
767
768 if (shouldValidate()) {
769 if (d_queryValidationState != vState::Indeterminate) {
770 t_Counters.at(rec::Counter::dnssecValidations)++;
771 }
772 auto xdnssec = g_xdnssec.getLocal();
773 if (xdnssec->check(qname)) {
774 increaseXDNSSECStateCounter(d_queryValidationState);
775 }
776 else {
777 increaseDNSSECStateCounter(d_queryValidationState);
778 }
779 }
780
781 // Avoid calling addAdditionals() if we know we won't find anything
782 auto luaLocal = g_luaconfs.getLocal();
783 if (res == 0 && qclass == QClass::IN && luaLocal->allowAdditionalQTypes.find(qtype) != luaLocal->allowAdditionalQTypes.end()) {
784 bool additionalsNotInCache = addAdditionals(qtype, ret, depth);
785 if (additionalsNotInCache) {
786 d_wasVariable = true;
787 }
788 }
789 d_eventTrace.add(RecEventTrace::SyncRes, res, false);
790 return res;
791 }
792
793 /*! Handles all special, built-in names
794 * Fills ret with an answer and returns true if it handled the query.
795 *
796 * Handles the following queries (and their ANY variants):
797 *
798 * - localhost. IN A
799 * - localhost. IN AAAA
800 * - 1.0.0.127.in-addr.arpa. IN PTR
801 * - 1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa. IN PTR
802 * - version.bind. CH TXT
803 * - version.pdns. CH TXT
804 * - id.server. CH TXT
805 * - trustanchor.server CH TXT
806 * - negativetrustanchor.server CH TXT
807 */
808 bool SyncRes::doSpecialNamesResolve(const DNSName& qname, const QType qtype, const QClass qclass, vector<DNSRecord>& ret)
809 {
810 static const DNSName arpa("1.0.0.127.in-addr.arpa.");
811 static const DNSName ip6_arpa("1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa.");
812 static const DNSName localhost("localhost.");
813 static const DNSName versionbind("version.bind.");
814 static const DNSName idserver("id.server.");
815 static const DNSName versionpdns("version.pdns.");
816 static const DNSName trustanchorserver("trustanchor.server.");
817 static const DNSName negativetrustanchorserver("negativetrustanchor.server.");
818
819 bool handled = false;
820 vector<pair<QType::typeenum, string>> answers;
821
822 if ((qname == arpa || qname == ip6_arpa) && qclass == QClass::IN) {
823 handled = true;
824 if (qtype == QType::PTR || qtype == QType::ANY) {
825 answers.emplace_back(QType::PTR, "localhost.");
826 }
827 }
828
829 if (qname.isPartOf(localhost) && qclass == QClass::IN) {
830 handled = true;
831 if (qtype == QType::A || qtype == QType::ANY) {
832 answers.emplace_back(QType::A, "127.0.0.1");
833 }
834 if (qtype == QType::AAAA || qtype == QType::ANY) {
835 answers.emplace_back(QType::AAAA, "::1");
836 }
837 }
838
839 if ((qname == versionbind || qname == idserver || qname == versionpdns) && qclass == QClass::CHAOS) {
840 handled = true;
841 if (qtype == QType::TXT || qtype == QType::ANY) {
842 if (qname == versionbind || qname == versionpdns) {
843 answers.emplace_back(QType::TXT, "\"" + ::arg()["version-string"] + "\"");
844 }
845 else if (s_serverID != "disabled") {
846 answers.emplace_back(QType::TXT, "\"" + s_serverID + "\"");
847 }
848 }
849 }
850
851 if (qname == trustanchorserver && qclass == QClass::CHAOS && ::arg().mustDo("allow-trust-anchor-query")) {
852 handled = true;
853 if (qtype == QType::TXT || qtype == QType::ANY) {
854 auto luaLocal = g_luaconfs.getLocal();
855 for (auto const& dsAnchor : luaLocal->dsAnchors) {
856 ostringstream ans;
857 ans << "\"";
858 ans << dsAnchor.first.toString(); // Explicit toString to have a trailing dot
859 for (auto const& dsRecord : dsAnchor.second) {
860 ans << " ";
861 ans << dsRecord.d_tag;
862 }
863 ans << "\"";
864 answers.emplace_back(QType::TXT, ans.str());
865 }
866 }
867 }
868
869 if (qname == negativetrustanchorserver && qclass == QClass::CHAOS && ::arg().mustDo("allow-trust-anchor-query")) {
870 handled = true;
871 if (qtype == QType::TXT || qtype == QType::ANY) {
872 auto luaLocal = g_luaconfs.getLocal();
873 for (auto const& negAnchor : luaLocal->negAnchors) {
874 ostringstream ans;
875 ans << "\"";
876 ans << negAnchor.first.toString(); // Explicit toString to have a trailing dot
877 if (negAnchor.second.length() != 0) {
878 ans << " " << negAnchor.second;
879 }
880 ans << "\"";
881 answers.emplace_back(QType::TXT, ans.str());
882 }
883 }
884 }
885
886 if (handled && !answers.empty()) {
887 ret.clear();
888 d_wasOutOfBand = true;
889
890 DNSRecord dnsRecord;
891 dnsRecord.d_name = qname;
892 dnsRecord.d_place = DNSResourceRecord::ANSWER;
893 dnsRecord.d_class = qclass;
894 dnsRecord.d_ttl = 86400;
895 for (const auto& ans : answers) {
896 dnsRecord.d_type = ans.first;
897 dnsRecord.setContent(DNSRecordContent::make(ans.first, qclass, ans.second));
898 ret.push_back(dnsRecord);
899 }
900 }
901
902 return handled;
903 }
904
905 //! This is the 'out of band resolver', in other words, the authoritative server
906 void SyncRes::AuthDomain::addSOA(std::vector<DNSRecord>& records) const
907 {
908 SyncRes::AuthDomain::records_t::const_iterator ziter = d_records.find(std::tuple(getName(), QType::SOA));
909 if (ziter != d_records.end()) {
910 DNSRecord dnsRecord = *ziter;
911 dnsRecord.d_place = DNSResourceRecord::AUTHORITY;
912 records.push_back(dnsRecord);
913 }
914 }
915
916 bool SyncRes::AuthDomain::operator==(const AuthDomain& rhs) const
917 {
918 return d_records == rhs.d_records
919 && d_servers == rhs.d_servers
920 && d_name == rhs.d_name
921 && d_rdForward == rhs.d_rdForward;
922 }
923
924 [[nodiscard]] std::string SyncRes::AuthDomain::print(const std::string& indent,
925 const std::string& indentLevel) const
926 {
927 std::stringstream outputsStream;
928 outputsStream << indent << "DNSName = " << d_name << std::endl;
929 outputsStream << indent << "rdForward = " << d_rdForward << std::endl;
930 outputsStream << indent << "Records {" << std::endl;
931 auto recordContentIndentation = indent;
932 recordContentIndentation += indentLevel;
933 recordContentIndentation += indentLevel;
934 for (const auto& record : d_records) {
935 outputsStream << indent << indentLevel << "Record `" << record.d_name << "` {" << std::endl;
936 outputsStream << record.print(recordContentIndentation);
937 outputsStream << indent << indentLevel << "}" << std::endl;
938 }
939 outputsStream << indent << "}" << std::endl;
940 outputsStream << indent << "Servers {" << std::endl;
941 for (const auto& server : d_servers) {
942 outputsStream << indent << indentLevel << server.toString() << std::endl;
943 }
944 outputsStream << indent << "}" << std::endl;
945 return outputsStream.str();
946 }
947
948 int SyncRes::AuthDomain::getRecords(const DNSName& qname, const QType qtype, std::vector<DNSRecord>& records) const
949 {
950 int result = RCode::NoError;
951 records.clear();
952
953 // partial lookup
954 std::pair<records_t::const_iterator, records_t::const_iterator> range = d_records.equal_range(std::tie(qname));
955
956 SyncRes::AuthDomain::records_t::const_iterator ziter;
957 bool somedata = false;
958
959 for (ziter = range.first; ziter != range.second; ++ziter) {
960 somedata = true;
961
962 if (qtype == QType::ANY || ziter->d_type == qtype || ziter->d_type == QType::CNAME) {
963 // let rest of nameserver do the legwork on this one
964 records.push_back(*ziter);
965 }
966 else if (ziter->d_type == QType::NS && ziter->d_name.countLabels() > getName().countLabels()) {
967 // we hit a delegation point!
968 DNSRecord dnsRecord = *ziter;
969 dnsRecord.d_place = DNSResourceRecord::AUTHORITY;
970 records.push_back(dnsRecord);
971 }
972 }
973
974 if (!records.empty()) {
975 /* We have found an exact match, we're done */
976 return result;
977 }
978
979 if (somedata) {
980 /* We have records for that name, but not of the wanted qtype */
981 addSOA(records);
982
983 return result;
984 }
985
986 DNSName wcarddomain(qname);
987 while (wcarddomain != getName() && wcarddomain.chopOff()) {
988 range = d_records.equal_range(std::tuple(g_wildcarddnsname + wcarddomain));
989 if (range.first == range.second) {
990 continue;
991 }
992 for (ziter = range.first; ziter != range.second; ++ziter) {
993 DNSRecord dnsRecord = *ziter;
994 // if we hit a CNAME, just answer that - rest of recursor will do the needful & follow
995 if (dnsRecord.d_type == qtype || qtype == QType::ANY || dnsRecord.d_type == QType::CNAME) {
996 dnsRecord.d_name = qname;
997 dnsRecord.d_place = DNSResourceRecord::ANSWER;
998 records.push_back(dnsRecord);
999 }
1000 }
1001
1002 if (records.empty()) {
1003 addSOA(records);
1004 }
1005
1006 return result;
1007 }
1008
1009 /* Nothing for this name, no wildcard, let's see if there is some NS */
1010 DNSName nsdomain(qname);
1011 while (nsdomain.chopOff() && nsdomain != getName()) {
1012 range = d_records.equal_range(std::tuple(nsdomain, QType::NS));
1013 if (range.first == range.second) {
1014 continue;
1015 }
1016 for (ziter = range.first; ziter != range.second; ++ziter) {
1017 DNSRecord dnsRecord = *ziter;
1018 dnsRecord.d_place = DNSResourceRecord::AUTHORITY;
1019 records.push_back(dnsRecord);
1020 }
1021 }
1022
1023 if (records.empty()) {
1024 addSOA(records);
1025 result = RCode::NXDomain;
1026 }
1027
1028 return result;
1029 }
1030
1031 bool SyncRes::doOOBResolve(const AuthDomain& domain, const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, int& res)
1032 {
1033 d_authzonequeries++;
1034 t_Counters.at(rec::Counter::authzonequeries)++;
1035
1036 res = domain.getRecords(qname, qtype, ret);
1037 return true;
1038 }
1039
1040 bool SyncRes::doOOBResolve(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int /* depth */, const string& prefix, int& res)
1041 {
1042 DNSName authdomain(qname);
1043 const auto iter = getBestAuthZone(&authdomain);
1044 if (iter == t_sstorage.domainmap->end() || !iter->second.isAuth()) {
1045 LOG(prefix << qname << ": Auth storage has no zone for this query!" << endl);
1046 return false;
1047 }
1048
1049 LOG(prefix << qname << ": Auth storage has data, zone='" << authdomain << "'" << endl);
1050 return doOOBResolve(iter->second, qname, qtype, ret, res);
1051 }
1052
1053 bool SyncRes::isRecursiveForwardOrAuth(const DNSName& qname)
1054 {
1055 DNSName authname(qname);
1056 const auto iter = getBestAuthZone(&authname);
1057 return iter != t_sstorage.domainmap->end() && (iter->second.isAuth() || iter->second.shouldRecurse());
1058 }
1059
1060 bool SyncRes::isForwardOrAuth(const DNSName& qname)
1061 {
1062 DNSName authname(qname);
1063 const auto iter = getBestAuthZone(&authname);
1064 return iter != t_sstorage.domainmap->end();
1065 }
1066
1067 const char* isoDateTimeMillis(const struct timeval& tval, timebuf_t& buf)
1068 {
1069 const std::string s_timestampFormat = "%Y-%m-%dT%T";
1070 struct tm tmval
1071 {
1072 };
1073 size_t len = strftime(buf.data(), buf.size(), s_timestampFormat.c_str(), localtime_r(&tval.tv_sec, &tmval));
1074 if (len == 0) {
1075 int ret = snprintf(buf.data(), buf.size(), "%lld", static_cast<long long>(tval.tv_sec));
1076 if (ret < 0 || static_cast<size_t>(ret) >= buf.size()) {
1077 buf[0] = '\0';
1078 return buf.data();
1079 }
1080 len = ret;
1081 }
1082
1083 if (buf.size() > len + 4) {
1084 snprintf(&buf.at(len), buf.size() - len, ".%03ld", static_cast<long>(tval.tv_usec) / 1000);
1085 }
1086 return buf.data();
1087 }
1088
1089 static const char* timestamp(time_t arg, timebuf_t& buf)
1090 {
1091 const std::string s_timestampFormat = "%Y-%m-%dT%T";
1092 struct tm tmval
1093 {
1094 };
1095 size_t len = strftime(buf.data(), buf.size(), s_timestampFormat.c_str(), localtime_r(&arg, &tmval));
1096 if (len == 0) {
1097 int ret = snprintf(buf.data(), buf.size(), "%lld", static_cast<long long>(arg));
1098 if (ret < 0 || static_cast<size_t>(ret) >= buf.size()) {
1099 buf[0] = '\0';
1100 }
1101 }
1102 return buf.data();
1103 }
1104
1105 struct ednsstatus_t : public multi_index_container<SyncRes::EDNSStatus,
1106 indexed_by<
1107 ordered_unique<tag<ComboAddress>, member<SyncRes::EDNSStatus, ComboAddress, &SyncRes::EDNSStatus::address>>,
1108 ordered_non_unique<tag<time_t>, member<SyncRes::EDNSStatus, time_t, &SyncRes::EDNSStatus::ttd>>>>
1109 {
1110 // Get a copy
1111 [[nodiscard]] ednsstatus_t getMap() const
1112 {
1113 return *this;
1114 }
1115
1116 static void setMode(index<ComboAddress>::type& ind, iterator iter, SyncRes::EDNSStatus::EDNSMode mode, time_t theTime)
1117 {
1118 if (iter->mode != mode || iter->ttd == 0) {
1119 ind.modify(iter, [=](SyncRes::EDNSStatus& status) { status.mode = mode; status.ttd = theTime + Expire; });
1120 }
1121 }
1122
1123 void prune(time_t now)
1124 {
1125 auto& ind = get<time_t>();
1126 ind.erase(ind.begin(), ind.upper_bound(now));
1127 }
1128
1129 static const time_t Expire = 7200;
1130 };
1131
1132 static LockGuarded<ednsstatus_t> s_ednsstatus;
1133
1134 SyncRes::EDNSStatus::EDNSMode SyncRes::getEDNSStatus(const ComboAddress& server)
1135 {
1136 auto lock = s_ednsstatus.lock();
1137 const auto& iter = lock->find(server);
1138 if (iter == lock->end()) {
1139 return EDNSStatus::EDNSOK;
1140 }
1141 return iter->mode;
1142 }
1143
1144 uint64_t SyncRes::getEDNSStatusesSize()
1145 {
1146 return s_ednsstatus.lock()->size();
1147 }
1148
1149 void SyncRes::clearEDNSStatuses()
1150 {
1151 s_ednsstatus.lock()->clear();
1152 }
1153
1154 void SyncRes::pruneEDNSStatuses(time_t cutoff)
1155 {
1156 s_ednsstatus.lock()->prune(cutoff);
1157 }
1158
1159 uint64_t SyncRes::doEDNSDump(int fileDesc)
1160 {
1161 int newfd = dup(fileDesc);
1162 if (newfd == -1) {
1163 return 0;
1164 }
1165 auto filePtr = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1166 if (!filePtr) {
1167 close(newfd);
1168 return 0;
1169 }
1170 uint64_t count = 0;
1171
1172 fprintf(filePtr.get(), "; edns dump follows\n; ip\tstatus\tttd\n");
1173 const auto copy = s_ednsstatus.lock()->getMap();
1174 for (const auto& eds : copy) {
1175 count++;
1176 timebuf_t tmp;
1177 fprintf(filePtr.get(), "%s\t%s\t%s\n", eds.address.toString().c_str(), eds.toString().c_str(), timestamp(eds.ttd, tmp));
1178 }
1179 return count;
1180 }
1181
1182 void SyncRes::pruneNSSpeeds(time_t limit)
1183 {
1184 auto lock = s_nsSpeeds.lock();
1185 auto& ind = lock->get<timeval>();
1186 ind.erase(ind.begin(), ind.upper_bound(timeval{limit, 0}));
1187 }
1188
1189 uint64_t SyncRes::getNSSpeedsSize()
1190 {
1191 return s_nsSpeeds.lock()->size();
1192 }
1193
1194 void SyncRes::submitNSSpeed(const DNSName& server, const ComboAddress& address, int usec, const struct timeval& now)
1195 {
1196 auto lock = s_nsSpeeds.lock();
1197 lock->find_or_enter(server, now).submit(address, usec, now);
1198 }
1199
1200 void SyncRes::clearNSSpeeds()
1201 {
1202 s_nsSpeeds.lock()->clear();
1203 }
1204
1205 float SyncRes::getNSSpeed(const DNSName& server, const ComboAddress& address)
1206 {
1207 auto lock = s_nsSpeeds.lock();
1208 return lock->find_or_enter(server).d_collection[address].peek();
1209 }
1210
1211 uint64_t SyncRes::doDumpNSSpeeds(int fileDesc)
1212 {
1213 int newfd = dup(fileDesc);
1214 if (newfd == -1) {
1215 return 0;
1216 }
1217 auto filePtr = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1218 if (!filePtr) {
1219 close(newfd);
1220 return 0;
1221 }
1222
1223 fprintf(filePtr.get(), "; nsspeed dump follows\n; nsname\ttimestamp\t[ip/decaying-ms/last-ms...]\n");
1224 uint64_t count = 0;
1225
1226 // Create a copy to avoid holding the lock while doing I/O
1227 for (const auto& iter : *s_nsSpeeds.lock()) {
1228 count++;
1229
1230 // an <empty> can appear hear in case of authoritative (hosted) zones
1231 timebuf_t tmp;
1232 fprintf(filePtr.get(), "%s\t%s\t", iter.d_name.toLogString().c_str(), isoDateTimeMillis(iter.d_lastget, tmp));
1233 bool first = true;
1234 for (const auto& line : iter.d_collection) {
1235 fprintf(filePtr.get(), "%s%s/%.3f/%.3f", first ? "" : "\t", line.first.toStringWithPortExcept(53).c_str(), line.second.peek() / 1000.0F, static_cast<float>(line.second.last()) / 1000.0F);
1236 first = false;
1237 }
1238 fprintf(filePtr.get(), "\n");
1239 }
1240 return count;
1241 }
1242
1243 uint64_t SyncRes::getThrottledServersSize()
1244 {
1245 return s_throttle.lock()->size();
1246 }
1247
1248 void SyncRes::pruneThrottledServers(time_t now)
1249 {
1250 s_throttle.lock()->prune(now);
1251 }
1252
1253 void SyncRes::clearThrottle()
1254 {
1255 s_throttle.lock()->clear();
1256 }
1257
1258 bool SyncRes::isThrottled(time_t now, const ComboAddress& server, const DNSName& target, QType qtype)
1259 {
1260 return s_throttle.lock()->shouldThrottle(now, std::tuple(server, target, qtype));
1261 }
1262
1263 bool SyncRes::isThrottled(time_t now, const ComboAddress& server)
1264 {
1265 auto throttled = s_throttle.lock()->shouldThrottle(now, std::tuple(server, g_rootdnsname, 0));
1266 if (throttled) {
1267 // Give fully throttled servers a chance to be used, to avoid having one bad zone spoil the NS
1268 // record for others using the same NS. If the NS answers, it will be unThrottled immediately
1269 if (s_unthrottle_n > 0 && dns_random(s_unthrottle_n) == 0) {
1270 throttled = false;
1271 }
1272 }
1273 return throttled;
1274 }
1275
1276 void SyncRes::unThrottle(const ComboAddress& server, const DNSName& name, QType qtype)
1277 {
1278 s_throttle.lock()->clear(std::tuple(server, g_rootdnsname, 0));
1279 s_throttle.lock()->clear(std::tuple(server, name, qtype));
1280 }
1281
1282 void SyncRes::doThrottle(time_t now, const ComboAddress& server, time_t duration, unsigned int tries)
1283 {
1284 s_throttle.lock()->throttle(now, std::tuple(server, g_rootdnsname, 0), duration, tries);
1285 }
1286
1287 void SyncRes::doThrottle(time_t now, const ComboAddress& server, const DNSName& name, QType qtype, time_t duration, unsigned int tries)
1288 {
1289 s_throttle.lock()->throttle(now, std::tuple(server, name, qtype), duration, tries);
1290 }
1291
1292 uint64_t SyncRes::doDumpThrottleMap(int fileDesc)
1293 {
1294 int newfd = dup(fileDesc);
1295 if (newfd == -1) {
1296 return 0;
1297 }
1298 auto filePtr = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1299 if (!filePtr) {
1300 close(newfd);
1301 return 0;
1302 }
1303 fprintf(filePtr.get(), "; throttle map dump follows\n");
1304 fprintf(filePtr.get(), "; remote IP\tqname\tqtype\tcount\tttd\n");
1305 uint64_t count = 0;
1306
1307 // Get a copy to avoid holding the lock while doing I/O
1308 const auto throttleMap = s_throttle.lock()->getThrottleMap();
1309 for (const auto& iter : throttleMap) {
1310 count++;
1311 timebuf_t tmp;
1312 // remote IP, dns name, qtype, count, ttd
1313 fprintf(filePtr.get(), "%s\t%s\t%s\t%u\t%s\n", std::get<0>(iter.thing).toString().c_str(), std::get<1>(iter.thing).toLogString().c_str(), std::get<2>(iter.thing).toString().c_str(), iter.count, timestamp(iter.ttd, tmp));
1314 }
1315
1316 return count;
1317 }
1318
1319 uint64_t SyncRes::getFailedServersSize()
1320 {
1321 return s_fails.lock()->size();
1322 }
1323
1324 void SyncRes::clearFailedServers()
1325 {
1326 s_fails.lock()->clear();
1327 }
1328
1329 void SyncRes::pruneFailedServers(time_t cutoff)
1330 {
1331 s_fails.lock()->prune(cutoff);
1332 }
1333
1334 unsigned long SyncRes::getServerFailsCount(const ComboAddress& server)
1335 {
1336 return s_fails.lock()->value(server);
1337 }
1338
1339 uint64_t SyncRes::doDumpFailedServers(int fileDesc)
1340 {
1341 int newfd = dup(fileDesc);
1342 if (newfd == -1) {
1343 return 0;
1344 }
1345 auto filePtr = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1346 if (!filePtr) {
1347 close(newfd);
1348 return 0;
1349 }
1350 fprintf(filePtr.get(), "; failed servers dump follows\n");
1351 fprintf(filePtr.get(), "; remote IP\tcount\ttimestamp\n");
1352 uint64_t count = 0;
1353
1354 // We get a copy, so the I/O does not need to happen while holding the lock
1355 for (const auto& iter : s_fails.lock()->getMapCopy()) {
1356 count++;
1357 timebuf_t tmp;
1358 fprintf(filePtr.get(), "%s\t%" PRIu64 "\t%s\n", iter.key.toString().c_str(), iter.value, timestamp(iter.last, tmp));
1359 }
1360
1361 return count;
1362 }
1363
1364 uint64_t SyncRes::getNonResolvingNSSize()
1365 {
1366 return s_nonresolving.lock()->size();
1367 }
1368
1369 void SyncRes::clearNonResolvingNS()
1370 {
1371 s_nonresolving.lock()->clear();
1372 }
1373
1374 void SyncRes::pruneNonResolving(time_t cutoff)
1375 {
1376 s_nonresolving.lock()->prune(cutoff);
1377 }
1378
1379 uint64_t SyncRes::doDumpNonResolvingNS(int fileDesc)
1380 {
1381 int newfd = dup(fileDesc);
1382 if (newfd == -1) {
1383 return 0;
1384 }
1385 auto filePtr = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1386 if (!filePtr) {
1387 close(newfd);
1388 return 0;
1389 }
1390 fprintf(filePtr.get(), "; non-resolving nameserver dump follows\n");
1391 fprintf(filePtr.get(), "; name\tcount\ttimestamp\n");
1392 uint64_t count = 0;
1393
1394 // We get a copy, so the I/O does not need to happen while holding the lock
1395 for (const auto& iter : s_nonresolving.lock()->getMapCopy()) {
1396 count++;
1397 timebuf_t tmp;
1398 fprintf(filePtr.get(), "%s\t%" PRIu64 "\t%s\n", iter.key.toString().c_str(), iter.value, timestamp(iter.last, tmp));
1399 }
1400
1401 return count;
1402 }
1403
1404 void SyncRes::clearSaveParentsNSSets()
1405 {
1406 s_savedParentNSSet.lock()->clear();
1407 }
1408
1409 size_t SyncRes::getSaveParentsNSSetsSize()
1410 {
1411 return s_savedParentNSSet.lock()->size();
1412 }
1413
1414 void SyncRes::pruneSaveParentsNSSets(time_t now)
1415 {
1416 s_savedParentNSSet.lock()->prune(now);
1417 }
1418
1419 uint64_t SyncRes::doDumpSavedParentNSSets(int fileDesc)
1420 {
1421 int newfd = dup(fileDesc);
1422 if (newfd == -1) {
1423 return 0;
1424 }
1425 auto filePtr = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1426 if (!filePtr) {
1427 close(newfd);
1428 return 0;
1429 }
1430 fprintf(filePtr.get(), "; dump of saved parent nameserver sets succesfully used follows\n");
1431 fprintf(filePtr.get(), "; total entries: %zu\n", s_savedParentNSSet.lock()->size());
1432 fprintf(filePtr.get(), "; domain\tsuccess\tttd\n");
1433 uint64_t count = 0;
1434
1435 // We get a copy, so the I/O does not need to happen while holding the lock
1436 for (const auto& iter : s_savedParentNSSet.lock()->getMapCopy()) {
1437 if (iter.d_count == 0) {
1438 continue;
1439 }
1440 count++;
1441 timebuf_t tmp;
1442 fprintf(filePtr.get(), "%s\t%" PRIu64 "\t%s\n", iter.d_domain.toString().c_str(), iter.d_count, timestamp(iter.d_ttd, tmp));
1443 }
1444 return count;
1445 }
1446
1447 void SyncRes::pruneDoTProbeMap(time_t cutoff)
1448 {
1449 auto lock = s_dotMap.lock();
1450 auto& ind = lock->d_map.get<time_t>();
1451
1452 for (auto i = ind.begin(); i != ind.end();) {
1453 if (i->d_ttd >= cutoff) {
1454 // We're done as we loop ordered by d_ttd
1455 break;
1456 }
1457 if (i->d_status == DoTStatus::Status::Busy) {
1458 lock->d_numBusy--;
1459 }
1460 i = ind.erase(i);
1461 }
1462 }
1463
1464 uint64_t SyncRes::doDumpDoTProbeMap(int fileDesc)
1465 {
1466 int newfd = dup(fileDesc);
1467 if (newfd == -1) {
1468 return 0;
1469 }
1470 auto filePtr = std::unique_ptr<FILE, int (*)(FILE*)>(fdopen(newfd, "w"), fclose);
1471 if (!filePtr) {
1472 close(newfd);
1473 return 0;
1474 }
1475 fprintf(filePtr.get(), "; DoT probing map follows\n");
1476 fprintf(filePtr.get(), "; ip\tdomain\tcount\tstatus\tttd\n");
1477 uint64_t count = 0;
1478
1479 // We get a copy, so the I/O does not need to happen while holding the lock
1480 DoTMap copy;
1481 {
1482 copy = *s_dotMap.lock();
1483 }
1484 fprintf(filePtr.get(), "; %" PRIu64 " Busy entries\n", copy.d_numBusy);
1485 for (const auto& iter : copy.d_map) {
1486 count++;
1487 timebuf_t tmp;
1488 fprintf(filePtr.get(), "%s\t%s\t%" PRIu64 "\t%s\t%s\n", iter.d_address.toString().c_str(), iter.d_auth.toString().c_str(), iter.d_count, iter.toString().c_str(), timestamp(iter.d_ttd, tmp));
1489 }
1490 return count;
1491 }
1492
1493 /* so here is the story. First we complete the full resolution process for a domain name. And only THEN do we decide
1494 to also do DNSSEC validation, which leads to new queries. To make this simple, we *always* ask for DNSSEC records
1495 so that if there are RRSIGs for a name, we'll have them.
1496
1497 However, some hosts simply can't answer questions which ask for DNSSEC. This can manifest itself as:
1498 * No answer
1499 * FormErr
1500 * Nonsense answer
1501
1502 The cause of "No answer" may be fragmentation, and it is tempting to probe if smaller answers would get through.
1503 Another cause of "No answer" may simply be a network condition.
1504 Nonsense answers are a clearer indication this host won't be able to do DNSSEC evah.
1505
1506 Previous implementations have suffered from turning off DNSSEC questions for an authoritative server based on timeouts.
1507 A clever idea is to only turn off DNSSEC if we know a domain isn't signed anyhow. The problem with that really
1508 clever idea however is that at this point in PowerDNS, we may simply not know that yet. All the DNSSEC thinking happens
1509 elsewhere. It may not have happened yet.
1510
1511 For now this means we can't be clever, but will turn off DNSSEC if you reply with FormError or gibberish.
1512 */
1513
1514 LWResult::Result SyncRes::asyncresolveWrapper(const ComboAddress& address, bool ednsMANDATORY, const DNSName& domain, [[maybe_unused]] const DNSName& auth, int type, bool doTCP, bool sendRDQuery, struct timeval* now, boost::optional<Netmask>& srcmask, LWResult* res, bool* chained, const DNSName& nsName) const
1515 {
1516 /* what is your QUEST?
1517 the goal is to get as many remotes as possible on the best level of EDNS support
1518 The levels are:
1519
1520 1) EDNSOK: Honors EDNS0, absent from table
1521 2) EDNSIGNORANT: Ignores EDNS0, gives replies without EDNS0
1522 3) NOEDNS: Generates FORMERR on EDNS queries
1523
1524 Everybody starts out assumed to be EDNSOK.
1525 If EDNSOK, send out EDNS0
1526 If you FORMERR us, go to NOEDNS,
1527 If no EDNS in response, go to EDNSIGNORANT
1528 If EDNSIGNORANT, keep on including EDNS0, see what happens
1529 Same behaviour as EDNSOK
1530 If NOEDNS, send bare queries
1531 */
1532
1533 // Read current status, defaulting to OK
1534 SyncRes::EDNSStatus::EDNSMode mode = EDNSStatus::EDNSOK;
1535 {
1536 auto lock = s_ednsstatus.lock();
1537 auto ednsstatus = lock->find(address); // does this include port? YES
1538 if (ednsstatus != lock->end()) {
1539 if (ednsstatus->ttd != 0 && ednsstatus->ttd < d_now.tv_sec) {
1540 lock->erase(ednsstatus);
1541 }
1542 else {
1543 mode = ednsstatus->mode;
1544 }
1545 }
1546 }
1547
1548 int EDNSLevel = 0;
1549 auto luaconfsLocal = g_luaconfs.getLocal();
1550 ResolveContext ctx(d_initialRequestId, nsName);
1551 #ifdef HAVE_FSTRM
1552 ctx.d_auth = auth;
1553 #endif
1554
1555 LWResult::Result ret{};
1556
1557 for (int tries = 0; tries < 2; ++tries) {
1558
1559 if (mode == EDNSStatus::NOEDNS) {
1560 t_Counters.at(rec::Counter::noEdnsOutQueries)++;
1561 EDNSLevel = 0; // level != mode
1562 }
1563 else if (ednsMANDATORY || mode != EDNSStatus::NOEDNS) {
1564 EDNSLevel = 1;
1565 }
1566
1567 DNSName sendQname(domain);
1568 if (g_lowercaseOutgoing) {
1569 sendQname.makeUsLowerCase();
1570 }
1571
1572 if (d_asyncResolve) {
1573 ret = d_asyncResolve(address, sendQname, type, doTCP, sendRDQuery, EDNSLevel, now, srcmask, ctx, res, chained);
1574 }
1575 else {
1576 ret = asyncresolve(address, sendQname, type, doTCP, sendRDQuery, EDNSLevel, now, srcmask, ctx, d_outgoingProtobufServers, d_frameStreamServers, luaconfsLocal->outgoingProtobufExportConfig.exportTypes, res, chained);
1577 }
1578
1579 if (ret == LWResult::Result::PermanentError || ret == LWResult::Result::OSLimitError || ret == LWResult::Result::Spoofed) {
1580 break; // transport error, nothing to learn here
1581 }
1582
1583 if (ret == LWResult::Result::Timeout) { // timeout, not doing anything with it now
1584 break;
1585 }
1586
1587 if (EDNSLevel == 1) {
1588 // We sent out with EDNS
1589 // ret is LWResult::Result::Success
1590 // ednsstatus in table might be pruned or changed by another request/thread, so do a new lookup/insert if needed
1591 auto lock = s_ednsstatus.lock(); // all three branches below need a lock
1592
1593 // Determine new mode
1594 if (res->d_validpacket && !res->d_haveEDNS && res->d_rcode == RCode::FormErr) {
1595 mode = EDNSStatus::NOEDNS;
1596 auto ednsstatus = lock->insert(address).first;
1597 auto& ind = lock->get<ComboAddress>();
1598 lock->setMode(ind, ednsstatus, mode, d_now.tv_sec);
1599 // This is the only path that re-iterates the loop
1600 continue;
1601 }
1602 if (!res->d_haveEDNS) {
1603 auto ednsstatus = lock->insert(address).first;
1604 auto& ind = lock->get<ComboAddress>();
1605 lock->setMode(ind, ednsstatus, EDNSStatus::EDNSIGNORANT, d_now.tv_sec);
1606 }
1607 else {
1608 // New status is EDNSOK
1609 lock->erase(address);
1610 }
1611 }
1612
1613 break;
1614 }
1615 return ret;
1616 }
1617
1618 /* The parameters from rfc9156. */
1619 /* maximum number of QNAME minimization iterations */
1620 unsigned int SyncRes::s_max_minimize_count; // default is 10
1621 /* number of iterations that should only have one label appended */
1622 unsigned int SyncRes::s_minimize_one_label; // default is 4
1623
1624 static unsigned int qmStepLen(unsigned int labels, unsigned int qnamelen, unsigned int qmIteration)
1625 {
1626 unsigned int step{};
1627
1628 if (qmIteration < SyncRes::s_minimize_one_label) {
1629 step = 1;
1630 }
1631 else if (qmIteration < SyncRes::s_max_minimize_count) {
1632 step = std::max(1U, (qnamelen - labels) / (SyncRes::s_max_minimize_count - qmIteration));
1633 }
1634 else {
1635 step = qnamelen - labels;
1636 }
1637 unsigned int targetlen = std::min(labels + step, qnamelen);
1638 return targetlen;
1639 }
1640
1641 static string resToString(int res)
1642 {
1643 return res >= 0 ? RCode::to_s(res) : std::to_string(res);
1644 }
1645
1646 int SyncRes::doResolve(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int depth, set<GetBestNSAnswer>& beenthere, Context& context) // NOLINT(readability-function-cognitive-complexity)
1647 {
1648 auto prefix = getPrefix(depth);
1649 auto luaconfsLocal = g_luaconfs.getLocal();
1650
1651 /* Apply qname (including CNAME chain) filtering policies */
1652 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
1653 if (luaconfsLocal->dfe.getQueryPolicy(qname, d_discardedPolicies, d_appliedPolicy)) {
1654 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1655 bool done = false;
1656 int rcode = RCode::NoError;
1657 handlePolicyHit(prefix, qname, qtype, ret, done, rcode, depth);
1658 if (done) {
1659 return rcode;
1660 }
1661 }
1662 }
1663
1664 initZoneCutsFromTA(qname, prefix);
1665
1666 // In the auth or recursive forward case, it does not make sense to do qname-minimization
1667 if (!getQNameMinimization() || isRecursiveForwardOrAuth(qname)) {
1668 return doResolveNoQNameMinimization(qname, qtype, ret, depth, beenthere, context);
1669 }
1670
1671 // The qname minimization algorithm is a simplified version of the one in RFC 7816 (bis).
1672 // It could be simplified because the cache maintenance (both positive and negative)
1673 // is already done by doResolveNoQNameMinimization().
1674 //
1675 // Sketch of algorithm:
1676 // Check cache
1677 // If result found: done
1678 // Otherwise determine closes ancestor from cache data
1679 // Repeat querying A, adding more labels of the original qname
1680 // If we get a delegation continue at ancestor determination
1681 // Until we have the full name.
1682 //
1683 // The algorithm starts with adding a single label per iteration, and
1684 // moves to three labels per iteration after three iterations.
1685
1686 DNSName child;
1687 prefix.append(string("QM "));
1688
1689 LOG(prefix << qname << ": doResolve" << endl);
1690
1691 // Look in cache only
1692 vector<DNSRecord> retq;
1693 bool old = setCacheOnly(true);
1694 bool fromCache = false;
1695 // For cache peeking, we tell doResolveNoQNameMinimization not to consider the (non-recursive) forward case.
1696 // Otherwise all queries in a forward domain will be forwarded, while we want to consult the cache.
1697 int res = doResolveNoQNameMinimization(qname, qtype, retq, depth, beenthere, context, &fromCache, nullptr);
1698 setCacheOnly(old);
1699 if (fromCache) {
1700 LOG(prefix << qname << ": Step0 Found in cache" << endl);
1701 if (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None && (d_appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NXDOMAIN || d_appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NODATA)) {
1702 ret.clear();
1703 }
1704 ret.insert(ret.end(), retq.begin(), retq.end());
1705
1706 return res;
1707 }
1708 LOG(prefix << qname << ": Step0 Not cached" << endl);
1709
1710 const unsigned int qnamelen = qname.countLabels();
1711
1712 DNSName fwdomain(qname);
1713 const bool forwarded = getBestAuthZone(&fwdomain) != t_sstorage.domainmap->end();
1714 if (forwarded) {
1715 LOG(prefix << qname << ": Step0 qname is in a forwarded domain " << fwdomain << endl);
1716 }
1717
1718 for (unsigned int i = 0; i <= qnamelen; i++) {
1719
1720 // Step 1
1721 vector<DNSRecord> bestns;
1722 DNSName nsdomain(qname);
1723 if (qtype == QType::DS) {
1724 nsdomain.chopOff();
1725 }
1726 // the two retries allow getBestNSFromCache&co to reprime the root
1727 // hints, in case they ever go missing
1728 for (int tries = 0; tries < 2 && bestns.empty(); ++tries) {
1729 bool flawedNSSet = false;
1730 set<GetBestNSAnswer> beenthereIgnored;
1731 getBestNSFromCache(nsdomain, qtype, bestns, &flawedNSSet, depth, prefix, beenthereIgnored, boost::make_optional(forwarded, fwdomain));
1732 if (forwarded) {
1733 break;
1734 }
1735 }
1736
1737 if (bestns.empty()) {
1738 if (!forwarded) {
1739 // Something terrible is wrong
1740 LOG(prefix << qname << ": Step1 No ancestor found return ServFail" << endl);
1741 return RCode::ServFail;
1742 }
1743 child = fwdomain;
1744 }
1745 else {
1746 LOG(prefix << qname << ": Step1 Ancestor from cache is " << bestns[0].d_name << endl);
1747 if (forwarded) {
1748 child = bestns[0].d_name.isPartOf(fwdomain) ? bestns[0].d_name : fwdomain;
1749 LOG(prefix << qname << ": Step1 Final Ancestor (using forwarding info) is " << child << endl);
1750 }
1751 else {
1752 child = bestns[0].d_name;
1753 }
1754 }
1755 for (; i <= qnamelen; i++) {
1756 // Step 2
1757 unsigned int labels = child.countLabels();
1758 unsigned int targetlen = qmStepLen(labels, qnamelen, i);
1759
1760 while (labels < targetlen) {
1761 child.prependRawLabel(qname.getRawLabel(qnamelen - labels - 1));
1762 labels++;
1763 }
1764 // rfc9156 section-2.3, append labels if they start with an underscore
1765 while (labels < qnamelen) {
1766 auto prependLabel = qname.getRawLabel(qnamelen - labels - 1);
1767 if (prependLabel.at(0) != '_') {
1768 break;
1769 }
1770 child.prependRawLabel(prependLabel);
1771 labels++;
1772 }
1773
1774 LOG(prefix << qname << ": Step2 New child " << child << endl);
1775
1776 // Step 3 resolve
1777 if (child == qname) {
1778 LOG(prefix << qname << ": Step3 Going to do final resolve" << endl);
1779 res = doResolveNoQNameMinimization(qname, qtype, ret, depth, beenthere, context);
1780 LOG(prefix << qname << ": Step3 Final resolve: " << resToString(res) << "/" << ret.size() << endl);
1781 return res;
1782 }
1783
1784 // If we have seen this child during resolution already; we tried to QM it already or otherwise broken.
1785 // fall back to no-QM
1786 bool qmLoopDetected = false;
1787 for (const auto& visitedNS : beenthere) {
1788 if (visitedNS.qname == child) {
1789 qmLoopDetected = true;
1790 break;
1791 }
1792 }
1793 if (qmLoopDetected) {
1794 LOG(prefix << qname << ": Step4 loop detected as visited this child name already, fallback to no QM" << endl);
1795 res = doResolveNoQNameMinimization(qname, qtype, ret, depth, beenthere, context);
1796 LOG(prefix << qname << ": Step4 Final resolve: " << resToString(res) << "/" << ret.size() << endl);
1797 return res;
1798 }
1799
1800 // Step 4
1801 LOG(prefix << qname << ": Step4 Resolve A for child " << child << endl);
1802 bool oldFollowCNAME = d_followCNAME;
1803 d_followCNAME = false;
1804 retq.resize(0);
1805 StopAtDelegation stopAtDelegation = Stop;
1806 res = doResolveNoQNameMinimization(child, QType::A, retq, depth, beenthere, context, nullptr, &stopAtDelegation);
1807 d_followCNAME = oldFollowCNAME;
1808 LOG(prefix << qname << ": Step4 Resolve " << child << "|A result is " << RCode::to_s(res) << "/" << retq.size() << "/" << stopAtDelegation << endl);
1809 if (stopAtDelegation == Stopped) {
1810 LOG(prefix << qname << ": Delegation seen, continue at step 1" << endl);
1811 break;
1812 }
1813
1814 if (res != RCode::NoError) {
1815 // Case 5: unexpected answer
1816 LOG(prefix << qname << ": Step5: other rcode, last effort final resolve" << endl);
1817 setQNameMinimization(false);
1818 setQMFallbackMode(true);
1819
1820 auto oldEDE = context.extendedError;
1821 res = doResolveNoQNameMinimization(qname, qtype, ret, depth + 1, beenthere, context);
1822
1823 if (res == RCode::NoError) {
1824 t_Counters.at(rec::Counter::qnameminfallbacksuccess)++;
1825 }
1826 else {
1827 // as doResolveNoQNameMinimization clears the EDE, we put it back here, it is relevant but might not be set by the last effort attempt
1828 if (!context.extendedError) {
1829 context.extendedError = std::move(oldEDE);
1830 }
1831 }
1832
1833 LOG(prefix << qname << ": Step5 End resolve: " << resToString(res) << "/" << ret.size() << endl);
1834 return res;
1835 }
1836 }
1837 }
1838
1839 // Should not be reached
1840 LOG(prefix << qname << ": Max iterations reached, return ServFail" << endl);
1841 return RCode::ServFail;
1842 }
1843
1844 unsigned int SyncRes::getAdjustedRecursionBound() const
1845 {
1846 auto bound = s_maxdepth; // 40 is default value of s_maxdepth
1847 if (getQMFallbackMode()) {
1848 // We might have hit a depth level check, but we still want to allow some recursion levels in the fallback
1849 // no-qname-minimization case. This has the effect that a qname minimization fallback case might reach 150% of
1850 // maxdepth, taking care to not repeatedly increase the bound.
1851 bound += s_maxdepth / 2;
1852 }
1853 return bound;
1854 }
1855
1856 /*! This function will check the cache and go out to the internet if the answer is not in cache
1857 *
1858 * \param qname The name we need an answer for
1859 * \param qtype
1860 * \param ret The vector of DNSRecords we need to fill with the answers
1861 * \param depth The recursion depth we are in
1862 * \param beenthere
1863 * \param fromCache tells the caller the result came from the cache, may be nullptr
1864 * \param stopAtDelegation if non-nullptr and pointed-to value is Stop requests the callee to stop at a delegation, if so pointed-to value is set to Stopped
1865 * \return DNS RCODE or -1 (Error)
1866 */
1867 int SyncRes::doResolveNoQNameMinimization(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int depth, set<GetBestNSAnswer>& beenthere, Context& context, bool* fromCache, StopAtDelegation* stopAtDelegation) // NOLINT(readability-function-cognitive-complexity)
1868 {
1869 context.extendedError.reset();
1870 auto prefix = getPrefix(depth);
1871
1872 LOG(prefix << qname << ": Wants " << (d_doDNSSEC ? "" : "NO ") << "DNSSEC processing, " << (d_requireAuthData ? "" : "NO ") << "auth data required by query for " << qtype << endl);
1873
1874 d_maxdepth = std::max(d_maxdepth, depth);
1875 if (s_maxdepth > 0) {
1876 auto bound = getAdjustedRecursionBound();
1877 // Use a stricter bound if throttling
1878 if (depth > bound || (d_outqueries > 10 && d_throttledqueries > 5 && depth > bound * 2 / 3)) {
1879 string msg = "More than " + std::to_string(bound) + " (adjusted max-recursion-depth) levels of recursion needed while resolving " + qname.toLogString();
1880 LOG(prefix << qname << ": " << msg << endl);
1881 throw ImmediateServFailException(std::move(msg));
1882 }
1883 }
1884
1885 int res = 0;
1886
1887 const int iterations = !d_refresh && MemRecursorCache::s_maxServedStaleExtensions > 0 ? 2 : 1;
1888 for (int loop = 0; loop < iterations; loop++) {
1889
1890 d_serveStale = loop == 1;
1891 if (d_serveStale) {
1892 LOG(prefix << qname << ": Restart, with serve-stale enabled" << endl);
1893 }
1894 // This is a difficult way of expressing "this is a normal query", i.e. not getRootNS.
1895 if (!d_updatingRootNS || qtype.getCode() != QType::NS || !qname.isRoot()) {
1896 DNSName authname(qname);
1897 const auto iter = getBestAuthZone(&authname);
1898
1899 if (d_cacheonly) {
1900 if (iter != t_sstorage.domainmap->end()) {
1901 if (iter->second.isAuth()) {
1902 LOG(prefix << qname << ": Cache only lookup for '" << qname << "|" << qtype << "', in auth zone" << endl);
1903 ret.clear();
1904 d_wasOutOfBand = doOOBResolve(qname, qtype, ret, depth, prefix, res);
1905 if (fromCache != nullptr) {
1906 *fromCache = d_wasOutOfBand;
1907 }
1908 return res;
1909 }
1910 }
1911 }
1912
1913 bool wasForwardedOrAuthZone = false;
1914 bool wasAuthZone = false;
1915 bool wasForwardRecurse = false;
1916
1917 if (iter != t_sstorage.domainmap->end()) {
1918 wasForwardedOrAuthZone = true;
1919
1920 if (iter->second.isAuth()) {
1921 wasAuthZone = true;
1922 }
1923 else if (iter->second.shouldRecurse()) {
1924 wasForwardRecurse = true;
1925 }
1926 }
1927
1928 /* When we are looking for a DS, we want to the non-CNAME cache check first
1929 because we can actually have a DS (from the parent zone) AND a CNAME (from
1930 the child zone), and what we really want is the DS */
1931 if (qtype != QType::DS && doCNAMECacheCheck(qname, qtype, ret, depth, prefix, res, context, wasAuthZone, wasForwardRecurse, loop == 1)) { // will reroute us if needed
1932 d_wasOutOfBand = wasAuthZone;
1933 // Here we have an issue. If we were prevented from going out to the network (cache-only was set, possibly because we
1934 // are in QM Step0) we might have a CNAME but not the corresponding target.
1935 // It means that we will sometimes go to the next steps when we are in fact done, but that's fine since
1936 // we will get the records from the cache, resulting in a small overhead.
1937 // This might be a real problem if we had a RPZ hit, though, because we do not want the processing to continue, since
1938 // RPZ rules will not be evaluated anymore (we already matched).
1939 const bool stoppedByPolicyHit = d_appliedPolicy.wasHit();
1940
1941 if (fromCache != nullptr && (!d_cacheonly || stoppedByPolicyHit)) {
1942 *fromCache = true;
1943 }
1944 /* Apply Post filtering policies */
1945
1946 if (d_wantsRPZ && !stoppedByPolicyHit) {
1947 auto luaLocal = g_luaconfs.getLocal();
1948 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
1949 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1950 bool done = false;
1951 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
1952 if (done && fromCache != nullptr) {
1953 *fromCache = true;
1954 }
1955 }
1956 }
1957 return res;
1958 }
1959
1960 if (doCacheCheck(qname, authname, wasForwardedOrAuthZone, wasAuthZone, wasForwardRecurse, qtype, ret, depth, prefix, res, context)) {
1961 // we done
1962 d_wasOutOfBand = wasAuthZone;
1963 if (fromCache != nullptr) {
1964 *fromCache = true;
1965 }
1966
1967 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
1968 auto luaLocal = g_luaconfs.getLocal();
1969 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
1970 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1971 bool done = false;
1972 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
1973 }
1974 }
1975
1976 return res;
1977 }
1978
1979 /* if we have not found a cached DS (or denial of), now is the time to look for a CNAME */
1980 if (qtype == QType::DS && doCNAMECacheCheck(qname, qtype, ret, depth, prefix, res, context, wasAuthZone, wasForwardRecurse, loop == 1)) { // will reroute us if needed
1981 d_wasOutOfBand = wasAuthZone;
1982 // Here we have an issue. If we were prevented from going out to the network (cache-only was set, possibly because we
1983 // are in QM Step0) we might have a CNAME but not the corresponding target.
1984 // It means that we will sometimes go to the next steps when we are in fact done, but that's fine since
1985 // we will get the records from the cache, resulting in a small overhead.
1986 // This might be a real problem if we had a RPZ hit, though, because we do not want the processing to continue, since
1987 // RPZ rules will not be evaluated anymore (we already matched).
1988 const bool stoppedByPolicyHit = d_appliedPolicy.wasHit();
1989
1990 if (fromCache != nullptr && (!d_cacheonly || stoppedByPolicyHit)) {
1991 *fromCache = true;
1992 }
1993 /* Apply Post filtering policies */
1994
1995 if (d_wantsRPZ && !stoppedByPolicyHit) {
1996 auto luaLocal = g_luaconfs.getLocal();
1997 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
1998 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
1999 bool done = false;
2000 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
2001 if (done && fromCache != nullptr) {
2002 *fromCache = true;
2003 }
2004 }
2005 }
2006
2007 return res;
2008 }
2009 }
2010
2011 if (d_cacheonly) {
2012 return 0;
2013 }
2014
2015 // When trying to serve-stale, we also only look at the cache. Don't look at d_serveStale, it
2016 // might be changed by recursive calls (this should be fixed in a better way!).
2017 if (loop == 1) {
2018 return res;
2019 }
2020
2021 LOG(prefix << qname << ": No cache hit for '" << qname << "|" << qtype << "', trying to find an appropriate NS record" << endl);
2022
2023 DNSName subdomain(qname);
2024 if (qtype == QType::DS) {
2025 subdomain.chopOff();
2026 }
2027
2028 NsSet nsset;
2029 bool flawedNSSet = false;
2030
2031 // the two retries allow getBestNSNamesFromCache&co to reprime the root
2032 // hints, in case they ever go missing
2033 for (int tries = 0; tries < 2 && nsset.empty(); ++tries) {
2034 subdomain = getBestNSNamesFromCache(subdomain, qtype, nsset, &flawedNSSet, depth, prefix, beenthere); // pass beenthere to both occasions
2035 }
2036
2037 res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, prefix, beenthere, context, stopAtDelegation, nullptr);
2038
2039 if (res == -1 && s_save_parent_ns_set) {
2040 // It did not work out, lets check if we have a saved parent NS set
2041 map<DNSName, vector<ComboAddress>> fallBack;
2042 {
2043 auto lock = s_savedParentNSSet.lock();
2044 auto domainData = lock->find(subdomain);
2045 if (domainData != lock->end() && !domainData->d_nsAddresses.empty()) {
2046 nsset.clear();
2047 // Build the nsset arg and fallBack data for the fallback doResolveAt() attempt
2048 // Take a copy to be able to release the lock, NsSet is actually a map, go figure
2049 for (const auto& nsAddress : domainData->d_nsAddresses) {
2050 nsset.emplace(nsAddress.first, pair(std::vector<ComboAddress>(), false));
2051 fallBack.emplace(nsAddress.first, nsAddress.second);
2052 }
2053 }
2054 }
2055 if (!fallBack.empty()) {
2056 LOG(prefix << qname << ": Failure, but we have a saved parent NS set, trying that one" << endl);
2057 res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, prefix, beenthere, context, stopAtDelegation, &fallBack);
2058 if (res == 0) {
2059 // It did work out
2060 s_savedParentNSSet.lock()->inc(subdomain);
2061 }
2062 }
2063 }
2064 /* Apply Post filtering policies */
2065 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
2066 auto luaLocal = g_luaconfs.getLocal();
2067 if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) {
2068 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
2069 bool done = false;
2070 handlePolicyHit(prefix, qname, qtype, ret, done, res, depth);
2071 }
2072 }
2073
2074 if (res == 0) {
2075 return 0;
2076 }
2077
2078 LOG(prefix << qname << ": Failed (res=" << res << ")" << endl);
2079 if (res >= 0) {
2080 break;
2081 }
2082 }
2083 return res < 0 ? RCode::ServFail : res;
2084 }
2085
2086 #if 0
2087 // for testing purposes
2088 static bool ipv6First(const ComboAddress& a, const ComboAddress& b)
2089 {
2090 return !(a.sin4.sin_family < a.sin4.sin_family);
2091 }
2092 #endif
2093
2094 struct speedOrderCA
2095 {
2096 speedOrderCA(std::map<ComboAddress, float>& speeds) :
2097 d_speeds(speeds) {}
2098 bool operator()(const ComboAddress& lhs, const ComboAddress& rhs) const
2099 {
2100 return d_speeds[lhs] < d_speeds[rhs];
2101 }
2102 std::map<ComboAddress, float>& d_speeds; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members): nothing wrong afaiks
2103 };
2104
2105 void SyncRes::selectNSOnSpeed(const DNSName& qname, const string& prefix, vector<ComboAddress>& ret)
2106 {
2107 /* we need to remove from the nsSpeeds collection the existing IPs
2108 for this nameserver that are no longer in the set, even if there
2109 is only one or none at all in the current set.
2110 */
2111 map<ComboAddress, float> speeds;
2112 {
2113 auto lock = s_nsSpeeds.lock();
2114 const auto& collection = lock->find_or_enter(qname, d_now);
2115 float factor = collection.getFactor(d_now);
2116 for (const auto& val : ret) {
2117 speeds[val] = collection.d_collection[val].get(factor);
2118 }
2119 collection.purge(speeds);
2120 }
2121
2122 if (ret.size() > 1) {
2123 shuffle(ret.begin(), ret.end(), pdns::dns_random_engine());
2124 speedOrderCA speedOrder(speeds);
2125 stable_sort(ret.begin(), ret.end(), speedOrder);
2126 }
2127
2128 if (doLog()) {
2129 LOG(prefix << qname << ": Nameserver " << qname << " IPs: ");
2130 bool first = true;
2131 for (const auto& addr : ret) {
2132 if (first) {
2133 first = false;
2134 }
2135 else {
2136 LOG(", ");
2137 }
2138 LOG((addr.toString()) << "(" << fmtfloat(speeds[addr] / 1000.0) << "ms)");
2139 }
2140 LOG(endl);
2141 }
2142 }
2143
2144 template <typename T>
2145 static bool collectAddresses(const vector<DNSRecord>& cset, vector<ComboAddress>& ret)
2146 {
2147 bool pushed = false;
2148 for (const auto& record : cset) {
2149 if (auto rec = getRR<T>(record)) {
2150 ret.push_back(rec->getCA(53));
2151 pushed = true;
2152 }
2153 }
2154 return pushed;
2155 }
2156
2157 /** This function explicitly goes out for A or AAAA addresses
2158 */
2159 vector<ComboAddress> SyncRes::getAddrs(const DNSName& qname, unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere, bool cacheOnly, unsigned int& addressQueriesForNS)
2160 {
2161 typedef vector<DNSRecord> res_t;
2162 typedef vector<ComboAddress> ret_t;
2163 ret_t ret;
2164
2165 bool oldCacheOnly = setCacheOnly(cacheOnly);
2166 bool oldRequireAuthData = d_requireAuthData;
2167 bool oldValidationRequested = d_DNSSECValidationRequested;
2168 bool oldFollowCNAME = d_followCNAME;
2169 bool seenV6 = false;
2170 const unsigned int startqueries = d_outqueries;
2171 d_requireAuthData = false;
2172 d_DNSSECValidationRequested = false;
2173 d_followCNAME = false;
2174
2175 MemRecursorCache::Flags flags = MemRecursorCache::None;
2176 if (d_serveStale) {
2177 flags |= MemRecursorCache::ServeStale;
2178 }
2179 try {
2180 // First look for both A and AAAA in the cache
2181 res_t cset;
2182 if (s_doIPv4 && g_recCache->get(d_now.tv_sec, qname, QType::A, flags, &cset, d_cacheRemote, d_routingTag) > 0) {
2183 collectAddresses<ARecordContent>(cset, ret);
2184 }
2185 if (s_doIPv6 && g_recCache->get(d_now.tv_sec, qname, QType::AAAA, flags, &cset, d_cacheRemote, d_routingTag) > 0) {
2186 if (collectAddresses<AAAARecordContent>(cset, ret)) {
2187 seenV6 = true;
2188 }
2189 }
2190 if (ret.empty()) {
2191 // Neither A nor AAAA in the cache...
2192 Context newContext1;
2193 cset.clear();
2194 // Go out to get A's
2195 if (s_doIPv4 && doResolveNoQNameMinimization(qname, QType::A, cset, depth + 1, beenthere, newContext1) == 0) { // this consults cache, OR goes out
2196 collectAddresses<ARecordContent>(cset, ret);
2197 }
2198 if (s_doIPv6) { // s_doIPv6 **IMPLIES** pdns::isQueryLocalAddressFamilyEnabled(AF_INET6) returned true
2199 if (ret.empty()) {
2200 // We only go out immediately to find IPv6 records if we did not find any IPv4 ones.
2201 Context newContext2;
2202 if (doResolveNoQNameMinimization(qname, QType::AAAA, cset, depth + 1, beenthere, newContext2) == 0) { // this consults cache, OR goes out
2203 if (collectAddresses<AAAARecordContent>(cset, ret)) {
2204 seenV6 = true;
2205 }
2206 }
2207 }
2208 else {
2209 // We have some IPv4 records, consult the cache, we might have encountered some IPv6 glue
2210 cset.clear();
2211 if (g_recCache->get(d_now.tv_sec, qname, QType::AAAA, flags, &cset, d_cacheRemote, d_routingTag) > 0) {
2212 if (collectAddresses<AAAARecordContent>(cset, ret)) {
2213 seenV6 = true;
2214 }
2215 }
2216 }
2217 }
2218 }
2219 if (s_doIPv6 && !seenV6 && !cacheOnly) {
2220 // No IPv6 records in cache, check negcache and submit async task if negache does not have the data
2221 // so that the next time the cache or the negcache will have data
2222 pushResolveIfNotInNegCache(qname, QType::AAAA, d_now);
2223 }
2224 }
2225 catch (const PolicyHitException&) {
2226 // We ignore a policy hit while trying to retrieve the addresses
2227 // of a NS and keep processing the current query
2228 }
2229
2230 if (ret.empty() && d_outqueries > startqueries) {
2231 // We did 1 or more outgoing queries to resolve this NS name but returned empty handed
2232 addressQueriesForNS++;
2233 }
2234 d_requireAuthData = oldRequireAuthData;
2235 d_DNSSECValidationRequested = oldValidationRequested;
2236 setCacheOnly(oldCacheOnly);
2237 d_followCNAME = oldFollowCNAME;
2238
2239 if (s_max_busy_dot_probes > 0 && s_dot_to_port_853) {
2240 for (auto& add : ret) {
2241 if (shouldDoDoT(add, d_now.tv_sec)) {
2242 add.setPort(853);
2243 }
2244 }
2245 }
2246 selectNSOnSpeed(qname, prefix, ret);
2247 return ret;
2248 }
2249
2250 void SyncRes::getBestNSFromCache(const DNSName& qname, const QType qtype, vector<DNSRecord>& bestns, bool* flawedNSSet, unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere, const boost::optional<DNSName>& cutOffDomain) // NOLINT(readability-function-cognitive-complexity)
2251 {
2252 DNSName subdomain(qname);
2253 bestns.clear();
2254 bool brokeloop = false;
2255 MemRecursorCache::Flags flags = MemRecursorCache::None;
2256 if (d_serveStale) {
2257 flags |= MemRecursorCache::ServeStale;
2258 }
2259 do {
2260 if (cutOffDomain && (subdomain == *cutOffDomain || !subdomain.isPartOf(*cutOffDomain))) {
2261 break;
2262 }
2263 brokeloop = false;
2264 LOG(prefix << qname << ": Checking if we have NS in cache for '" << subdomain << "'" << endl);
2265 vector<DNSRecord> nsVector;
2266 *flawedNSSet = false;
2267
2268 if (bool isAuth = false; g_recCache->get(d_now.tv_sec, subdomain, QType::NS, flags, &nsVector, d_cacheRemote, d_routingTag, nullptr, nullptr, nullptr, nullptr, &isAuth) > 0) {
2269 if (s_maxnsperresolve > 0 && nsVector.size() > s_maxnsperresolve) {
2270 vector<DNSRecord> selected;
2271 selected.reserve(s_maxnsperresolve);
2272 std::sample(nsVector.cbegin(), nsVector.cend(), std::back_inserter(selected), s_maxnsperresolve, pdns::dns_random_engine());
2273 nsVector = std::move(selected);
2274 }
2275 bestns.reserve(nsVector.size());
2276
2277 vector<DNSName> missing;
2278 for (const auto& nsRecord : nsVector) {
2279 if (nsRecord.d_ttl > (unsigned int)d_now.tv_sec) {
2280 vector<DNSRecord> aset;
2281 QType nsqt{QType::ADDR};
2282 if (s_doIPv4 && !s_doIPv6) {
2283 nsqt = QType::A;
2284 }
2285 else if (!s_doIPv4 && s_doIPv6) {
2286 nsqt = QType::AAAA;
2287 }
2288
2289 auto nrr = getRR<NSRecordContent>(nsRecord);
2290 if (nrr && (!nrr->getNS().isPartOf(subdomain) || g_recCache->get(d_now.tv_sec, nrr->getNS(), nsqt, flags, doLog() ? &aset : nullptr, d_cacheRemote, d_routingTag) > 0)) {
2291 bestns.push_back(nsRecord);
2292 LOG(prefix << qname << ": NS (with ip, or non-glue) in cache for '" << subdomain << "' -> '" << nrr->getNS() << "'");
2293 LOG(", within bailiwick: " << nrr->getNS().isPartOf(subdomain));
2294 if (!aset.empty()) {
2295 LOG(", in cache, ttl=" << (unsigned int)(((time_t)aset.begin()->d_ttl - d_now.tv_sec)) << endl);
2296 }
2297 else {
2298 LOG(", not in cache / did not look at cache" << endl);
2299 }
2300 }
2301 else if (nrr != nullptr) {
2302 *flawedNSSet = true;
2303 LOG(prefix << qname << ": NS in cache for '" << subdomain << "', but needs glue (" << nrr->getNS() << ") which we miss or is expired" << endl);
2304 missing.emplace_back(nrr->getNS());
2305 }
2306 }
2307 }
2308 if (*flawedNSSet && bestns.empty() && isAuth) {
2309 // The authoritative (child) NS records did not produce any usable addresses, wipe them, so
2310 // these useless records do not prevent parent records to be inserted into the cache
2311 LOG(prefix << qname << ": Wiping flawed authoritative NS records for " << subdomain << endl);
2312 g_recCache->doWipeCache(subdomain, false, QType::NS);
2313 }
2314 if (!missing.empty() && missing.size() < nsVector.size()) {
2315 // We miss glue, but we have a chance to resolve it, since we do have address(es) for at least one NS
2316 for (const auto& name : missing) {
2317 if (s_doIPv4 && pushResolveIfNotInNegCache(name, QType::A, d_now)) {
2318 LOG(prefix << qname << ": A glue for " << subdomain << " NS " << name << " missing, pushed task to resolve" << endl);
2319 }
2320 if (s_doIPv6 && pushResolveIfNotInNegCache(name, QType::AAAA, d_now)) {
2321 LOG(prefix << qname << ": AAAA glue for " << subdomain << " NS " << name << " missing, pushed task to resolve" << endl);
2322 }
2323 }
2324 }
2325
2326 if (!bestns.empty()) {
2327 GetBestNSAnswer answer;
2328 answer.qname = qname;
2329 answer.qtype = qtype.getCode();
2330 for (const auto& bestNSRecord : bestns) {
2331 if (auto nsContent = getRR<NSRecordContent>(bestNSRecord)) {
2332 answer.bestns.emplace(bestNSRecord.d_name, nsContent->getNS());
2333 }
2334 }
2335
2336 auto insertionPair = beenthere.insert(std::move(answer));
2337 if (!insertionPair.second) {
2338 brokeloop = true;
2339 LOG(prefix << qname << ": We have NS in cache for '" << subdomain << "' but part of LOOP (already seen " << insertionPair.first->qname << ")! Trying less specific NS" << endl);
2340 ;
2341 if (doLog()) {
2342 for (auto j = beenthere.begin(); j != beenthere.end(); ++j) {
2343 bool neo = (j == insertionPair.first);
2344 LOG(prefix << qname << ": Beenthere" << (neo ? "*" : "") << ": " << j->qname << "|" << DNSRecordContent::NumberToType(j->qtype) << " (" << (unsigned int)j->bestns.size() << ")" << endl);
2345 }
2346 }
2347 bestns.clear();
2348 }
2349 else {
2350 LOG(prefix << qname << ": We have NS in cache for '" << subdomain << "' (flawedNSSet=" << *flawedNSSet << ")" << endl);
2351 return;
2352 }
2353 }
2354 }
2355 LOG(prefix << qname << ": No valid/useful NS in cache for '" << subdomain << "'" << endl);
2356
2357 if (subdomain.isRoot() && !brokeloop) {
2358 // We lost the root NS records
2359 primeHints();
2360 LOG(prefix << qname << ": Reprimed the root" << endl);
2361 /* let's prevent an infinite loop */
2362 if (!d_updatingRootNS) {
2363 auto log = g_slog->withName("housekeeping");
2364 getRootNS(d_now, d_asyncResolve, depth, log);
2365 }
2366 }
2367 } while (subdomain.chopOff());
2368 }
2369
2370 SyncRes::domainmap_t::const_iterator SyncRes::getBestAuthZone(DNSName* qname)
2371 {
2372 if (t_sstorage.domainmap->empty()) {
2373 return t_sstorage.domainmap->end();
2374 }
2375
2376 SyncRes::domainmap_t::const_iterator ret;
2377 do {
2378 ret = t_sstorage.domainmap->find(*qname);
2379 if (ret != t_sstorage.domainmap->end()) {
2380 break;
2381 }
2382 } while (qname->chopOff());
2383 return ret;
2384 }
2385
2386 /** doesn't actually do the work, leaves that to getBestNSFromCache */
2387 DNSName SyncRes::getBestNSNamesFromCache(const DNSName& qname, const QType qtype, NsSet& nsset, bool* flawedNSSet, unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere)
2388 {
2389 DNSName authOrForwDomain(qname);
2390
2391 auto iter = getBestAuthZone(&authOrForwDomain);
2392 // We have an auth, forwarder of forwarder-recurse
2393 if (iter != t_sstorage.domainmap->end()) {
2394 if (iter->second.isAuth()) {
2395 // this gets picked up in doResolveAt, the empty DNSName, combined with the
2396 // empty vector means 'we are auth for this zone'
2397 nsset.insert({DNSName(), {{}, false}});
2398 return authOrForwDomain;
2399 }
2400 if (iter->second.shouldRecurse()) {
2401 // Again, picked up in doResolveAt. An empty DNSName, combined with a
2402 // non-empty vector of ComboAddresses means 'this is a forwarded domain'
2403 // This is actually picked up in retrieveAddressesForNS called from doResolveAt.
2404 nsset.insert({DNSName(), {iter->second.d_servers, true}});
2405 return authOrForwDomain;
2406 }
2407 }
2408
2409 // We might have a (non-recursive) forwarder, but maybe the cache already contains
2410 // a better NS
2411 vector<DNSRecord> bestns;
2412 DNSName nsFromCacheDomain(g_rootdnsname);
2413 getBestNSFromCache(qname, qtype, bestns, flawedNSSet, depth, prefix, beenthere);
2414
2415 // Pick up the auth domain
2416 for (const auto& nsRecord : bestns) {
2417 const auto nsContent = getRR<NSRecordContent>(nsRecord);
2418 if (nsContent) {
2419 nsFromCacheDomain = nsRecord.d_name;
2420 break;
2421 }
2422 }
2423
2424 if (iter != t_sstorage.domainmap->end()) {
2425 if (doLog()) {
2426 LOG(prefix << qname << " authOrForwDomain: " << authOrForwDomain << " nsFromCacheDomain: " << nsFromCacheDomain << " isPartof: " << authOrForwDomain.isPartOf(nsFromCacheDomain) << endl);
2427 }
2428
2429 // If the forwarder is better or equal to what's found in the cache, use forwarder. Note that name.isPartOf(name).
2430 // So queries that get NS for authOrForwDomain itself go to the forwarder
2431 if (authOrForwDomain.isPartOf(nsFromCacheDomain)) {
2432 if (doLog()) {
2433 LOG(prefix << qname << ": Using forwarder as NS" << endl);
2434 }
2435 nsset.insert({DNSName(), {iter->second.d_servers, false}});
2436 return authOrForwDomain;
2437 }
2438 if (doLog()) {
2439 LOG(prefix << qname << ": Using NS from cache" << endl);
2440 }
2441 }
2442 for (const auto& bestn : bestns) {
2443 // The actual resolver code will not even look at the ComboAddress or bool
2444 const auto nsContent = getRR<NSRecordContent>(bestn);
2445 if (nsContent) {
2446 nsset.insert({nsContent->getNS(), {{}, false}});
2447 }
2448 }
2449 return nsFromCacheDomain;
2450 }
2451
2452 void SyncRes::updateValidationStatusInCache(const DNSName& qname, const QType qtype, bool aaFlag, vState newState) const
2453 {
2454 if (qtype == QType::ANY || qtype == QType::ADDR) {
2455 // not doing that
2456 return;
2457 }
2458
2459 if (vStateIsBogus(newState)) {
2460 g_recCache->updateValidationStatus(d_now.tv_sec, qname, qtype, d_cacheRemote, d_routingTag, aaFlag, newState, s_maxbogusttl + d_now.tv_sec);
2461 }
2462 else {
2463 g_recCache->updateValidationStatus(d_now.tv_sec, qname, qtype, d_cacheRemote, d_routingTag, aaFlag, newState, boost::none);
2464 }
2465 }
2466
2467 static pair<bool, unsigned int> scanForCNAMELoop(const DNSName& name, const vector<DNSRecord>& records)
2468 {
2469 unsigned int numCNames = 0;
2470 for (const auto& record : records) {
2471 if (record.d_type == QType::CNAME && record.d_place == DNSResourceRecord::ANSWER) {
2472 ++numCNames;
2473 if (name == record.d_name) {
2474 return {true, numCNames};
2475 }
2476 }
2477 }
2478 return {false, numCNames};
2479 }
2480
2481 bool SyncRes::doCNAMECacheCheck(const DNSName& qname, const QType qtype, vector<DNSRecord>& ret, unsigned int depth, const string& prefix, int& res, Context& context, bool wasAuthZone, bool wasForwardRecurse, bool checkForDups) // NOLINT(readability-function-cognitive-complexity)
2482 {
2483 vector<DNSRecord> cset;
2484 vector<std::shared_ptr<const RRSIGRecordContent>> signatures;
2485 vector<std::shared_ptr<DNSRecord>> authorityRecs;
2486 bool wasAuth = false;
2487 uint32_t capTTL = std::numeric_limits<uint32_t>::max();
2488 DNSName foundName;
2489 DNSName authZone;
2490 QType foundQT = QType::ENT;
2491
2492 /* we don't require auth data for forward-recurse lookups */
2493 MemRecursorCache::Flags flags = MemRecursorCache::None;
2494 if (!wasForwardRecurse && d_requireAuthData) {
2495 flags |= MemRecursorCache::RequireAuth;
2496 }
2497 if (d_refresh) {
2498 flags |= MemRecursorCache::Refresh;
2499 }
2500 if (d_serveStale) {
2501 flags |= MemRecursorCache::ServeStale;
2502 }
2503 if (g_recCache->get(d_now.tv_sec, qname, QType::CNAME, flags, &cset, d_cacheRemote, d_routingTag, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &context.state, &wasAuth, &authZone, &d_fromAuthIP) > 0) {
2504 foundName = qname;
2505 foundQT = QType::CNAME;
2506 }
2507
2508 if (foundName.empty() && qname != g_rootdnsname) {
2509 // look for a DNAME cache hit
2510 auto labels = qname.getRawLabels();
2511 DNSName dnameName(g_rootdnsname);
2512
2513 do {
2514 dnameName.prependRawLabel(labels.back());
2515 labels.pop_back();
2516 if (dnameName == qname && qtype != QType::DNAME) { // The client does not want a DNAME, but we've reached the QNAME already. So there is no match
2517 break;
2518 }
2519 if (g_recCache->get(d_now.tv_sec, dnameName, QType::DNAME, flags, &cset, d_cacheRemote, d_routingTag, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &context.state, &wasAuth, &authZone, &d_fromAuthIP) > 0) {
2520 foundName = dnameName;
2521 foundQT = QType::DNAME;
2522 break;
2523 }
2524 } while (!labels.empty());
2525 }
2526
2527 if (foundName.empty()) {
2528 return false;
2529 }
2530
2531 if (qtype == QType::DS && authZone == qname) {
2532 /* CNAME at APEX of the child zone, we can't use that to prove that
2533 there is no DS */
2534 LOG(prefix << qname << ": Found a " << foundQT.toString() << " cache hit of '" << qname << "' from " << authZone << ", but such a record at the apex of the child zone does not prove that there is no DS in the parent zone" << endl);
2535 return false;
2536 }
2537
2538 for (auto const& record : cset) {
2539 if (record.d_class != QClass::IN) {
2540 continue;
2541 }
2542
2543 if (record.d_ttl > (unsigned int)d_now.tv_sec) {
2544
2545 if (!wasAuthZone && shouldValidate() && (wasAuth || wasForwardRecurse) && context.state == vState::Indeterminate && d_requireAuthData) {
2546 /* This means we couldn't figure out the state when this entry was cached */
2547
2548 vState recordState = getValidationStatus(foundName, !signatures.empty(), qtype == QType::DS, depth, prefix);
2549 if (recordState == vState::Secure) {
2550 LOG(prefix << qname << ": Got vState::Indeterminate state from the " << foundQT.toString() << " cache, validating.." << endl);
2551 context.state = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, foundName, foundQT, cset, signatures);
2552 if (context.state != vState::Indeterminate) {
2553 LOG(prefix << qname << ": Got vState::Indeterminate state from the " << foundQT.toString() << " cache, new validation result is " << context.state << endl);
2554 if (vStateIsBogus(context.state)) {
2555 capTTL = s_maxbogusttl;
2556 }
2557 updateValidationStatusInCache(foundName, foundQT, wasAuth, context.state);
2558 }
2559 }
2560 }
2561
2562 LOG(prefix << qname << ": Found cache " << foundQT.toString() << " hit for '" << foundName << "|" << foundQT.toString() << "' to '" << record.getContent()->getZoneRepresentation() << "', validation state is " << context.state << endl);
2563
2564 DNSRecord dnsRecord = record;
2565 auto alreadyPresent = false;
2566
2567 if (checkForDups) {
2568 // This can happen on the 2nd iteration of the servestale loop, where the first iteration
2569 // added a C/DNAME record, but the target resolve failed
2570 for (const auto& dnsrec : ret) {
2571 if (dnsrec.d_type == foundQT && dnsrec.d_name == record.d_name) {
2572 alreadyPresent = true;
2573 break;
2574 }
2575 }
2576 }
2577 dnsRecord.d_ttl -= d_now.tv_sec;
2578 dnsRecord.d_ttl = std::min(dnsRecord.d_ttl, capTTL);
2579 const uint32_t ttl = dnsRecord.d_ttl;
2580 if (!alreadyPresent) {
2581 ret.reserve(ret.size() + 2 + signatures.size() + authorityRecs.size());
2582 ret.push_back(dnsRecord);
2583
2584 for (const auto& signature : signatures) {
2585 DNSRecord sigdr;
2586 sigdr.d_type = QType::RRSIG;
2587 sigdr.d_name = foundName;
2588 sigdr.d_ttl = ttl;
2589 sigdr.setContent(signature);
2590 sigdr.d_place = DNSResourceRecord::ANSWER;
2591 sigdr.d_class = QClass::IN;
2592 ret.push_back(sigdr);
2593 }
2594
2595 for (const auto& rec : authorityRecs) {
2596 DNSRecord authDR(*rec);
2597 authDR.d_ttl = ttl;
2598 ret.push_back(authDR);
2599 }
2600 }
2601
2602 DNSName newTarget;
2603 if (foundQT == QType::DNAME) {
2604 if (qtype == QType::DNAME && qname == foundName) { // client wanted the DNAME, no need to synthesize a CNAME
2605 res = RCode::NoError;
2606 return true;
2607 }
2608 // Synthesize a CNAME
2609 auto dnameRR = getRR<DNAMERecordContent>(record);
2610 if (dnameRR == nullptr) {
2611 throw ImmediateServFailException("Unable to get record content for " + foundName.toLogString() + "|DNAME cache entry");
2612 }
2613 const auto& dnameSuffix = dnameRR->getTarget();
2614 DNSName targetPrefix = qname.makeRelative(foundName);
2615 try {
2616 dnsRecord.d_type = QType::CNAME;
2617 dnsRecord.d_name = targetPrefix + foundName;
2618 newTarget = targetPrefix + dnameSuffix;
2619 dnsRecord.setContent(std::make_shared<CNAMERecordContent>(CNAMERecordContent(newTarget)));
2620 ret.push_back(dnsRecord);
2621 }
2622 catch (const std::exception& e) {
2623 // We should probably catch an std::range_error here and set the rcode to YXDOMAIN (RFC 6672, section 2.2)
2624 // But this is consistent with processRecords
2625 throw ImmediateServFailException("Unable to perform DNAME substitution(DNAME owner: '" + foundName.toLogString() + "', DNAME target: '" + dnameSuffix.toLogString() + "', substituted name: '" + targetPrefix.toLogString() + "." + dnameSuffix.toLogString() + "' : " + e.what());
2626 }
2627
2628 LOG(prefix << qname << ": Synthesized " << dnsRecord.d_name << "|CNAME " << newTarget << endl);
2629 }
2630
2631 if (qtype == QType::CNAME) { // perhaps they really wanted a CNAME!
2632 res = RCode::NoError;
2633 return true;
2634 }
2635
2636 if (qtype == QType::DS || qtype == QType::DNSKEY) {
2637 res = RCode::NoError;
2638 return true;
2639 }
2640
2641 // We have a DNAME _or_ CNAME cache hit and the client wants something else than those two.
2642 // Let's find the answer!
2643 if (foundQT == QType::CNAME) {
2644 const auto cnameContent = getRR<CNAMERecordContent>(record);
2645 if (cnameContent == nullptr) {
2646 throw ImmediateServFailException("Unable to get record content for " + foundName.toLogString() + "|CNAME cache entry");
2647 }
2648 newTarget = cnameContent->getTarget();
2649 }
2650
2651 if (qname == newTarget) {
2652 string msg = "Got a CNAME referral (from cache) to self";
2653 LOG(prefix << qname << ": " << msg << endl);
2654 throw ImmediateServFailException(std::move(msg));
2655 }
2656
2657 if (newTarget.isPartOf(qname)) {
2658 // a.b.c. CNAME x.a.b.c will go to great depths with QM on
2659 string msg = "Got a CNAME referral (from cache) to child, disabling QM";
2660 LOG(prefix << qname << ": " << msg << endl);
2661 setQNameMinimization(false);
2662 }
2663
2664 if (!d_followCNAME) {
2665 res = RCode::NoError;
2666 return true;
2667 }
2668
2669 // Check to see if we already have seen the new target as a previous target or that we have a very long CNAME chain
2670 const auto [CNAMELoop, numCNAMEs] = scanForCNAMELoop(newTarget, ret);
2671 if (CNAMELoop) {
2672 string msg = "got a CNAME referral (from cache) that causes a loop";
2673 LOG(prefix << qname << ": Status=" << msg << endl);
2674 throw ImmediateServFailException(std::move(msg));
2675 }
2676 if (numCNAMEs > s_max_CNAMES_followed) {
2677 string msg = "max number of CNAMEs exceeded";
2678 LOG(prefix << qname << ": Status=" << msg << endl);
2679 throw ImmediateServFailException(std::move(msg));
2680 }
2681
2682 set<GetBestNSAnswer> beenthere;
2683 Context cnameContext;
2684 // Be aware that going out on the network might be disabled (cache-only), for example because we are in QM Step0,
2685 // so you can't trust that a real lookup will have been made.
2686 res = doResolve(newTarget, qtype, ret, depth + 1, beenthere, cnameContext);
2687 LOG(prefix << qname << ": Updating validation state for response to " << qname << " from " << context.state << " with the state from the DNAME/CNAME quest: " << cnameContext.state << endl);
2688 updateValidationState(qname, context.state, cnameContext.state, prefix);
2689
2690 return true;
2691 }
2692 }
2693 throw ImmediateServFailException("Could not determine whether or not there was a CNAME or DNAME in cache for '" + qname.toLogString() + "'");
2694 }
2695
2696 namespace
2697 {
2698 struct CacheEntry
2699 {
2700 vector<DNSRecord> records;
2701 vector<shared_ptr<const RRSIGRecordContent>> signatures;
2702 time_t d_ttl_time{0};
2703 uint32_t signaturesTTL{std::numeric_limits<uint32_t>::max()};
2704 };
2705 struct CacheKey
2706 {
2707 DNSName name;
2708 QType type;
2709 DNSResourceRecord::Place place;
2710 bool operator<(const CacheKey& rhs) const
2711 {
2712 return std::tie(type, place, name) < std::tie(rhs.type, rhs.place, rhs.name);
2713 }
2714 };
2715 using tcache_t = map<CacheKey, CacheEntry>;
2716 }
2717
2718 static void reapRecordsFromNegCacheEntryForValidation(tcache_t& tcache, const vector<DNSRecord>& records)
2719 {
2720 for (const auto& rec : records) {
2721 if (rec.d_type == QType::RRSIG) {
2722 auto rrsig = getRR<RRSIGRecordContent>(rec);
2723 if (rrsig) {
2724 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signatures.push_back(rrsig);
2725 }
2726 }
2727 else {
2728 tcache[{rec.d_name, rec.d_type, rec.d_place}].records.push_back(rec);
2729 }
2730 }
2731 }
2732
2733 static bool negativeCacheEntryHasSOA(const NegCache::NegCacheEntry& negEntry)
2734 {
2735 return !negEntry.authoritySOA.records.empty();
2736 }
2737
2738 static void reapRecordsForValidation(std::map<QType, CacheEntry>& entries, const vector<DNSRecord>& records)
2739 {
2740 for (const auto& rec : records) {
2741 entries[rec.d_type].records.push_back(rec);
2742 }
2743 }
2744
2745 static void reapSignaturesForValidation(std::map<QType, CacheEntry>& entries, const vector<std::shared_ptr<const RRSIGRecordContent>>& signatures)
2746 {
2747 for (const auto& sig : signatures) {
2748 entries[sig->d_type].signatures.push_back(sig);
2749 }
2750 }
2751
2752 /*!
2753 * Convenience function to push the records from records into ret with a new TTL
2754 *
2755 * \param records DNSRecords that need to go into ret
2756 * \param ttl The new TTL for these records
2757 * \param ret The vector of DNSRecords that should contain the records with the modified TTL
2758 */
2759 static void addTTLModifiedRecords(vector<DNSRecord>& records, const uint32_t ttl, vector<DNSRecord>& ret)
2760 {
2761 for (auto& rec : records) {
2762 rec.d_ttl = ttl;
2763 ret.push_back(std::move(rec));
2764 }
2765 }
2766
2767 void SyncRes::computeNegCacheValidationStatus(const NegCache::NegCacheEntry& negEntry, const DNSName& qname, const QType qtype, const int res, vState& state, unsigned int depth, const string& prefix)
2768 {
2769 tcache_t tcache;
2770 reapRecordsFromNegCacheEntryForValidation(tcache, negEntry.authoritySOA.records);
2771 reapRecordsFromNegCacheEntryForValidation(tcache, negEntry.authoritySOA.signatures);
2772 reapRecordsFromNegCacheEntryForValidation(tcache, negEntry.DNSSECRecords.records);
2773 reapRecordsFromNegCacheEntryForValidation(tcache, negEntry.DNSSECRecords.signatures);
2774
2775 for (const auto& entry : tcache) {
2776 // this happens when we did store signatures, but passed on the records themselves
2777 if (entry.second.records.empty()) {
2778 continue;
2779 }
2780
2781 const DNSName& owner = entry.first.name;
2782
2783 vState recordState = getValidationStatus(owner, !entry.second.signatures.empty(), qtype == QType::DS, depth, prefix);
2784 if (state == vState::Indeterminate) {
2785 state = recordState;
2786 }
2787
2788 if (recordState == vState::Secure) {
2789 recordState = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, owner, QType(entry.first.type), entry.second.records, entry.second.signatures);
2790 }
2791
2792 if (recordState != vState::Indeterminate && recordState != state) {
2793 updateValidationState(qname, state, recordState, prefix);
2794 if (state != vState::Secure) {
2795 break;
2796 }
2797 }
2798 }
2799
2800 if (state == vState::Secure) {
2801 vState neValidationState = negEntry.d_validationState;
2802 dState expectedState = res == RCode::NXDomain ? dState::NXDOMAIN : dState::NXQTYPE;
2803 dState denialState = getDenialValidationState(negEntry, expectedState, false, prefix);
2804 updateDenialValidationState(qname, neValidationState, negEntry.d_name, state, denialState, expectedState, qtype == QType::DS, depth, prefix);
2805 }
2806 if (state != vState::Indeterminate) {
2807 /* validation succeeded, let's update the cache entry so we don't have to validate again */
2808 boost::optional<time_t> capTTD = boost::none;
2809 if (vStateIsBogus(state)) {
2810 capTTD = d_now.tv_sec + s_maxbogusttl;
2811 }
2812 g_negCache->updateValidationStatus(negEntry.d_name, negEntry.d_qtype, state, capTTD);
2813 }
2814 }
2815
2816 bool SyncRes::doCacheCheck(const DNSName& qname, const DNSName& authname, bool wasForwardedOrAuthZone, bool wasAuthZone, bool wasForwardRecurse, QType qtype, vector<DNSRecord>& ret, unsigned int depth, const string& prefix, int& res, Context& context) // NOLINT(readability-function-cognitive-complexity)
2817 {
2818 bool giveNegative = false;
2819
2820 // sqname and sqtype are used contain 'higher' names if we have them (e.g. powerdns.com|SOA when we find a negative entry for doesnotexist.powerdns.com|A)
2821 DNSName sqname(qname);
2822 QType sqt(qtype);
2823 uint32_t sttl = 0;
2824 // cout<<"Lookup for '"<<qname<<"|"<<qtype.toString()<<"' -> "<<getLastLabel(qname)<<endl;
2825 vState cachedState{};
2826 NegCache::NegCacheEntry negEntry;
2827
2828 if (s_rootNXTrust && g_negCache->getRootNXTrust(qname, d_now, negEntry, d_serveStale, d_refresh) && negEntry.d_auth.isRoot() && (!wasForwardedOrAuthZone || authname.isRoot())) { // when forwarding, the root may only neg-cache if it was forwarded to.
2829 sttl = negEntry.d_ttd - d_now.tv_sec;
2830 LOG(prefix << qname << ": Entire name '" << qname << "', is negatively cached via '" << negEntry.d_auth << "' & '" << negEntry.d_name << "' for another " << sttl << " seconds" << endl);
2831 res = RCode::NXDomain;
2832 giveNegative = true;
2833 cachedState = negEntry.d_validationState;
2834 if (s_addExtendedResolutionDNSErrors) {
2835 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::Synthesized), "Result synthesized by root-nx-trust"};
2836 }
2837 }
2838 else if (g_negCache->get(qname, qtype, d_now, negEntry, false, d_serveStale, d_refresh)) {
2839 /* If we are looking for a DS, discard NXD if auth == qname
2840 and ask for a specific denial instead */
2841 if (qtype != QType::DS || negEntry.d_qtype.getCode() != 0 || negEntry.d_auth != qname || g_negCache->get(qname, qtype, d_now, negEntry, true, d_serveStale, d_refresh)) {
2842 /* Careful! If the client is asking for a DS that does not exist, we need to provide the SOA along with the NSEC(3) proof
2843 and we might not have it if we picked up the proof from a delegation, in which case we need to keep on to do the actual DS
2844 query. */
2845 if (qtype == QType::DS && negEntry.d_qtype.getCode() != 0 && !d_externalDSQuery.empty() && qname == d_externalDSQuery && !negativeCacheEntryHasSOA(negEntry)) {
2846 giveNegative = false;
2847 }
2848 else {
2849 res = RCode::NXDomain;
2850 sttl = negEntry.d_ttd - d_now.tv_sec;
2851 giveNegative = true;
2852 cachedState = negEntry.d_validationState;
2853 if (negEntry.d_qtype.getCode() != 0) {
2854 LOG(prefix << qname << "|" << qtype << ": Is negatively cached via '" << negEntry.d_auth << "' for another " << sttl << " seconds" << endl);
2855 res = RCode::NoError;
2856 if (s_addExtendedResolutionDNSErrors) {
2857 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::Synthesized), "Result from negative cache"};
2858 }
2859 }
2860 else {
2861 LOG(prefix << qname << ": Entire name '" << qname << "' is negatively cached via '" << negEntry.d_auth << "' for another " << sttl << " seconds" << endl);
2862 if (s_addExtendedResolutionDNSErrors) {
2863 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::Synthesized), "Result from negative cache for entire name"};
2864 }
2865 }
2866 }
2867 }
2868 }
2869 else if (s_hardenNXD != HardenNXD::No && !qname.isRoot() && !wasForwardedOrAuthZone) {
2870 auto labels = qname.getRawLabels();
2871 DNSName negCacheName(g_rootdnsname);
2872 negCacheName.prependRawLabel(labels.back());
2873 labels.pop_back();
2874 while (!labels.empty()) {
2875 if (g_negCache->get(negCacheName, QType::ENT, d_now, negEntry, true, d_serveStale, d_refresh)) {
2876 if (negEntry.d_validationState == vState::Indeterminate && validationEnabled()) {
2877 // LOG(prefix << negCacheName << " negatively cached and vState::Indeterminate, trying to validate NXDOMAIN" << endl);
2878 // ...
2879 // And get the updated ne struct
2880 // t_sstorage.negcache.get(negCacheName, QType(0), d_now, ne, true);
2881 }
2882 if ((s_hardenNXD == HardenNXD::Yes && !vStateIsBogus(negEntry.d_validationState)) || negEntry.d_validationState == vState::Secure) {
2883 res = RCode::NXDomain;
2884 sttl = negEntry.d_ttd - d_now.tv_sec;
2885 giveNegative = true;
2886 cachedState = negEntry.d_validationState;
2887 LOG(prefix << qname << ": Name '" << negCacheName << "' and below, is negatively cached via '" << negEntry.d_auth << "' for another " << sttl << " seconds" << endl);
2888 if (s_addExtendedResolutionDNSErrors) {
2889 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::Synthesized), "Result synthesized by nothing-below-nxdomain (RFC8020)"};
2890 }
2891 break;
2892 }
2893 }
2894 negCacheName.prependRawLabel(labels.back());
2895 labels.pop_back();
2896 }
2897 }
2898
2899 if (giveNegative) {
2900
2901 context.state = cachedState;
2902
2903 if (!wasAuthZone && shouldValidate() && context.state == vState::Indeterminate) {
2904 LOG(prefix << qname << ": Got vState::Indeterminate state for records retrieved from the negative cache, validating.." << endl);
2905 computeNegCacheValidationStatus(negEntry, qname, qtype, res, context.state, depth, prefix);
2906
2907 if (context.state != cachedState && vStateIsBogus(context.state)) {
2908 sttl = std::min(sttl, s_maxbogusttl);
2909 }
2910 }
2911
2912 // Transplant SOA to the returned packet
2913 addTTLModifiedRecords(negEntry.authoritySOA.records, sttl, ret);
2914 if (d_doDNSSEC) {
2915 addTTLModifiedRecords(negEntry.authoritySOA.signatures, sttl, ret);
2916 addTTLModifiedRecords(negEntry.DNSSECRecords.records, sttl, ret);
2917 addTTLModifiedRecords(negEntry.DNSSECRecords.signatures, sttl, ret);
2918 }
2919
2920 LOG(prefix << qname << ": Updating validation state with negative cache content for " << qname << " to " << context.state << endl);
2921 return true;
2922 }
2923
2924 vector<DNSRecord> cset;
2925 bool found = false;
2926 bool expired = false;
2927 vector<std::shared_ptr<const RRSIGRecordContent>> signatures;
2928 vector<std::shared_ptr<DNSRecord>> authorityRecs;
2929 uint32_t ttl = 0;
2930 uint32_t capTTL = std::numeric_limits<uint32_t>::max();
2931 bool wasCachedAuth{};
2932 MemRecursorCache::Flags flags = MemRecursorCache::None;
2933 if (!wasForwardRecurse && d_requireAuthData) {
2934 flags |= MemRecursorCache::RequireAuth;
2935 }
2936 if (d_serveStale) {
2937 flags |= MemRecursorCache::ServeStale;
2938 }
2939 if (d_refresh) {
2940 flags |= MemRecursorCache::Refresh;
2941 }
2942 if (g_recCache->get(d_now.tv_sec, sqname, sqt, flags, &cset, d_cacheRemote, d_routingTag, d_doDNSSEC ? &signatures : nullptr, d_doDNSSEC ? &authorityRecs : nullptr, &d_wasVariable, &cachedState, &wasCachedAuth, nullptr, &d_fromAuthIP) > 0) {
2943
2944 LOG(prefix << sqname << ": Found cache hit for " << sqt.toString() << ": ");
2945
2946 if (!wasAuthZone && shouldValidate() && (wasCachedAuth || wasForwardRecurse) && cachedState == vState::Indeterminate && d_requireAuthData) {
2947
2948 /* This means we couldn't figure out the state when this entry was cached */
2949 vState recordState = getValidationStatus(qname, !signatures.empty(), qtype == QType::DS, depth, prefix);
2950
2951 if (recordState == vState::Secure) {
2952 LOG(prefix << sqname << ": Got vState::Indeterminate state from the cache, validating.." << endl);
2953 if (sqt == QType::DNSKEY && sqname == getSigner(signatures)) {
2954 cachedState = validateDNSKeys(sqname, cset, signatures, depth, prefix);
2955 }
2956 else {
2957 if (sqt == QType::ANY) {
2958 std::map<QType, CacheEntry> types;
2959 reapRecordsForValidation(types, cset);
2960 reapSignaturesForValidation(types, signatures);
2961
2962 for (const auto& type : types) {
2963 vState cachedRecordState{};
2964 if (type.first == QType::DNSKEY && sqname == getSigner(type.second.signatures)) {
2965 cachedRecordState = validateDNSKeys(sqname, type.second.records, type.second.signatures, depth, prefix);
2966 }
2967 else {
2968 cachedRecordState = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, sqname, type.first, type.second.records, type.second.signatures);
2969 }
2970 updateDNSSECValidationState(cachedState, cachedRecordState);
2971 }
2972 }
2973 else {
2974 cachedState = SyncRes::validateRecordsWithSigs(depth, prefix, qname, qtype, sqname, sqt, cset, signatures);
2975 }
2976 }
2977 }
2978 else {
2979 cachedState = recordState;
2980 }
2981
2982 if (cachedState != vState::Indeterminate) {
2983 LOG(prefix << qname << ": Got vState::Indeterminate state from the cache, validation result is " << cachedState << endl);
2984 if (vStateIsBogus(cachedState)) {
2985 capTTL = s_maxbogusttl;
2986 }
2987 if (sqt != QType::ANY && sqt != QType::ADDR) {
2988 updateValidationStatusInCache(sqname, sqt, wasCachedAuth, cachedState);
2989 }
2990 }
2991 }
2992
2993 for (auto j = cset.cbegin(); j != cset.cend(); ++j) {
2994
2995 LOG(j->getContent()->getZoneRepresentation());
2996
2997 if (j->d_class != QClass::IN) {
2998 continue;
2999 }
3000
3001 if (j->d_ttl > (unsigned int)d_now.tv_sec) {
3002 DNSRecord dnsRecord = *j;
3003 dnsRecord.d_ttl -= d_now.tv_sec;
3004 dnsRecord.d_ttl = std::min(dnsRecord.d_ttl, capTTL);
3005 ttl = dnsRecord.d_ttl;
3006 ret.push_back(dnsRecord);
3007 LOG("[ttl=" << dnsRecord.d_ttl << "] ");
3008 found = true;
3009 }
3010 else {
3011 LOG("[expired] ");
3012 expired = true;
3013 }
3014 }
3015
3016 ret.reserve(ret.size() + signatures.size() + authorityRecs.size());
3017
3018 for (const auto& signature : signatures) {
3019 DNSRecord dnsRecord;
3020 dnsRecord.d_type = QType::RRSIG;
3021 dnsRecord.d_name = sqname;
3022 dnsRecord.d_ttl = ttl;
3023 dnsRecord.setContent(signature);
3024 dnsRecord.d_place = DNSResourceRecord::ANSWER;
3025 dnsRecord.d_class = QClass::IN;
3026 ret.push_back(dnsRecord);
3027 }
3028
3029 for (const auto& rec : authorityRecs) {
3030 DNSRecord dnsRecord(*rec);
3031 dnsRecord.d_ttl = ttl;
3032 ret.push_back(dnsRecord);
3033 }
3034
3035 LOG(endl);
3036 if (found && !expired) {
3037 if (!giveNegative) {
3038 res = 0;
3039 }
3040 LOG(prefix << qname << ": Updating validation state with cache content for " << qname << " to " << cachedState << endl);
3041 context.state = cachedState;
3042 return true;
3043 }
3044 LOG(prefix << qname << ": Cache had only stale entries" << endl);
3045 }
3046
3047 /* let's check if we have a NSEC covering that record */
3048 if (g_aggressiveNSECCache && !wasForwardedOrAuthZone) {
3049 if (g_aggressiveNSECCache->getDenial(d_now.tv_sec, qname, qtype, ret, res, d_cacheRemote, d_routingTag, d_doDNSSEC, d_validationContext, LogObject(prefix))) {
3050 context.state = vState::Secure;
3051 if (s_addExtendedResolutionDNSErrors) {
3052 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::Synthesized), "Result synthesized from aggressive NSEC cache (RFC8198)"};
3053 }
3054 return true;
3055 }
3056 }
3057
3058 return false;
3059 }
3060
3061 bool SyncRes::moreSpecificThan(const DNSName& lhs, const DNSName& rhs)
3062 {
3063 return (lhs.isPartOf(rhs) && lhs.countLabels() > rhs.countLabels());
3064 }
3065
3066 struct speedOrder
3067 {
3068 bool operator()(const std::pair<DNSName, float>& lhs, const std::pair<DNSName, float>& rhs) const
3069 {
3070 return lhs.second < rhs.second;
3071 }
3072 };
3073
3074 std::vector<std::pair<DNSName, float>> SyncRes::shuffleInSpeedOrder(const DNSName& qname, NsSet& tnameservers, const string& prefix)
3075 {
3076 std::vector<std::pair<DNSName, float>> rnameservers;
3077 rnameservers.reserve(tnameservers.size());
3078 for (const auto& tns : tnameservers) {
3079 float speed = s_nsSpeeds.lock()->fastest(tns.first, d_now);
3080 rnameservers.emplace_back(tns.first, speed);
3081 if (tns.first.empty()) { // this was an authoritative OOB zone, don't pollute the nsSpeeds with that
3082 return rnameservers;
3083 }
3084 }
3085
3086 shuffle(rnameservers.begin(), rnameservers.end(), pdns::dns_random_engine());
3087 speedOrder speedCompare;
3088 stable_sort(rnameservers.begin(), rnameservers.end(), speedCompare);
3089
3090 if (doLog()) {
3091 LOG(prefix << qname << ": Nameservers: ");
3092 for (auto i = rnameservers.begin(); i != rnameservers.end(); ++i) {
3093 if (i != rnameservers.begin()) {
3094 LOG(", ");
3095 if (((i - rnameservers.begin()) % 3) == 0) {
3096 LOG(endl
3097 << prefix << " ");
3098 }
3099 }
3100 LOG(i->first.toLogString() << "(" << fmtfloat(i->second / 1000.0) << "ms)");
3101 }
3102 LOG(endl);
3103 }
3104 return rnameservers;
3105 }
3106
3107 vector<ComboAddress> SyncRes::shuffleForwardSpeed(const DNSName& qname, const vector<ComboAddress>& rnameservers, const string& prefix, const bool wasRd)
3108 {
3109 vector<ComboAddress> nameservers = rnameservers;
3110 map<ComboAddress, float> speeds;
3111
3112 for (const auto& val : nameservers) {
3113 DNSName nsName = DNSName(val.toStringWithPort());
3114 float speed = s_nsSpeeds.lock()->fastest(nsName, d_now);
3115 speeds[val] = speed;
3116 }
3117 shuffle(nameservers.begin(), nameservers.end(), pdns::dns_random_engine());
3118 speedOrderCA speedCompare(speeds);
3119 stable_sort(nameservers.begin(), nameservers.end(), speedCompare);
3120
3121 if (doLog()) {
3122 LOG(prefix << qname << ": Nameservers: ");
3123 for (auto i = nameservers.cbegin(); i != nameservers.cend(); ++i) {
3124 if (i != nameservers.cbegin()) {
3125 LOG(", ");
3126 if (((i - nameservers.cbegin()) % 3) == 0) {
3127 LOG(endl
3128 << prefix << " ");
3129 }
3130 }
3131 LOG((wasRd ? string("+") : string("-")) << i->toStringWithPort() << "(" << fmtfloat(speeds[*i] / 1000.0) << "ms)");
3132 }
3133 LOG(endl);
3134 }
3135 return nameservers;
3136 }
3137
3138 static uint32_t getRRSIGTTL(const time_t now, const std::shared_ptr<const RRSIGRecordContent>& rrsig)
3139 {
3140 uint32_t res = 0;
3141 if (now < rrsig->d_sigexpire) {
3142 // coverity[store_truncates_time_t]
3143 res = static_cast<uint32_t>(rrsig->d_sigexpire) - now;
3144 }
3145 return res;
3146 }
3147
3148 static const set<QType> nsecTypes = {QType::NSEC, QType::NSEC3};
3149
3150 /* Fills the authoritySOA and DNSSECRecords fields from ne with those found in the records
3151 *
3152 * \param records The records to parse for the authority SOA and NSEC(3) records
3153 * \param ne The NegCacheEntry to be filled out (will not be cleared, only appended to
3154 */
3155 static void harvestNXRecords(const vector<DNSRecord>& records, NegCache::NegCacheEntry& negEntry, const time_t now, uint32_t* lowestTTL)
3156 {
3157 for (const auto& rec : records) {
3158 if (rec.d_place != DNSResourceRecord::AUTHORITY) {
3159 // RFC 4035 section 3.1.3. indicates that NSEC records MUST be placed in
3160 // the AUTHORITY section. Section 3.1.1 indicates that that RRSIGs for
3161 // records MUST be in the same section as the records they cover.
3162 // Hence, we ignore all records outside of the AUTHORITY section.
3163 continue;
3164 }
3165
3166 if (rec.d_type == QType::RRSIG) {
3167 auto rrsig = getRR<RRSIGRecordContent>(rec);
3168 if (rrsig) {
3169 if (rrsig->d_type == QType::SOA) {
3170 negEntry.authoritySOA.signatures.push_back(rec);
3171 if (lowestTTL != nullptr && isRRSIGNotExpired(now, *rrsig)) {
3172 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3173 *lowestTTL = min(*lowestTTL, getRRSIGTTL(now, rrsig));
3174 }
3175 }
3176 if (nsecTypes.count(rrsig->d_type) != 0) {
3177 negEntry.DNSSECRecords.signatures.push_back(rec);
3178 if (lowestTTL != nullptr && isRRSIGNotExpired(now, *rrsig)) {
3179 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3180 *lowestTTL = min(*lowestTTL, getRRSIGTTL(now, rrsig));
3181 }
3182 }
3183 }
3184 continue;
3185 }
3186 if (rec.d_type == QType::SOA) {
3187 negEntry.authoritySOA.records.push_back(rec);
3188 if (lowestTTL != nullptr) {
3189 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3190 }
3191 continue;
3192 }
3193 if (nsecTypes.count(rec.d_type) != 0) {
3194 negEntry.DNSSECRecords.records.push_back(rec);
3195 if (lowestTTL != nullptr) {
3196 *lowestTTL = min(*lowestTTL, rec.d_ttl);
3197 }
3198 continue;
3199 }
3200 }
3201 }
3202
3203 static cspmap_t harvestCSPFromNE(const NegCache::NegCacheEntry& negEntry)
3204 {
3205 cspmap_t cspmap;
3206 for (const auto& rec : negEntry.DNSSECRecords.signatures) {
3207 if (rec.d_type == QType::RRSIG) {
3208 auto rrc = getRR<RRSIGRecordContent>(rec);
3209 if (rrc) {
3210 cspmap[{rec.d_name, rrc->d_type}].signatures.push_back(rrc);
3211 }
3212 }
3213 }
3214 for (const auto& rec : negEntry.DNSSECRecords.records) {
3215 cspmap[{rec.d_name, rec.d_type}].records.insert(rec.getContent());
3216 }
3217 return cspmap;
3218 }
3219
3220 // TODO remove after processRecords is fixed!
3221 // Adds the RRSIG for the SOA and the NSEC(3) + RRSIGs to ret
3222 static void addNXNSECS(vector<DNSRecord>& ret, const vector<DNSRecord>& records)
3223 {
3224 NegCache::NegCacheEntry negEntry;
3225 harvestNXRecords(records, negEntry, 0, nullptr);
3226 ret.insert(ret.end(), negEntry.authoritySOA.signatures.begin(), negEntry.authoritySOA.signatures.end());
3227 ret.insert(ret.end(), negEntry.DNSSECRecords.records.begin(), negEntry.DNSSECRecords.records.end());
3228 ret.insert(ret.end(), negEntry.DNSSECRecords.signatures.begin(), negEntry.DNSSECRecords.signatures.end());
3229 }
3230
3231 static bool rpzHitShouldReplaceContent(const DNSName& qname, const QType qtype, const std::vector<DNSRecord>& records)
3232 {
3233 if (qtype == QType::CNAME) {
3234 return true;
3235 }
3236
3237 for (const auto& record : records) { // NOLINT(readability-use-anyofallof): don't agree
3238 if (record.d_type == QType::CNAME) {
3239 if (auto content = getRR<CNAMERecordContent>(record)) {
3240 if (qname == content->getTarget()) {
3241 /* we have a CNAME whose target matches the entry we are about to
3242 generate, so it will complete the current records, not replace
3243 them
3244 */
3245 return false;
3246 }
3247 }
3248 }
3249 }
3250
3251 return true;
3252 }
3253
3254 static void removeConflictingRecord(std::vector<DNSRecord>& records, const DNSName& name, const QType dtype)
3255 {
3256 for (auto it = records.begin(); it != records.end();) {
3257 bool remove = false;
3258
3259 if (it->d_class == QClass::IN && (it->d_type == QType::CNAME || dtype == QType::CNAME || it->d_type == dtype) && it->d_name == name) {
3260 remove = true;
3261 }
3262 else if (it->d_class == QClass::IN && it->d_type == QType::RRSIG && it->d_name == name) {
3263 if (auto rrc = getRR<RRSIGRecordContent>(*it)) {
3264 if (rrc->d_type == QType::CNAME || rrc->d_type == dtype) {
3265 /* also remove any RRSIG that could conflict */
3266 remove = true;
3267 }
3268 }
3269 }
3270
3271 if (remove) {
3272 it = records.erase(it);
3273 }
3274 else {
3275 ++it;
3276 }
3277 }
3278 }
3279
3280 void SyncRes::handlePolicyHit(const std::string& prefix, const DNSName& qname, const QType qtype, std::vector<DNSRecord>& ret, bool& done, int& rcode, unsigned int depth)
3281 {
3282 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
3283 /* reset to no match */
3284 d_appliedPolicy = DNSFilterEngine::Policy();
3285 return;
3286 }
3287
3288 /* don't account truncate actions for TCP queries, since they are not applied */
3289 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::Truncate || !d_queryReceivedOverTCP) {
3290 ++t_Counters.at(rec::PolicyHistogram::policy).at(d_appliedPolicy.d_kind);
3291 ++t_Counters.at(rec::PolicyNameHits::policyName).counts[d_appliedPolicy.getName()];
3292 }
3293
3294 if (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) {
3295 LOG(prefix << qname << "|" << qtype << ':' << d_appliedPolicy.getLogString() << endl);
3296 }
3297
3298 switch (d_appliedPolicy.d_kind) {
3299
3300 case DNSFilterEngine::PolicyKind::NoAction:
3301 return;
3302
3303 case DNSFilterEngine::PolicyKind::Drop:
3304 ++t_Counters.at(rec::Counter::policyDrops);
3305 throw ImmediateQueryDropException();
3306
3307 case DNSFilterEngine::PolicyKind::NXDOMAIN:
3308 ret.clear();
3309 d_appliedPolicy.addSOAtoRPZResult(ret);
3310 rcode = RCode::NXDomain;
3311 done = true;
3312 return;
3313
3314 case DNSFilterEngine::PolicyKind::NODATA:
3315 ret.clear();
3316 d_appliedPolicy.addSOAtoRPZResult(ret);
3317 rcode = RCode::NoError;
3318 done = true;
3319 return;
3320
3321 case DNSFilterEngine::PolicyKind::Truncate:
3322 if (!d_queryReceivedOverTCP) {
3323 ret.clear();
3324 rcode = RCode::NoError;
3325 // Exception handling code in pdns_recursor clears ret as well, so no use to
3326 // fill it here.
3327 throw SendTruncatedAnswerException();
3328 }
3329 return;
3330
3331 case DNSFilterEngine::PolicyKind::Custom: {
3332 if (rpzHitShouldReplaceContent(qname, qtype, ret)) {
3333 ret.clear();
3334 }
3335
3336 rcode = RCode::NoError;
3337 done = true;
3338 auto spoofed = d_appliedPolicy.getCustomRecords(qname, qtype.getCode());
3339 for (auto& dnsRecord : spoofed) {
3340 removeConflictingRecord(ret, dnsRecord.d_name, dnsRecord.d_type);
3341 }
3342
3343 for (auto& dnsRecord : spoofed) {
3344 ret.push_back(dnsRecord);
3345
3346 if (dnsRecord.d_name == qname && dnsRecord.d_type == QType::CNAME && qtype != QType::CNAME) {
3347 if (auto content = getRR<CNAMERecordContent>(dnsRecord)) {
3348 vState newTargetState = vState::Indeterminate;
3349 handleNewTarget(prefix, qname, content->getTarget(), qtype.getCode(), ret, rcode, depth, {}, newTargetState);
3350 }
3351 }
3352 }
3353 d_appliedPolicy.addSOAtoRPZResult(ret);
3354 }
3355 }
3356 }
3357
3358 bool SyncRes::nameserversBlockedByRPZ(const DNSFilterEngine& dfe, const NsSet& nameservers)
3359 {
3360 /* we skip RPZ processing if:
3361 - it was disabled (d_wantsRPZ is false) ;
3362 - we already got a RPZ hit (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) since
3363 the only way we can get back here is that it was a 'pass-thru' (NoAction) meaning that we should not
3364 process any further RPZ rules. Except that we need to process rules of higher priority..
3365 */
3366 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
3367 for (auto const& nameserver : nameservers) {
3368 bool match = dfe.getProcessingPolicy(nameserver.first, d_discardedPolicies, d_appliedPolicy);
3369 if (match) {
3370 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
3371 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
3372 LOG(", however nameserver " << nameserver.first << " was blocked by RPZ policy '" << d_appliedPolicy.getName() << "'" << endl);
3373 return true;
3374 }
3375 }
3376
3377 // Traverse all IP addresses for this NS to see if they have an RPN NSIP policy
3378 for (auto const& address : nameserver.second.first) {
3379 match = dfe.getProcessingPolicy(address, d_discardedPolicies, d_appliedPolicy);
3380 if (match) {
3381 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
3382 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
3383 LOG(", however nameserver " << nameserver.first << " IP address " << address.toString() << " was blocked by RPZ policy '" << d_appliedPolicy.getName() << "'" << endl);
3384 return true;
3385 }
3386 }
3387 }
3388 }
3389 }
3390 return false;
3391 }
3392
3393 bool SyncRes::nameserverIPBlockedByRPZ(const DNSFilterEngine& dfe, const ComboAddress& remoteIP)
3394 {
3395 /* we skip RPZ processing if:
3396 - it was disabled (d_wantsRPZ is false) ;
3397 - we already got a RPZ hit (d_appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) since
3398 the only way we can get back here is that it was a 'pass-thru' (NoAction) meaning that we should not
3399 process any further RPZ rules. Except that we need to process rules of higher priority..
3400 */
3401 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
3402 bool match = dfe.getProcessingPolicy(remoteIP, d_discardedPolicies, d_appliedPolicy);
3403 if (match) {
3404 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
3405 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) {
3406 LOG(" (blocked by RPZ policy '" + d_appliedPolicy.getName() + "')");
3407 return true;
3408 }
3409 }
3410 }
3411 return false;
3412 }
3413
3414 vector<ComboAddress> SyncRes::retrieveAddressesForNS(const std::string& prefix, const DNSName& qname, std::vector<std::pair<DNSName, float>>::const_iterator& tns, const unsigned int depth, set<GetBestNSAnswer>& beenthere, const vector<std::pair<DNSName, float>>& rnameservers, NsSet& nameservers, bool& sendRDQuery, bool& pierceDontQuery, bool& /* flawedNSSet */, bool cacheOnly, unsigned int& nretrieveAddressesForNS)
3415 {
3416 vector<ComboAddress> result;
3417
3418 size_t nonresolvingfails = 0;
3419 if (!tns->first.empty()) {
3420 if (s_nonresolvingnsmaxfails > 0) {
3421 nonresolvingfails = s_nonresolving.lock()->value(tns->first);
3422 if (nonresolvingfails >= s_nonresolvingnsmaxfails) {
3423 LOG(prefix << qname << ": NS " << tns->first << " in non-resolving map, skipping" << endl);
3424 return result;
3425 }
3426 }
3427
3428 LOG(prefix << qname << ": Trying to resolve NS '" << tns->first << "' (" << 1 + tns - rnameservers.begin() << "/" << (unsigned int)rnameservers.size() << ")" << endl);
3429 const unsigned int oldOutQueries = d_outqueries;
3430 try {
3431 result = getAddrs(tns->first, depth, prefix, beenthere, cacheOnly, nretrieveAddressesForNS);
3432 }
3433 // Other exceptions should likely not throttle...
3434 catch (const ImmediateServFailException& ex) {
3435 if (s_nonresolvingnsmaxfails > 0 && d_outqueries > oldOutQueries) {
3436 auto dontThrottleNames = g_dontThrottleNames.getLocal();
3437 if (!dontThrottleNames->check(tns->first)) {
3438 s_nonresolving.lock()->incr(tns->first, d_now);
3439 }
3440 }
3441 throw ex;
3442 }
3443 if (s_nonresolvingnsmaxfails > 0 && d_outqueries > oldOutQueries) {
3444 if (result.empty()) {
3445 auto dontThrottleNames = g_dontThrottleNames.getLocal();
3446 if (!dontThrottleNames->check(tns->first)) {
3447 s_nonresolving.lock()->incr(tns->first, d_now);
3448 }
3449 }
3450 else if (nonresolvingfails > 0) {
3451 // Succeeding resolve, clear memory of recent failures
3452 s_nonresolving.lock()->clear(tns->first);
3453 }
3454 }
3455 pierceDontQuery = false;
3456 }
3457 else {
3458 LOG(prefix << qname << ": Domain has hardcoded nameserver");
3459
3460 if (nameservers[tns->first].first.size() > 1) {
3461 LOG("s");
3462 }
3463 LOG(endl);
3464
3465 sendRDQuery = nameservers[tns->first].second;
3466 result = shuffleForwardSpeed(qname, nameservers[tns->first].first, prefix, sendRDQuery);
3467 pierceDontQuery = true;
3468 }
3469 return result;
3470 }
3471
3472 void SyncRes::checkMaxQperQ(const DNSName& qname) const
3473 {
3474 if (d_outqueries + d_throttledqueries > s_maxqperq) {
3475 throw ImmediateServFailException("more than " + std::to_string(s_maxqperq) + " (max-qperq) queries sent or throttled while resolving " + qname.toLogString());
3476 }
3477 }
3478
3479 bool SyncRes::throttledOrBlocked(const std::string& prefix, const ComboAddress& remoteIP, const DNSName& qname, const QType qtype, bool pierceDontQuery)
3480 {
3481 if (isThrottled(d_now.tv_sec, remoteIP)) {
3482 LOG(prefix << qname << ": Server throttled " << endl);
3483 t_Counters.at(rec::Counter::throttledqueries)++;
3484 d_throttledqueries++;
3485 return true;
3486 }
3487 if (isThrottled(d_now.tv_sec, remoteIP, qname, qtype)) {
3488 LOG(prefix << qname << ": Query throttled " << remoteIP.toString() << ", " << qname << "; " << qtype << endl);
3489 t_Counters.at(rec::Counter::throttledqueries)++;
3490 d_throttledqueries++;
3491 return true;
3492 }
3493 if (!pierceDontQuery && s_dontQuery && s_dontQuery->match(&remoteIP)) {
3494 // We could have retrieved an NS from the cache in a forwarding domain
3495 // Even in the case of !pierceDontQuery we still want to allow that NS
3496 DNSName forwardCandidate(qname);
3497 auto iter = getBestAuthZone(&forwardCandidate);
3498 if (iter == t_sstorage.domainmap->end()) {
3499 LOG(prefix << qname << ": Not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
3500 t_Counters.at(rec::Counter::dontqueries)++;
3501 return true;
3502 }
3503 // The name (from the cache) is forwarded, but is it forwarded to an IP in known forwarders?
3504 const auto& ips = iter->second.d_servers;
3505 if (std::find(ips.cbegin(), ips.cend(), remoteIP) == ips.cend()) {
3506 LOG(prefix << qname << ": Not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
3507 t_Counters.at(rec::Counter::dontqueries)++;
3508 return true;
3509 }
3510 LOG(prefix << qname << ": Sending query to " << remoteIP.toString() << ", blocked by 'dont-query' but a forwarding/auth case" << endl);
3511 }
3512 return false;
3513 }
3514
3515 bool SyncRes::validationEnabled()
3516 {
3517 return g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate;
3518 }
3519
3520 uint32_t SyncRes::computeLowestTTD(const std::vector<DNSRecord>& records, const std::vector<std::shared_ptr<const RRSIGRecordContent>>& signatures, uint32_t signaturesTTL, const std::vector<std::shared_ptr<DNSRecord>>& authorityRecs) const
3521 {
3522 uint32_t lowestTTD = std::numeric_limits<uint32_t>::max();
3523 for (const auto& record : records) {
3524 lowestTTD = min(lowestTTD, record.d_ttl);
3525 }
3526
3527 /* even if it was not requested for that request (Process, and neither AD nor DO set),
3528 it might be requested at a later time so we need to be careful with the TTL. */
3529 if (validationEnabled() && !signatures.empty()) {
3530 /* if we are validating, we don't want to cache records after their signatures expire. */
3531 /* records TTL are now TTD, let's add 'now' to the signatures lowest TTL */
3532 lowestTTD = min(lowestTTD, static_cast<uint32_t>(signaturesTTL + d_now.tv_sec));
3533
3534 for (const auto& sig : signatures) {
3535 if (isRRSIGNotExpired(d_now.tv_sec, *sig)) {
3536 // we don't decrement d_sigexpire by 'now' because we actually want a TTD, not a TTL */
3537 lowestTTD = min(lowestTTD, static_cast<uint32_t>(sig->d_sigexpire));
3538 }
3539 }
3540 }
3541
3542 for (const auto& entry : authorityRecs) {
3543 /* be careful, this is still a TTL here */
3544 lowestTTD = min(lowestTTD, static_cast<uint32_t>(entry->d_ttl + d_now.tv_sec));
3545
3546 if (entry->d_type == QType::RRSIG && validationEnabled()) {
3547 auto rrsig = getRR<RRSIGRecordContent>(*entry);
3548 if (rrsig) {
3549 if (isRRSIGNotExpired(d_now.tv_sec, *rrsig)) {
3550 // we don't decrement d_sigexpire by 'now' because we actually want a TTD, not a TTL */
3551 lowestTTD = min(lowestTTD, static_cast<uint32_t>(rrsig->d_sigexpire));
3552 }
3553 }
3554 }
3555 }
3556
3557 return lowestTTD;
3558 }
3559
3560 void SyncRes::updateValidationState(const DNSName& qname, vState& state, const vState stateUpdate, const string& prefix)
3561 {
3562 LOG(prefix << qname << ": Validation state was " << state << ", state update is " << stateUpdate);
3563 updateDNSSECValidationState(state, stateUpdate);
3564 LOG(", validation state is now " << state << endl);
3565 }
3566
3567 vState SyncRes::getTA(const DNSName& zone, dsmap_t& dsMap, const string& prefix)
3568 {
3569 auto luaLocal = g_luaconfs.getLocal();
3570
3571 if (luaLocal->dsAnchors.empty()) {
3572 LOG(prefix << zone << ": No trust anchors configured, everything is Insecure" << endl);
3573 /* We have no TA, everything is insecure */
3574 return vState::Insecure;
3575 }
3576
3577 std::string reason;
3578 if (haveNegativeTrustAnchor(luaLocal->negAnchors, zone, reason)) {
3579 LOG(prefix << zone << ": Got NTA" << endl);
3580 return vState::NTA;
3581 }
3582
3583 if (getTrustAnchor(luaLocal->dsAnchors, zone, dsMap)) {
3584 if (!zone.isRoot()) {
3585 LOG(prefix << zone << ": Got TA" << endl);
3586 }
3587 return vState::TA;
3588 }
3589
3590 if (zone.isRoot()) {
3591 /* No TA for the root */
3592 return vState::Insecure;
3593 }
3594
3595 return vState::Indeterminate;
3596 }
3597
3598 size_t SyncRes::countSupportedDS(const dsmap_t& dsmap, const string& prefix)
3599 {
3600 size_t count = 0;
3601
3602 for (const auto& dsRecordContent : dsmap) {
3603 if (isSupportedDS(dsRecordContent, LogObject(prefix))) {
3604 count++;
3605 }
3606 }
3607
3608 return count;
3609 }
3610
3611 void SyncRes::initZoneCutsFromTA(const DNSName& from, const string& prefix)
3612 {
3613 DNSName zone(from);
3614 do {
3615 dsmap_t dsMap;
3616 vState result = getTA(zone, dsMap, prefix);
3617 if (result != vState::Indeterminate) {
3618 if (result == vState::TA) {
3619 if (countSupportedDS(dsMap, prefix) == 0) {
3620 dsMap.clear();
3621 result = vState::Insecure;
3622 }
3623 else {
3624 result = vState::Secure;
3625 }
3626 }
3627 else if (result == vState::NTA) {
3628 result = vState::Insecure;
3629 }
3630
3631 d_cutStates[zone] = result;
3632 }
3633 } while (zone.chopOff());
3634 }
3635
3636 vState SyncRes::getDSRecords(const DNSName& zone, dsmap_t& dsMap, bool onlyTA, unsigned int depth, const string& prefix, bool bogusOnNXD, bool* foundCut)
3637 {
3638 vState result = getTA(zone, dsMap, prefix);
3639
3640 if (result != vState::Indeterminate || onlyTA) {
3641 if (foundCut != nullptr) {
3642 *foundCut = (result != vState::Indeterminate);
3643 }
3644
3645 if (result == vState::TA) {
3646 if (countSupportedDS(dsMap, prefix) == 0) {
3647 dsMap.clear();
3648 result = vState::Insecure;
3649 }
3650 else {
3651 result = vState::Secure;
3652 }
3653 }
3654 else if (result == vState::NTA) {
3655 result = vState::Insecure;
3656 }
3657
3658 return result;
3659 }
3660
3661 std::set<GetBestNSAnswer> beenthere;
3662 std::vector<DNSRecord> dsrecords;
3663
3664 Context context;
3665
3666 const bool oldCacheOnly = setCacheOnly(false);
3667 const bool oldQM = setQNameMinimization(!getQMFallbackMode());
3668 int rcode = doResolve(zone, QType::DS, dsrecords, depth + 1, beenthere, context);
3669 setCacheOnly(oldCacheOnly);
3670 setQNameMinimization(oldQM);
3671
3672 if (rcode == RCode::ServFail) {
3673 throw ImmediateServFailException("Server Failure while retrieving DS records for " + zone.toLogString());
3674 }
3675
3676 if (rcode != RCode::NoError && (rcode != RCode::NXDomain || bogusOnNXD)) {
3677 LOG(prefix << zone << ": Returning Bogus state from " << static_cast<const char*>(__func__) << "(" << zone << ")" << endl);
3678 return vState::BogusUnableToGetDSs;
3679 }
3680
3681 uint8_t bestDigestType = 0;
3682
3683 bool gotCNAME = false;
3684 for (const auto& record : dsrecords) {
3685 if (record.d_type == QType::DS) {
3686 const auto dscontent = getRR<DSRecordContent>(record);
3687 if (dscontent && isSupportedDS(*dscontent, LogObject(prefix))) {
3688 // Make GOST a lower prio than SHA256
3689 if (dscontent->d_digesttype == DNSSECKeeper::DIGEST_GOST && bestDigestType == DNSSECKeeper::DIGEST_SHA256) {
3690 continue;
3691 }
3692 if (dscontent->d_digesttype > bestDigestType || (bestDigestType == DNSSECKeeper::DIGEST_GOST && dscontent->d_digesttype == DNSSECKeeper::DIGEST_SHA256)) {
3693 bestDigestType = dscontent->d_digesttype;
3694 }
3695 dsMap.insert(*dscontent);
3696 }
3697 }
3698 else if (record.d_type == QType::CNAME && record.d_name == zone) {
3699 gotCNAME = true;
3700 }
3701 }
3702
3703 /* RFC 4509 section 3: "Validator implementations SHOULD ignore DS RRs containing SHA-1
3704 * digests if DS RRs with SHA-256 digests are present in the DS RRset."
3705 * We interpret that as: do not use SHA-1 if SHA-256 or SHA-384 is available
3706 */
3707 for (auto dsrec = dsMap.begin(); dsrec != dsMap.end();) {
3708 if (dsrec->d_digesttype == DNSSECKeeper::DIGEST_SHA1 && dsrec->d_digesttype != bestDigestType) {
3709 dsrec = dsMap.erase(dsrec);
3710 }
3711 else {
3712 ++dsrec;
3713 }
3714 }
3715
3716 if (rcode == RCode::NoError) {
3717 if (dsMap.empty()) {
3718 /* we have no DS, it's either:
3719 - a delegation to a non-DNSSEC signed zone
3720 - no delegation, we stay in the same zone
3721 */
3722 if (gotCNAME || denialProvesNoDelegation(zone, dsrecords, d_validationContext)) {
3723 /* we are still inside the same zone */
3724
3725 if (foundCut != nullptr) {
3726 *foundCut = false;
3727 }
3728 return context.state;
3729 }
3730
3731 d_cutStates[zone] = context.state == vState::Secure ? vState::Insecure : context.state;
3732 /* delegation with no DS, might be Secure -> Insecure */
3733 if (foundCut != nullptr) {
3734 *foundCut = true;
3735 }
3736
3737 /* a delegation with no DS is either:
3738 - a signed zone (Secure) to an unsigned one (Insecure)
3739 - an unsigned zone to another unsigned one (Insecure stays Insecure, Bogus stays Bogus)
3740 */
3741 return context.state == vState::Secure ? vState::Insecure : context.state;
3742 }
3743 /* we have a DS */
3744 d_cutStates[zone] = context.state;
3745 if (foundCut != nullptr) {
3746 *foundCut = true;
3747 }
3748 }
3749
3750 return context.state;
3751 }
3752
3753 vState SyncRes::getValidationStatus(const DNSName& name, bool wouldBeValid, bool typeIsDS, unsigned int depth, const string& prefix)
3754 {
3755 vState result = vState::Indeterminate;
3756
3757 if (!shouldValidate()) {
3758 return result;
3759 }
3760
3761 DNSName subdomain(name);
3762 if (typeIsDS) {
3763 subdomain.chopOff();
3764 }
3765
3766 {
3767 const auto& iter = d_cutStates.find(subdomain);
3768 if (iter != d_cutStates.cend()) {
3769 LOG(prefix << name << ": Got status " << iter->second << " for name " << subdomain << endl);
3770 return iter->second;
3771 }
3772 }
3773
3774 /* look for the best match we have */
3775 DNSName best(subdomain);
3776 while (best.chopOff()) {
3777 const auto& iter = d_cutStates.find(best);
3778 if (iter != d_cutStates.cend()) {
3779 result = iter->second;
3780 if (vStateIsBogus(result) || result == vState::Insecure) {
3781 LOG(prefix << name << ": Got status " << result << " for name " << best << endl);
3782 return result;
3783 }
3784 break;
3785 }
3786 }
3787
3788 /* by now we have the best match, it's likely Secure (otherwise we would not be there)
3789 but we don't know if we missed a cut (or several).
3790 We could see if we have DS (or denial of) in cache but let's not worry for now,
3791 we will if we don't have a signature, or if the signer doesn't match what we expect */
3792 if (!wouldBeValid && best != subdomain) {
3793 /* no signatures or Bogus, we likely missed a cut, let's try to find it */
3794 LOG(prefix << name << ": No or invalid signature/proof for " << name << ", we likely missed a cut between " << best << " and " << subdomain << ", looking for it" << endl);
3795 DNSName dsName(best);
3796 std::vector<string> labelsToAdd = subdomain.makeRelative(dsName).getRawLabels();
3797
3798 while (!labelsToAdd.empty()) {
3799
3800 dsName.prependRawLabel(labelsToAdd.back());
3801 labelsToAdd.pop_back();
3802 LOG(prefix << name << ": - Looking for a DS at " << dsName << endl);
3803
3804 bool foundCut = false;
3805 dsmap_t results;
3806 vState dsState = getDSRecords(dsName, results, false, depth, prefix, false, &foundCut);
3807
3808 if (foundCut) {
3809 LOG(prefix << name << ": - Found cut at " << dsName << endl);
3810 LOG(prefix << name << ": New state for " << dsName << " is " << dsState << endl);
3811 d_cutStates[dsName] = dsState;
3812
3813 if (dsState != vState::Secure) {
3814 return dsState;
3815 }
3816 }
3817 }
3818
3819 /* we did not miss a cut, good luck */
3820 return result;
3821 }
3822
3823 #if 0
3824 /* we don't need this, we actually do the right thing later */
3825 DNSName signer = getSigner(signatures);
3826
3827 if (!signer.empty() && name.isPartOf(signer)) {
3828 if (signer == best) {
3829 return result;
3830 }
3831 /* the zone cut is not the one we expected,
3832 this is fine because we will retrieve the needed DNSKEYs and DSs
3833 later, and even go Insecure if we missed a cut to Insecure (no DS)
3834 and the signatures do not validate (we should not go Bogus in that
3835 case) */
3836 }
3837 /* something is not right, but let's not worry about that for now.. */
3838 #endif
3839
3840 return result;
3841 }
3842
3843 vState SyncRes::validateDNSKeys(const DNSName& zone, const std::vector<DNSRecord>& dnskeys, const std::vector<std::shared_ptr<const RRSIGRecordContent>>& signatures, unsigned int depth, const string& prefix)
3844 {
3845 dsmap_t dsMap;
3846 if (signatures.empty()) {
3847 LOG(prefix << zone << ": We have " << std::to_string(dnskeys.size()) << " DNSKEYs but no signature, going Bogus!" << endl);
3848 return vState::BogusNoRRSIG;
3849 }
3850
3851 DNSName signer = getSigner(signatures);
3852
3853 if (!signer.empty() && zone.isPartOf(signer)) {
3854 vState state = getDSRecords(signer, dsMap, false, depth, prefix);
3855
3856 if (state != vState::Secure) {
3857 return state;
3858 }
3859 }
3860 else {
3861 LOG(prefix << zone << ": We have " << std::to_string(dnskeys.size()) << " DNSKEYs but the zone (" << zone << ") is not part of the signer (" << signer << "), check that we did not miss a zone cut" << endl);
3862 /* try again to get the missed cuts, harder this time */
3863 auto zState = getValidationStatus(zone, false, false, depth, prefix);
3864 if (zState == vState::Secure) {
3865 /* too bad */
3866 LOG(prefix << zone << ": After checking the zone cuts again, we still have " << std::to_string(dnskeys.size()) << " DNSKEYs and the zone (" << zone << ") is still not part of the signer (" << signer << "), going Bogus!" << endl);
3867 return vState::BogusNoValidRRSIG;
3868 }
3869 return zState;
3870 }
3871
3872 skeyset_t tentativeKeys;
3873 sortedRecords_t toSign;
3874
3875 for (const auto& dnskey : dnskeys) {
3876 if (dnskey.d_type == QType::DNSKEY) {
3877 auto content = getRR<DNSKEYRecordContent>(dnskey);
3878 if (content) {
3879 tentativeKeys.insert(content);
3880 toSign.insert(content);
3881 }
3882 }
3883 }
3884
3885 LOG(prefix << zone << ": Trying to validate " << std::to_string(tentativeKeys.size()) << " DNSKEYs with " << std::to_string(dsMap.size()) << " DS" << endl);
3886 skeyset_t validatedKeys;
3887 auto state = validateDNSKeysAgainstDS(d_now.tv_sec, zone, dsMap, tentativeKeys, toSign, signatures, validatedKeys, LogObject(prefix), d_validationContext);
3888
3889 if (s_maxvalidationsperq != 0 && d_validationContext.d_validationsCounter > s_maxvalidationsperq) {
3890 throw ImmediateServFailException("Server Failure while validating DNSKEYs, too many signature validations for this query");
3891 }
3892
3893 LOG(prefix << zone << ": We now have " << std::to_string(validatedKeys.size()) << " DNSKEYs" << endl);
3894
3895 /* if we found at least one valid RRSIG covering the set,
3896 all tentative keys are validated keys. Otherwise it means
3897 we haven't found at least one DNSKEY and a matching RRSIG
3898 covering this set, this looks Bogus. */
3899 if (validatedKeys.size() != tentativeKeys.size()) {
3900 LOG(prefix << zone << ": Let's check whether we missed a zone cut before returning a Bogus state from " << static_cast<const char*>(__func__) << "(" << zone << ")" << endl);
3901 /* try again to get the missed cuts, harder this time */
3902 auto zState = getValidationStatus(zone, false, false, depth, prefix);
3903 if (zState == vState::Secure) {
3904 /* too bad */
3905 LOG(prefix << zone << ": After checking the zone cuts we are still in a Secure zone, returning Bogus state from " << static_cast<const char*>(__func__) << "(" << zone << ")" << endl);
3906 return state;
3907 }
3908 return zState;
3909 }
3910
3911 return state;
3912 }
3913
3914 vState SyncRes::getDNSKeys(const DNSName& signer, skeyset_t& keys, bool& servFailOccurred, unsigned int depth, const string& prefix)
3915 {
3916 std::vector<DNSRecord> records;
3917 std::set<GetBestNSAnswer> beenthere;
3918 LOG(prefix << signer << ": Retrieving DNSKEYs" << endl);
3919
3920 Context context;
3921
3922 const bool oldCacheOnly = setCacheOnly(false);
3923 int rcode = doResolve(signer, QType::DNSKEY, records, depth + 1, beenthere, context);
3924 setCacheOnly(oldCacheOnly);
3925
3926 if (rcode == RCode::ServFail) {
3927 servFailOccurred = true;
3928 return vState::BogusUnableToGetDNSKEYs;
3929 }
3930
3931 if (rcode == RCode::NoError) {
3932 if (context.state == vState::Secure) {
3933 for (const auto& key : records) {
3934 if (key.d_type == QType::DNSKEY) {
3935 auto content = getRR<DNSKEYRecordContent>(key);
3936 if (content) {
3937 keys.insert(content);
3938 }
3939 }
3940 }
3941 }
3942 LOG(prefix << signer << ": Retrieved " << keys.size() << " DNSKeys, state is " << context.state << endl);
3943 return context.state;
3944 }
3945
3946 if (context.state == vState::Insecure) {
3947 return context.state;
3948 }
3949
3950 LOG(prefix << signer << ": Returning Bogus state from " << static_cast<const char*>(__func__) << "(" << signer << ")" << endl);
3951 return vState::BogusUnableToGetDNSKEYs;
3952 }
3953
3954 vState SyncRes::validateRecordsWithSigs(unsigned int depth, const string& prefix, const DNSName& qname, const QType qtype, const DNSName& name, const QType type, const std::vector<DNSRecord>& records, const std::vector<std::shared_ptr<const RRSIGRecordContent>>& signatures)
3955 {
3956 skeyset_t keys;
3957 if (signatures.empty()) {
3958 LOG(prefix << qname << ": Bogus!" << endl);
3959 return vState::BogusNoRRSIG;
3960 }
3961
3962 const DNSName signer = getSigner(signatures);
3963 bool dsFailed = false;
3964 if (!signer.empty() && name.isPartOf(signer)) {
3965 vState state = vState::Secure;
3966
3967 if ((qtype == QType::DNSKEY || qtype == QType::DS) && signer == qname) {
3968 /* we are already retrieving those keys, sorry */
3969 if (type == QType::DS && signer == name && !signer.isRoot()) {
3970 /* Unless we are getting the DS of the root zone, we should never see a
3971 DS (or a denial of a DS) signed by the DS itself, since we should be
3972 requesting it from the parent zone. Something is very wrong */
3973 LOG(prefix << qname << ": The DS for " << qname << " is signed by itself" << endl);
3974 state = vState::BogusSelfSignedDS;
3975 dsFailed = true;
3976 }
3977 else if (qtype == QType::DS && signer == qname && !signer.isRoot()) {
3978 if (type == QType::SOA || type == QType::NSEC || type == QType::NSEC3) {
3979 /* if we are trying to validate the DS or more likely NSEC(3)s proving that it does not exist, we have a problem.
3980 In that case let's go Bogus (we will check later if we missed a cut)
3981 */
3982 state = vState::BogusSelfSignedDS;
3983 dsFailed = true;
3984 }
3985 else if (type == QType::CNAME) {
3986 state = vState::BogusUnableToGetDSs;
3987 dsFailed = true;
3988 }
3989 }
3990 else if (qtype == QType::DNSKEY && signer == qname) {
3991 /* that actually does happen when a server returns NS records in authority
3992 along with the DNSKEY, leading us to trying to validate the RRSIGs for
3993 the NS with the DNSKEY that we are about to process. */
3994 if ((name == signer && type == QType::NSEC) || type == QType::NSEC3) {
3995 /* if we are trying to validate the DNSKEY (should not happen here),
3996 or more likely NSEC(3)s proving that it does not exist, we have a problem.
3997 In that case let's see if the DS does exist, and if it does let's go Bogus
3998 */
3999 dsmap_t results;
4000 vState dsState = getDSRecords(signer, results, false, depth, prefix, true);
4001 if (vStateIsBogus(dsState) || dsState == vState::Insecure) {
4002 state = dsState;
4003 if (vStateIsBogus(dsState)) {
4004 dsFailed = true;
4005 }
4006 }
4007 else {
4008 LOG(prefix << qname << ": Unable to get the DS for " << signer << endl);
4009 state = vState::BogusUnableToGetDNSKEYs;
4010 dsFailed = true;
4011 }
4012 }
4013 else {
4014 /* return immediately since looking at the cuts is not going to change the
4015 fact that we are looking at a signature done with the key we are trying to
4016 obtain */
4017 LOG(prefix << qname << ": We are looking at a signature done with the key we are trying to obtain " << signer << endl);
4018 return vState::Indeterminate;
4019 }
4020 }
4021 }
4022 bool servFailOccurred = false;
4023 if (state == vState::Secure) {
4024 state = getDNSKeys(signer, keys, servFailOccurred, depth, prefix);
4025 }
4026
4027 if (state != vState::Secure) {
4028 if (!vStateIsBogus(state)) {
4029 return state;
4030 }
4031 /* try again to get the missed cuts, harder this time */
4032 LOG(prefix << signer << ": Checking whether we missed a zone cut for " << signer << " before returning a Bogus state for " << name << "|" << type.toString() << endl);
4033 auto zState = getValidationStatus(signer, false, dsFailed, depth, prefix);
4034 if (zState == vState::Secure) {
4035 if (state == vState::BogusUnableToGetDNSKEYs && servFailOccurred) {
4036 throw ImmediateServFailException("Server Failure while retrieving DNSKEY records for " + signer.toLogString());
4037 }
4038 /* too bad */
4039 LOG(prefix << signer << ": We are still in a Secure zone, returning " << vStateToString(state) << endl);
4040 return state;
4041 }
4042 return zState;
4043 }
4044 }
4045
4046 sortedRecords_t recordcontents;
4047 for (const auto& record : records) {
4048 recordcontents.insert(record.getContent());
4049 }
4050
4051 LOG(prefix << name << ": Going to validate " << recordcontents.size() << " record contents with " << signatures.size() << " sigs and " << keys.size() << " keys for " << name << "|" << type.toString() << endl);
4052 vState state = validateWithKeySet(d_now.tv_sec, name, recordcontents, signatures, keys, LogObject(prefix), d_validationContext, false);
4053 if (s_maxvalidationsperq != 0 && d_validationContext.d_validationsCounter > s_maxvalidationsperq) {
4054 throw ImmediateServFailException("Server Failure while validating records, too many signature validations for this query");
4055 }
4056
4057 if (state == vState::Secure) {
4058 LOG(prefix << name << ": Secure!" << endl);
4059 return vState::Secure;
4060 }
4061
4062 LOG(prefix << vStateToString(state) << "!" << endl);
4063 /* try again to get the missed cuts, harder this time */
4064 auto zState = getValidationStatus(name, false, type == QType::DS, depth, prefix);
4065 LOG(prefix << name << ": Checking whether we missed a zone cut before returning a Bogus state" << endl);
4066 if (zState == vState::Secure) {
4067 /* too bad */
4068 LOG(prefix << name << ": We are still in a Secure zone, returning " << vStateToString(state) << endl);
4069 return state;
4070 }
4071 return zState;
4072 }
4073
4074 /* This function will check whether the answer should have the AA bit set, and will set if it should be set and isn't.
4075 This is unfortunately needed to deal with very crappy so-called DNS servers */
4076 void SyncRes::fixupAnswer(const std::string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, bool rdQuery)
4077 {
4078 const bool wasForwardRecurse = wasForwarded && rdQuery;
4079
4080 if (wasForwardRecurse || lwr.d_aabit) {
4081 /* easy */
4082 return;
4083 }
4084
4085 for (const auto& rec : lwr.d_records) {
4086
4087 if (rec.d_type == QType::OPT) {
4088 continue;
4089 }
4090
4091 if (rec.d_class != QClass::IN) {
4092 continue;
4093 }
4094
4095 if (rec.d_type == QType::ANY) {
4096 continue;
4097 }
4098
4099 if (rec.d_place == DNSResourceRecord::ANSWER && (rec.d_type == qtype || rec.d_type == QType::CNAME || qtype == QType::ANY) && rec.d_name == qname && rec.d_name.isPartOf(auth)) {
4100 /* This is clearly an answer to the question we were asking, from an authoritative server that is allowed to send it.
4101 We are going to assume this server is broken and does not know it should set the AA bit, even though it is DNS 101 */
4102 LOG(prefix << qname << ": Received a record for " << rec.d_name << "|" << DNSRecordContent::NumberToType(rec.d_type) << " in the answer section from " << auth << ", without the AA bit set. Assuming this server is clueless and setting the AA bit." << endl);
4103 lwr.d_aabit = true;
4104 return;
4105 }
4106
4107 if (rec.d_place != DNSResourceRecord::ANSWER) {
4108 /* we have scanned all the records in the answer section, if any, we are done */
4109 return;
4110 }
4111 }
4112 }
4113
4114 static void allowAdditionalEntry(std::unordered_set<DNSName>& allowedAdditionals, const DNSRecord& rec)
4115 {
4116 switch (rec.d_type) {
4117 case QType::MX:
4118 if (auto mxContent = getRR<MXRecordContent>(rec)) {
4119 allowedAdditionals.insert(mxContent->d_mxname);
4120 }
4121 break;
4122 case QType::NS:
4123 if (auto nsContent = getRR<NSRecordContent>(rec)) {
4124 allowedAdditionals.insert(nsContent->getNS());
4125 }
4126 break;
4127 case QType::SRV:
4128 if (auto srvContent = getRR<SRVRecordContent>(rec)) {
4129 allowedAdditionals.insert(srvContent->d_target);
4130 }
4131 break;
4132 case QType::SVCB: /* fall-through */
4133 case QType::HTTPS:
4134 if (auto svcbContent = getRR<SVCBBaseRecordContent>(rec)) {
4135 if (svcbContent->getPriority() > 0) {
4136 DNSName target = svcbContent->getTarget();
4137 if (target.isRoot()) {
4138 target = rec.d_name;
4139 }
4140 allowedAdditionals.insert(target);
4141 }
4142 else {
4143 // FIXME: Alias mode not implemented yet
4144 }
4145 }
4146 break;
4147 case QType::NAPTR:
4148 if (auto naptrContent = getRR<NAPTRRecordContent>(rec)) {
4149 auto flags = naptrContent->getFlags();
4150 toLowerInPlace(flags);
4151 if (flags.find('a') != string::npos || flags.find('s') != string::npos) {
4152 allowedAdditionals.insert(naptrContent->getReplacement());
4153 }
4154 }
4155 break;
4156 default:
4157 break;
4158 }
4159 }
4160
4161 void SyncRes::sanitizeRecords(const std::string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, bool rdQuery)
4162 {
4163 const bool wasForwardRecurse = wasForwarded && rdQuery;
4164 /* list of names for which we will allow A and AAAA records in the additional section
4165 to remain */
4166 std::unordered_set<DNSName> allowedAdditionals = {qname};
4167 bool haveAnswers = false;
4168 bool isNXDomain = false;
4169 bool isNXQType = false;
4170
4171 for (auto rec = lwr.d_records.begin(); rec != lwr.d_records.end();) {
4172
4173 if (rec->d_type == QType::OPT) {
4174 ++rec;
4175 continue;
4176 }
4177
4178 if (rec->d_class != QClass::IN) {
4179 LOG(prefix << qname << ": Removing non internet-classed data received from " << auth << endl);
4180 rec = lwr.d_records.erase(rec);
4181 continue;
4182 }
4183
4184 if (rec->d_type == QType::ANY) {
4185 LOG(prefix << qname << ": Removing 'ANY'-typed data received from " << auth << endl);
4186 rec = lwr.d_records.erase(rec);
4187 continue;
4188 }
4189
4190 if (!rec->d_name.isPartOf(auth)) {
4191 LOG(prefix << qname << ": Removing record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section received from " << auth << endl);
4192 rec = lwr.d_records.erase(rec);
4193 continue;
4194 }
4195
4196 /* dealing with the records in answer */
4197 if (!(lwr.d_aabit || wasForwardRecurse) && rec->d_place == DNSResourceRecord::ANSWER) {
4198 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
4199 are sending such responses */
4200 if (rec->d_type != QType::CNAME || qname != rec->d_name) {
4201 LOG(prefix << qname << ": Removing record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the answer section without the AA bit set received from " << auth << endl);
4202 rec = lwr.d_records.erase(rec);
4203 continue;
4204 }
4205 }
4206
4207 if (rec->d_type == QType::DNAME && (rec->d_place != DNSResourceRecord::ANSWER || !qname.isPartOf(rec->d_name))) {
4208 LOG(prefix << qname << ": Removing invalid DNAME record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section received from " << auth << endl);
4209 rec = lwr.d_records.erase(rec);
4210 continue;
4211 }
4212
4213 if (rec->d_place == DNSResourceRecord::ANSWER && (qtype != QType::ANY && rec->d_type != qtype.getCode() && s_redirectionQTypes.count(rec->d_type) == 0 && rec->d_type != QType::SOA && rec->d_type != QType::RRSIG)) {
4214 LOG(prefix << qname << ": Removing irrelevant record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the ANSWER section received from " << auth << endl);
4215 rec = lwr.d_records.erase(rec);
4216 continue;
4217 }
4218
4219 if (rec->d_place == DNSResourceRecord::ANSWER && !haveAnswers) {
4220 haveAnswers = true;
4221 }
4222
4223 if (rec->d_place == DNSResourceRecord::ANSWER) {
4224 allowAdditionalEntry(allowedAdditionals, *rec);
4225 }
4226
4227 /* dealing with the records in authority */
4228 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type != QType::NS && rec->d_type != QType::DS && rec->d_type != QType::SOA && rec->d_type != QType::RRSIG && rec->d_type != QType::NSEC && rec->d_type != QType::NSEC3) {
4229 LOG(prefix << qname << ": Removing irrelevant record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the AUTHORITY section received from " << auth << endl);
4230 rec = lwr.d_records.erase(rec);
4231 continue;
4232 }
4233
4234 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::SOA) {
4235 if (!qname.isPartOf(rec->d_name)) {
4236 LOG(prefix << qname << ": Removing irrelevant SOA record '" << rec->d_name << "|" << rec->getContent()->getZoneRepresentation() << "' in the AUTHORITY section received from " << auth << endl);
4237 rec = lwr.d_records.erase(rec);
4238 continue;
4239 }
4240
4241 if (!(lwr.d_aabit || wasForwardRecurse)) {
4242 LOG(prefix << qname << ": Removing irrelevant record (AA not set) '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the AUTHORITY section received from " << auth << endl);
4243 rec = lwr.d_records.erase(rec);
4244 continue;
4245 }
4246
4247 if (!haveAnswers) {
4248 if (lwr.d_rcode == RCode::NXDomain) {
4249 isNXDomain = true;
4250 }
4251 else if (lwr.d_rcode == RCode::NoError) {
4252 isNXQType = true;
4253 }
4254 }
4255 }
4256
4257 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS && (isNXDomain || isNXQType)) {
4258 /*
4259 * We don't want to pick up NS records in AUTHORITY and their ADDITIONAL sections of NXDomain answers
4260 * because they are somewhat easy to insert into a large, fragmented UDP response
4261 * for an off-path attacker by injecting spoofed UDP fragments. So do not add these to allowedAdditionals.
4262 */
4263 LOG(prefix << qname << ": Removing NS record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section of a " << (isNXDomain ? "NXD" : "NXQTYPE") << " response received from " << auth << endl);
4264 rec = lwr.d_records.erase(rec);
4265 continue;
4266 }
4267
4268 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS && !d_updatingRootNS && rec->d_name == g_rootdnsname) {
4269 /*
4270 * We don't want to pick up root NS records in AUTHORITY and their associated ADDITIONAL sections of random queries.
4271 * So don't add them to allowedAdditionals.
4272 */
4273 LOG(prefix << qname << ": Removing NS record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the " << (int)rec->d_place << " section of a response received from " << auth << endl);
4274 rec = lwr.d_records.erase(rec);
4275 continue;
4276 }
4277
4278 if (rec->d_place == DNSResourceRecord::AUTHORITY && rec->d_type == QType::NS) {
4279 allowAdditionalEntry(allowedAdditionals, *rec);
4280 }
4281
4282 /* dealing with the records in additional */
4283 if (rec->d_place == DNSResourceRecord::ADDITIONAL && rec->d_type != QType::A && rec->d_type != QType::AAAA && rec->d_type != QType::RRSIG) {
4284 LOG(prefix << qname << ": Removing irrelevant record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the ADDITIONAL section received from " << auth << endl);
4285 rec = lwr.d_records.erase(rec);
4286 continue;
4287 }
4288
4289 if (rec->d_place == DNSResourceRecord::ADDITIONAL && allowedAdditionals.count(rec->d_name) == 0) {
4290 LOG(prefix << qname << ": Removing irrelevant additional record '" << rec->d_name << "|" << DNSRecordContent::NumberToType(rec->d_type) << "|" << rec->getContent()->getZoneRepresentation() << "' in the ADDITIONAL section received from " << auth << endl);
4291 rec = lwr.d_records.erase(rec);
4292 continue;
4293 }
4294
4295 ++rec;
4296 }
4297 }
4298
4299 void SyncRes::rememberParentSetIfNeeded(const DNSName& domain, const vector<DNSRecord>& newRecords, unsigned int depth, const string& prefix)
4300 {
4301 vector<DNSRecord> existing;
4302 bool wasAuth = false;
4303 auto ttl = g_recCache->get(d_now.tv_sec, domain, QType::NS, MemRecursorCache::None, &existing, d_cacheRemote, d_routingTag, nullptr, nullptr, nullptr, nullptr, &wasAuth);
4304
4305 if (ttl <= 0 || wasAuth) {
4306 return;
4307 }
4308 {
4309 auto lock = s_savedParentNSSet.lock();
4310 if (lock->find(domain) != lock->end()) {
4311 // no relevant data, or we already stored the parent data
4312 return;
4313 }
4314 }
4315
4316 set<DNSName> authSet;
4317 for (const auto& dnsRecord : newRecords) {
4318 auto content = getRR<NSRecordContent>(dnsRecord);
4319 authSet.insert(content->getNS());
4320 }
4321 // The glue IPs could also differ, but we're not checking that yet, we're only looking for parent NS records not
4322 // in the child set
4323 bool shouldSave = false;
4324 for (const auto& dnsRecord : existing) {
4325 auto content = getRR<NSRecordContent>(dnsRecord);
4326 if (authSet.count(content->getNS()) == 0) {
4327 LOG(prefix << domain << ": At least one parent-side NS was not in the child-side NS set, remembering parent NS set and cached IPs" << endl);
4328 shouldSave = true;
4329 break;
4330 }
4331 }
4332
4333 if (shouldSave) {
4334 map<DNSName, vector<ComboAddress>> entries;
4335 for (const auto& dnsRecord : existing) {
4336 auto content = getRR<NSRecordContent>(dnsRecord);
4337 const DNSName& name = content->getNS();
4338 set<GetBestNSAnswer> beenthereIgnored;
4339 unsigned int nretrieveAddressesForNSIgnored{};
4340 auto addresses = getAddrs(name, depth, prefix, beenthereIgnored, true, nretrieveAddressesForNSIgnored);
4341 entries.emplace(name, addresses);
4342 }
4343 s_savedParentNSSet.lock()->emplace(domain, std::move(entries), d_now.tv_sec + ttl);
4344 }
4345 }
4346
4347 RCode::rcodes_ SyncRes::updateCacheFromRecords(unsigned int depth, const string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, const boost::optional<Netmask>& ednsmask, vState& state, bool& needWildcardProof, bool& gatherWildcardProof, unsigned int& wildcardLabelsCount, bool rdQuery, const ComboAddress& remoteIP) // NOLINT(readability-function-cognitive-complexity)
4348 {
4349 bool wasForwardRecurse = wasForwarded && rdQuery;
4350 tcache_t tcache;
4351
4352 fixupAnswer(prefix, lwr, qname, qtype, auth, wasForwarded, rdQuery);
4353 sanitizeRecords(prefix, lwr, qname, qtype, auth, wasForwarded, rdQuery);
4354
4355 std::vector<std::shared_ptr<DNSRecord>> authorityRecs;
4356 const unsigned int labelCount = qname.countLabels();
4357 bool isCNAMEAnswer = false;
4358 bool isDNAMEAnswer = false;
4359 DNSName seenAuth;
4360
4361 for (auto& rec : lwr.d_records) {
4362 if (rec.d_type == QType::OPT || rec.d_class != QClass::IN) {
4363 continue;
4364 }
4365
4366 rec.d_ttl = min(s_maxcachettl, rec.d_ttl);
4367
4368 if (!isCNAMEAnswer && rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == QType::CNAME && (!(qtype == QType::CNAME)) && rec.d_name == qname && !isDNAMEAnswer) {
4369 isCNAMEAnswer = true;
4370 }
4371 if (!isDNAMEAnswer && rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == QType::DNAME && qtype != QType::DNAME && qname.isPartOf(rec.d_name)) {
4372 isDNAMEAnswer = true;
4373 isCNAMEAnswer = false;
4374 }
4375
4376 if (rec.d_type == QType::SOA && rec.d_place == DNSResourceRecord::AUTHORITY && qname.isPartOf(rec.d_name)) {
4377 seenAuth = rec.d_name;
4378 }
4379
4380 if (rec.d_type == QType::RRSIG) {
4381 auto rrsig = getRR<RRSIGRecordContent>(rec);
4382 if (rrsig) {
4383 /* As illustrated in rfc4035's Appendix B.6, the RRSIG label
4384 count can be lower than the name's label count if it was
4385 synthesized from the wildcard. Note that the difference might
4386 be > 1. */
4387 if (rec.d_name == qname && isWildcardExpanded(labelCount, *rrsig)) {
4388 gatherWildcardProof = true;
4389 if (!isWildcardExpandedOntoItself(rec.d_name, labelCount, *rrsig)) {
4390 /* if we have a wildcard expanded onto itself, we don't need to prove
4391 that the exact name doesn't exist because it actually does.
4392 We still want to gather the corresponding NSEC/NSEC3 records
4393 to pass them to our client in case it wants to validate by itself.
4394 */
4395 LOG(prefix << qname << ": RRSIG indicates the name was synthesized from a wildcard, we need a wildcard proof" << endl);
4396 needWildcardProof = true;
4397 }
4398 else {
4399 LOG(prefix << qname << ": RRSIG indicates the name was synthesized from a wildcard expanded onto itself, we need to gather wildcard proof" << endl);
4400 }
4401 wildcardLabelsCount = rrsig->d_labels;
4402 }
4403
4404 // cerr<<"Got an RRSIG for "<<DNSRecordContent::NumberToType(rrsig->d_type)<<" with name '"<<rec.d_name<<"' and place "<<rec.d_place<<endl;
4405 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signatures.push_back(rrsig);
4406 tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signaturesTTL = std::min(tcache[{rec.d_name, rrsig->d_type, rec.d_place}].signaturesTTL, rec.d_ttl);
4407 }
4408 }
4409 }
4410
4411 /* if we have a positive answer synthesized from a wildcard,
4412 we need to store the corresponding NSEC/NSEC3 records proving
4413 that the exact name did not exist in the negative cache */
4414 if (gatherWildcardProof) {
4415 for (const auto& rec : lwr.d_records) {
4416 if (rec.d_type == QType::OPT || rec.d_class != QClass::IN) {
4417 continue;
4418 }
4419
4420 if (nsecTypes.count(rec.d_type) != 0) {
4421 authorityRecs.push_back(std::make_shared<DNSRecord>(rec));
4422 }
4423 else if (rec.d_type == QType::RRSIG) {
4424 auto rrsig = getRR<RRSIGRecordContent>(rec);
4425 if (rrsig && nsecTypes.count(rrsig->d_type) != 0) {
4426 authorityRecs.push_back(std::make_shared<DNSRecord>(rec));
4427 }
4428 }
4429 }
4430 }
4431
4432 // reap all answers from this packet that are acceptable
4433 for (auto& rec : lwr.d_records) {
4434 if (rec.d_type == QType::OPT) {
4435 LOG(prefix << qname << ": OPT answer '" << rec.d_name << "' from '" << auth << "' nameservers" << endl);
4436 continue;
4437 }
4438
4439 LOG(prefix << qname << ": Accept answer '" << rec.d_name << "|" << DNSRecordContent::NumberToType(rec.d_type) << "|" << rec.getContent()->getZoneRepresentation() << "' from '" << auth << "' nameservers? ttl=" << rec.d_ttl << ", place=" << (int)rec.d_place << " ");
4440
4441 // We called sanitizeRecords before, so all ANY, non-IN and non-aa/non-forwardrecurse answer records are already removed
4442
4443 if (rec.d_name.isPartOf(auth)) {
4444 if (rec.d_type == QType::RRSIG) {
4445 LOG("RRSIG - separate" << endl);
4446 }
4447 else if (rec.d_type == QType::DS && rec.d_name == auth) {
4448 LOG("NO - DS provided by child zone" << endl);
4449 }
4450 else {
4451 bool haveLogged = false;
4452 if (isDNAMEAnswer && rec.d_type == QType::CNAME) {
4453 LOG("NO - we already have a DNAME answer for this domain" << endl);
4454 continue;
4455 }
4456 if (!t_sstorage.domainmap->empty()) {
4457 // Check if we are authoritative for a zone in this answer
4458 DNSName tmp_qname(rec.d_name);
4459 // We may be auth for domain example.com, but the DS record needs to come from the parent (.com) nameserver
4460 if (rec.d_type == QType::DS) {
4461 tmp_qname.chopOff();
4462 }
4463 auto auth_domain_iter = getBestAuthZone(&tmp_qname);
4464 if (auth_domain_iter != t_sstorage.domainmap->end() && auth.countLabels() <= auth_domain_iter->first.countLabels()) {
4465 if (auth_domain_iter->first != auth) {
4466 LOG("NO! - we are authoritative for the zone " << auth_domain_iter->first << endl);
4467 continue;
4468 }
4469 LOG("YES! - This answer was ");
4470 if (!wasForwarded) {
4471 LOG("retrieved from the local auth store.");
4472 }
4473 else {
4474 LOG("received from a server we forward to.");
4475 }
4476 haveLogged = true;
4477 LOG(endl);
4478 }
4479 }
4480 if (!haveLogged) {
4481 LOG("YES!" << endl);
4482 }
4483
4484 rec.d_ttl = min(s_maxcachettl, rec.d_ttl);
4485
4486 DNSRecord dnsRecord(rec);
4487 tcache[{rec.d_name, rec.d_type, rec.d_place}].d_ttl_time = d_now.tv_sec;
4488 dnsRecord.d_ttl += d_now.tv_sec;
4489 dnsRecord.d_place = DNSResourceRecord::ANSWER;
4490 tcache[{rec.d_name, rec.d_type, rec.d_place}].records.push_back(dnsRecord);
4491 }
4492 }
4493 else
4494 LOG("NO!" << endl);
4495 }
4496
4497 // supplant
4498 for (auto& entry : tcache) {
4499 if ((entry.second.records.size() + entry.second.signatures.size() + authorityRecs.size()) > 1) { // need to group the ttl to be the minimum of the RRSET (RFC 2181, 5.2)
4500 uint32_t lowestTTD = computeLowestTTD(entry.second.records, entry.second.signatures, entry.second.signaturesTTL, authorityRecs);
4501
4502 for (auto& record : entry.second.records) {
4503 record.d_ttl = lowestTTD; // boom
4504 }
4505 }
4506 }
4507
4508 for (auto tCacheEntry = tcache.begin(); tCacheEntry != tcache.end(); ++tCacheEntry) {
4509
4510 if (tCacheEntry->second.records.empty()) { // this happens when we did store signatures, but passed on the records themselves
4511 continue;
4512 }
4513
4514 /* Even if the AA bit is set, additional data cannot be considered
4515 as authoritative. This is especially important during validation
4516 because keeping records in the additional section is allowed even
4517 if the corresponding RRSIGs are not included, without setting the TC
4518 bit, as stated in rfc4035's section 3.1.1. Including RRSIG RRs in a Response:
4519 "When placing a signed RRset in the Additional section, the name
4520 server MUST also place its RRSIG RRs in the Additional section.
4521 If space does not permit inclusion of both the RRset and its
4522 associated RRSIG RRs, the name server MAY retain the RRset while
4523 dropping the RRSIG RRs. If this happens, the name server MUST NOT
4524 set the TC bit solely because these RRSIG RRs didn't fit."
4525 */
4526 bool isAA = lwr.d_aabit && tCacheEntry->first.place != DNSResourceRecord::ADDITIONAL;
4527 /* if we forwarded the query to a recursor, we can expect the answer to be signed,
4528 even if the answer is not AA. Of course that's not only true inside a Secure
4529 zone, but we check that below. */
4530 bool expectSignature = tCacheEntry->first.place == DNSResourceRecord::ANSWER || ((lwr.d_aabit || wasForwardRecurse) && tCacheEntry->first.place != DNSResourceRecord::ADDITIONAL);
4531 /* in a non authoritative answer, we only care about the DS record (or lack of) */
4532 if (!isAA && (tCacheEntry->first.type == QType::DS || tCacheEntry->first.type == QType::NSEC || tCacheEntry->first.type == QType::NSEC3) && tCacheEntry->first.place == DNSResourceRecord::AUTHORITY) {
4533 expectSignature = true;
4534 }
4535
4536 if (isCNAMEAnswer && (tCacheEntry->first.place != DNSResourceRecord::ANSWER || tCacheEntry->first.type != QType::CNAME || tCacheEntry->first.name != qname)) {
4537 /*
4538 rfc2181 states:
4539 Note that the answer section of an authoritative answer normally
4540 contains only authoritative data. However when the name sought is an
4541 alias (see section 10.1.1) only the record describing that alias is
4542 necessarily authoritative. Clients should assume that other records
4543 may have come from the server's cache. Where authoritative answers
4544 are required, the client should query again, using the canonical name
4545 associated with the alias.
4546 */
4547 isAA = false;
4548 expectSignature = false;
4549 }
4550 if (isDNAMEAnswer && (tCacheEntry->first.place != DNSResourceRecord::ANSWER || tCacheEntry->first.type != QType::DNAME || !qname.isPartOf(tCacheEntry->first.name))) {
4551 /* see above */
4552 isAA = false;
4553 expectSignature = false;
4554 }
4555
4556 if ((isCNAMEAnswer || isDNAMEAnswer) && tCacheEntry->first.place == DNSResourceRecord::AUTHORITY && tCacheEntry->first.type == QType::NS && auth == tCacheEntry->first.name) {
4557 /* These NS can't be authoritative since we have a CNAME/DNAME answer for which (see above) only the
4558 record describing that alias is necessarily authoritative.
4559 But if we allow the current auth, which might be serving the child zone, to raise the TTL
4560 of non-authoritative NS in the cache, they might be able to keep a "ghost" zone alive forever,
4561 even after the delegation is gone from the parent.
4562 So let's just do nothing with them, we can fetch them directly if we need them.
4563 */
4564 LOG(prefix << qname << ": Skipping authority NS from '" << auth << "' nameservers in CNAME/DNAME answer " << tCacheEntry->first.name << "|" << DNSRecordContent::NumberToType(tCacheEntry->first.type) << endl);
4565 continue;
4566 }
4567
4568 /*
4569 * RFC 6672 section 5.3.1
4570 * In any response, a signed DNAME RR indicates a non-terminal
4571 * redirection of the query. There might or might not be a server-
4572 * synthesized CNAME in the answer section; if there is, the CNAME will
4573 * never be signed. For a DNSSEC validator, verification of the DNAME
4574 * RR and then that the CNAME was properly synthesized is sufficient
4575 * proof.
4576 *
4577 * We do the synthesis check in processRecords, here we make sure we
4578 * don't validate the CNAME.
4579 */
4580 if (isDNAMEAnswer && tCacheEntry->first.type == QType::CNAME) {
4581 expectSignature = false;
4582 }
4583
4584 vState recordState = vState::Indeterminate;
4585
4586 if (expectSignature && shouldValidate()) {
4587 vState initialState = getValidationStatus(tCacheEntry->first.name, !tCacheEntry->second.signatures.empty(), tCacheEntry->first.type == QType::DS, depth, prefix);
4588 LOG(prefix << qname << ": Got initial zone status " << initialState << " for record " << tCacheEntry->first.name << "|" << DNSRecordContent::NumberToType(tCacheEntry->first.type) << endl);
4589
4590 if (initialState == vState::Secure) {
4591 if (tCacheEntry->first.type == QType::DNSKEY && tCacheEntry->first.place == DNSResourceRecord::ANSWER && tCacheEntry->first.name == getSigner(tCacheEntry->second.signatures)) {
4592 LOG(prefix << qname << ": Validating DNSKEY for " << tCacheEntry->first.name << endl);
4593 recordState = validateDNSKeys(tCacheEntry->first.name, tCacheEntry->second.records, tCacheEntry->second.signatures, depth, prefix);
4594 }
4595 else {
4596 LOG(prefix << qname << ": Validating non-additional " << QType(tCacheEntry->first.type).toString() << " record for " << tCacheEntry->first.name << endl);
4597 recordState = validateRecordsWithSigs(depth, prefix, qname, qtype, tCacheEntry->first.name, QType(tCacheEntry->first.type), tCacheEntry->second.records, tCacheEntry->second.signatures);
4598 }
4599 }
4600 else {
4601 recordState = initialState;
4602 LOG(prefix << qname << ": Skipping validation because the current state is " << recordState << endl);
4603 }
4604
4605 LOG(prefix << qname << ": Validation result is " << recordState << ", current state is " << state << endl);
4606 if (state != recordState) {
4607 updateValidationState(qname, state, recordState, prefix);
4608 }
4609 }
4610
4611 if (vStateIsBogus(recordState)) {
4612 /* this is a TTD by now, be careful */
4613 for (auto& record : tCacheEntry->second.records) {
4614 auto newval = std::min(record.d_ttl, static_cast<uint32_t>(s_maxbogusttl + d_now.tv_sec));
4615 record.d_ttl = newval;
4616 }
4617 tCacheEntry->second.d_ttl_time = d_now.tv_sec;
4618 }
4619
4620 /* We don't need to store NSEC3 records in the positive cache because:
4621 - we don't allow direct NSEC3 queries
4622 - denial of existence proofs in wildcard expanded positive responses are stored in authorityRecs
4623 - denial of existence proofs for negative responses are stored in the negative cache
4624 We also don't want to cache non-authoritative data except for:
4625 - records coming from non forward-recurse servers (those will never be AA)
4626 - DS (special case)
4627 - NS, A and AAAA (used for infra queries)
4628 */
4629 if (tCacheEntry->first.type != QType::NSEC3 && (tCacheEntry->first.type == QType::DS || tCacheEntry->first.type == QType::NS || tCacheEntry->first.type == QType::A || tCacheEntry->first.type == QType::AAAA || isAA || wasForwardRecurse)) {
4630
4631 bool doCache = true;
4632 if (tCacheEntry->first.place == DNSResourceRecord::ANSWER && ednsmask) {
4633 const bool isv4 = ednsmask->isIPv4();
4634 if ((isv4 && s_ecsipv4nevercache) || (!isv4 && s_ecsipv6nevercache)) {
4635 doCache = false;
4636 }
4637 // If ednsmask is relevant, we do not want to cache if the scope prefix length is large and TTL is small
4638 if (doCache && s_ecscachelimitttl > 0) {
4639 bool manyMaskBits = (isv4 && ednsmask->getBits() > s_ecsipv4cachelimit) || (!isv4 && ednsmask->getBits() > s_ecsipv6cachelimit);
4640
4641 if (manyMaskBits) {
4642 uint32_t minttl = UINT32_MAX;
4643 for (const auto& iter : tCacheEntry->second.records) {
4644 if (iter.d_ttl < minttl) {
4645 minttl = iter.d_ttl;
4646 }
4647 }
4648 bool ttlIsSmall = minttl < s_ecscachelimitttl + d_now.tv_sec;
4649 if (ttlIsSmall) {
4650 // Case: many bits and ttlIsSmall
4651 doCache = false;
4652 }
4653 }
4654 }
4655 }
4656
4657 d_fromAuthIP = remoteIP;
4658
4659 if (doCache) {
4660 // Check if we are going to replace a non-auth (parent) NS recordset
4661 if (isAA && tCacheEntry->first.type == QType::NS && s_save_parent_ns_set) {
4662 rememberParentSetIfNeeded(tCacheEntry->first.name, tCacheEntry->second.records, depth, prefix);
4663 }
4664 g_recCache->replace(d_now.tv_sec, tCacheEntry->first.name, tCacheEntry->first.type, tCacheEntry->second.records, tCacheEntry->second.signatures, authorityRecs, tCacheEntry->first.type == QType::DS ? true : isAA, auth, tCacheEntry->first.place == DNSResourceRecord::ANSWER ? ednsmask : boost::none, d_routingTag, recordState, remoteIP, d_refresh, tCacheEntry->second.d_ttl_time);
4665
4666 // Delete potential negcache entry. When a record recovers with serve-stale the negcache entry can cause the wrong entry to
4667 // be served, as negcache entries are checked before record cache entries
4668 if (NegCache::s_maxServedStaleExtensions > 0) {
4669 g_negCache->wipeTyped(tCacheEntry->first.name, tCacheEntry->first.type);
4670 }
4671
4672 if (g_aggressiveNSECCache && needWildcardProof && recordState == vState::Secure && tCacheEntry->first.place == DNSResourceRecord::ANSWER && tCacheEntry->first.name == qname && !tCacheEntry->second.signatures.empty() && !d_routingTag && !ednsmask) {
4673 /* we have an answer synthesized from a wildcard and aggressive NSEC is enabled, we need to store the
4674 wildcard in its non-expanded form in the cache to be able to synthesize wildcard answers later */
4675 const auto& rrsig = tCacheEntry->second.signatures.at(0);
4676
4677 if (isWildcardExpanded(labelCount, *rrsig) && !isWildcardExpandedOntoItself(tCacheEntry->first.name, labelCount, *rrsig)) {
4678 DNSName realOwner = getNSECOwnerName(tCacheEntry->first.name, tCacheEntry->second.signatures);
4679
4680 std::vector<DNSRecord> content;
4681 content.reserve(tCacheEntry->second.records.size());
4682 for (const auto& record : tCacheEntry->second.records) {
4683 DNSRecord nonExpandedRecord(record);
4684 nonExpandedRecord.d_name = realOwner;
4685 content.push_back(std::move(nonExpandedRecord));
4686 }
4687
4688 g_recCache->replace(d_now.tv_sec, realOwner, QType(tCacheEntry->first.type), content, tCacheEntry->second.signatures, /* no additional records in that case */ {}, tCacheEntry->first.type == QType::DS ? true : isAA, auth, boost::none, boost::none, recordState, remoteIP, d_refresh, tCacheEntry->second.d_ttl_time);
4689 }
4690 }
4691 }
4692 }
4693
4694 if (seenAuth.empty() && !tCacheEntry->second.signatures.empty()) {
4695 seenAuth = getSigner(tCacheEntry->second.signatures);
4696 }
4697
4698 if (g_aggressiveNSECCache && (tCacheEntry->first.type == QType::NSEC || tCacheEntry->first.type == QType::NSEC3) && recordState == vState::Secure && !seenAuth.empty()) {
4699 // Good candidate for NSEC{,3} caching
4700 g_aggressiveNSECCache->insertNSEC(seenAuth, tCacheEntry->first.name, tCacheEntry->second.records.at(0), tCacheEntry->second.signatures, tCacheEntry->first.type == QType::NSEC3);
4701 }
4702
4703 if (tCacheEntry->first.place == DNSResourceRecord::ANSWER && ednsmask) {
4704 d_wasVariable = true;
4705 }
4706 }
4707
4708 return RCode::NoError;
4709 }
4710
4711 void SyncRes::updateDenialValidationState(const DNSName& qname, vState& neValidationState, const DNSName& neName, vState& state, const dState denialState, const dState expectedState, bool isDS, unsigned int depth, const string& prefix)
4712 {
4713 if (denialState == expectedState) {
4714 neValidationState = vState::Secure;
4715 }
4716 else {
4717 if (denialState == dState::OPTOUT) {
4718 LOG(prefix << qname << ": OPT-out denial found for " << neName << endl);
4719 /* rfc5155 states:
4720 "The AD bit, as defined by [RFC4035], MUST NOT be set when returning a
4721 response containing a closest (provable) encloser proof in which the
4722 NSEC3 RR that covers the "next closer" name has the Opt-Out bit set.
4723
4724 This rule is based on what this closest encloser proof actually
4725 proves: names that would be covered by the Opt-Out NSEC3 RR may or
4726 may not exist as insecure delegations. As such, not all the data in
4727 responses containing such closest encloser proofs will have been
4728 cryptographically verified, so the AD bit cannot be set."
4729
4730 At best the Opt-Out NSEC3 RR proves that there is no signed DS (so no
4731 secure delegation).
4732 */
4733 neValidationState = vState::Insecure;
4734 }
4735 else if (denialState == dState::INSECURE) {
4736 LOG(prefix << qname << ": Insecure denial found for " << neName << ", returning Insecure" << endl);
4737 neValidationState = vState::Insecure;
4738 }
4739 else {
4740 LOG(prefix << qname << ": Invalid denial found for " << neName << ", res=" << denialState << ", expectedState=" << expectedState << ", checking whether we have missed a zone cut before returning a Bogus state" << endl);
4741 /* try again to get the missed cuts, harder this time */
4742 auto zState = getValidationStatus(neName, false, isDS, depth, prefix);
4743 if (zState != vState::Secure) {
4744 neValidationState = zState;
4745 }
4746 else {
4747 LOG(prefix << qname << ": Still in a secure zone with an invalid denial for " << neName << ", returning " << vStateToString(vState::BogusInvalidDenial) << endl);
4748 neValidationState = vState::BogusInvalidDenial;
4749 }
4750 }
4751 }
4752 updateValidationState(qname, state, neValidationState, prefix);
4753 }
4754
4755 dState SyncRes::getDenialValidationState(const NegCache::NegCacheEntry& negEntry, const dState expectedState, bool referralToUnsigned, const string& prefix)
4756 {
4757 cspmap_t csp = harvestCSPFromNE(negEntry);
4758 return getDenial(csp, negEntry.d_name, negEntry.d_qtype.getCode(), referralToUnsigned, expectedState == dState::NXQTYPE, d_validationContext, LogObject(prefix));
4759 }
4760
4761 bool SyncRes::processRecords(const std::string& prefix, const DNSName& qname, const QType qtype, const DNSName& auth, LWResult& lwr, const bool sendRDQuery, vector<DNSRecord>& ret, set<DNSName>& nsset, DNSName& newtarget, DNSName& newauth, bool& realreferral, bool& negindic, vState& state, const bool needWildcardProof, const bool gatherWildcardProof, const unsigned int wildcardLabelsCount, int& rcode, bool& negIndicHasSignatures, unsigned int depth) // // NOLINT(readability-function-cognitive-complexity)
4762 {
4763 bool done = false;
4764 DNSName dnameTarget;
4765 DNSName dnameOwner;
4766 uint32_t dnameTTL = 0;
4767 bool referralOnDS = false;
4768
4769 for (auto& rec : lwr.d_records) {
4770 if (rec.d_type == QType::OPT || rec.d_class != QClass::IN) {
4771 continue;
4772 }
4773
4774 if (rec.d_place == DNSResourceRecord::ANSWER && !(lwr.d_aabit || sendRDQuery)) {
4775 /* for now we allow a CNAME for the exact qname in ANSWER with AA=0, because Amazon DNS servers
4776 are sending such responses */
4777 if (rec.d_type != QType::CNAME || rec.d_name != qname) {
4778 continue;
4779 }
4780 }
4781 const bool negCacheIndication = rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::SOA && lwr.d_rcode == RCode::NXDomain && qname.isPartOf(rec.d_name) && rec.d_name.isPartOf(auth);
4782
4783 bool putInNegCache = true;
4784 if (negCacheIndication && qtype == QType::DS && isForwardOrAuth(qname)) {
4785 // #10189, a NXDOMAIN to a DS query for a forwarded or auth domain should not NXDOMAIN the whole domain
4786 putInNegCache = false;
4787 }
4788
4789 if (negCacheIndication) {
4790 LOG(prefix << qname << ": Got negative caching indication for name '" << qname << "' (accept=" << rec.d_name.isPartOf(auth) << "), newtarget='" << newtarget << "'" << endl);
4791
4792 rec.d_ttl = min(rec.d_ttl, s_maxnegttl);
4793 // only add a SOA if we're not going anywhere after this
4794 if (newtarget.empty()) {
4795 ret.push_back(rec);
4796 }
4797
4798 NegCache::NegCacheEntry negEntry;
4799
4800 uint32_t lowestTTL = rec.d_ttl;
4801 /* if we get an NXDomain answer with a CNAME, the name
4802 does exist but the target does not */
4803 negEntry.d_name = newtarget.empty() ? qname : newtarget;
4804 negEntry.d_qtype = QType::ENT; // this encodes 'whole record'
4805 negEntry.d_auth = rec.d_name;
4806 harvestNXRecords(lwr.d_records, negEntry, d_now.tv_sec, &lowestTTL);
4807
4808 if (vStateIsBogus(state)) {
4809 negEntry.d_validationState = state;
4810 }
4811 else {
4812 /* here we need to get the validation status of the zone telling us that the domain does not
4813 exist, ie the owner of the SOA */
4814 auto recordState = getValidationStatus(rec.d_name, !negEntry.authoritySOA.signatures.empty() || !negEntry.DNSSECRecords.signatures.empty(), false, depth, prefix);
4815 if (recordState == vState::Secure) {
4816 dState denialState = getDenialValidationState(negEntry, dState::NXDOMAIN, false, prefix);
4817 updateDenialValidationState(qname, negEntry.d_validationState, negEntry.d_name, state, denialState, dState::NXDOMAIN, false, depth, prefix);
4818 }
4819 else {
4820 negEntry.d_validationState = recordState;
4821 updateValidationState(qname, state, negEntry.d_validationState, prefix);
4822 }
4823 }
4824
4825 if (vStateIsBogus(negEntry.d_validationState)) {
4826 lowestTTL = min(lowestTTL, s_maxbogusttl);
4827 }
4828
4829 negEntry.d_ttd = d_now.tv_sec + lowestTTL;
4830 negEntry.d_orig_ttl = lowestTTL;
4831 /* if we get an NXDomain answer with a CNAME, let's not cache the
4832 target, even the server was authoritative for it,
4833 and do an additional query for the CNAME target.
4834 We have a regression test making sure we do exactly that.
4835 */
4836 if (newtarget.empty() && putInNegCache) {
4837 g_negCache->add(negEntry);
4838 // doCNAMECacheCheck() checks record cache and does not look into negcache. That means that an old record might be found if
4839 // serve-stale is active. Avoid that by explicitly zapping that CNAME record.
4840 if (qtype == QType::CNAME && MemRecursorCache::s_maxServedStaleExtensions > 0) {
4841 g_recCache->doWipeCache(qname, false, qtype);
4842 }
4843 if (s_rootNXTrust && negEntry.d_auth.isRoot() && auth.isRoot() && lwr.d_aabit) {
4844 negEntry.d_name = negEntry.d_name.getLastLabel();
4845 g_negCache->add(negEntry);
4846 }
4847 }
4848
4849 negIndicHasSignatures = !negEntry.authoritySOA.signatures.empty() || !negEntry.DNSSECRecords.signatures.empty();
4850 negindic = true;
4851 }
4852 else if (rec.d_place == DNSResourceRecord::ANSWER && s_redirectionQTypes.count(rec.d_type) > 0 && // CNAME or DNAME answer
4853 s_redirectionQTypes.count(qtype.getCode()) == 0) { // But not in response to a CNAME or DNAME query
4854 if (rec.d_type == QType::CNAME && rec.d_name == qname) {
4855 if (!dnameOwner.empty()) { // We synthesize ourselves
4856 continue;
4857 }
4858 ret.push_back(rec);
4859 if (auto content = getRR<CNAMERecordContent>(rec)) {
4860 newtarget = DNSName(content->getTarget());
4861 }
4862 }
4863 else if (rec.d_type == QType::DNAME && qname.isPartOf(rec.d_name)) { // DNAME
4864 ret.push_back(rec);
4865 if (auto content = getRR<DNAMERecordContent>(rec)) {
4866 dnameOwner = rec.d_name;
4867 dnameTarget = content->getTarget();
4868 dnameTTL = rec.d_ttl;
4869 if (!newtarget.empty()) { // We had a CNAME before, remove it from ret so we don't cache it
4870 ret.erase(std::remove_if(
4871 ret.begin(),
4872 ret.end(),
4873 [&qname](DNSRecord& dnsrecord) {
4874 return (dnsrecord.d_place == DNSResourceRecord::ANSWER && dnsrecord.d_type == QType::CNAME && dnsrecord.d_name == qname);
4875 }),
4876 ret.end());
4877 }
4878 try {
4879 newtarget = qname.makeRelative(dnameOwner) + dnameTarget;
4880 }
4881 catch (const std::exception& e) {
4882 // We should probably catch an std::range_error here and set the rcode to YXDOMAIN (RFC 6672, section 2.2)
4883 // But there is no way to set the RCODE from this function
4884 throw ImmediateServFailException("Unable to perform DNAME substitution(DNAME owner: '" + dnameOwner.toLogString() + "', DNAME target: '" + dnameTarget.toLogString() + "', substituted name: '" + qname.makeRelative(dnameOwner).toLogString() + "." + dnameTarget.toLogString() + "' : " + e.what());
4885 }
4886 }
4887 }
4888 }
4889 /* if we have a positive answer synthesized from a wildcard, we need to
4890 return the corresponding NSEC/NSEC3 records from the AUTHORITY section
4891 proving that the exact name did not exist.
4892 Except if this is a NODATA answer because then we will gather the NXNSEC records later */
4893 else if (gatherWildcardProof && !negindic && (rec.d_type == QType::RRSIG || rec.d_type == QType::NSEC || rec.d_type == QType::NSEC3) && rec.d_place == DNSResourceRecord::AUTHORITY) {
4894 ret.push_back(rec); // enjoy your DNSSEC
4895 }
4896 // for ANY answers we *must* have an authoritative answer, unless we are forwarding recursively
4897 else if (rec.d_place == DNSResourceRecord::ANSWER && rec.d_name == qname && (rec.d_type == qtype.getCode() || ((lwr.d_aabit || sendRDQuery) && qtype == QType::ANY))) {
4898 LOG(prefix << qname << ": Answer is in: resolved to '" << rec.getContent()->getZoneRepresentation() << "|" << DNSRecordContent::NumberToType(rec.d_type) << "'" << endl);
4899
4900 done = true;
4901 rcode = RCode::NoError;
4902
4903 if (needWildcardProof) {
4904 /* positive answer synthesized from a wildcard */
4905 NegCache::NegCacheEntry negEntry;
4906 negEntry.d_name = qname;
4907 negEntry.d_qtype = QType::ENT; // this encodes 'whole record'
4908 uint32_t lowestTTL = rec.d_ttl;
4909 harvestNXRecords(lwr.d_records, negEntry, d_now.tv_sec, &lowestTTL);
4910
4911 if (vStateIsBogus(state)) {
4912 negEntry.d_validationState = state;
4913 }
4914 else {
4915 auto recordState = getValidationStatus(qname, !negEntry.authoritySOA.signatures.empty() || !negEntry.DNSSECRecords.signatures.empty(), false, depth, prefix);
4916
4917 if (recordState == vState::Secure) {
4918 /* We have a positive answer synthesized from a wildcard, we need to check that we have
4919 proof that the exact name doesn't exist so the wildcard can be used,
4920 as described in section 5.3.4 of RFC 4035 and 5.3 of RFC 7129.
4921 */
4922 cspmap_t csp = harvestCSPFromNE(negEntry);
4923 dState res = getDenial(csp, qname, negEntry.d_qtype.getCode(), false, false, d_validationContext, LogObject(prefix), false, wildcardLabelsCount);
4924 if (res != dState::NXDOMAIN) {
4925 vState tmpState = vState::BogusInvalidDenial;
4926 if (res == dState::INSECURE || res == dState::OPTOUT) {
4927 /* Some part could not be validated, for example a NSEC3 record with a too large number of iterations,
4928 this is not enough to warrant a Bogus, but go Insecure. */
4929 tmpState = vState::Insecure;
4930 LOG(prefix << qname << ": Unable to validate denial in wildcard expanded positive response found for " << qname << ", returning Insecure, res=" << res << endl);
4931 }
4932 else {
4933 LOG(prefix << qname << ": Invalid denial in wildcard expanded positive response found for " << qname << ", returning Bogus, res=" << res << endl);
4934 rec.d_ttl = std::min(rec.d_ttl, s_maxbogusttl);
4935 }
4936
4937 updateValidationState(qname, state, tmpState, prefix);
4938 /* we already stored the record with a different validation status, let's fix it */
4939 updateValidationStatusInCache(qname, qtype, lwr.d_aabit, tmpState);
4940 }
4941 }
4942 }
4943 }
4944
4945 ret.push_back(rec);
4946 }
4947 else if ((rec.d_type == QType::RRSIG || rec.d_type == QType::NSEC || rec.d_type == QType::NSEC3) && rec.d_place == DNSResourceRecord::ANSWER) {
4948 if (rec.d_type != QType::RRSIG || rec.d_name == qname) {
4949 ret.push_back(rec); // enjoy your DNSSEC
4950 }
4951 else if (rec.d_type == QType::RRSIG && qname.isPartOf(rec.d_name)) {
4952 auto rrsig = getRR<RRSIGRecordContent>(rec);
4953 if (rrsig != nullptr && rrsig->d_type == QType::DNAME) {
4954 ret.push_back(rec);
4955 }
4956 }
4957 }
4958 else if (rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::NS && qname.isPartOf(rec.d_name)) {
4959 if (moreSpecificThan(rec.d_name, auth)) {
4960 newauth = rec.d_name;
4961 LOG(prefix << qname << ": Got NS record '" << rec.d_name << "' -> '" << rec.getContent()->getZoneRepresentation() << "'" << endl);
4962
4963 /* check if we have a referral from the parent zone to a child zone for a DS query, which is not right */
4964 if (qtype == QType::DS && (newauth.isPartOf(qname) || qname == newauth)) {
4965 /* just got a referral from the parent zone when asking for a DS, looks like this server did not get the DNSSEC memo.. */
4966 referralOnDS = true;
4967 }
4968 else {
4969 realreferral = true;
4970 if (auto content = getRR<NSRecordContent>(rec)) {
4971 nsset.insert(content->getNS());
4972 }
4973 }
4974 }
4975 else {
4976 LOG(prefix << qname << ": Got upwards/level NS record '" << rec.d_name << "' -> '" << rec.getContent()->getZoneRepresentation() << "', had '" << auth << "'" << endl);
4977 if (auto content = getRR<NSRecordContent>(rec)) {
4978 nsset.insert(content->getNS());
4979 }
4980 }
4981 }
4982 else if (rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::DS && qname.isPartOf(rec.d_name)) {
4983 LOG(prefix << qname << ": Got DS record '" << rec.d_name << "' -> '" << rec.getContent()->getZoneRepresentation() << "'" << endl);
4984 }
4985 else if (realreferral && rec.d_place == DNSResourceRecord::AUTHORITY && (rec.d_type == QType::NSEC || rec.d_type == QType::NSEC3) && newauth.isPartOf(auth)) {
4986 /* we might have received a denial of the DS, let's check */
4987 NegCache::NegCacheEntry negEntry;
4988 uint32_t lowestTTL = rec.d_ttl;
4989 harvestNXRecords(lwr.d_records, negEntry, d_now.tv_sec, &lowestTTL);
4990
4991 if (!vStateIsBogus(state)) {
4992 auto recordState = getValidationStatus(newauth, !negEntry.authoritySOA.signatures.empty() || !negEntry.DNSSECRecords.signatures.empty(), true, depth, prefix);
4993
4994 if (recordState == vState::Secure) {
4995 negEntry.d_auth = auth;
4996 negEntry.d_name = newauth;
4997 negEntry.d_qtype = QType::DS;
4998 rec.d_ttl = min(s_maxnegttl, rec.d_ttl);
4999
5000 dState denialState = getDenialValidationState(negEntry, dState::NXQTYPE, true, prefix);
5001
5002 if (denialState == dState::NXQTYPE || denialState == dState::OPTOUT || denialState == dState::INSECURE) {
5003 negEntry.d_ttd = lowestTTL + d_now.tv_sec;
5004 negEntry.d_orig_ttl = lowestTTL;
5005 negEntry.d_validationState = vState::Secure;
5006 if (denialState == dState::OPTOUT) {
5007 negEntry.d_validationState = vState::Insecure;
5008 }
5009 LOG(prefix << qname << ": Got negative indication of DS record for '" << newauth << "'" << endl);
5010
5011 g_negCache->add(negEntry);
5012
5013 /* Careful! If the client is asking for a DS that does not exist, we need to provide the SOA along with the NSEC(3) proof
5014 and we might not have it if we picked up the proof from a delegation, in which case we need to keep on to do the actual DS
5015 query. */
5016 if (qtype == QType::DS && qname == newauth && (d_externalDSQuery.empty() || qname != d_externalDSQuery)) {
5017 /* we are actually done! */
5018 negindic = true;
5019 negIndicHasSignatures = !negEntry.authoritySOA.signatures.empty() || !negEntry.DNSSECRecords.signatures.empty();
5020 nsset.clear();
5021 }
5022 }
5023 }
5024 }
5025 }
5026 else if (!done && rec.d_place == DNSResourceRecord::AUTHORITY && rec.d_type == QType::SOA && lwr.d_rcode == RCode::NoError && qname.isPartOf(rec.d_name)) {
5027 LOG(prefix << qname << ": Got negative caching indication for '" << qname << "|" << qtype << "'" << endl);
5028
5029 if (!newtarget.empty()) {
5030 LOG(prefix << qname << ": Hang on! Got a redirect to '" << newtarget << "' already" << endl);
5031 }
5032 else {
5033 rec.d_ttl = min(s_maxnegttl, rec.d_ttl);
5034
5035 NegCache::NegCacheEntry negEntry;
5036 negEntry.d_auth = rec.d_name;
5037 uint32_t lowestTTL = rec.d_ttl;
5038 negEntry.d_name = qname;
5039 negEntry.d_qtype = qtype;
5040 harvestNXRecords(lwr.d_records, negEntry, d_now.tv_sec, &lowestTTL);
5041
5042 if (vStateIsBogus(state)) {
5043 negEntry.d_validationState = state;
5044 }
5045 else {
5046 auto recordState = getValidationStatus(qname, !negEntry.authoritySOA.signatures.empty() || !negEntry.DNSSECRecords.signatures.empty(), qtype == QType::DS, depth, prefix);
5047 if (recordState == vState::Secure) {
5048 dState denialState = getDenialValidationState(negEntry, dState::NXQTYPE, false, prefix);
5049 updateDenialValidationState(qname, negEntry.d_validationState, negEntry.d_name, state, denialState, dState::NXQTYPE, qtype == QType::DS, depth, prefix);
5050 }
5051 else {
5052 negEntry.d_validationState = recordState;
5053 updateValidationState(qname, state, negEntry.d_validationState, prefix);
5054 }
5055 }
5056
5057 if (vStateIsBogus(negEntry.d_validationState)) {
5058 lowestTTL = min(lowestTTL, s_maxbogusttl);
5059 rec.d_ttl = min(rec.d_ttl, s_maxbogusttl);
5060 }
5061 negEntry.d_ttd = d_now.tv_sec + lowestTTL;
5062 negEntry.d_orig_ttl = lowestTTL;
5063 if (qtype.getCode() != 0) { // prevents us from NXDOMAIN'ing a whole domain
5064 // doCNAMECacheCheck() checks record cache and does not look into negcache. That means that an old record might be found if
5065 // serve-stale is active. Avoid that by explicitly zapping that CNAME record.
5066 if (qtype == QType::CNAME && MemRecursorCache::s_maxServedStaleExtensions > 0) {
5067 g_recCache->doWipeCache(qname, false, qtype);
5068 }
5069 g_negCache->add(negEntry);
5070 }
5071
5072 ret.push_back(rec);
5073 negindic = true;
5074 negIndicHasSignatures = !negEntry.authoritySOA.signatures.empty() || !negEntry.DNSSECRecords.signatures.empty();
5075 }
5076 }
5077 }
5078
5079 if (!dnameTarget.empty()) {
5080 // Synthesize a CNAME
5081 auto cnamerec = DNSRecord();
5082 cnamerec.d_name = qname;
5083 cnamerec.d_type = QType::CNAME;
5084 cnamerec.d_ttl = dnameTTL;
5085 cnamerec.setContent(std::make_shared<CNAMERecordContent>(CNAMERecordContent(newtarget)));
5086 ret.push_back(std::move(cnamerec));
5087 }
5088
5089 /* If we have seen a proper denial, let's forget that we also had a referral for a DS query.
5090 Otherwise we need to deal with it. */
5091 if (referralOnDS && !negindic) {
5092 LOG(prefix << qname << ": Got a referral to the child zone for a DS query without a negative indication (missing SOA in authority), treating that as a NODATA" << endl);
5093 if (!vStateIsBogus(state)) {
5094 auto recordState = getValidationStatus(qname, false, true, depth, prefix);
5095 if (recordState == vState::Secure) {
5096 /* we are in a secure zone, got a referral to the child zone on a DS query, no denial, that's wrong */
5097 LOG(prefix << qname << ": NODATA without a negative indication (missing SOA in authority) in a DNSSEC secure zone, going Bogus" << endl);
5098 updateValidationState(qname, state, vState::BogusMissingNegativeIndication, prefix);
5099 }
5100 }
5101 negindic = true;
5102 negIndicHasSignatures = false;
5103 }
5104
5105 return done;
5106 }
5107
5108 static void submitTryDotTask(ComboAddress address, const DNSName& auth, const DNSName& nsname, time_t now)
5109 {
5110 if (address.getPort() == 853) {
5111 return;
5112 }
5113 address.setPort(853);
5114 auto lock = s_dotMap.lock();
5115 if (lock->d_numBusy >= SyncRes::s_max_busy_dot_probes) {
5116 return;
5117 }
5118 auto iter = lock->d_map.emplace(DoTStatus{address, auth, now + dotFailWait}).first;
5119 if (iter->d_status == DoTStatus::Busy) {
5120 return;
5121 }
5122 if (iter->d_ttd > now) {
5123 if (iter->d_status == DoTStatus::Bad) {
5124 return;
5125 }
5126 if (iter->d_status == DoTStatus::Good) {
5127 return;
5128 }
5129 // We only want to probe auths that we have seen before, auth that only come around once are not interesting
5130 if (iter->d_status == DoTStatus::Unknown && iter->d_count == 0) {
5131 return;
5132 }
5133 }
5134 lock->d_map.modify(iter, [=](DoTStatus& status) { status.d_ttd = now + dotFailWait; });
5135 bool pushed = pushTryDoTTask(auth, QType::SOA, address, std::numeric_limits<time_t>::max(), nsname);
5136 if (pushed) {
5137 iter->d_status = DoTStatus::Busy;
5138 ++lock->d_numBusy;
5139 }
5140 }
5141
5142 static bool shouldDoDoT(ComboAddress address, time_t now)
5143 {
5144 address.setPort(853);
5145 auto lock = s_dotMap.lock();
5146 auto iter = lock->d_map.find(address);
5147 if (iter == lock->d_map.end()) {
5148 return false;
5149 }
5150 iter->d_count++;
5151 return iter->d_status == DoTStatus::Good && iter->d_ttd > now;
5152 }
5153
5154 static void updateDoTStatus(ComboAddress address, DoTStatus::Status status, time_t time, bool updateBusy = false)
5155 {
5156 address.setPort(853);
5157 auto lock = s_dotMap.lock();
5158 auto iter = lock->d_map.find(address);
5159 if (iter != lock->d_map.end()) {
5160 iter->d_status = status;
5161 lock->d_map.modify(iter, [=](DoTStatus& statusToModify) { statusToModify.d_ttd = time; });
5162 if (updateBusy) {
5163 --lock->d_numBusy;
5164 }
5165 }
5166 }
5167
5168 bool SyncRes::tryDoT(const DNSName& qname, const QType qtype, const DNSName& nsName, ComboAddress address, time_t now)
5169 {
5170 auto log = g_slog->withName("taskq")->withValues("method", Logging::Loggable("tryDoT"), "name", Logging::Loggable(qname), "qtype", Logging::Loggable(QType(qtype).toString()), "ip", Logging::Loggable(address));
5171
5172 auto logHelper1 = [&log](const string& ename) {
5173 log->info(Logr::Debug, "Failed to probe DoT records, got an exception", "exception", Logging::Loggable(ename));
5174 };
5175 auto logHelper2 = [&log](const string& msg, const string& ename) {
5176 log->error(Logr::Debug, msg, "Failed to probe DoT records, got an exception", "exception", Logging::Loggable(ename));
5177 };
5178 LWResult lwr;
5179 bool truncated{};
5180 bool spoofed{};
5181 boost::optional<Netmask> netmask;
5182 address.setPort(853);
5183 // We use the fact that qname equals auth
5184 bool isOK = false;
5185 try {
5186 boost::optional<EDNSExtendedError> extendedError;
5187 isOK = doResolveAtThisIP("", qname, qtype, lwr, netmask, qname, false, false, nsName, address, true, true, truncated, spoofed, extendedError, true);
5188 isOK = isOK && lwr.d_rcode == RCode::NoError && !lwr.d_records.empty();
5189 }
5190 catch (const PDNSException& e) {
5191 logHelper2(e.reason, "PDNSException");
5192 }
5193 catch (const ImmediateServFailException& e) {
5194 logHelper2(e.reason, "ImmediateServFailException");
5195 }
5196 catch (const PolicyHitException& e) {
5197 logHelper1("PolicyHitException");
5198 }
5199 catch (const std::exception& e) {
5200 logHelper2(e.what(), "std::exception");
5201 }
5202 catch (...) {
5203 logHelper1("other");
5204 }
5205 updateDoTStatus(address, isOK ? DoTStatus::Good : DoTStatus::Bad, now + (isOK ? dotSuccessWait : dotFailWait), true);
5206 return isOK;
5207 }
5208
5209 void SyncRes::ednsStats(boost::optional<Netmask>& ednsmask, const DNSName& qname, const string& prefix)
5210 {
5211 if (!ednsmask) {
5212 return;
5213 }
5214 s_ecsresponses++;
5215 LOG(prefix << qname << ": Received EDNS Client Subnet Mask " << ednsmask->toString() << " on response" << endl);
5216
5217 if (ednsmask->getBits() > 0) {
5218 if (ednsmask->isIPv4()) {
5219 ++SyncRes::s_ecsResponsesBySubnetSize4.at(ednsmask->getBits() - 1);
5220 }
5221 else {
5222 ++SyncRes::s_ecsResponsesBySubnetSize6.at(ednsmask->getBits() - 1);
5223 }
5224 }
5225 }
5226
5227 void SyncRes::updateQueryCounts(const string& prefix, const DNSName& qname, const ComboAddress& address, bool doTCP, bool doDoT)
5228 {
5229 t_Counters.at(rec::Counter::outqueries)++;
5230 d_outqueries++;
5231 checkMaxQperQ(qname);
5232 if (address.sin4.sin_family == AF_INET6) {
5233 t_Counters.at(rec::Counter::ipv6queries)++;
5234 }
5235 if (doTCP) {
5236 if (doDoT) {
5237 LOG(prefix << qname << ": Using DoT with " << address.toStringWithPort() << endl);
5238 t_Counters.at(rec::Counter::dotoutqueries)++;
5239 d_dotoutqueries++;
5240 }
5241 else {
5242 LOG(prefix << qname << ": Using TCP with " << address.toStringWithPort() << endl);
5243 t_Counters.at(rec::Counter::tcpoutqueries)++;
5244 d_tcpoutqueries++;
5245 }
5246 }
5247 }
5248
5249 bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname, const QType qtype, LWResult& lwr, boost::optional<Netmask>& ednsmask, const DNSName& auth, bool const sendRDQuery, const bool wasForwarded, const DNSName& nsName, const ComboAddress& remoteIP, bool doTCP, bool doDoT, bool& truncated, bool& spoofed, boost::optional<EDNSExtendedError>& extendedError, bool dontThrottle) // NOLINT(readability-function-cognitive-complexity)
5250 {
5251 bool chained = false;
5252 LWResult::Result resolveret = LWResult::Result::Success;
5253
5254 if (s_maxtotusec != 0 && d_totUsec > s_maxtotusec) {
5255 if (s_addExtendedResolutionDNSErrors) {
5256 extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::NoReachableAuthority), "Timeout waiting for answer(s)"};
5257 }
5258 throw ImmediateServFailException("Too much time waiting for " + qname.toLogString() + "|" + qtype.toString() + ", timeouts: " + std::to_string(d_timeouts) + ", throttles: " + std::to_string(d_throttledqueries) + ", queries: " + std::to_string(d_outqueries) + ", " + std::to_string(d_totUsec / 1000) + " ms");
5259 }
5260
5261 int preOutQueryRet = RCode::NoError;
5262 if (d_pdl && d_pdl->preoutquery(remoteIP, d_requestor, qname, qtype, doTCP, lwr.d_records, preOutQueryRet, d_eventTrace, timeval{0, 0})) {
5263 LOG(prefix << qname << ": Query handled by Lua" << endl);
5264 }
5265 else {
5266 ednsmask = getEDNSSubnetMask(qname, remoteIP);
5267 if (ednsmask) {
5268 LOG(prefix << qname << ": Adding EDNS Client Subnet Mask " << ednsmask->toString() << " to query" << endl);
5269 s_ecsqueries++;
5270 }
5271 updateQueryCounts(prefix, qname, remoteIP, doTCP, doDoT);
5272 resolveret = asyncresolveWrapper(remoteIP, d_doDNSSEC, qname, auth, qtype.getCode(),
5273 doTCP, sendRDQuery, &d_now, ednsmask, &lwr, &chained, nsName); // <- we go out on the wire!
5274 ednsStats(ednsmask, qname, prefix);
5275 }
5276
5277 /* preoutquery killed the query by setting dq.rcode to -3 */
5278 if (preOutQueryRet == -3) {
5279 throw ImmediateServFailException("Query killed by policy");
5280 }
5281
5282 d_totUsec += lwr.d_usec;
5283
5284 if (resolveret == LWResult::Result::Spoofed) {
5285 spoofed = true;
5286 return false;
5287 }
5288
5289 accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
5290 ++t_Counters.at(rec::RCode::auth).rcodeCounters.at(static_cast<uint8_t>(lwr.d_rcode));
5291
5292 if (!dontThrottle) {
5293 auto dontThrottleNames = g_dontThrottleNames.getLocal();
5294 auto dontThrottleNetmasks = g_dontThrottleNetmasks.getLocal();
5295 dontThrottle = dontThrottleNames->check(nsName) || dontThrottleNetmasks->match(remoteIP);
5296 }
5297
5298 if (resolveret != LWResult::Result::Success) {
5299 /* Error while resolving */
5300 if (resolveret == LWResult::Result::Timeout) {
5301 /* Time out */
5302
5303 LOG(prefix << qname << ": Timeout resolving after " << lwr.d_usec / 1000.0 << " ms " << (doTCP ? "over TCP" : "") << endl);
5304 d_timeouts++;
5305 t_Counters.at(rec::Counter::outgoingtimeouts)++;
5306
5307 if (remoteIP.sin4.sin_family == AF_INET) {
5308 t_Counters.at(rec::Counter::outgoing4timeouts)++;
5309 }
5310 else {
5311 t_Counters.at(rec::Counter::outgoing6timeouts)++;
5312 }
5313
5314 if (t_timeouts) {
5315 t_timeouts->push_back(remoteIP);
5316 }
5317 }
5318 else if (resolveret == LWResult::Result::OSLimitError) {
5319 /* OS resource limit reached */
5320 LOG(prefix << qname << ": Hit a local resource limit resolving" << (doTCP ? " over TCP" : "") << ", probable error: " << stringerror() << endl);
5321 t_Counters.at(rec::Counter::resourceLimits)++;
5322 }
5323 else {
5324 /* LWResult::Result::PermanentError */
5325 t_Counters.at(rec::Counter::unreachables)++;
5326 d_unreachables++;
5327 // XXX questionable use of errno
5328 LOG(prefix << qname << ": Error resolving from " << remoteIP.toString() << (doTCP ? " over TCP" : "") << ", possible error: " << stringerror() << endl);
5329 }
5330
5331 // don't account for resource limits, they are our own fault
5332 // And don't throttle when the IP address is on the dontThrottleNetmasks list or the name is part of dontThrottleNames
5333 if (resolveret != LWResult::Result::OSLimitError && !chained && !dontThrottle) {
5334 s_nsSpeeds.lock()->find_or_enter(nsName.empty() ? DNSName(remoteIP.toStringWithPort()) : nsName, d_now).submit(remoteIP, 1000000, d_now); // 1 sec
5335
5336 // make sure we don't throttle the root
5337 if (s_serverdownmaxfails > 0 && auth != g_rootdnsname && s_fails.lock()->incr(remoteIP, d_now) >= s_serverdownmaxfails) {
5338 LOG(prefix << qname << ": Max fails reached resolving on " << remoteIP.toString() << ". Going full throttle for " << s_serverdownthrottletime << " seconds" << endl);
5339 // mark server as down
5340 doThrottle(d_now.tv_sec, remoteIP, s_serverdownthrottletime, 10000);
5341 }
5342 else if (resolveret == LWResult::Result::PermanentError) {
5343 // unreachable, 1 minute or 100 queries
5344 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 100);
5345 }
5346 else {
5347 // timeout, 10 seconds or 5 queries
5348 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 10, 5);
5349 }
5350 }
5351
5352 return false;
5353 }
5354
5355 if (!lwr.d_validpacket) {
5356 LOG(prefix << qname << ": " << nsName << " (" << remoteIP.toString() << ") returned a packet we could not parse over " << (doTCP ? "TCP" : "UDP") << ", trying sibling IP or NS" << endl);
5357 if (!chained && !dontThrottle) {
5358
5359 // let's make sure we prefer a different server for some time, if there is one available
5360 s_nsSpeeds.lock()->find_or_enter(nsName.empty() ? DNSName(remoteIP.toStringWithPort()) : nsName, d_now).submit(remoteIP, 1000000, d_now); // 1 sec
5361
5362 if (doTCP) {
5363 // we can be more heavy-handed over TCP
5364 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 10);
5365 }
5366 else {
5367 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 10, 2);
5368 }
5369 }
5370 return false;
5371 }
5372 /* we got an answer */
5373 if (lwr.d_rcode != RCode::NoError && lwr.d_rcode != RCode::NXDomain) {
5374 LOG(prefix << qname << ": " << nsName << " (" << remoteIP.toString() << ") returned a " << RCode::to_s(lwr.d_rcode) << ", trying sibling IP or NS" << endl);
5375 if (!chained && !dontThrottle) {
5376 if (wasForwarded && lwr.d_rcode == RCode::ServFail) {
5377 // rather than throttling what could be the only server we have for this destination, let's make sure we try a different one if there is one available
5378 // on the other hand, we might keep hammering a server under attack if there is no other alternative, or the alternative is overwhelmed as well, but
5379 // at the very least we will detect that if our packets stop being answered
5380 s_nsSpeeds.lock()->find_or_enter(nsName.empty() ? DNSName(remoteIP.toStringWithPort()) : nsName, d_now).submit(remoteIP, 1000000, d_now); // 1 sec
5381 }
5382 else {
5383 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 3);
5384 }
5385 }
5386 return false;
5387 }
5388
5389 /* this server sent a valid answer, mark it backup up if it was down */
5390 if (s_serverdownmaxfails > 0) {
5391 s_fails.lock()->clear(remoteIP);
5392 }
5393 // Clear all throttles for this IP, both general and specific throttles for qname-qtype
5394 unThrottle(remoteIP, qname, qtype);
5395
5396 if (lwr.d_tcbit) {
5397 truncated = true;
5398
5399 if (doTCP) {
5400 LOG(prefix << qname << ": Truncated bit set, over TCP?" << endl);
5401 if (!dontThrottle) {
5402 /* let's treat that as a ServFail answer from this server */
5403 doThrottle(d_now.tv_sec, remoteIP, qname, qtype, 60, 3);
5404 }
5405 return false;
5406 }
5407 LOG(prefix << qname << ": Truncated bit set, over UDP" << endl);
5408
5409 return true;
5410 }
5411
5412 return true;
5413 }
5414
5415 void SyncRes::handleNewTarget(const std::string& prefix, const DNSName& qname, const DNSName& newtarget, const QType qtype, std::vector<DNSRecord>& ret, int& rcode, unsigned int depth, const std::vector<DNSRecord>& recordsFromAnswer, vState& state)
5416 {
5417 if (newtarget == qname) {
5418 LOG(prefix << qname << ": Status=got a CNAME referral to self, returning SERVFAIL" << endl);
5419 ret.clear();
5420 rcode = RCode::ServFail;
5421 return;
5422 }
5423 if (newtarget.isPartOf(qname)) {
5424 // a.b.c. CNAME x.a.b.c will go to great depths with QM on
5425 LOG(prefix << qname << ": Status=got a CNAME referral to child, disabling QM" << endl);
5426 setQNameMinimization(false);
5427 }
5428
5429 if (!d_followCNAME) {
5430 rcode = RCode::NoError;
5431 return;
5432 }
5433
5434 // Check to see if we already have seen the new target as a previous target or that the chain is too long
5435 const auto [CNAMELoop, numCNAMEs] = scanForCNAMELoop(newtarget, ret);
5436 if (CNAMELoop) {
5437 LOG(prefix << qname << ": Status=got a CNAME referral that causes a loop, returning SERVFAIL" << endl);
5438 ret.clear();
5439 rcode = RCode::ServFail;
5440 return;
5441 }
5442 if (numCNAMEs > s_max_CNAMES_followed) {
5443 LOG(prefix << qname << ": Status=got a CNAME referral, but chain too long, returning SERVFAIL" << endl);
5444 rcode = RCode::ServFail;
5445 return;
5446 }
5447
5448 if (qtype == QType::DS || qtype == QType::DNSKEY) {
5449 LOG(prefix << qname << ": Status=got a CNAME referral, but we are looking for a DS or DNSKEY" << endl);
5450
5451 if (d_doDNSSEC) {
5452 addNXNSECS(ret, recordsFromAnswer);
5453 }
5454
5455 rcode = RCode::NoError;
5456 return;
5457 }
5458
5459 LOG(prefix << qname << ": Status=got a CNAME referral, starting over with " << newtarget << endl);
5460
5461 set<GetBestNSAnswer> beenthere;
5462 Context cnameContext;
5463 rcode = doResolve(newtarget, qtype, ret, depth + 1, beenthere, cnameContext);
5464 LOG(prefix << qname << ": Updating validation state for response to " << qname << " from " << state << " with the state from the CNAME quest: " << cnameContext.state << endl);
5465 updateValidationState(qname, state, cnameContext.state, prefix);
5466 }
5467
5468 bool SyncRes::processAnswer(unsigned int depth, const string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, DNSName& auth, bool wasForwarded, const boost::optional<Netmask>& ednsmask, bool sendRDQuery, NsSet& nameservers, std::vector<DNSRecord>& ret, const DNSFilterEngine& dfe, bool* gotNewServers, int* rcode, vState& state, const ComboAddress& remoteIP)
5469 {
5470 if (s_minimumTTL != 0) {
5471 for (auto& rec : lwr.d_records) {
5472 rec.d_ttl = max(rec.d_ttl, s_minimumTTL);
5473 }
5474 }
5475
5476 /* if the answer is ECS-specific, a minimum TTL is set for this kind of answers
5477 and it's higher than the global minimum TTL */
5478 if (ednsmask && s_minimumECSTTL > 0 && (s_minimumTTL == 0 || s_minimumECSTTL > s_minimumTTL)) {
5479 for (auto& rec : lwr.d_records) {
5480 if (rec.d_place == DNSResourceRecord::ANSWER) {
5481 rec.d_ttl = max(rec.d_ttl, s_minimumECSTTL);
5482 }
5483 }
5484 }
5485
5486 bool needWildcardProof = false;
5487 bool gatherWildcardProof = false;
5488 unsigned int wildcardLabelsCount = 0;
5489 *rcode = updateCacheFromRecords(depth, prefix, lwr, qname, qtype, auth, wasForwarded, ednsmask, state, needWildcardProof, gatherWildcardProof, wildcardLabelsCount, sendRDQuery, remoteIP);
5490 if (*rcode != RCode::NoError) {
5491 return true;
5492 }
5493
5494 LOG(prefix << qname << ": Determining status after receiving this packet" << endl);
5495
5496 set<DNSName> nsset;
5497 bool realreferral = false;
5498 bool negindic = false;
5499 bool negIndicHasSignatures = false;
5500 DNSName newauth;
5501 DNSName newtarget;
5502
5503 bool done = processRecords(prefix, qname, qtype, auth, lwr, sendRDQuery, ret, nsset, newtarget, newauth, realreferral, negindic, state, needWildcardProof, gatherWildcardProof, wildcardLabelsCount, *rcode, negIndicHasSignatures, depth);
5504
5505 if (done) {
5506 LOG(prefix << qname << ": Status=got results, this level of recursion done" << endl);
5507 LOG(prefix << qname << ": Validation status is " << state << endl);
5508 return true;
5509 }
5510
5511 if (!newtarget.empty()) {
5512 handleNewTarget(prefix, qname, newtarget, qtype.getCode(), ret, *rcode, depth, lwr.d_records, state);
5513 return true;
5514 }
5515
5516 if (lwr.d_rcode == RCode::NXDomain) {
5517 LOG(prefix << qname << ": Status=NXDOMAIN, we are done " << (negindic ? "(have negative SOA)" : "") << endl);
5518
5519 auto tempState = getValidationStatus(qname, negIndicHasSignatures, qtype == QType::DS, depth, prefix);
5520 if (tempState == vState::Secure && (lwr.d_aabit || sendRDQuery) && !negindic) {
5521 LOG(prefix << qname << ": NXDOMAIN without a negative indication (missing SOA in authority) in a DNSSEC secure zone, going Bogus" << endl);
5522 updateValidationState(qname, state, vState::BogusMissingNegativeIndication, prefix);
5523 }
5524 else {
5525 /* we might not have validated any record, because we did get a NXDOMAIN without any SOA
5526 from an insecure zone, for example */
5527 updateValidationState(qname, state, tempState, prefix);
5528 }
5529
5530 if (d_doDNSSEC) {
5531 addNXNSECS(ret, lwr.d_records);
5532 }
5533
5534 *rcode = RCode::NXDomain;
5535 return true;
5536 }
5537
5538 if (nsset.empty() && lwr.d_rcode == 0 && (negindic || lwr.d_aabit || sendRDQuery)) {
5539 LOG(prefix << qname << ": Status=noerror, other types may exist, but we are done " << (negindic ? "(have negative SOA) " : "") << (lwr.d_aabit ? "(have aa bit) " : "") << endl);
5540
5541 auto tempState = getValidationStatus(qname, negIndicHasSignatures, qtype == QType::DS, depth, prefix);
5542 if (tempState == vState::Secure && (lwr.d_aabit || sendRDQuery) && !negindic) {
5543 LOG(prefix << qname << ": NODATA without a negative indication (missing SOA in authority) in a DNSSEC secure zone, going Bogus" << endl);
5544 updateValidationState(qname, state, vState::BogusMissingNegativeIndication, prefix);
5545 }
5546 else {
5547 /* we might not have validated any record, because we did get a NODATA without any SOA
5548 from an insecure zone, for example */
5549 updateValidationState(qname, state, tempState, prefix);
5550 }
5551
5552 if (d_doDNSSEC) {
5553 addNXNSECS(ret, lwr.d_records);
5554 }
5555
5556 *rcode = RCode::NoError;
5557 return true;
5558 }
5559
5560 if (realreferral) {
5561 LOG(prefix << qname << ": Status=did not resolve, got " << (unsigned int)nsset.size() << " NS, ");
5562
5563 nameservers.clear();
5564 for (auto const& nameserver : nsset) {
5565 if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
5566 bool match = dfe.getProcessingPolicy(nameserver, d_discardedPolicies, d_appliedPolicy);
5567 if (match) {
5568 mergePolicyTags(d_policyTags, d_appliedPolicy.getTags());
5569 if (d_appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) { // client query needs an RPZ response
5570 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
5571 /* reset to no match */
5572 d_appliedPolicy = DNSFilterEngine::Policy();
5573 }
5574 else {
5575 LOG("however " << nameserver << " was blocked by RPZ policy '" << d_appliedPolicy.getName() << "'" << endl);
5576 throw PolicyHitException();
5577 }
5578 }
5579 }
5580 }
5581 nameservers.insert({nameserver, {{}, false}});
5582 }
5583 LOG("looping to them" << endl);
5584 *gotNewServers = true;
5585 auth = std::move(newauth);
5586
5587 return false;
5588 }
5589
5590 return false;
5591 }
5592
5593 bool SyncRes::doDoTtoAuth(const DNSName& nameServer)
5594 {
5595 return g_DoTToAuthNames.getLocal()->check(nameServer);
5596 }
5597
5598 /** returns:
5599 * -1 in case of no results
5600 * rcode otherwise
5601 */
5602 // NOLINTNEXTLINE(readability-function-cognitive-complexity)
5603 int SyncRes::doResolveAt(NsSet& nameservers, DNSName auth, bool flawedNSSet, const DNSName& qname, const QType qtype,
5604 vector<DNSRecord>& ret,
5605 unsigned int depth, const string& prefix, set<GetBestNSAnswer>& beenthere, Context& context, StopAtDelegation* stopAtDelegation,
5606 map<DNSName, vector<ComboAddress>>* fallBack)
5607 {
5608 auto luaconfsLocal = g_luaconfs.getLocal();
5609
5610 LOG(prefix << qname << ": Cache consultations done, have " << (unsigned int)nameservers.size() << " NS to contact");
5611
5612 if (nameserversBlockedByRPZ(luaconfsLocal->dfe, nameservers)) {
5613 /* RPZ hit */
5614 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
5615 /* reset to no match */
5616 d_appliedPolicy = DNSFilterEngine::Policy();
5617 }
5618 else {
5619 throw PolicyHitException();
5620 }
5621 }
5622
5623 LOG(endl);
5624
5625 unsigned int addressQueriesForNS = 0;
5626 for (;;) { // we may get more specific nameservers
5627 auto rnameservers = shuffleInSpeedOrder(qname, nameservers, prefix);
5628
5629 // We allow s_maxnsaddressqperq (default 10) queries with empty responses when resolving NS names.
5630 // If a zone publishes many (more than s_maxnsaddressqperq) NS records, we allow less.
5631 // This is to "punish" zones that publish many non-resolving NS names.
5632 // We always allow 5 NS name resolving attempts with empty results.
5633 unsigned int nsLimit = s_maxnsaddressqperq;
5634 if (rnameservers.size() > nsLimit) {
5635 int newLimit = static_cast<int>(nsLimit - (rnameservers.size() - nsLimit));
5636 nsLimit = std::max(5, newLimit);
5637 }
5638
5639 for (auto tns = rnameservers.cbegin();; ++tns) {
5640 if (addressQueriesForNS >= nsLimit) {
5641 throw ImmediateServFailException(std::to_string(nsLimit) + " (adjusted max-ns-address-qperq) or more queries with empty results for NS addresses sent resolving " + qname.toLogString());
5642 }
5643 if (tns == rnameservers.cend()) {
5644 LOG(prefix << qname << ": Failed to resolve via any of the " << (unsigned int)rnameservers.size() << " offered NS at level '" << auth << "'" << endl);
5645 if (s_addExtendedResolutionDNSErrors) {
5646 context.extendedError = EDNSExtendedError{static_cast<uint16_t>(EDNSExtendedError::code::NoReachableAuthority), "delegation " + auth.toLogString()};
5647 }
5648 if (!auth.isRoot() && flawedNSSet) {
5649 LOG(prefix << qname << ": Ageing nameservers for level '" << auth << "', next query might succeed" << endl);
5650 if (g_recCache->doAgeCache(d_now.tv_sec, auth, QType::NS, 10)) {
5651 t_Counters.at(rec::Counter::nsSetInvalidations)++;
5652 }
5653 }
5654 return -1;
5655 }
5656
5657 bool cacheOnly = false;
5658 // this line needs to identify the 'self-resolving' behaviour
5659 if (qname == tns->first && (qtype.getCode() == QType::A || qtype.getCode() == QType::AAAA)) {
5660 /* we might have a glue entry in cache so let's try this NS
5661 but only if we have enough in the cache to know how to reach it */
5662 LOG(prefix << qname << ": Using NS to resolve itself, but only using what we have in cache (" << (1 + tns - rnameservers.cbegin()) << "/" << rnameservers.size() << ")" << endl);
5663 cacheOnly = true;
5664 }
5665
5666 typedef vector<ComboAddress> remoteIPs_t;
5667 remoteIPs_t remoteIPs;
5668 remoteIPs_t::iterator remoteIP;
5669 bool pierceDontQuery = false;
5670 bool sendRDQuery = false;
5671 boost::optional<Netmask> ednsmask;
5672 LWResult lwr;
5673 const bool wasForwarded = tns->first.empty() && (!nameservers[tns->first].first.empty());
5674 int rcode = RCode::NoError;
5675 bool gotNewServers = false;
5676
5677 if (tns->first.empty() && !wasForwarded) {
5678 static ComboAddress const s_oobRemote("255.255.255.255");
5679 LOG(prefix << qname << ": Domain is out-of-band" << endl);
5680 /* setting state to indeterminate since validation is disabled for local auth zone,
5681 and Insecure would be misleading. */
5682 context.state = vState::Indeterminate;
5683 d_wasOutOfBand = doOOBResolve(qname, qtype, lwr.d_records, depth, prefix, lwr.d_rcode);
5684 lwr.d_tcbit = false;
5685 lwr.d_aabit = true;
5686
5687 /* we have received an answer, are we done ? */
5688 bool done = processAnswer(depth, prefix, lwr, qname, qtype, auth, false, ednsmask, sendRDQuery, nameservers, ret, luaconfsLocal->dfe, &gotNewServers, &rcode, context.state, s_oobRemote);
5689 if (done) {
5690 return rcode;
5691 }
5692 if (gotNewServers) {
5693 if (stopAtDelegation != nullptr && *stopAtDelegation == Stop) {
5694 *stopAtDelegation = Stopped;
5695 return rcode;
5696 }
5697 break;
5698 }
5699 }
5700 else {
5701 if (fallBack != nullptr) {
5702 if (auto iter = fallBack->find(tns->first); iter != fallBack->end()) {
5703 remoteIPs = iter->second;
5704 }
5705 }
5706 if (remoteIPs.empty()) {
5707 remoteIPs = retrieveAddressesForNS(prefix, qname, tns, depth, beenthere, rnameservers, nameservers, sendRDQuery, pierceDontQuery, flawedNSSet, cacheOnly, addressQueriesForNS);
5708 }
5709
5710 if (remoteIPs.empty()) {
5711 LOG(prefix << qname << ": Failed to get IP for NS " << tns->first << ", trying next if available" << endl);
5712 flawedNSSet = true;
5713 continue;
5714 }
5715 bool hitPolicy{false};
5716 LOG(prefix << qname << ": Resolved '" << auth << "' NS " << tns->first << " to: ");
5717 for (remoteIP = remoteIPs.begin(); remoteIP != remoteIPs.end(); ++remoteIP) {
5718 if (remoteIP != remoteIPs.begin()) {
5719 LOG(", ");
5720 }
5721 LOG(remoteIP->toString());
5722 if (nameserverIPBlockedByRPZ(luaconfsLocal->dfe, *remoteIP)) {
5723 hitPolicy = true;
5724 }
5725 }
5726 LOG(endl);
5727 if (hitPolicy) { // implies d_wantsRPZ
5728 /* RPZ hit */
5729 if (d_pdl && d_pdl->policyHitEventFilter(d_requestor, qname, qtype, d_queryReceivedOverTCP, d_appliedPolicy, d_policyTags, d_discardedPolicies)) {
5730 /* reset to no match */
5731 d_appliedPolicy = DNSFilterEngine::Policy();
5732 }
5733 else {
5734 throw PolicyHitException();
5735 }
5736 }
5737
5738 for (remoteIP = remoteIPs.begin(); remoteIP != remoteIPs.end(); ++remoteIP) {
5739 LOG(prefix << qname << ": Trying IP " << remoteIP->toStringWithPort() << ", asking '" << qname << "|" << qtype << "'" << endl);
5740
5741 if (throttledOrBlocked(prefix, *remoteIP, qname, qtype, pierceDontQuery)) {
5742 // As d_throttledqueries might be increased, check the max-qperq condition
5743 checkMaxQperQ(qname);
5744 continue;
5745 }
5746
5747 bool truncated = false;
5748 bool spoofed = false;
5749 bool gotAnswer = false;
5750 bool doDoT = false;
5751
5752 if (doDoTtoAuth(tns->first)) {
5753 remoteIP->setPort(853);
5754 doDoT = true;
5755 }
5756 if (SyncRes::s_dot_to_port_853 && remoteIP->getPort() == 853) {
5757 doDoT = true;
5758 }
5759 bool forceTCP = doDoT;
5760
5761 if (!doDoT && s_max_busy_dot_probes > 0) {
5762 submitTryDotTask(*remoteIP, auth, tns->first, d_now.tv_sec);
5763 }
5764 if (!forceTCP) {
5765 gotAnswer = doResolveAtThisIP(prefix, qname, qtype, lwr, ednsmask, auth, sendRDQuery, wasForwarded,
5766 tns->first, *remoteIP, false, false, truncated, spoofed, context.extendedError);
5767 }
5768 if (forceTCP || (spoofed || (gotAnswer && truncated))) {
5769 /* retry, over TCP this time */
5770 gotAnswer = doResolveAtThisIP(prefix, qname, qtype, lwr, ednsmask, auth, sendRDQuery, wasForwarded,
5771 tns->first, *remoteIP, true, doDoT, truncated, spoofed, context.extendedError);
5772 }
5773
5774 if (!gotAnswer) {
5775 if (doDoT && s_max_busy_dot_probes > 0) {
5776 // This is quite pessimistic...
5777 updateDoTStatus(*remoteIP, DoTStatus::Bad, d_now.tv_sec + dotFailWait);
5778 }
5779 continue;
5780 }
5781
5782 LOG(prefix << qname << ": Got " << (unsigned int)lwr.d_records.size() << " answers from " << tns->first << " (" << remoteIP->toString() << "), rcode=" << lwr.d_rcode << " (" << RCode::to_s(lwr.d_rcode) << "), aa=" << lwr.d_aabit << ", in " << lwr.d_usec / 1000 << "ms" << endl);
5783
5784 if (doDoT && s_max_busy_dot_probes > 0) {
5785 updateDoTStatus(*remoteIP, DoTStatus::Good, d_now.tv_sec + dotSuccessWait);
5786 }
5787 /* // for you IPv6 fanatics :-)
5788 if(remoteIP->sin4.sin_family==AF_INET6)
5789 lwr.d_usec/=3;
5790 */
5791 // cout<<"ms: "<<lwr.d_usec/1000.0<<", "<<g_avgLatency/1000.0<<'\n';
5792
5793 s_nsSpeeds.lock()->find_or_enter(tns->first.empty() ? DNSName(remoteIP->toStringWithPort()) : tns->first, d_now).submit(*remoteIP, static_cast<int>(lwr.d_usec), d_now);
5794
5795 /* we have received an answer, are we done ? */
5796 bool done = processAnswer(depth, prefix, lwr, qname, qtype, auth, wasForwarded, ednsmask, sendRDQuery, nameservers, ret, luaconfsLocal->dfe, &gotNewServers, &rcode, context.state, *remoteIP);
5797 if (done) {
5798 return rcode;
5799 }
5800 if (gotNewServers) {
5801 if (stopAtDelegation != nullptr && *stopAtDelegation == Stop) {
5802 *stopAtDelegation = Stopped;
5803 return rcode;
5804 }
5805 break;
5806 }
5807 /* was lame */
5808 doThrottle(d_now.tv_sec, *remoteIP, qname, qtype, 60, 100);
5809 }
5810
5811 if (gotNewServers) {
5812 break;
5813 }
5814
5815 if (remoteIP == remoteIPs.cend()) { // we tried all IP addresses, none worked
5816 continue;
5817 }
5818 }
5819 }
5820 }
5821 return -1;
5822 }
5823
5824 void SyncRes::setQuerySource(const Netmask& netmask)
5825 {
5826 if (!netmask.empty()) {
5827 d_outgoingECSNetwork = netmask;
5828 }
5829 else {
5830 d_outgoingECSNetwork = boost::none;
5831 }
5832 }
5833
5834 void SyncRes::setQuerySource(const ComboAddress& requestor, const boost::optional<const EDNSSubnetOpts&>& incomingECS)
5835 {
5836 d_requestor = requestor;
5837
5838 if (incomingECS && incomingECS->source.getBits() > 0) {
5839 d_cacheRemote = incomingECS->source.getMaskedNetwork();
5840 uint8_t bits = std::min(incomingECS->source.getBits(), (incomingECS->source.isIPv4() ? s_ecsipv4limit : s_ecsipv6limit));
5841 ComboAddress trunc = incomingECS->source.getNetwork();
5842 trunc.truncate(bits);
5843 d_outgoingECSNetwork = boost::optional<Netmask>(Netmask(trunc, bits));
5844 }
5845 else {
5846 d_cacheRemote = d_requestor;
5847 if (!incomingECS && s_ednslocalsubnets.match(d_requestor)) {
5848 ComboAddress trunc = d_requestor;
5849 uint8_t bits = d_requestor.isIPv4() ? 32 : 128;
5850 bits = std::min(bits, (trunc.isIPv4() ? s_ecsipv4limit : s_ecsipv6limit));
5851 trunc.truncate(bits);
5852 d_outgoingECSNetwork = boost::optional<Netmask>(Netmask(trunc, bits));
5853 }
5854 else if (s_ecsScopeZero.source.getBits() > 0) {
5855 /* RFC7871 says we MUST NOT send any ECS if the source scope is 0.
5856 But using an empty ECS in that case would mean inserting
5857 a non ECS-specific entry into the cache, preventing any further
5858 ECS-specific query to be sent.
5859 So instead we use the trick described in section 7.1.2:
5860 "The subsequent Recursive Resolver query to the Authoritative Nameserver
5861 will then either not include an ECS option or MAY optionally include
5862 its own address information, which is what the Authoritative
5863 Nameserver will almost certainly use to generate any Tailored
5864 Response in lieu of an option. This allows the answer to be handled
5865 by the same caching mechanism as other queries, with an explicit
5866 indicator of the applicable scope. Subsequent Stub Resolver queries
5867 for /0 can then be answered from this cached response.
5868 */
5869 d_outgoingECSNetwork = boost::optional<Netmask>(s_ecsScopeZero.source.getMaskedNetwork());
5870 d_cacheRemote = s_ecsScopeZero.source.getNetwork();
5871 }
5872 else {
5873 // ECS disabled because no scope-zero address could be derived.
5874 d_outgoingECSNetwork = boost::none;
5875 }
5876 }
5877 }
5878
5879 boost::optional<Netmask> SyncRes::getEDNSSubnetMask(const DNSName& name, const ComboAddress& rem)
5880 {
5881 if (d_outgoingECSNetwork && (s_ednsdomains.check(name) || s_ednsremotesubnets.match(rem))) {
5882 return d_outgoingECSNetwork;
5883 }
5884 return boost::none;
5885 }
5886
5887 void SyncRes::parseEDNSSubnetAllowlist(const std::string& alist)
5888 {
5889 vector<string> parts;
5890 stringtok(parts, alist, ",; ");
5891 for (const auto& allow : parts) {
5892 try {
5893 s_ednsremotesubnets.addMask(Netmask(allow));
5894 }
5895 catch (...) {
5896 s_ednsdomains.add(DNSName(allow));
5897 }
5898 }
5899 }
5900
5901 void SyncRes::parseEDNSSubnetAddFor(const std::string& subnetlist)
5902 {
5903 vector<string> parts;
5904 stringtok(parts, subnetlist, ",; ");
5905 for (const auto& allow : parts) {
5906 s_ednslocalsubnets.addMask(allow);
5907 }
5908 }
5909
5910 // used by PowerDNSLua - note that this neglects to add the packet count & statistics back to pdns_recursor.cc
5911 int directResolve(const DNSName& qname, const QType qtype, const QClass qclass, vector<DNSRecord>& ret, const shared_ptr<RecursorLua4>& pdl, Logr::log_t log)
5912 {
5913 return directResolve(qname, qtype, qclass, ret, pdl, SyncRes::s_qnameminimization, log);
5914 }
5915
5916 int directResolve(const DNSName& qname, const QType qtype, const QClass qclass, vector<DNSRecord>& ret, const shared_ptr<RecursorLua4>& pdl, bool qnamemin, Logr::log_t slog)
5917 {
5918 auto log = slog->withValues("qname", Logging::Loggable(qname), "qtype", Logging::Loggable(qtype));
5919
5920 struct timeval now
5921 {
5922 };
5923 gettimeofday(&now, nullptr);
5924
5925 SyncRes resolver(now);
5926 resolver.setQNameMinimization(qnamemin);
5927 if (pdl) {
5928 resolver.setLuaEngine(pdl);
5929 }
5930
5931 int res = -1;
5932 const std::string msg = "Exception while resolving";
5933 try {
5934 res = resolver.beginResolve(qname, qtype, qclass, ret, 0);
5935 }
5936 catch (const PDNSException& e) {
5937 SLOG(g_log << Logger::Warning << "Failed to resolve " << qname << ", got pdns exception: " << e.reason << endl,
5938 log->error(Logr::Warning, e.reason, msg, "exception", Logging::Loggable("PDNSException")));
5939 ret.clear();
5940 }
5941 catch (const ImmediateServFailException& e) {
5942 SLOG(g_log << Logger::Warning << "Failed to resolve " << qname << ", got ImmediateServFailException: " << e.reason << endl,
5943 log->error(Logr::Warning, e.reason, msg, "exception", Logging::Loggable("ImmediateServFailException")));
5944 ret.clear();
5945 }
5946 catch (const PolicyHitException& e) {
5947 SLOG(g_log << Logger::Warning << "Failed to resolve " << qname << ", got a policy hit" << endl,
5948 log->info(Logr::Warning, msg, "exception", Logging::Loggable("PolicyHitException")));
5949 ret.clear();
5950 }
5951 catch (const std::exception& e) {
5952 SLOG(g_log << Logger::Warning << "Failed to resolve " << qname << ", got STL error: " << e.what() << endl,
5953 log->error(Logr::Warning, e.what(), msg, "exception", Logging::Loggable("std::exception")));
5954 ret.clear();
5955 }
5956 catch (...) {
5957 SLOG(g_log << Logger::Warning << "Failed to resolve " << qname << ", got an exception" << endl,
5958 log->info(Logr::Warning, msg));
5959 ret.clear();
5960 }
5961
5962 return res;
5963 }
5964
5965 int SyncRes::getRootNS(struct timeval now, asyncresolve_t asyncCallback, unsigned int depth, Logr::log_t log)
5966 {
5967 if (::arg()["hint-file"] == "no-refresh") {
5968 return 0;
5969 }
5970 SyncRes resolver(now);
5971 resolver.d_prefix = "[getRootNS]";
5972 resolver.setDoEDNS0(true);
5973 resolver.setUpdatingRootNS();
5974 resolver.setDoDNSSEC(g_dnssecmode != DNSSECMode::Off);
5975 resolver.setDNSSECValidationRequested(g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate);
5976 resolver.setAsyncCallback(std::move(asyncCallback));
5977 resolver.setRefreshAlmostExpired(true);
5978
5979 const string msg = "Failed to update . records";
5980 vector<DNSRecord> ret;
5981 int res = -1;
5982 try {
5983 res = resolver.beginResolve(g_rootdnsname, QType::NS, 1, ret, depth + 1);
5984 if (g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate) {
5985 auto state = resolver.getValidationState();
5986 if (vStateIsBogus(state)) {
5987 throw PDNSException("Got Bogus validation result for .|NS");
5988 }
5989 }
5990 }
5991 catch (const PDNSException& e) {
5992 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception: " << e.reason << endl,
5993 log->error(Logr::Error, e.reason, msg, "exception", Logging::Loggable("PDNSException")));
5994 }
5995 catch (const ImmediateServFailException& e) {
5996 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception: " << e.reason << endl,
5997 log->error(Logr::Error, e.reason, msg, "exception", Logging::Loggable("ImmediateServFailException")));
5998 }
5999 catch (const PolicyHitException& e) {
6000 SLOG(g_log << Logger::Error << "Failed to update . records, got a policy hit" << endl,
6001 log->info(Logr::Error, msg, "exception", Logging::Loggable("PolicyHitException")));
6002 ret.clear();
6003 }
6004 catch (const std::exception& e) {
6005 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception: " << e.what() << endl,
6006 log->error(Logr::Error, e.what(), msg, "exception", Logging::Loggable("std::exception")));
6007 }
6008 catch (...) {
6009 SLOG(g_log << Logger::Error << "Failed to update . records, got an exception" << endl,
6010 log->info(Logr::Error, msg));
6011 }
6012
6013 if (res == 0) {
6014 SLOG(g_log << Logger::Debug << "Refreshed . records" << endl,
6015 log->info(Logr::Debug, "Refreshed . records"));
6016 }
6017 else {
6018 SLOG(g_log << Logger::Warning << "Failed to update root NS records, RCODE=" << res << endl,
6019 log->info(Logr::Warning, msg, "rcode", Logging::Loggable(res)));
6020 }
6021 return res;
6022 }
6023
6024 bool SyncRes::answerIsNOData(uint16_t requestedType, int rcode, const std::vector<DNSRecord>& records)
6025 {
6026 if (rcode != RCode::NoError) {
6027 return false;
6028 }
6029
6030 // NOLINTNEXTLINE(readability-use-anyofallof)
6031 for (const auto& rec : records) {
6032 if (rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == requestedType) {
6033 /* we have a record, of the right type, in the right section */
6034 return false;
6035 }
6036 }
6037 return true;
6038 #if 0
6039 // This code should be equivalent to the code above, clang-tidy prefers any_of()
6040 // I have doubts if that is easier to read
6041 return !std::any_of(records.begin(), records.end(), [=](const DNSRecord& rec) {
6042 return rec.d_place == DNSResourceRecord::ANSWER && rec.d_type == requestedType;
6043 });
6044 #endif
6045 }