]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/recursor_cache.cc
Contention stats plus variable # of shards
[thirdparty/pdns.git] / pdns / recursor_cache.cc
1 #ifdef HAVE_CONFIG_H
2 #include "config.h"
3 #endif
4
5 #include <cinttypes>
6
7 #include "recursor_cache.hh"
8 #include "misc.hh"
9 #include <iostream>
10 #include "dnsrecords.hh"
11 #include "arguments.hh"
12 #include "syncres.hh"
13 #include "recursor_cache.hh"
14 #include "namespaces.hh"
15 #include "cachecleaner.hh"
16
17 MemRecursorCache::MemRecursorCache(size_t mapsCount) : d_maps(mapsCount)
18 {
19 }
20
21 MemRecursorCache::~MemRecursorCache()
22 {
23 try {
24 typedef std::unique_ptr<lock> lock_t;
25 vector<lock_t> locks;
26 for (auto& map : d_maps) {
27 locks.push_back(lock_t(new lock(map)));
28 }
29 }
30 catch(...) {
31 }
32 }
33
34 size_t MemRecursorCache::size()
35 {
36 size_t count = 0;
37 for (auto& map : d_maps) {
38 count += map.d_entriesCount;
39 }
40 return count;
41 }
42
43 pair<uint64_t,uint64_t> MemRecursorCache::stats()
44 {
45 uint64_t c = 0, a = 0;
46 for (auto& map : d_maps) {
47 const lock l(map);
48 c += map.d_contended_count;
49 a += map.d_acuired_count;
50 }
51 return pair<uint64_t,uint64_t>(c, a);
52 }
53
54 size_t MemRecursorCache::ecsIndexSize()
55 {
56 // XXX!
57 size_t count = 0;
58 for (auto& map : d_maps) {
59 const lock l(map);
60 count += map.d_ecsIndex.size();
61 }
62 return count;
63 }
64
65 // this function is too slow to poll!
66 size_t MemRecursorCache::bytes()
67 {
68 size_t ret = 0;
69 for (auto& map : d_maps) {
70 const lock l(map);
71 for (const auto& i : map.d_map) {
72 ret += sizeof(struct CacheEntry);
73 ret += i.d_qname.toString().length();
74 for (const auto& record : i.d_records) {
75 ret += sizeof(record); // XXX WRONG we don't know the stored size!
76 }
77 }
78 }
79 return ret;
80 }
81
82 int32_t MemRecursorCache::handleHit(MapCombo& map, MemRecursorCache::OrderedTagIterator_t& entry, const DNSName& qname, const ComboAddress& who, vector<DNSRecord>* res, vector<std::shared_ptr<RRSIGRecordContent>>* signatures, std::vector<std::shared_ptr<DNSRecord>>* authorityRecs, bool* variable, vState* state, bool* wasAuth)
83 {
84 // MUTEX SHOULD BE ACQUIRED
85 int32_t ttd = entry->d_ttd;
86
87 if(variable && !entry->d_netmask.empty()) {
88 *variable = true;
89 }
90
91 // cerr<<"Looking at "<<entry->d_records.size()<<" records for this name"<<endl;
92 if (res) {
93 res->reserve(res->size() + entry->d_records.size());
94
95 for(const auto& k : entry->d_records) {
96 DNSRecord dr;
97 dr.d_name = qname;
98 dr.d_type = entry->d_qtype;
99 dr.d_class = QClass::IN;
100 dr.d_content = k;
101 dr.d_ttl = static_cast<uint32_t>(entry->d_ttd);
102 dr.d_place = DNSResourceRecord::ANSWER;
103 res->push_back(std::move(dr));
104 }
105 }
106
107 if(signatures) { // if you do an ANY lookup you are hosed XXXX
108 *signatures = entry->d_signatures;
109 }
110
111 if(authorityRecs) {
112 *authorityRecs = entry->d_authorityRecs;
113 }
114
115 if (state) {
116 *state = entry->d_state;
117 }
118
119 if (wasAuth) {
120 *wasAuth = entry->d_auth;
121 }
122
123 moveCacheItemToBack<SequencedTag>(map.d_map, entry);
124
125 return ttd;
126 }
127
128 MemRecursorCache::cache_t::const_iterator MemRecursorCache::getEntryUsingECSIndex(MapCombo& map, time_t now, const DNSName &qname, uint16_t qtype, bool requireAuth, const ComboAddress& who)
129 {
130 // MUTEX SHOULD BE ACQUIRED
131 auto ecsIndexKey = tie(qname, qtype);
132 auto ecsIndex = map.d_ecsIndex.find(ecsIndexKey);
133 if (ecsIndex != map.d_ecsIndex.end() && !ecsIndex->isEmpty()) {
134 /* we have netmask-specific entries, let's see if we match one */
135 while (true) {
136 const Netmask best = ecsIndex->lookupBestMatch(who);
137 if (best.empty()) {
138 /* we have nothing more specific for you */
139 break;
140 }
141 auto key = boost::make_tuple(qname, qtype, best);
142 auto entry = map.d_map.find(key);
143 if (entry == map.d_map.end()) {
144 /* ecsIndex is not up-to-date */
145 ecsIndex->removeNetmask(best);
146 if (ecsIndex->isEmpty()) {
147 map.d_ecsIndex.erase(ecsIndex);
148 break;
149 }
150 continue;
151 }
152
153 if (entry->d_ttd > now) {
154 if (!requireAuth || entry->d_auth) {
155 return entry;
156 }
157 /* we need auth data and the best match is not authoritative */
158 return map.d_map.end();
159 }
160 else {
161 /* this netmask-specific entry has expired */
162 moveCacheItemToFront<SequencedTag>(map.d_map, entry);
163 ecsIndex->removeNetmask(best);
164 if (ecsIndex->isEmpty()) {
165 map.d_ecsIndex.erase(ecsIndex);
166 break;
167 }
168 }
169 }
170 }
171
172 /* we have nothing specific, let's see if we have a generic one */
173 auto key = boost::make_tuple(qname, qtype, Netmask());
174 auto entry = map.d_map.find(key);
175 if (entry != map.d_map.end()) {
176 if (entry->d_ttd > now) {
177 if (!requireAuth || entry->d_auth) {
178 return entry;
179 }
180 }
181 else {
182 moveCacheItemToFront<SequencedTag>(map.d_map, entry);
183 }
184 }
185
186 /* nothing for you, sorry */
187 return map.d_map.end();
188 }
189
190 std::pair<MemRecursorCache::NameOnlyHashedTagIterator_t, MemRecursorCache::NameOnlyHashedTagIterator_t> MemRecursorCache::getEntries(MapCombo& map, const DNSName &qname, const QType& qt)
191 {
192 // MUTEX SHOULD BE ACQUIRED
193 if (!map.d_cachecachevalid || map.d_cachedqname != qname) {
194 map.d_cachedqname = qname;
195 const auto& idx = map.d_map.get<NameOnlyHashedTag>();
196 map.d_cachecache = idx.equal_range(qname);
197 map.d_cachecachevalid = true;
198 }
199 return map.d_cachecache;
200 }
201
202 bool MemRecursorCache::entryMatches(MemRecursorCache::OrderedTagIterator_t& entry, uint16_t qt, bool requireAuth, const ComboAddress& who)
203 {
204 // MUTEX SHOULD BE ACQUIRED
205 if (requireAuth && !entry->d_auth)
206 return false;
207
208 return ((entry->d_qtype == qt || qt == QType::ANY ||
209 (qt == QType::ADDR && (entry->d_qtype == QType::A || entry->d_qtype == QType::AAAA)))
210 && (entry->d_netmask.empty() || entry->d_netmask.match(who)));
211 }
212
213 // returns -1 for no hits
214 int32_t MemRecursorCache::get(time_t now, const DNSName &qname, const QType& qt, bool requireAuth, vector<DNSRecord>* res, const ComboAddress& who, vector<std::shared_ptr<RRSIGRecordContent>>* signatures, std::vector<std::shared_ptr<DNSRecord>>* authorityRecs, bool* variable, vState* state, bool* wasAuth)
215 {
216 time_t ttd=0;
217 // cerr<<"looking up "<< qname<<"|"+qt.getName()<<"\n";
218 if(res) {
219 res->clear();
220 }
221 const uint16_t qtype = qt.getCode();
222
223 auto& map = getMap(qname);
224 const lock l(map);
225
226 /* If we don't have any netmask-specific entries at all, let's just skip this
227 to be able to use the nice d_cachecache hack. */
228 if (qtype != QType::ANY && !map.d_ecsIndex.empty()) {
229
230 if (qtype == QType::ADDR) {
231 int32_t ret = -1;
232
233 auto entryA = getEntryUsingECSIndex(map, now, qname, QType::A, requireAuth, who);
234 if (entryA != map.d_map.end()) {
235 ret = handleHit(map, entryA, qname, who, res, signatures, authorityRecs, variable, state, wasAuth);
236 }
237 auto entryAAAA = getEntryUsingECSIndex(map, now, qname, QType::AAAA, requireAuth, who);
238 if (entryAAAA != map.d_map.end()) {
239 int32_t ttdAAAA = handleHit(map, entryAAAA, qname, who, res, signatures, authorityRecs, variable, state, wasAuth);
240 if (ret > 0) {
241 ret = std::min(ret, ttdAAAA);
242 } else {
243 ret = ttdAAAA;
244 }
245 }
246 return ret > 0 ? static_cast<int32_t>(ret-now) : ret;
247 }
248 else {
249 auto entry = getEntryUsingECSIndex(map, now, qname, qtype, requireAuth, who);
250 if (entry != map.d_map.end()) {
251 return static_cast<int32_t>(handleHit(map, entry, qname, who, res, signatures, authorityRecs, variable, state, wasAuth) - now);
252 }
253 return -1;
254 }
255 }
256
257 auto entries = getEntries(map, qname, qt);
258
259 if(entries.first!=entries.second) {
260 for(auto i=entries.first; i != entries.second; ++i) {
261
262 auto firstIndexIterator = map.d_map.project<OrderedTag>(i);
263 if (i->d_ttd <= now) {
264 moveCacheItemToFront<SequencedTag>(map.d_map, firstIndexIterator);
265 continue;
266 }
267
268 if (!entryMatches(firstIndexIterator, qtype, requireAuth, who))
269 continue;
270
271 ttd = handleHit(map, firstIndexIterator, qname, who, res, signatures, authorityRecs, variable, state, wasAuth);
272
273 if(qt.getCode()!=QType::ANY && qt.getCode()!=QType::ADDR) // normally if we have a hit, we are done
274 break;
275 }
276
277 // cerr<<"time left : "<<ttd - now<<", "<< (res ? res->size() : 0) <<"\n";
278 return static_cast<int32_t>(ttd-now);
279 }
280 return -1;
281 }
282
283 void MemRecursorCache::replace(time_t now, const DNSName &qname, const QType& qt, const vector<DNSRecord>& content, const vector<shared_ptr<RRSIGRecordContent>>& signatures, const std::vector<std::shared_ptr<DNSRecord>>& authorityRecs, bool auth, boost::optional<Netmask> ednsmask, vState state)
284 {
285 auto& map = getMap(qname);
286 const lock l(map);
287
288 map.d_cachecachevalid = false;
289 // cerr<<"Replacing "<<qname<<" for "<< (ednsmask ? ednsmask->toString() : "everyone") << endl;
290 if (ednsmask) {
291 ednsmask = ednsmask->getNormalized();
292 }
293 auto key = boost::make_tuple(qname, qt.getCode(), ednsmask ? *ednsmask : Netmask());
294 bool isNew = false;
295 cache_t::iterator stored = map.d_map.find(key);
296 if (stored == map.d_map.end()) {
297 stored = map.d_map.insert(CacheEntry(key, auth)).first;
298 map.d_entriesCount++;
299 isNew = true;
300 }
301
302 /* if we are inserting a new entry or updating an expired one (in which case the
303 ECS index might have been removed but the entry still exists because it has not
304 been garbage collected yet) we might need to update the ECS index.
305 Otherwise it should already be indexed and we don't need to update it.
306 */
307 if (isNew || stored->d_ttd <= now) {
308 /* don't bother building an ecsIndex if we don't have any netmask-specific entries */
309 if (ednsmask && !ednsmask->empty()) {
310 auto ecsIndexKey = boost::make_tuple(qname, qt.getCode());
311 auto ecsIndex = map.d_ecsIndex.find(ecsIndexKey);
312 if (ecsIndex == map.d_ecsIndex.end()) {
313 ecsIndex = map.d_ecsIndex.insert(ECSIndexEntry(qname, qt.getCode())).first;
314 }
315 ecsIndex->addMask(*ednsmask);
316 }
317 }
318
319 time_t maxTTD=std::numeric_limits<time_t>::max();
320 CacheEntry ce=*stored; // this is a COPY
321 ce.d_qtype=qt.getCode();
322 ce.d_signatures=signatures;
323 ce.d_authorityRecs=authorityRecs;
324 ce.d_state=state;
325
326 // cerr<<"asked to store "<< (qname.empty() ? "EMPTY" : qname.toString()) <<"|"+qt.getName()<<" -> '";
327 // cerr<<(content.empty() ? string("EMPTY CONTENT") : content.begin()->d_content->getZoneRepresentation())<<"', auth="<<auth<<", ce.auth="<<ce.d_auth;
328 // cerr<<", ednsmask: " << (ednsmask ? ednsmask->toString() : "none") <<endl;
329
330 if(!auth && ce.d_auth) { // unauth data came in, we have some auth data, but is it fresh?
331 if(ce.d_ttd > now) { // we still have valid data, ignore unauth data
332 // cerr<<"\tStill hold valid auth data, and the new data is unauth, return\n";
333 return;
334 }
335 else {
336 ce.d_auth = false; // new data won't be auth
337 }
338 }
339
340 // refuse any attempt to *raise* the TTL of auth NS records, as it would make it possible
341 // for an auth to keep a "ghost" zone alive forever, even after the delegation is gone from
342 // the parent
343 // BUT make sure that we CAN refresh the root
344 if(ce.d_auth && auth && qt.getCode()==QType::NS && !isNew && !qname.isRoot()) {
345 // cerr<<"\tLimiting TTL of auth->auth NS set replace to "<<ce.d_ttd<<endl;
346 maxTTD = ce.d_ttd;
347 }
348
349 if(auth) {
350 ce.d_auth = true;
351 }
352
353 ce.d_records.clear();
354 ce.d_records.reserve(content.size());
355
356 for(const auto i : content) {
357 /* Yes, we have altered the d_ttl value by adding time(nullptr) to it
358 prior to calling this function, so the TTL actually holds a TTD. */
359 ce.d_ttd=min(maxTTD, static_cast<time_t>(i.d_ttl)); // XXX this does weird things if TTLs differ in the set
360 // cerr<<"To store: "<<i.d_content->getZoneRepresentation()<<" with ttl/ttd "<<i.d_ttl<<", capped at: "<<maxTTD<<endl;
361 ce.d_records.push_back(i.d_content);
362 }
363
364 if (!isNew) {
365 moveCacheItemToBack<SequencedTag>(map.d_map, stored);
366 }
367 map.d_map.replace(stored, ce);
368 }
369
370 size_t MemRecursorCache::doWipeCache(const DNSName& name, bool sub, uint16_t qtype)
371 {
372 size_t count = 0;
373
374 if (!sub) {
375 auto& map = getMap(name);
376 const lock l(map);
377 map.d_cachecachevalid = false;
378 auto& idx = map.d_map.get<NameOnlyHashedTag>();
379 size_t n = idx.erase(name);
380 count += n;
381 map.d_entriesCount -= n;
382 if (qtype == 0xffff) {
383 auto& ecsIdx = map.d_ecsIndex.get<OrderedTag>();
384 auto ecsIndexRange = ecsIdx.equal_range(name);
385 ecsIdx.erase(ecsIndexRange.first, ecsIndexRange.second);
386 }
387 else {
388 auto& ecsIdx = map.d_ecsIndex.get<HashedTag>();
389 auto ecsIndexRange = ecsIdx.equal_range(tie(name, qtype));
390 ecsIdx.erase(ecsIndexRange.first, ecsIndexRange.second);
391 }
392 }
393 else {
394 for (auto& map : d_maps) {
395 const lock l(map);
396 map.d_cachecachevalid = false;
397 auto& idx = map.d_map.get<OrderedTag>();
398 for (auto i = idx.lower_bound(name); i != idx.end(); ) {
399 if (!i->d_qname.isPartOf(name))
400 break;
401 if (i->d_qtype == qtype || qtype == 0xffff) {
402 count++;
403 i = idx.erase(i);
404 map.d_entriesCount--;
405 } else {
406 ++i;
407 }
408 }
409 auto& ecsIdx = map.d_ecsIndex.get<OrderedTag>();
410 for (auto i = ecsIdx.lower_bound(name); i != ecsIdx.end(); ) {
411 if (!i->d_qname.isPartOf(name))
412 break;
413 if (i->d_qtype == qtype || qtype == 0xffff) {
414 i = ecsIdx.erase(i);
415 } else {
416 ++i;
417 }
418 }
419 }
420 }
421 return count;
422 }
423
424 // Name should be doLimitTime or so
425 bool MemRecursorCache::doAgeCache(time_t now, const DNSName& name, uint16_t qtype, uint32_t newTTL)
426 {
427 auto& map = getMap(name);
428 const lock l(map);
429 cache_t::iterator iter = map.d_map.find(tie(name, qtype));
430 if (iter == map.d_map.end()) {
431 return false;
432 }
433
434 CacheEntry ce = *iter;
435 if (ce.d_ttd < now)
436 return false; // would be dead anyhow
437
438 uint32_t maxTTL = static_cast<uint32_t>(ce.d_ttd - now);
439 if (maxTTL > newTTL) {
440 map.d_cachecachevalid = false;
441
442 time_t newTTD = now + newTTL;
443
444 if (ce.d_ttd > newTTD) {
445 ce.d_ttd = newTTD;
446 map.d_map.replace(iter, ce);
447 }
448 return true;
449 }
450 return false;
451 }
452
453 bool MemRecursorCache::updateValidationStatus(time_t now, const DNSName &qname, const QType& qt, const ComboAddress& who, bool requireAuth, vState newState, boost::optional<time_t> capTTD)
454 {
455 auto& map = getMap(qname);
456 const lock l(map);
457
458 bool updated = false;
459 uint16_t qtype = qt.getCode();
460 if (qtype != QType::ANY && qtype != QType::ADDR && !map.d_ecsIndex.empty()) {
461 auto entry = getEntryUsingECSIndex(map, now, qname, qtype, requireAuth, who);
462 if (entry == map.d_map.end()) {
463 return false;
464 }
465
466 entry->d_state = newState;
467 if (capTTD) {
468 entry->d_ttd = std::min(entry->d_ttd, *capTTD);
469 }
470 return true;
471 }
472
473 auto entries = getEntries(map, qname, qt);
474
475 for(auto i = entries.first; i != entries.second; ++i) {
476 auto firstIndexIterator = map.d_map.project<OrderedTag>(i);
477
478 if (!entryMatches(firstIndexIterator, qtype, requireAuth, who))
479 continue;
480
481 i->d_state = newState;
482 if (capTTD) {
483 i->d_ttd = std::min(i->d_ttd, *capTTD);
484 }
485 updated = true;
486
487 if(qtype != QType::ANY && qtype != QType::ADDR) // normally if we have a hit, we are done
488 break;
489 }
490
491 return updated;
492 }
493
494 uint64_t MemRecursorCache::doDump(int fd)
495 {
496 int newfd = dup(fd);
497 if (newfd == -1) {
498 return 0;
499 }
500 auto fp = std::unique_ptr<FILE, int(*)(FILE*)>(fdopen(newfd, "w"), fclose);
501 if(!fp) { // dup probably failed
502 close(newfd);
503 return 0;
504 }
505
506 fprintf(fp.get(), "; main record cache dump follows\n;\n");
507 uint64_t count = 0;
508
509 for (auto& map : d_maps) {
510 const lock l(map);
511 const auto& sidx = map.d_map.get<SequencedTag>();
512
513 time_t now = time(0);
514 for (const auto i : sidx) {
515 for (const auto j : i.d_records) {
516 count++;
517 try {
518 fprintf(fp.get(), "%s %" PRId64 " IN %s %s ; (%s) auth=%i %s\n", i.d_qname.toString().c_str(), static_cast<int64_t>(i.d_ttd - now), DNSRecordContent::NumberToType(i.d_qtype).c_str(), j->getZoneRepresentation().c_str(), vStates[i.d_state], i.d_auth, i.d_netmask.empty() ? "" : i.d_netmask.toString().c_str());
519 }
520 catch(...) {
521 fprintf(fp.get(), "; error printing '%s'\n", i.d_qname.empty() ? "EMPTY" : i.d_qname.toString().c_str());
522 }
523 }
524 for (const auto &sig : i.d_signatures) {
525 count++;
526 try {
527 fprintf(fp.get(), "%s %" PRId64 " IN RRSIG %s ; %s\n", i.d_qname.toString().c_str(), static_cast<int64_t>(i.d_ttd - now), sig->getZoneRepresentation().c_str(), i.d_netmask.empty() ? "" : i.d_netmask.toString().c_str());
528 }
529 catch(...) {
530 fprintf(fp.get(), "; error printing '%s'\n", i.d_qname.empty() ? "EMPTY" : i.d_qname.toString().c_str());
531 }
532 }
533 }
534 }
535 return count;
536 }
537
538 void MemRecursorCache::doPrune(size_t keep)
539 {
540 //size_t maxCached = d_maxEntries;
541 size_t cacheSize = size();
542 pruneMutexCollectionsVector<SequencedTag>(*this, d_maps, keep, cacheSize);
543 }
544