]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/syncres.hh
implement pdns-distributes-queries to make powerdns distribute queries itself
[thirdparty/pdns.git] / pdns / syncres.hh
CommitLineData
00a19ff7
BH
1#ifndef PDNS_SYNCRES_HH
2#define PDNS_SYNCRES_HH
3#include <string>
4#include "dns.hh"
5#include "qtype.hh"
6#include <vector>
7#include <set>
8#include <map>
eefd15f9
BH
9#include <cmath>
10#include <iostream>
11#include <utility>
c836dc19 12#include "misc.hh"
00a19ff7 13#include "lwres.hh"
d6d5dea7 14#include <boost/utility.hpp>
1d5b3ce6 15#include "sstuff.hh"
9fdf67d5 16#include "recursor_cache.hh"
16beeaa4 17#include "recpacketcache.hh"
1d5b3ce6 18#include <boost/tuple/tuple.hpp>
71dea98d 19#include <boost/optional.hpp>
1d5b3ce6
BH
20#include <boost/tuple/tuple_comparison.hpp>
21#include "mtasker.hh"
a9af3782 22#include "iputils.hh"
eefd15f9 23
bdf40704 24void primeHints(void);
00a19ff7 25
38e22b5a
BH
26struct NegCacheEntry
27{
f4df5e89 28 string d_name;
be718669 29 QType d_qtype;
33988bfb 30 string d_qname;
33988bfb 31 uint32_t d_ttd;
38e22b5a 32};
00a19ff7 33
49f076e8 34
bb4bdbaf 35template<class Thing> class Throttle : public boost::noncopyable
49f076e8
BH
36{
37public:
38 Throttle()
39 {
40 d_limit=3;
41 d_ttl=60;
42 d_last_clean=time(0);
43 }
87da00e7 44 bool shouldThrottle(time_t now, const Thing& t)
49f076e8 45 {
cd7bf56b 46 if(now > d_last_clean + 300 ) {
bb4bdbaf 47
49f076e8 48 d_last_clean=now;
594c2ee7 49 for(typename cont_t::iterator i=d_cont.begin();i!=d_cont.end();) {
4957a608
BH
50 if( i->second.ttd < now) {
51 d_cont.erase(i++);
52 }
53 else
54 ++i;
594c2ee7 55 }
49f076e8
BH
56 }
57
49f076e8
BH
58 typename cont_t::iterator i=d_cont.find(t);
59 if(i==d_cont.end())
60 return false;
014c60c3 61 if(now > i->second.ttd || i->second.count-- < 0) {
49f076e8 62 d_cont.erase(i);
49f076e8 63 }
014c60c3
BH
64
65 return true; // still listed, still blocked
49f076e8 66 }
87da00e7 67 void throttle(time_t now, const Thing& t, unsigned int ttl=0, unsigned int tries=0)
49f076e8
BH
68 {
69 typename cont_t::iterator i=d_cont.find(t);
87da00e7 70 entry e={ now+(ttl ? ttl : d_ttl), tries ? tries : d_limit};
49f076e8 71
c214232f 72 if(i==d_cont.end()) {
49f076e8 73 d_cont[t]=e;
c214232f
BH
74 }
75 else if(i->second.ttd > e.ttd || (i->second.count) < e.count)
76 d_cont[t]=e;
8a5602d4
BH
77 }
78
79 unsigned int size()
80 {
705f31ae 81 return (unsigned int)d_cont.size();
49f076e8
BH
82 }
83private:
84 int d_limit;
85 int d_ttl;
86 time_t d_last_clean;
87 struct entry
88 {
89 time_t ttd;
90 int count;
91 };
92 typedef map<Thing,entry> cont_t;
93 cont_t d_cont;
94};
95
96
eefd15f9 97/** Class that implements a decaying EWMA.
36c5ee42 98 This class keeps an exponentially weighted moving average which, additionally, decays over time.
eefd15f9
BH
99 The decaying is only done on get.
100*/
101class DecayingEwma
102{
103public:
71dea98d
BH
104 DecayingEwma() : d_val(0.0)
105 {
106 d_needinit=true;
118dcc93 107 d_last.tv_sec = d_last.tv_usec = 0;
71dea98d 108 d_lastget=d_last;
36c5ee42 109 }
d6d5dea7 110
71dea98d 111 DecayingEwma(const DecayingEwma& orig) : d_last(orig.d_last), d_lastget(orig.d_lastget), d_val(orig.d_val), d_needinit(orig.d_needinit)
eefd15f9 112 {
71dea98d 113 }
d6d5dea7 114
71dea98d
BH
115 struct timeval getOrMakeTime(struct timeval* tv)
116 {
117 if(tv)
118 return *tv;
119 else {
120 struct timeval ret;
f2b6ccd6 121 Utility::gettimeofday(&ret, 0);
71dea98d
BH
122 return ret;
123 }
d6d5dea7
BH
124 }
125
5a26bb53 126 void submit(int val, struct timeval* tv)
d6d5dea7 127 {
71dea98d
BH
128 struct timeval now=getOrMakeTime(tv);
129
130 if(d_needinit) {
131 d_last=now;
132 d_needinit=false;
133 }
134
135 float diff= makeFloat(d_last - now);
d6d5dea7 136
36c5ee42 137 d_last=now;
71dea98d 138 double factor=exp(diff)/2.0; // might be '0.5', or 0.0001
705f31ae 139 d_val=(float)((1-factor)*val+ (float)factor*d_val);
eefd15f9 140 }
d6d5dea7 141
5a26bb53 142 double get(struct timeval* tv)
71dea98d
BH
143 {
144 struct timeval now=getOrMakeTime(tv);
145 float diff=makeFloat(d_lastget-now);
36c5ee42 146 d_lastget=now;
705f31ae 147 float factor=exp(diff/60.0f); // is 1.0 or less
eefd15f9
BH
148 return d_val*=factor;
149 }
150
996c89cc 151 bool stale(time_t limit) const
9fdf67d5 152 {
71dea98d 153 return limit > d_lastget.tv_sec;
9fdf67d5
BH
154 }
155
eefd15f9 156private:
71dea98d
BH
157 struct timeval d_last; // stores time
158 struct timeval d_lastget; // stores time
159 float d_val;
160 bool d_needinit;
161};
162
163
bb4bdbaf 164class SyncRes : public boost::noncopyable
00a19ff7
BH
165{
166public:
ac0e821b 167 explicit SyncRes(const struct timeval& now);
bb4bdbaf 168
a9af3782 169 int beginResolve(const string &qname, const QType &qtype, uint16_t qclass, vector<DNSResourceRecord>&ret);
c836dc19
BH
170 void setId(int id)
171 {
9fdf67d5
BH
172 if(s_log)
173 d_prefix="["+itoa(id)+"] ";
c836dc19
BH
174 }
175 static void setLog(bool log)
176 {
177 s_log=log;
178 }
179 void setCacheOnly(bool state=true)
180 {
181 d_cacheonly=state;
182 }
183 void setNoCache(bool state=true)
184 {
185 d_nocache=state;
186 }
2188dcc3
BH
187
188 void setDoEDNS0(bool state=true)
189 {
190 d_doEDNS0=state;
191 }
192
ff1872cf
BH
193
194
c1d73d94 195 int asyncresolveWrapper(const ComboAddress& ip, const string& domain, int type, bool doTCP, bool sendRDQuery, struct timeval* now, LWResult* res);
ff1872cf
BH
196
197 static void doEDNSDumpAndClose(int fd);
c0ea6e55 198
c836dc19 199 static unsigned int s_queries;
7becf07f 200 static unsigned int s_outgoingtimeouts;
3de83124 201 static unsigned int s_throttledqueries;
66e0b6ea 202 static unsigned int s_dontqueries;
c836dc19 203 static unsigned int s_outqueries;
5c633640 204 static unsigned int s_tcpoutqueries;
525b8a7c 205 static unsigned int s_nodelegated;
c571588b 206 static unsigned int s_unreachables;
996c89cc 207 static bool s_doIPv6;
c836dc19 208 unsigned int d_outqueries;
5c633640 209 unsigned int d_tcpoutqueries;
3de83124 210 unsigned int d_throttledqueries;
d77df2e1 211 unsigned int d_timeouts;
c571588b
BH
212 unsigned int d_unreachables;
213
33988bfb
BH
214 // typedef map<string,NegCacheEntry> negcache_t;
215
216 typedef multi_index_container <
217 NegCacheEntry,
218 indexed_by <
219 ordered_unique<
f4df5e89
BH
220 composite_key<
221 NegCacheEntry,
222 member<NegCacheEntry, string, &NegCacheEntry::d_name>,
223 member<NegCacheEntry, QType, &NegCacheEntry::d_qtype>
7738a23f
BH
224 >,
225 composite_key_compare<CIStringCompare, std::less<QType> >
33988bfb
BH
226 >,
227 ordered_non_unique<
228 member<NegCacheEntry, uint32_t, &NegCacheEntry::d_ttd>
229 >
230 >
49a699c4
BH
231 > negcache_t;
232
996c89cc
BH
233 //! This represents a number of decaying Ewmas, used to store performance per namerserver-name.
234 /** Modelled to work mostly like the underlying DecayingEwma. After you've called get,
235 d_best is filled out with the best address for this collection */
236 struct DecayingEwmaCollection
237 {
238 void submit(const ComboAddress& remote, int usecs, struct timeval* now)
239 {
240 collection_t::iterator pos;
241 for(pos=d_collection.begin(); pos != d_collection.end(); ++pos)
4957a608
BH
242 if(pos->first==remote)
243 break;
996c89cc 244 if(pos!=d_collection.end()) {
4957a608 245 pos->second.submit(usecs, now);
996c89cc
BH
246 }
247 else {
4957a608
BH
248 DecayingEwma de;
249 de.submit(usecs, now);
250 d_collection.push_back(make_pair(remote, de));
996c89cc
BH
251 }
252 }
253
254 double get(struct timeval* now)
255 {
256 if(d_collection.empty())
4957a608 257 return 0;
996c89cc
BH
258 double ret=numeric_limits<double>::max();
259 double tmp;
260 for(collection_t::iterator pos=d_collection.begin(); pos != d_collection.end(); ++pos) {
4957a608
BH
261 if((tmp=pos->second.get(now)) < ret) {
262 ret=tmp;
263 d_best=pos->first;
264 }
996c89cc
BH
265 }
266
267 return ret;
268 }
269
270 bool stale(time_t limit) const
271 {
272 for(collection_t::const_iterator pos=d_collection.begin(); pos != d_collection.end(); ++pos)
4957a608
BH
273 if(!pos->second.stale(limit))
274 return false;
996c89cc
BH
275 return true;
276 }
277
278 typedef vector<pair<ComboAddress, DecayingEwma> > collection_t;
279 collection_t d_collection;
280 ComboAddress d_best;
281 };
282
c0ea6e55 283 typedef map<string, DecayingEwmaCollection, CIStringCompare> nsspeeds_t;
49a699c4 284
9fdf67d5 285
c0ea6e55
BH
286 struct EDNSStatus
287 {
288 EDNSStatus() : mode(UNKNOWN), modeSetAt(0), EDNSPingHitCount(0) {}
289 enum EDNSMode { CONFIRMEDPINGER=-1, UNKNOWN=0, EDNSNOPING=1, EDNSPINGOK=2, EDNSIGNORANT=3, NOEDNS=4 } mode;
290 time_t modeSetAt;
291 int EDNSPingHitCount;
292 };
293
294 typedef map<ComboAddress, EDNSStatus> ednsstatus_t;
bb4bdbaf 295
49a699c4 296
bb4bdbaf 297
840c10ec 298 static bool s_noEDNSPing;
4bfae16d 299 static bool s_noEDNS;
c0ea6e55 300
5605c067
BH
301 struct AuthDomain
302 {
2e5ae2b2 303 vector<ComboAddress> d_servers;
3b608765 304 bool d_rdForward;
5605c067
BH
305 typedef multi_index_container <
306 DNSResourceRecord,
307 indexed_by <
308 ordered_non_unique<
309 composite_key< DNSResourceRecord,
4957a608
BH
310 member<DNSResourceRecord, string, &DNSResourceRecord::qname>,
311 member<DNSResourceRecord, QType, &DNSResourceRecord::qtype>
5605c067
BH
312 >,
313 composite_key_compare<CIStringCompare, std::less<QType> >
314 >
315 >
316 > records_t;
317 records_t d_records;
318 };
319
320
321 typedef map<string, AuthDomain, CIStringCompare> domainmap_t;
49a699c4 322
5605c067 323
996c89cc 324 typedef Throttle<tuple<ComboAddress,string,uint16_t> > throttle_t;
49a699c4 325
fe213470 326 struct timeval d_now;
a9af3782 327 static unsigned int s_maxnegttl;
c3e753c7 328 static unsigned int s_maxcachettl;
1051f8a9
BH
329 static unsigned int s_packetcachettl;
330 static unsigned int s_packetcacheservfailttl;
331 static bool s_nopacketcache;
a9af3782 332 static string s_serverID;
bb4bdbaf 333
49a699c4
BH
334 struct StaticStorage {
335 negcache_t negcache;
336 nsspeeds_t nsSpeeds;
337 ednsstatus_t ednsstatus;
338 throttle_t throttle;
339 domainmap_t* domainmap;
340 };
49a699c4 341
00a19ff7
BH
342private:
343 struct GetBestNSAnswer;
7305df82 344 int doResolveAt(set<string, CIStringCompare> nameservers, string auth, bool flawedNSSet, const string &qname, const QType &qtype, vector<DNSResourceRecord>&ret,
4957a608 345 int depth, set<GetBestNSAnswer>&beenthere);
00a19ff7 346 int doResolve(const string &qname, const QType &qtype, vector<DNSResourceRecord>&ret, int depth, set<GetBestNSAnswer>& beenthere);
e93c956b 347 bool doOOBResolve(const string &qname, const QType &qtype, vector<DNSResourceRecord>&ret, int depth, int &res);
5605c067 348 domainmap_t::const_iterator getBestAuthZone(string* qname);
00a19ff7
BH
349 bool doCNAMECacheCheck(const string &qname, const QType &qtype, vector<DNSResourceRecord>&ret, int depth, int &res);
350 bool doCacheCheck(const string &qname, const QType &qtype, vector<DNSResourceRecord>&ret, int depth, int &res);
7305df82 351 void getBestNSFromCache(const string &qname, set<DNSResourceRecord>&bestns, bool* flawedNSSet, int depth, set<GetBestNSAnswer>& beenthere);
00a19ff7 352 void addCruft(const string &qname, vector<DNSResourceRecord>& ret);
7305df82 353 string getBestNSNamesFromCache(const string &qname,set<string, CIStringCompare>& nsset, bool* flawedNSSet, int depth, set<GetBestNSAnswer>&beenthere);
00a19ff7
BH
354 void addAuthorityRecords(const string& qname, vector<DNSResourceRecord>& ret, int depth);
355
996c89cc 356 inline vector<string> shuffleInSpeedOrder(set<string, CIStringCompare> &nameservers, const string &prefix);
00a19ff7 357 bool moreSpecificThan(const string& a, const string &b);
996c89cc 358 vector<ComboAddress> getAs(const string &qname, int depth, set<GetBestNSAnswer>& beenthere);
c75a6a9e 359
00a19ff7 360private:
c836dc19
BH
361 string d_prefix;
362 static bool s_log;
363 bool d_cacheonly;
364 bool d_nocache;
2188dcc3 365 bool d_doEDNS0;
c75a6a9e 366
00a19ff7
BH
367 struct GetBestNSAnswer
368 {
369 string qname;
370 set<DNSResourceRecord> bestns;
371 bool operator<(const GetBestNSAnswer &b) const
372 {
373 if(qname<b.qname)
4957a608 374 return true;
00a19ff7 375 if(qname==b.qname)
4957a608 376 return bestns<b.bestns;
00a19ff7
BH
377 return false;
378 }
379 };
380
381};
ac0e821b
BH
382extern __thread SyncRes::StaticStorage* t_sstorage;
383
5c633640 384class Socket;
a9af3782 385/* external functions, opaque to us */
5c633640
BH
386int asendtcp(const string& data, Socket* sock);
387int arecvtcp(string& data, int len, Socket* sock);
1d5b3ce6
BH
388
389
390struct PacketID
391{
787e5eab 392 PacketID() : id(0), type(0), sock(0), inNeeded(0), outPos(0), nearMisses(0), fd(-1)
67770277
BH
393 {
394 memset(&remote, 0, sizeof(remote));
395 }
1d5b3ce6
BH
396
397 uint16_t id; // wait for a specific id/remote pair
996c89cc 398 ComboAddress remote; // this is the remote
0d5f0a9f 399 string domain; // this is the question
787e5eab 400 uint16_t type; // and this is its type
1d5b3ce6
BH
401
402 Socket* sock; // or wait for an event on a TCP fd
403 int inNeeded; // if this is set, we'll read until inNeeded bytes are read
404 string inMSG; // they'll go here
405
406 string outMSG; // the outgoing message that needs to be sent
407 string::size_type outPos; // how far we are along in the outMSG
408
35ce8576 409 mutable uint32_t nearMisses; // number of near misses - host correct, id wrong
96f81a93
BH
410 typedef set<uint16_t > chain_t;
411 mutable chain_t chain;
4ef015cd 412 int fd;
35ce8576 413
1d5b3ce6
BH
414 bool operator<(const PacketID& b) const
415 {
416 int ourSock= sock ? sock->getHandle() : 0;
417 int bSock = b.sock ? b.sock->getHandle() : 0;
787e5eab 418 if( tie(remote, ourSock, type) < tie(b.remote, bSock, b.type))
0d5f0a9f 419 return true;
787e5eab 420 if( tie(remote, ourSock, type) > tie(b.remote, bSock, b.type))
0d5f0a9f
BH
421 return false;
422
ec6480f3 423 if(pdns_ilexicographical_compare(domain, b.domain))
96f81a93 424 return true;
ec6480f3 425 if(pdns_ilexicographical_compare(b.domain, domain))
96f81a93
BH
426 return false;
427
428 return tie(fd, id) < tie(b.fd, b.id);
1d5b3ce6
BH
429 }
430};
431
96f81a93
BH
432struct PacketIDBirthdayCompare: public binary_function<PacketID, PacketID, bool>
433{
434 bool operator()(const PacketID& a, const PacketID& b) const
435 {
436 int ourSock= a.sock ? a.sock->getHandle() : 0;
437 int bSock = b.sock ? b.sock->getHandle() : 0;
787e5eab 438 if( tie(a.remote, ourSock, a.type) < tie(b.remote, bSock, b.type))
96f81a93 439 return true;
787e5eab 440 if( tie(a.remote, ourSock, a.type) > tie(b.remote, bSock, b.type))
96f81a93
BH
441 return false;
442
ec6480f3 443 return pdns_ilexicographical_compare(a.domain, b.domain);
96f81a93
BH
444 }
445};
49a699c4 446extern __thread MemRecursorCache* t_RC;
16beeaa4 447extern __thread RecursorPacketCache* t_packetCache;
d2392145 448typedef MTasker<PacketID,string> MT_t;
bb4bdbaf 449extern __thread MT_t* MT;
1d5b3ce6 450
b3b5459d 451
1d5b3ce6
BH
452struct RecursorStats
453{
454 uint64_t servFails;
455 uint64_t nxDomains;
456 uint64_t noErrors;
fe213470 457 uint64_t answers0_1, answers1_10, answers10_100, answers100_1000, answersSlow;
574af7ea 458 uint64_t avgLatencyUsec;
aaacf7f2
BH
459 uint64_t qcounter;
460 uint64_t tcpqcounter;
c8ddb7c2
BH
461 uint64_t unauthorizedUDP;
462 uint64_t unauthorizedTCP;
4e120339 463 uint64_t tcpClientOverflow;
0e9d9ce2
BH
464 uint64_t clientParseError;
465 uint64_t serverParseError;
01ed3112 466 uint64_t unexpectedCount;
7a132082 467 uint64_t caseMismatchCount;
0d5f0a9f 468 uint64_t spoofCount;
998a4334 469 uint64_t resourceLimits;
85c32340 470 uint64_t overCapacityDrops;
996c89cc 471 uint64_t ipv6queries;
96f81a93 472 uint64_t chainResends;
1ef00ba1 473 uint64_t nsSetInvalidations;
c0ea6e55
BH
474 uint64_t ednsPingMatches;
475 uint64_t ednsPingMismatches;
476 uint64_t noPingOutQueries, noEdnsOutQueries;
61b26744 477 uint64_t packetCacheHits;
9326cae1 478 uint64_t noPacketError;
5e3de507 479 time_t startupTime;
b3b5459d 480};
996c89cc 481
0e408828 482//! represents a running TCP/IP client session
50a5ef72 483class TCPConnection
0e408828 484{
50a5ef72 485public:
0e408828
BH
486 int fd;
487 enum stateenum {BYTE0, BYTE1, GETQUESTION, DONE} state;
488 int qlen;
489 int bytesread;
490 ComboAddress remote;
491 char data[65535];
0e408828
BH
492
493 static void closeAndCleanup(int fd, const ComboAddress& remote);
494 void closeAndCleanup();
50a5ef72
BH
495 static unsigned int getCurrentConnections() { return s_currentConnections; }
496 static void incCurrentConnections() { s_currentConnections++; }
497 static void decCurrentConnections() { s_currentConnections--; }
498private:
499 static volatile unsigned int s_currentConnections; //!< total number of current TCP connections
0e408828
BH
500};
501
502
b3b5459d
BH
503struct RemoteKeeper
504{
a9af3782
BH
505 typedef vector<ComboAddress> remotes_t;
506 remotes_t remotes;
507 int d_remotepos;
508 void addRemote(const ComboAddress& remote)
509 {
510 if(!remotes.size())
511 return;
512
513 remotes[(d_remotepos++) % remotes.size()]=remote;
514 }
1d5b3ce6 515};
b3b5459d 516extern __thread RemoteKeeper* t_remotes;
674cf0f6 517string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end);
18af64a8 518void parseACLs();
1d5b3ce6 519extern RecursorStats g_stats;
c3828c03 520extern unsigned int g_numThreads;
739f6278
BH
521
522template<typename Index>
523std::pair<typename Index::iterator,bool>
524replacing_insert(Index& i,const typename Index::value_type& x)
525{
526 std::pair<typename Index::iterator,bool> res=i.insert(x);
527 if(!res.second)res.second=i.replace(res.first,x);
528 return res;
529}
530
ee1ada80
BH
531
532std::string reloadAuthAndForwards();
c1d73d94 533ComboAddress parseIPAndPort(const std::string& input, uint16_t port);
1652a63e 534ComboAddress getQueryLocalAddress(int family, uint16_t port);
3427fa8a 535typedef boost::function<void*(void)> pipefunc_t;
49a699c4 536void broadcastFunction(const pipefunc_t& func, bool skipSelf = false);
00c9b8c1 537void distributeAsyncFunction(const pipefunc_t& func);
3427fa8a
BH
538
539
13034931 540template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf=false);
3427fa8a 541
49a699c4 542SyncRes::domainmap_t* parseAuthAndForwards();
3427fa8a 543
13034931
BH
544uint64_t* pleaseGetNsSpeedsSize();
545uint64_t* pleaseGetCacheSize();
546uint64_t* pleaseGetNegCacheSize();
547uint64_t* pleaseGetCacheHits();
548uint64_t* pleaseGetCacheMisses();
549uint64_t* pleaseGetConcurrentQueries();
550uint64_t* pleaseGetThrottleSize();
16beeaa4
BH
551uint64_t* pleaseGetPacketCacheHits();
552uint64_t* pleaseGetPacketCacheSize();
54ea3a27 553uint64_t* pleaseWipeCache(const std::string& canon);
16beeaa4 554
00a19ff7 555#endif