]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/dnsdist.hh
14e0fd287bcc0b8e9e696513808921575bfa9bf4
[thirdparty/pdns.git] / pdns / dnsdist.hh
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #pragma once
23 #include "config.h"
24 #include "ext/luawrapper/include/LuaContext.hpp"
25
26 #include <atomic>
27 #include <mutex>
28 #include <string>
29 #include <thread>
30 #include <time.h>
31 #include <unistd.h>
32 #include <unordered_map>
33
34 #include <boost/variant.hpp>
35
36 #include "capabilities.hh"
37 #include "circular_buffer.hh"
38 #include "dnscrypt.hh"
39 #include "dnsdist-cache.hh"
40 #include "dnsdist-dynbpf.hh"
41 #include "dnsdist-lbpolicies.hh"
42 #include "dnsname.hh"
43 #include "doh.hh"
44 #include "ednsoptions.hh"
45 #include "gettime.hh"
46 #include "iputils.hh"
47 #include "misc.hh"
48 #include "mplexer.hh"
49 #include "sholder.hh"
50 #include "tcpiohandler.hh"
51 #include "uuid-utils.hh"
52 #include "proxy-protocol.hh"
53
54 void carbonDumpThread();
55 uint64_t uptimeOfProcess(const std::string& str);
56
57 extern uint16_t g_ECSSourcePrefixV4;
58 extern uint16_t g_ECSSourcePrefixV6;
59 extern bool g_ECSOverride;
60
61 typedef std::unordered_map<string, string> QTag;
62
63 struct DNSQuestion
64 {
65 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
66 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
67 const uint16_t* flags = getFlagsFromDNSHeader(dh);
68 origFlags = *flags;
69 }
70 DNSQuestion(const DNSQuestion&) = delete;
71 DNSQuestion& operator=(const DNSQuestion&) = delete;
72 DNSQuestion(DNSQuestion&&) = default;
73
74 std::string getTrailingData() const;
75 bool setTrailingData(const std::string&);
76
77 #ifdef HAVE_PROTOBUF
78 boost::optional<boost::uuids::uuid> uniqueId;
79 #endif
80 Netmask ecs;
81 boost::optional<Netmask> subnet;
82 std::string sni; /* Server Name Indication, if any (DoT or DoH) */
83 std::string poolname;
84 const DNSName* qname{nullptr};
85 const ComboAddress* local{nullptr};
86 const ComboAddress* remote{nullptr};
87 std::shared_ptr<QTag> qTag{nullptr};
88 std::unique_ptr<std::vector<ProxyProtocolValue>> proxyProtocolValues{nullptr};
89 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
90 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
91 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
92 struct dnsheader* dh{nullptr};
93 const struct timespec* queryTime{nullptr};
94 struct DOHUnit* du{nullptr};
95 size_t size;
96 unsigned int consumed{0};
97 int delayMsec{0};
98 boost::optional<uint32_t> tempFailureTTL;
99 uint32_t cacheKeyNoECS;
100 uint32_t cacheKey;
101 const uint16_t qtype;
102 const uint16_t qclass;
103 uint16_t len;
104 uint16_t ecsPrefixLength;
105 uint16_t origFlags;
106 uint8_t ednsRCode{0};
107 const bool tcp;
108 bool skipCache{false};
109 bool ecsOverride;
110 bool useECS{true};
111 bool addXPF{true};
112 bool addProxyProtocol{true};
113 bool ecsSet{false};
114 bool ecsAdded{false};
115 bool ednsAdded{false};
116 bool useZeroScope{false};
117 bool dnssecOK{false};
118 };
119
120 struct DNSResponse : DNSQuestion
121 {
122 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
123 DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
124 DNSResponse(const DNSResponse&) = delete;
125 DNSResponse& operator=(const DNSResponse&) = delete;
126 DNSResponse(DNSResponse&&) = default;
127 };
128
129 /* so what could you do:
130 drop,
131 fake up nxdomain,
132 provide actual answer,
133 allow & and stop processing,
134 continue processing,
135 modify header: (servfail|refused|notimp), set TC=1,
136 send to pool */
137
138 class DNSAction
139 {
140 public:
141 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse, SpoofRaw };
142 static std::string typeToString(const Action& action)
143 {
144 switch(action) {
145 case Action::Drop:
146 return "Drop";
147 case Action::Nxdomain:
148 return "Send NXDomain";
149 case Action::Refused:
150 return "Send Refused";
151 case Action::Spoof:
152 return "Spoof an answer";
153 case Action::SpoofRaw:
154 return "Spoof an answer from raw bytes";
155 case Action::Allow:
156 return "Allow";
157 case Action::HeaderModify:
158 return "Modify the header";
159 case Action::Pool:
160 return "Route to a pool";
161 case Action::Delay:
162 return "Delay";
163 case Action::Truncate:
164 return "Truncate over UDP";
165 case Action::ServFail:
166 return "Send ServFail";
167 case Action::None:
168 case Action::NoOp:
169 return "Do nothing";
170 case Action::NoRecurse:
171 return "Set rd=0";
172 }
173
174 return "Unknown";
175 }
176
177 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
178 virtual ~DNSAction()
179 {
180 }
181 virtual string toString() const = 0;
182 virtual std::map<string, double> getStats() const
183 {
184 return {{}};
185 }
186 };
187
188 class DNSResponseAction
189 {
190 public:
191 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
192 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
193 virtual ~DNSResponseAction()
194 {
195 }
196 virtual string toString() const = 0;
197 };
198
199 struct DynBlock
200 {
201 DynBlock(): action(DNSAction::Action::None), warning(false)
202 {
203 }
204
205 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
206 {
207 }
208
209 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
210 {
211 blocks.store(rhs.blocks);
212 }
213
214 DynBlock& operator=(const DynBlock& rhs)
215 {
216 reason=rhs.reason;
217 until=rhs.until;
218 domain=rhs.domain;
219 action=rhs.action;
220 blocks.store(rhs.blocks);
221 warning=rhs.warning;
222 return *this;
223 }
224
225 string reason;
226 struct timespec until;
227 DNSName domain;
228 DNSAction::Action action;
229 mutable std::atomic<unsigned int> blocks;
230 bool warning;
231 };
232
233 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
234
235 extern vector<pair<struct timeval, std::string> > g_confDelta;
236
237 extern uint64_t getLatencyCount(const std::string&);
238
239 struct DNSDistStats
240 {
241 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
242 stat_t responses{0};
243 stat_t servfailResponses{0};
244 stat_t queries{0};
245 stat_t frontendNXDomain{0};
246 stat_t frontendServFail{0};
247 stat_t frontendNoError{0};
248 stat_t nonCompliantQueries{0};
249 stat_t nonCompliantResponses{0};
250 stat_t rdQueries{0};
251 stat_t emptyQueries{0};
252 stat_t aclDrops{0};
253 stat_t dynBlocked{0};
254 stat_t ruleDrop{0};
255 stat_t ruleNXDomain{0};
256 stat_t ruleRefused{0};
257 stat_t ruleServFail{0};
258 stat_t selfAnswered{0};
259 stat_t downstreamTimeouts{0};
260 stat_t downstreamSendErrors{0};
261 stat_t truncFail{0};
262 stat_t noPolicy{0};
263 stat_t cacheHits{0};
264 stat_t cacheMisses{0};
265 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
266 stat_t securityStatus{0};
267
268 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
269 typedef std::function<uint64_t(const std::string&)> statfunction_t;
270 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
271 std::vector<std::pair<std::string, entry_t>> entries{
272 {"responses", &responses},
273 {"servfail-responses", &servfailResponses},
274 {"queries", &queries},
275 {"frontend-nxdomain", &frontendNXDomain},
276 {"frontend-servfail", &frontendServFail},
277 {"frontend-noerror", &frontendNoError},
278 {"acl-drops", &aclDrops},
279 {"rule-drop", &ruleDrop},
280 {"rule-nxdomain", &ruleNXDomain},
281 {"rule-refused", &ruleRefused},
282 {"rule-servfail", &ruleServFail},
283 {"self-answered", &selfAnswered},
284 {"downstream-timeouts", &downstreamTimeouts},
285 {"downstream-send-errors", &downstreamSendErrors},
286 {"trunc-failures", &truncFail},
287 {"no-policy", &noPolicy},
288 {"latency0-1", &latency0_1},
289 {"latency1-10", &latency1_10},
290 {"latency10-50", &latency10_50},
291 {"latency50-100", &latency50_100},
292 {"latency100-1000", &latency100_1000},
293 {"latency-slow", &latencySlow},
294 {"latency-avg100", &latencyAvg100},
295 {"latency-avg1000", &latencyAvg1000},
296 {"latency-avg10000", &latencyAvg10000},
297 {"latency-avg1000000", &latencyAvg1000000},
298 {"uptime", uptimeOfProcess},
299 {"real-memory-usage", getRealMemoryUsage},
300 {"special-memory-usage", getSpecialMemoryUsage},
301 {"udp-in-errors", boost::bind(udpErrorStats, "udp-in-errors")},
302 {"udp-noport-errors", boost::bind(udpErrorStats, "udp-noport-errors")},
303 {"udp-recvbuf-errors", boost::bind(udpErrorStats, "udp-recvbuf-errors")},
304 {"udp-sndbuf-errors", boost::bind(udpErrorStats, "udp-sndbuf-errors")},
305 {"noncompliant-queries", &nonCompliantQueries},
306 {"noncompliant-responses", &nonCompliantResponses},
307 {"rdqueries", &rdQueries},
308 {"empty-queries", &emptyQueries},
309 {"cache-hits", &cacheHits},
310 {"cache-misses", &cacheMisses},
311 {"cpu-iowait", getCPUIOWait},
312 {"cpu-steal", getCPUSteal},
313 {"cpu-sys-msec", getCPUTimeSystem},
314 {"cpu-user-msec", getCPUTimeUser},
315 {"fd-usage", getOpenFileDescriptors},
316 {"dyn-blocked", &dynBlocked},
317 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
318 {"security-status", &securityStatus},
319 // Latency histogram
320 {"latency-sum", &latencySum},
321 {"latency-count", getLatencyCount},
322 };
323 };
324
325 extern struct DNSDistStats g_stats;
326 void doLatencyStats(double udiff);
327
328
329 struct StopWatch
330 {
331 StopWatch(bool realTime=false): d_needRealTime(realTime)
332 {
333 }
334 struct timespec d_start{0,0};
335 bool d_needRealTime{false};
336
337 void start() {
338 if(gettime(&d_start, d_needRealTime) < 0)
339 unixDie("Getting timestamp");
340
341 }
342
343 void set(const struct timespec& from) {
344 d_start = from;
345 }
346
347 double udiff() const {
348 struct timespec now;
349 if(gettime(&now, d_needRealTime) < 0)
350 unixDie("Getting timestamp");
351
352 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
353 }
354
355 double udiffAndSet() {
356 struct timespec now;
357 if(gettime(&now, d_needRealTime) < 0)
358 unixDie("Getting timestamp");
359
360 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
361 d_start = now;
362 return ret;
363 }
364
365 };
366
367 class BasicQPSLimiter
368 {
369 public:
370 BasicQPSLimiter()
371 {
372 }
373
374 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
375 {
376 d_prev.start();
377 }
378
379 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
380 {
381 auto delta = d_prev.udiffAndSet();
382
383 if(delta > 0.0) // time, frequently, does go backwards..
384 d_tokens += 1.0 * rate * (delta/1000000.0);
385
386 if(d_tokens > burst) {
387 d_tokens = burst;
388 }
389
390 bool ret=false;
391 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
392 ret=true;
393 --d_tokens;
394 }
395
396 return ret;
397 }
398
399 bool seenSince(const struct timespec& cutOff) const
400 {
401 return cutOff < d_prev.d_start;
402 }
403
404 protected:
405 mutable StopWatch d_prev;
406 mutable double d_tokens;
407 };
408
409 class QPSLimiter : public BasicQPSLimiter
410 {
411 public:
412 QPSLimiter(): BasicQPSLimiter()
413 {
414 }
415
416 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
417 {
418 d_prev.start();
419 }
420
421 unsigned int getRate() const
422 {
423 return d_passthrough ? 0 : d_rate;
424 }
425
426 int getPassed() const
427 {
428 return d_passed;
429 }
430
431 int getBlocked() const
432 {
433 return d_blocked;
434 }
435
436 bool check() const // this is not quite fair
437 {
438 if (d_passthrough) {
439 return true;
440 }
441
442 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
443 if (ret) {
444 d_passed++;
445 }
446 else {
447 d_blocked++;
448 }
449
450 return ret;
451 }
452 private:
453 mutable unsigned int d_passed{0};
454 mutable unsigned int d_blocked{0};
455 unsigned int d_rate;
456 unsigned int d_burst;
457 bool d_passthrough{true};
458 };
459
460 struct ClientState;
461
462 struct IDState
463 {
464 IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
465 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
466 {
467 usageIndicator.store(orig.usageIndicator.load());
468 origFD = orig.origFD;
469 origID = orig.origID;
470 delayMsec = orig.delayMsec;
471 tempFailureTTL = orig.tempFailureTTL;
472 }
473
474 static const int64_t unusedIndicator = -1;
475
476 static bool isInUse(int64_t usageIndicator)
477 {
478 return usageIndicator != unusedIndicator;
479 }
480
481 bool isInUse() const
482 {
483 return usageIndicator != unusedIndicator;
484 }
485
486 /* return true if the value has been successfully replaced meaning that
487 no-one updated the usage indicator in the meantime */
488 bool tryMarkUnused(int64_t expectedUsageIndicator)
489 {
490 return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
491 }
492
493 /* mark as unused no matter what, return true if the state was in use before */
494 bool markAsUsed()
495 {
496 auto currentGeneration = generation++;
497 return markAsUsed(currentGeneration);
498 }
499
500 /* mark as unused no matter what, return true if the state was in use before */
501 bool markAsUsed(int64_t currentGeneration)
502 {
503 int64_t oldUsage = usageIndicator.exchange(currentGeneration);
504 return oldUsage != unusedIndicator;
505 }
506
507 /* We use this value to detect whether this state is in use.
508 For performance reasons we don't want to use a lock here, but that means
509 we need to be very careful when modifying this value. Modifications happen
510 from:
511 - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
512 then picking one of the states associated to this backend (via the idOffset).
513 Most of the time this state should not be in use and usageIndicator is -1, but we
514 might not yet have received a response for the query previously associated to this
515 state, meaning that we will 'reuse' this state and erase the existing state.
516 If we ever receive a response for this state, it will be discarded. This is
517 mostly fine for UDP except that we still need to be careful in order to miss
518 the 'outstanding' counters, which should only be increased when we are picking
519 an empty state, and not when reusing ;
520 For DoH, though, we have dynamically allocated a DOHUnit object that needs to
521 be freed, as well as internal objects internals to libh2o.
522 - one of the UDP receiver threads receiving a response from a backend, picking
523 the corresponding state and sending the response to the client ;
524 - the 'healthcheck' thread scanning the states to actively discover timeouts,
525 mostly to keep some counters like the 'outstanding' one sane.
526 We previously based that logic on the origFD (FD on which the query was received,
527 and therefore from where the response should be sent) but this suffered from an
528 ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
529 same value since we only have so much incoming sockets:
530 - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
531 - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
532 qtype and qclass match
533 - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
534 - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
535 5, except it's not the same 5 anymore and it overrides a fresh state.
536 We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
537 wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
538 when the state is unused and the value of our counter otherwise.
539 */
540 std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8
541 std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4
542 ComboAddress origRemote; // 28
543 ComboAddress origDest; // 28
544 StopWatch sentTime; // 16
545 DNSName qname; // 80
546 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
547 #ifdef HAVE_PROTOBUF
548 boost::optional<boost::uuids::uuid> uniqueId;
549 #endif
550 boost::optional<Netmask> subnet{boost::none};
551 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
552 std::shared_ptr<QTag> qTag{nullptr};
553 const ClientState* cs{nullptr};
554 DOHUnit* du{nullptr};
555 uint32_t cacheKey; // 4
556 uint32_t cacheKeyNoECS; // 4
557 uint16_t age; // 4
558 uint16_t qtype; // 2
559 uint16_t qclass; // 2
560 uint16_t origID; // 2
561 uint16_t origFlags; // 2
562 int origFD{-1};
563 int delayMsec;
564 boost::optional<uint32_t> tempFailureTTL;
565 bool ednsAdded{false};
566 bool ecsAdded{false};
567 bool skipCache{false};
568 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
569 bool dnssecOK{false};
570 bool useZeroScope;
571 };
572
573 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
574 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
575 struct QueryCount {
576 QueryCount()
577 {
578 pthread_rwlock_init(&queryLock, nullptr);
579 }
580 ~QueryCount()
581 {
582 pthread_rwlock_destroy(&queryLock);
583 }
584 QueryCountRecords records;
585 QueryCountFilter filter;
586 pthread_rwlock_t queryLock;
587 bool enabled{false};
588 };
589
590 extern QueryCount g_qcount;
591
592 struct ClientState
593 {
594 ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort)
595 {
596 }
597
598 std::set<int> cpus;
599 ComboAddress local;
600 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
601 std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
602 std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
603 std::string interface;
604 std::atomic<uint64_t> queries{0};
605 mutable std::atomic<uint64_t> responses{0};
606 std::atomic<uint64_t> tcpDiedReadingQuery{0};
607 std::atomic<uint64_t> tcpDiedSendingResponse{0};
608 std::atomic<uint64_t> tcpGaveUp{0};
609 std::atomic<uint64_t> tcpClientTimeouts{0};
610 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
611 std::atomic<uint64_t> tcpCurrentConnections{0};
612 std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
613 std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
614 std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired)
615 std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one
616 std::atomic<uint64_t> tls10queries{0}; // valid DNS queries received via TLSv1.0
617 std::atomic<uint64_t> tls11queries{0}; // valid DNS queries received via TLSv1.1
618 std::atomic<uint64_t> tls12queries{0}; // valid DNS queries received via TLSv1.2
619 std::atomic<uint64_t> tls13queries{0}; // valid DNS queries received via TLSv1.3
620 std::atomic<uint64_t> tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version
621 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
622 /* in ms */
623 std::atomic<double> tcpAvgConnectionDuration{0.0};
624 int udpFD{-1};
625 int tcpFD{-1};
626 int fastOpenQueueSize{0};
627 bool muted{false};
628 bool tcp;
629 bool reuseport;
630 bool ready{false};
631
632 int getSocket() const
633 {
634 return udpFD != -1 ? udpFD : tcpFD;
635 }
636
637 bool isUDP() const
638 {
639 return udpFD != -1;
640 }
641
642 bool isTCP() const
643 {
644 return udpFD == -1;
645 }
646
647 bool hasTLS() const
648 {
649 return tlsFrontend != nullptr || dohFrontend != nullptr;
650 }
651
652 std::string getType() const
653 {
654 std::string result = udpFD != -1 ? "UDP" : "TCP";
655
656 if (dohFrontend) {
657 result += " (DNS over HTTPS)";
658 }
659 else if (tlsFrontend) {
660 result += " (DNS over TLS)";
661 }
662 else if (dnscryptCtx) {
663 result += " (DNSCrypt)";
664 }
665
666 return result;
667 }
668
669 #ifdef HAVE_EBPF
670 shared_ptr<BPFFilter> d_filter;
671
672 void detachFilter()
673 {
674 if (d_filter) {
675 d_filter->removeSocket(getSocket());
676 d_filter = nullptr;
677 }
678 }
679
680 void attachFilter(shared_ptr<BPFFilter> bpf)
681 {
682 detachFilter();
683
684 bpf->addSocket(getSocket());
685 d_filter = bpf;
686 }
687 #endif /* HAVE_EBPF */
688
689 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
690 {
691 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
692 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
693 }
694 };
695
696 class TCPClientCollection {
697 std::vector<int> d_tcpclientthreads;
698 std::atomic<uint64_t> d_numthreads{0};
699 std::atomic<uint64_t> d_pos{0};
700 std::atomic<uint64_t> d_queued{0};
701 const uint64_t d_maxthreads{0};
702 std::mutex d_mutex;
703 int d_singlePipe[2];
704 const bool d_useSinglePipe;
705 public:
706
707 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
708
709 {
710 d_tcpclientthreads.reserve(maxThreads);
711
712 if (d_useSinglePipe) {
713 if (pipe(d_singlePipe) < 0) {
714 int err = errno;
715 throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
716 }
717
718 if (!setNonBlocking(d_singlePipe[0])) {
719 int err = errno;
720 close(d_singlePipe[0]);
721 close(d_singlePipe[1]);
722 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
723 }
724
725 if (!setNonBlocking(d_singlePipe[1])) {
726 int err = errno;
727 close(d_singlePipe[0]);
728 close(d_singlePipe[1]);
729 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
730 }
731 }
732 }
733 int getThread()
734 {
735 uint64_t pos = d_pos++;
736 ++d_queued;
737 return d_tcpclientthreads[pos % d_numthreads];
738 }
739 bool hasReachedMaxThreads() const
740 {
741 return d_numthreads >= d_maxthreads;
742 }
743 uint64_t getThreadsCount() const
744 {
745 return d_numthreads;
746 }
747 uint64_t getQueuedCount() const
748 {
749 return d_queued;
750 }
751 void decrementQueuedCount()
752 {
753 --d_queued;
754 }
755 void addTCPClientThread();
756 };
757
758 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
759
760 struct DownstreamState
761 {
762 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
763
764 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect);
765 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {}
766 ~DownstreamState()
767 {
768 for (auto& fd : sockets) {
769 if (fd >= 0) {
770 close(fd);
771 fd = -1;
772 }
773 }
774 pthread_rwlock_destroy(&d_lock);
775 }
776 boost::uuids::uuid id;
777 std::vector<unsigned int> hashes;
778 mutable pthread_rwlock_t d_lock;
779 std::vector<int> sockets;
780 const std::string sourceItfName;
781 std::mutex socketsLock;
782 std::mutex connectLock;
783 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
784 std::thread tid;
785 const ComboAddress remote;
786 QPSLimiter qps;
787 vector<IDState> idStates;
788 const ComboAddress sourceAddr;
789 checkfunc_t checkFunction;
790 DNSName checkName{"a.root-servers.net."};
791 QType checkType{QType::A};
792 uint16_t checkClass{QClass::IN};
793 std::atomic<uint64_t> idOffset{0};
794 std::atomic<uint64_t> sendErrors{0};
795 std::atomic<uint64_t> outstanding{0};
796 std::atomic<uint64_t> reuseds{0};
797 std::atomic<uint64_t> queries{0};
798 std::atomic<uint64_t> responses{0};
799 struct {
800 std::atomic<uint64_t> sendErrors{0};
801 std::atomic<uint64_t> reuseds{0};
802 std::atomic<uint64_t> queries{0};
803 } prev;
804 std::atomic<uint64_t> tcpDiedSendingQuery{0};
805 std::atomic<uint64_t> tcpDiedReadingResponse{0};
806 std::atomic<uint64_t> tcpGaveUp{0};
807 std::atomic<uint64_t> tcpReadTimeouts{0};
808 std::atomic<uint64_t> tcpWriteTimeouts{0};
809 std::atomic<uint64_t> tcpCurrentConnections{0};
810 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
811 /* in ms */
812 std::atomic<double> tcpAvgConnectionDuration{0.0};
813 size_t socketsOffset{0};
814 double queryLoad{0.0};
815 double dropRate{0.0};
816 double latencyUsec{0.0};
817 int order{1};
818 int weight{1};
819 int tcpConnectTimeout{5};
820 int tcpRecvTimeout{30};
821 int tcpSendTimeout{30};
822 unsigned int checkInterval{1};
823 unsigned int lastCheck{0};
824 const unsigned int sourceItf{0};
825 uint16_t retries{5};
826 uint16_t xpfRRCode{0};
827 uint16_t checkTimeout{1000}; /* in milliseconds */
828 uint8_t currentCheckFailures{0};
829 uint8_t consecutiveSuccessfulChecks{0};
830 uint8_t maxCheckFailures{1};
831 uint8_t minRiseSuccesses{1};
832 StopWatch sw;
833 set<string> pools;
834 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
835 bool mustResolve{false};
836 bool upStatus{false};
837 bool useECS{false};
838 bool useProxyProtocol{false};
839 bool setCD{false};
840 bool disableZeroScope{false};
841 std::atomic<bool> connected{false};
842 std::atomic_flag threadStarted;
843 bool tcpFastOpen{false};
844 bool ipBindAddrNoPort{true};
845
846 bool isUp() const
847 {
848 if(availability == Availability::Down)
849 return false;
850 if(availability == Availability::Up)
851 return true;
852 return upStatus;
853 }
854 void setUp() { availability = Availability::Up; }
855 void setDown() { availability = Availability::Down; }
856 void setAuto() { availability = Availability::Auto; }
857 const string& getName() const {
858 return name;
859 }
860 const string& getNameWithAddr() const {
861 return nameWithAddr;
862 }
863 void setName(const std::string& newName)
864 {
865 name = newName;
866 nameWithAddr = newName.empty() ? remote.toStringWithPort() : (name + " (" + remote.toStringWithPort()+ ")");
867 }
868
869 string getStatus() const
870 {
871 string status;
872 if(availability == DownstreamState::Availability::Up)
873 status = "UP";
874 else if(availability == DownstreamState::Availability::Down)
875 status = "DOWN";
876 else
877 status = (upStatus ? "up" : "down");
878 return status;
879 }
880 bool reconnect();
881 void hash();
882 void setId(const boost::uuids::uuid& newId);
883 void setWeight(int newWeight);
884
885 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
886 {
887 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
888 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
889 }
890 private:
891 std::string name;
892 std::string nameWithAddr;
893 };
894 using servers_t =vector<std::shared_ptr<DownstreamState>>;
895
896 void responderThread(std::shared_ptr<DownstreamState> state);
897 extern std::mutex g_luamutex;
898 extern LuaContext g_lua;
899 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
900
901 class DNSRule
902 {
903 public:
904 virtual ~DNSRule ()
905 {
906 }
907 virtual bool matches(const DNSQuestion* dq) const =0;
908 virtual string toString() const = 0;
909 mutable std::atomic<uint64_t> d_matches{0};
910 };
911
912 struct ServerPool
913 {
914 ServerPool()
915 {
916 pthread_rwlock_init(&d_lock, nullptr);
917 }
918 ~ServerPool()
919 {
920 pthread_rwlock_destroy(&d_lock);
921 }
922
923 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
924
925 bool getECS() const
926 {
927 return d_useECS;
928 }
929
930 void setECS(bool useECS)
931 {
932 d_useECS = useECS;
933 }
934
935 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
936 std::shared_ptr<ServerPolicy> policy{nullptr};
937
938 size_t countServers(bool upOnly)
939 {
940 size_t count = 0;
941 ReadLock rl(&d_lock);
942 for (const auto& server : d_servers) {
943 if (!upOnly || std::get<1>(server)->isUp() ) {
944 count++;
945 }
946 }
947 return count;
948 }
949
950 ServerPolicy::NumberedServerVector getServers()
951 {
952 ServerPolicy::NumberedServerVector result;
953 {
954 ReadLock rl(&d_lock);
955 result = d_servers;
956 }
957 return result;
958 }
959
960 void addServer(shared_ptr<DownstreamState>& server)
961 {
962 WriteLock wl(&d_lock);
963 unsigned int count = (unsigned int) d_servers.size();
964 d_servers.push_back(make_pair(++count, server));
965 /* we need to reorder based on the server 'order' */
966 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
967 return a.second->order < b.second->order;
968 });
969 /* and now we need to renumber for Lua (custom policies) */
970 size_t idx = 1;
971 for (auto& serv : d_servers) {
972 serv.first = idx++;
973 }
974 }
975
976 void removeServer(shared_ptr<DownstreamState>& server)
977 {
978 WriteLock wl(&d_lock);
979 size_t idx = 1;
980 bool found = false;
981 for (auto it = d_servers.begin(); it != d_servers.end();) {
982 if (found) {
983 /* we need to renumber the servers placed
984 after the removed one, for Lua (custom policies) */
985 it->first = idx++;
986 it++;
987 }
988 else if (it->second == server) {
989 it = d_servers.erase(it);
990 found = true;
991 } else {
992 idx++;
993 it++;
994 }
995 }
996 }
997
998 private:
999 ServerPolicy::NumberedServerVector d_servers;
1000 pthread_rwlock_t d_lock;
1001 bool d_useECS{false};
1002 };
1003
1004 struct CarbonConfig
1005 {
1006 ComboAddress server;
1007 std::string namespace_name;
1008 std::string ourname;
1009 std::string instance_name;
1010 unsigned int interval;
1011 };
1012
1013 enum ednsHeaderFlags {
1014 EDNS_HEADER_FLAG_NONE = 0,
1015 EDNS_HEADER_FLAG_DO = 32768
1016 };
1017
1018 struct DNSDistRuleAction
1019 {
1020 std::shared_ptr<DNSRule> d_rule;
1021 std::shared_ptr<DNSAction> d_action;
1022 boost::uuids::uuid d_id;
1023 uint64_t d_creationOrder;
1024 };
1025
1026 struct DNSDistResponseRuleAction
1027 {
1028 std::shared_ptr<DNSRule> d_rule;
1029 std::shared_ptr<DNSResponseAction> d_action;
1030 boost::uuids::uuid d_id;
1031 uint64_t d_creationOrder;
1032 };
1033
1034 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1035 extern DNSAction::Action g_dynBlockAction;
1036
1037 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1038 extern GlobalStateHolder<ServerPolicy> g_policy;
1039 extern GlobalStateHolder<servers_t> g_dstates;
1040 extern GlobalStateHolder<pools_t> g_pools;
1041 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1042 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1043 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1044 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1045 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1046
1047 extern ComboAddress g_serverControl; // not changed during runtime
1048
1049 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1050 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1051 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1052 extern bool g_truncateTC;
1053 extern bool g_fixupCase;
1054 extern int g_tcpRecvTimeout;
1055 extern int g_tcpSendTimeout;
1056 extern int g_udpTimeout;
1057 extern uint16_t g_maxOutstanding;
1058 extern std::atomic<bool> g_configurationDone;
1059 extern uint64_t g_maxTCPClientThreads;
1060 extern uint64_t g_maxTCPQueuedConnections;
1061 extern size_t g_maxTCPQueriesPerConn;
1062 extern size_t g_maxTCPConnectionDuration;
1063 extern size_t g_maxTCPConnectionsPerClient;
1064 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1065 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1066 extern uint32_t g_staleCacheEntriesTTL;
1067 extern bool g_apiReadWrite;
1068 extern std::string g_apiConfigDirectory;
1069 extern bool g_servFailOnNoPolicy;
1070 extern bool g_useTCPSinglePipe;
1071 extern uint16_t g_downstreamTCPCleanupInterval;
1072 extern size_t g_udpVectorSize;
1073 extern bool g_preserveTrailingData;
1074 extern bool g_allowEmptyResponse;
1075
1076 #ifdef HAVE_EBPF
1077 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1078 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1079 #endif /* HAVE_EBPF */
1080
1081 struct LocalHolders
1082 {
1083 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1084 {
1085 }
1086
1087 LocalStateHolder<NetmaskGroup> acl;
1088 LocalStateHolder<ServerPolicy> policy;
1089 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1090 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1091 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1092 LocalStateHolder<servers_t> servers;
1093 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1094 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1095 LocalStateHolder<pools_t> pools;
1096 };
1097
1098 struct dnsheader;
1099
1100 void controlThread(int fd, ComboAddress local);
1101 vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
1102
1103 struct WebserverConfig
1104 {
1105 std::string password;
1106 std::string apiKey;
1107 boost::optional<std::map<std::string, std::string> > customHeaders;
1108 std::mutex lock;
1109 };
1110
1111 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1112 void setWebserverPassword(const std::string& password);
1113 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1114
1115 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1116 void tcpAcceptorThread(void* p);
1117 #ifdef HAVE_DNS_OVER_HTTPS
1118 void dohThread(ClientState* cs);
1119 #endif /* HAVE_DNS_OVER_HTTPS */
1120
1121 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1122 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1123 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1124 void resetLuaSideEffect(); // reset to indeterminate state
1125
1126 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1127 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1128 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1129
1130 bool checkQueryHeaders(const struct dnsheader* dh);
1131
1132 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1133 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1134 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1135
1136 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1137
1138 uint16_t getRandomDNSID();
1139
1140 #include "dnsdist-snmp.hh"
1141
1142 extern bool g_snmpEnabled;
1143 extern bool g_snmpTrapsEnabled;
1144 extern DNSDistSNMPAgent* g_snmpAgent;
1145 extern bool g_addEDNSToSelfGeneratedResponses;
1146
1147 extern std::set<std::string> g_capabilitiesToRetain;
1148 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
1149 static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
1150
1151 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1152 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1153
1154 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1155 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1156
1157 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1158 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);