]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/dnsdist.hh
Merge pull request #8839 from Habbie/auth-4.3.0-beta2-docs
[thirdparty/pdns.git] / pdns / dnsdist.hh
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #pragma once
23 #include "config.h"
24 #include "ext/luawrapper/include/LuaContext.hpp"
25
26 #include <atomic>
27 #include <mutex>
28 #include <string>
29 #include <thread>
30 #include <time.h>
31 #include <unistd.h>
32 #include <unordered_map>
33
34 #include <boost/variant.hpp>
35
36 #include "bpf-filter.hh"
37 #include "capabilities.hh"
38 #include "circular_buffer.hh"
39 #include "dnscrypt.hh"
40 #include "dnsdist-cache.hh"
41 #include "dnsdist-dynbpf.hh"
42 #include "dnsdist-lbpolicies.hh"
43 #include "dnsname.hh"
44 #include "doh.hh"
45 #include "ednsoptions.hh"
46 #include "gettime.hh"
47 #include "iputils.hh"
48 #include "misc.hh"
49 #include "mplexer.hh"
50 #include "sholder.hh"
51 #include "tcpiohandler.hh"
52 #include "uuid-utils.hh"
53
54 void carbonDumpThread();
55 uint64_t uptimeOfProcess(const std::string& str);
56
57 extern uint16_t g_ECSSourcePrefixV4;
58 extern uint16_t g_ECSSourcePrefixV6;
59 extern bool g_ECSOverride;
60
61 typedef std::unordered_map<string, string> QTag;
62
63 struct DNSQuestion
64 {
65 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
66 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
67 const uint16_t* flags = getFlagsFromDNSHeader(dh);
68 origFlags = *flags;
69 }
70 DNSQuestion(const DNSQuestion&) = delete;
71 DNSQuestion& operator=(const DNSQuestion&) = delete;
72 DNSQuestion(DNSQuestion&&) = default;
73
74 std::string getTrailingData() const;
75 bool setTrailingData(const std::string&);
76
77 #ifdef HAVE_PROTOBUF
78 boost::optional<boost::uuids::uuid> uniqueId;
79 #endif
80 Netmask ecs;
81 boost::optional<Netmask> subnet;
82 std::string sni; /* Server Name Indication, if any (DoT or DoH) */
83 std::string poolname;
84 const DNSName* qname{nullptr};
85 const ComboAddress* local{nullptr};
86 const ComboAddress* remote{nullptr};
87 std::shared_ptr<QTag> qTag{nullptr};
88 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
89 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
90 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
91 struct dnsheader* dh{nullptr};
92 const struct timespec* queryTime{nullptr};
93 struct DOHUnit* du{nullptr};
94 size_t size;
95 unsigned int consumed{0};
96 int delayMsec{0};
97 boost::optional<uint32_t> tempFailureTTL;
98 uint32_t cacheKeyNoECS;
99 uint32_t cacheKey;
100 const uint16_t qtype;
101 const uint16_t qclass;
102 uint16_t len;
103 uint16_t ecsPrefixLength;
104 uint16_t origFlags;
105 uint8_t ednsRCode{0};
106 const bool tcp;
107 bool skipCache{false};
108 bool ecsOverride;
109 bool useECS{true};
110 bool addXPF{true};
111 bool ecsSet{false};
112 bool ecsAdded{false};
113 bool ednsAdded{false};
114 bool useZeroScope{false};
115 bool dnssecOK{false};
116 };
117
118 struct DNSResponse : DNSQuestion
119 {
120 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
121 DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
122 DNSResponse(const DNSResponse&) = delete;
123 DNSResponse& operator=(const DNSResponse&) = delete;
124 DNSResponse(DNSResponse&&) = default;
125 };
126
127 /* so what could you do:
128 drop,
129 fake up nxdomain,
130 provide actual answer,
131 allow & and stop processing,
132 continue processing,
133 modify header: (servfail|refused|notimp), set TC=1,
134 send to pool */
135
136 class DNSAction
137 {
138 public:
139 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse, SpoofRaw };
140 static std::string typeToString(const Action& action)
141 {
142 switch(action) {
143 case Action::Drop:
144 return "Drop";
145 case Action::Nxdomain:
146 return "Send NXDomain";
147 case Action::Refused:
148 return "Send Refused";
149 case Action::Spoof:
150 return "Spoof an answer";
151 case Action::SpoofRaw:
152 return "Spoof an answer from raw bytes";
153 case Action::Allow:
154 return "Allow";
155 case Action::HeaderModify:
156 return "Modify the header";
157 case Action::Pool:
158 return "Route to a pool";
159 case Action::Delay:
160 return "Delay";
161 case Action::Truncate:
162 return "Truncate over UDP";
163 case Action::ServFail:
164 return "Send ServFail";
165 case Action::None:
166 case Action::NoOp:
167 return "Do nothing";
168 case Action::NoRecurse:
169 return "Set rd=0";
170 }
171
172 return "Unknown";
173 }
174
175 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
176 virtual ~DNSAction()
177 {
178 }
179 virtual string toString() const = 0;
180 virtual std::map<string, double> getStats() const
181 {
182 return {{}};
183 }
184 };
185
186 class DNSResponseAction
187 {
188 public:
189 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
190 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
191 virtual ~DNSResponseAction()
192 {
193 }
194 virtual string toString() const = 0;
195 };
196
197 struct DynBlock
198 {
199 DynBlock(): action(DNSAction::Action::None), warning(false)
200 {
201 }
202
203 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
204 {
205 }
206
207 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
208 {
209 blocks.store(rhs.blocks);
210 }
211
212 DynBlock& operator=(const DynBlock& rhs)
213 {
214 reason=rhs.reason;
215 until=rhs.until;
216 domain=rhs.domain;
217 action=rhs.action;
218 blocks.store(rhs.blocks);
219 warning=rhs.warning;
220 return *this;
221 }
222
223 string reason;
224 struct timespec until;
225 DNSName domain;
226 DNSAction::Action action;
227 mutable std::atomic<unsigned int> blocks;
228 bool warning;
229 };
230
231 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
232
233 extern vector<pair<struct timeval, std::string> > g_confDelta;
234
235 extern uint64_t getLatencyCount(const std::string&);
236
237 struct DNSDistStats
238 {
239 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
240 stat_t responses{0};
241 stat_t servfailResponses{0};
242 stat_t queries{0};
243 stat_t frontendNXDomain{0};
244 stat_t frontendServFail{0};
245 stat_t frontendNoError{0};
246 stat_t nonCompliantQueries{0};
247 stat_t nonCompliantResponses{0};
248 stat_t rdQueries{0};
249 stat_t emptyQueries{0};
250 stat_t aclDrops{0};
251 stat_t dynBlocked{0};
252 stat_t ruleDrop{0};
253 stat_t ruleNXDomain{0};
254 stat_t ruleRefused{0};
255 stat_t ruleServFail{0};
256 stat_t selfAnswered{0};
257 stat_t downstreamTimeouts{0};
258 stat_t downstreamSendErrors{0};
259 stat_t truncFail{0};
260 stat_t noPolicy{0};
261 stat_t cacheHits{0};
262 stat_t cacheMisses{0};
263 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
264 stat_t securityStatus{0};
265
266 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
267 typedef std::function<uint64_t(const std::string&)> statfunction_t;
268 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
269 std::vector<std::pair<std::string, entry_t>> entries{
270 {"responses", &responses},
271 {"servfail-responses", &servfailResponses},
272 {"queries", &queries},
273 {"frontend-nxdomain", &frontendNXDomain},
274 {"frontend-servfail", &frontendServFail},
275 {"frontend-noerror", &frontendNoError},
276 {"acl-drops", &aclDrops},
277 {"rule-drop", &ruleDrop},
278 {"rule-nxdomain", &ruleNXDomain},
279 {"rule-refused", &ruleRefused},
280 {"rule-servfail", &ruleServFail},
281 {"self-answered", &selfAnswered},
282 {"downstream-timeouts", &downstreamTimeouts},
283 {"downstream-send-errors", &downstreamSendErrors},
284 {"trunc-failures", &truncFail},
285 {"no-policy", &noPolicy},
286 {"latency0-1", &latency0_1},
287 {"latency1-10", &latency1_10},
288 {"latency10-50", &latency10_50},
289 {"latency50-100", &latency50_100},
290 {"latency100-1000", &latency100_1000},
291 {"latency-slow", &latencySlow},
292 {"latency-avg100", &latencyAvg100},
293 {"latency-avg1000", &latencyAvg1000},
294 {"latency-avg10000", &latencyAvg10000},
295 {"latency-avg1000000", &latencyAvg1000000},
296 {"uptime", uptimeOfProcess},
297 {"real-memory-usage", getRealMemoryUsage},
298 {"special-memory-usage", getSpecialMemoryUsage},
299 {"udp-in-errors", boost::bind(udpErrorStats, "udp-in-errors")},
300 {"udp-noport-errors", boost::bind(udpErrorStats, "udp-noport-errors")},
301 {"udp-recvbuf-errors", boost::bind(udpErrorStats, "udp-recvbuf-errors")},
302 {"udp-sndbuf-errors", boost::bind(udpErrorStats, "udp-sndbuf-errors")},
303 {"noncompliant-queries", &nonCompliantQueries},
304 {"noncompliant-responses", &nonCompliantResponses},
305 {"rdqueries", &rdQueries},
306 {"empty-queries", &emptyQueries},
307 {"cache-hits", &cacheHits},
308 {"cache-misses", &cacheMisses},
309 {"cpu-iowait", getCPUIOWait},
310 {"cpu-steal", getCPUSteal},
311 {"cpu-sys-msec", getCPUTimeSystem},
312 {"cpu-user-msec", getCPUTimeUser},
313 {"fd-usage", getOpenFileDescriptors},
314 {"dyn-blocked", &dynBlocked},
315 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
316 {"security-status", &securityStatus},
317 // Latency histogram
318 {"latency-sum", &latencySum},
319 {"latency-count", getLatencyCount},
320 };
321 };
322
323 extern struct DNSDistStats g_stats;
324 void doLatencyStats(double udiff);
325
326
327 struct StopWatch
328 {
329 StopWatch(bool realTime=false): d_needRealTime(realTime)
330 {
331 }
332 struct timespec d_start{0,0};
333 bool d_needRealTime{false};
334
335 void start() {
336 if(gettime(&d_start, d_needRealTime) < 0)
337 unixDie("Getting timestamp");
338
339 }
340
341 void set(const struct timespec& from) {
342 d_start = from;
343 }
344
345 double udiff() const {
346 struct timespec now;
347 if(gettime(&now, d_needRealTime) < 0)
348 unixDie("Getting timestamp");
349
350 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
351 }
352
353 double udiffAndSet() {
354 struct timespec now;
355 if(gettime(&now, d_needRealTime) < 0)
356 unixDie("Getting timestamp");
357
358 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
359 d_start = now;
360 return ret;
361 }
362
363 };
364
365 class BasicQPSLimiter
366 {
367 public:
368 BasicQPSLimiter()
369 {
370 }
371
372 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
373 {
374 d_prev.start();
375 }
376
377 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
378 {
379 auto delta = d_prev.udiffAndSet();
380
381 if(delta > 0.0) // time, frequently, does go backwards..
382 d_tokens += 1.0 * rate * (delta/1000000.0);
383
384 if(d_tokens > burst) {
385 d_tokens = burst;
386 }
387
388 bool ret=false;
389 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
390 ret=true;
391 --d_tokens;
392 }
393
394 return ret;
395 }
396
397 bool seenSince(const struct timespec& cutOff) const
398 {
399 return cutOff < d_prev.d_start;
400 }
401
402 protected:
403 mutable StopWatch d_prev;
404 mutable double d_tokens;
405 };
406
407 class QPSLimiter : public BasicQPSLimiter
408 {
409 public:
410 QPSLimiter(): BasicQPSLimiter()
411 {
412 }
413
414 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
415 {
416 d_prev.start();
417 }
418
419 unsigned int getRate() const
420 {
421 return d_passthrough ? 0 : d_rate;
422 }
423
424 int getPassed() const
425 {
426 return d_passed;
427 }
428
429 int getBlocked() const
430 {
431 return d_blocked;
432 }
433
434 bool check() const // this is not quite fair
435 {
436 if (d_passthrough) {
437 return true;
438 }
439
440 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
441 if (ret) {
442 d_passed++;
443 }
444 else {
445 d_blocked++;
446 }
447
448 return ret;
449 }
450 private:
451 mutable unsigned int d_passed{0};
452 mutable unsigned int d_blocked{0};
453 unsigned int d_rate;
454 unsigned int d_burst;
455 bool d_passthrough{true};
456 };
457
458 struct ClientState;
459
460 struct IDState
461 {
462 IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
463 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
464 {
465 usageIndicator.store(orig.usageIndicator.load());
466 origFD = orig.origFD;
467 origID = orig.origID;
468 delayMsec = orig.delayMsec;
469 tempFailureTTL = orig.tempFailureTTL;
470 }
471
472 static const int64_t unusedIndicator = -1;
473
474 static bool isInUse(int64_t usageIndicator)
475 {
476 return usageIndicator != unusedIndicator;
477 }
478
479 bool isInUse() const
480 {
481 return usageIndicator != unusedIndicator;
482 }
483
484 /* return true if the value has been successfully replaced meaning that
485 no-one updated the usage indicator in the meantime */
486 bool tryMarkUnused(int64_t expectedUsageIndicator)
487 {
488 return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
489 }
490
491 /* mark as unused no matter what, return true if the state was in use before */
492 bool markAsUsed()
493 {
494 auto currentGeneration = generation++;
495 return markAsUsed(currentGeneration);
496 }
497
498 /* mark as unused no matter what, return true if the state was in use before */
499 bool markAsUsed(int64_t currentGeneration)
500 {
501 int64_t oldUsage = usageIndicator.exchange(currentGeneration);
502 return oldUsage != unusedIndicator;
503 }
504
505 /* We use this value to detect whether this state is in use.
506 For performance reasons we don't want to use a lock here, but that means
507 we need to be very careful when modifying this value. Modifications happen
508 from:
509 - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
510 then picking one of the states associated to this backend (via the idOffset).
511 Most of the time this state should not be in use and usageIndicator is -1, but we
512 might not yet have received a response for the query previously associated to this
513 state, meaning that we will 'reuse' this state and erase the existing state.
514 If we ever receive a response for this state, it will be discarded. This is
515 mostly fine for UDP except that we still need to be careful in order to miss
516 the 'outstanding' counters, which should only be increased when we are picking
517 an empty state, and not when reusing ;
518 For DoH, though, we have dynamically allocated a DOHUnit object that needs to
519 be freed, as well as internal objects internals to libh2o.
520 - one of the UDP receiver threads receiving a response from a backend, picking
521 the corresponding state and sending the response to the client ;
522 - the 'healthcheck' thread scanning the states to actively discover timeouts,
523 mostly to keep some counters like the 'outstanding' one sane.
524 We previously based that logic on the origFD (FD on which the query was received,
525 and therefore from where the response should be sent) but this suffered from an
526 ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
527 same value since we only have so much incoming sockets:
528 - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
529 - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
530 qtype and qclass match
531 - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
532 - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
533 5, except it's not the same 5 anymore and it overrides a fresh state.
534 We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
535 wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
536 when the state is unused and the value of our counter otherwise.
537 */
538 std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8
539 std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4
540 ComboAddress origRemote; // 28
541 ComboAddress origDest; // 28
542 StopWatch sentTime; // 16
543 DNSName qname; // 80
544 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
545 #ifdef HAVE_PROTOBUF
546 boost::optional<boost::uuids::uuid> uniqueId;
547 #endif
548 boost::optional<Netmask> subnet{boost::none};
549 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
550 std::shared_ptr<QTag> qTag{nullptr};
551 const ClientState* cs{nullptr};
552 DOHUnit* du{nullptr};
553 uint32_t cacheKey; // 4
554 uint32_t cacheKeyNoECS; // 4
555 uint16_t age; // 4
556 uint16_t qtype; // 2
557 uint16_t qclass; // 2
558 uint16_t origID; // 2
559 uint16_t origFlags; // 2
560 int origFD{-1};
561 int delayMsec;
562 boost::optional<uint32_t> tempFailureTTL;
563 bool ednsAdded{false};
564 bool ecsAdded{false};
565 bool skipCache{false};
566 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
567 bool dnssecOK{false};
568 bool useZeroScope;
569 };
570
571 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
572 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
573 struct QueryCount {
574 QueryCount()
575 {
576 pthread_rwlock_init(&queryLock, nullptr);
577 }
578 ~QueryCount()
579 {
580 pthread_rwlock_destroy(&queryLock);
581 }
582 QueryCountRecords records;
583 QueryCountFilter filter;
584 pthread_rwlock_t queryLock;
585 bool enabled{false};
586 };
587
588 extern QueryCount g_qcount;
589
590 struct ClientState
591 {
592 ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort)
593 {
594 }
595
596 std::set<int> cpus;
597 ComboAddress local;
598 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
599 std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
600 std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
601 std::string interface;
602 std::atomic<uint64_t> queries{0};
603 mutable std::atomic<uint64_t> responses{0};
604 std::atomic<uint64_t> tcpDiedReadingQuery{0};
605 std::atomic<uint64_t> tcpDiedSendingResponse{0};
606 std::atomic<uint64_t> tcpGaveUp{0};
607 std::atomic<uint64_t> tcpClientTimeouts{0};
608 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
609 std::atomic<uint64_t> tcpCurrentConnections{0};
610 std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
611 std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
612 std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired)
613 std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one
614 std::atomic<uint64_t> tls10queries{0}; // valid DNS queries received via TLSv1.0
615 std::atomic<uint64_t> tls11queries{0}; // valid DNS queries received via TLSv1.1
616 std::atomic<uint64_t> tls12queries{0}; // valid DNS queries received via TLSv1.2
617 std::atomic<uint64_t> tls13queries{0}; // valid DNS queries received via TLSv1.3
618 std::atomic<uint64_t> tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version
619 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
620 /* in ms */
621 std::atomic<double> tcpAvgConnectionDuration{0.0};
622 int udpFD{-1};
623 int tcpFD{-1};
624 int fastOpenQueueSize{0};
625 bool muted{false};
626 bool tcp;
627 bool reuseport;
628 bool ready{false};
629
630 int getSocket() const
631 {
632 return udpFD != -1 ? udpFD : tcpFD;
633 }
634
635 bool isUDP() const
636 {
637 return udpFD != -1;
638 }
639
640 bool isTCP() const
641 {
642 return udpFD == -1;
643 }
644
645 bool hasTLS() const
646 {
647 return tlsFrontend != nullptr || dohFrontend != nullptr;
648 }
649
650 std::string getType() const
651 {
652 std::string result = udpFD != -1 ? "UDP" : "TCP";
653
654 if (dohFrontend) {
655 result += " (DNS over HTTPS)";
656 }
657 else if (tlsFrontend) {
658 result += " (DNS over TLS)";
659 }
660 else if (dnscryptCtx) {
661 result += " (DNSCrypt)";
662 }
663
664 return result;
665 }
666
667 #ifdef HAVE_EBPF
668 shared_ptr<BPFFilter> d_filter;
669
670 void detachFilter()
671 {
672 if (d_filter) {
673 d_filter->removeSocket(getSocket());
674 d_filter = nullptr;
675 }
676 }
677
678 void attachFilter(shared_ptr<BPFFilter> bpf)
679 {
680 detachFilter();
681
682 bpf->addSocket(getSocket());
683 d_filter = bpf;
684 }
685 #endif /* HAVE_EBPF */
686
687 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
688 {
689 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
690 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
691 }
692 };
693
694 class TCPClientCollection {
695 std::vector<int> d_tcpclientthreads;
696 std::atomic<uint64_t> d_numthreads{0};
697 std::atomic<uint64_t> d_pos{0};
698 std::atomic<uint64_t> d_queued{0};
699 const uint64_t d_maxthreads{0};
700 std::mutex d_mutex;
701 int d_singlePipe[2];
702 const bool d_useSinglePipe;
703 public:
704
705 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
706
707 {
708 d_tcpclientthreads.reserve(maxThreads);
709
710 if (d_useSinglePipe) {
711 if (pipe(d_singlePipe) < 0) {
712 int err = errno;
713 throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
714 }
715
716 if (!setNonBlocking(d_singlePipe[0])) {
717 int err = errno;
718 close(d_singlePipe[0]);
719 close(d_singlePipe[1]);
720 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
721 }
722
723 if (!setNonBlocking(d_singlePipe[1])) {
724 int err = errno;
725 close(d_singlePipe[0]);
726 close(d_singlePipe[1]);
727 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
728 }
729 }
730 }
731 int getThread()
732 {
733 uint64_t pos = d_pos++;
734 ++d_queued;
735 return d_tcpclientthreads[pos % d_numthreads];
736 }
737 bool hasReachedMaxThreads() const
738 {
739 return d_numthreads >= d_maxthreads;
740 }
741 uint64_t getThreadsCount() const
742 {
743 return d_numthreads;
744 }
745 uint64_t getQueuedCount() const
746 {
747 return d_queued;
748 }
749 void decrementQueuedCount()
750 {
751 --d_queued;
752 }
753 void addTCPClientThread();
754 };
755
756 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
757
758 struct DownstreamState
759 {
760 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
761
762 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect);
763 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {}
764 ~DownstreamState()
765 {
766 for (auto& fd : sockets) {
767 if (fd >= 0) {
768 close(fd);
769 fd = -1;
770 }
771 }
772 pthread_rwlock_destroy(&d_lock);
773 }
774 boost::uuids::uuid id;
775 std::vector<unsigned int> hashes;
776 mutable pthread_rwlock_t d_lock;
777 std::vector<int> sockets;
778 const std::string sourceItfName;
779 std::mutex socketsLock;
780 std::mutex connectLock;
781 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
782 std::thread tid;
783 const ComboAddress remote;
784 QPSLimiter qps;
785 vector<IDState> idStates;
786 const ComboAddress sourceAddr;
787 checkfunc_t checkFunction;
788 DNSName checkName{"a.root-servers.net."};
789 QType checkType{QType::A};
790 uint16_t checkClass{QClass::IN};
791 std::atomic<uint64_t> idOffset{0};
792 std::atomic<uint64_t> sendErrors{0};
793 std::atomic<uint64_t> outstanding{0};
794 std::atomic<uint64_t> reuseds{0};
795 std::atomic<uint64_t> queries{0};
796 std::atomic<uint64_t> responses{0};
797 struct {
798 std::atomic<uint64_t> sendErrors{0};
799 std::atomic<uint64_t> reuseds{0};
800 std::atomic<uint64_t> queries{0};
801 } prev;
802 std::atomic<uint64_t> tcpDiedSendingQuery{0};
803 std::atomic<uint64_t> tcpDiedReadingResponse{0};
804 std::atomic<uint64_t> tcpGaveUp{0};
805 std::atomic<uint64_t> tcpReadTimeouts{0};
806 std::atomic<uint64_t> tcpWriteTimeouts{0};
807 std::atomic<uint64_t> tcpCurrentConnections{0};
808 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
809 /* in ms */
810 std::atomic<double> tcpAvgConnectionDuration{0.0};
811 size_t socketsOffset{0};
812 double queryLoad{0.0};
813 double dropRate{0.0};
814 double latencyUsec{0.0};
815 int order{1};
816 int weight{1};
817 int tcpConnectTimeout{5};
818 int tcpRecvTimeout{30};
819 int tcpSendTimeout{30};
820 unsigned int checkInterval{1};
821 unsigned int lastCheck{0};
822 const unsigned int sourceItf{0};
823 uint16_t retries{5};
824 uint16_t xpfRRCode{0};
825 uint16_t checkTimeout{1000}; /* in milliseconds */
826 uint8_t currentCheckFailures{0};
827 uint8_t consecutiveSuccessfulChecks{0};
828 uint8_t maxCheckFailures{1};
829 uint8_t minRiseSuccesses{1};
830 StopWatch sw;
831 set<string> pools;
832 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
833 bool mustResolve{false};
834 bool upStatus{false};
835 bool useECS{false};
836 bool setCD{false};
837 bool disableZeroScope{false};
838 std::atomic<bool> connected{false};
839 std::atomic_flag threadStarted;
840 bool tcpFastOpen{false};
841 bool ipBindAddrNoPort{true};
842
843 bool isUp() const
844 {
845 if(availability == Availability::Down)
846 return false;
847 if(availability == Availability::Up)
848 return true;
849 return upStatus;
850 }
851 void setUp() { availability = Availability::Up; }
852 void setDown() { availability = Availability::Down; }
853 void setAuto() { availability = Availability::Auto; }
854 const string& getName() const {
855 return name;
856 }
857 const string& getNameWithAddr() const {
858 return nameWithAddr;
859 }
860 void setName(const std::string& newName)
861 {
862 name = newName;
863 nameWithAddr = newName.empty() ? remote.toStringWithPort() : (name + " (" + remote.toStringWithPort()+ ")");
864 }
865
866 string getStatus() const
867 {
868 string status;
869 if(availability == DownstreamState::Availability::Up)
870 status = "UP";
871 else if(availability == DownstreamState::Availability::Down)
872 status = "DOWN";
873 else
874 status = (upStatus ? "up" : "down");
875 return status;
876 }
877 bool reconnect();
878 void hash();
879 void setId(const boost::uuids::uuid& newId);
880 void setWeight(int newWeight);
881
882 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
883 {
884 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
885 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
886 }
887 private:
888 std::string name;
889 std::string nameWithAddr;
890 };
891 using servers_t =vector<std::shared_ptr<DownstreamState>>;
892
893 void responderThread(std::shared_ptr<DownstreamState> state);
894 extern std::mutex g_luamutex;
895 extern LuaContext g_lua;
896 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
897
898 class DNSRule
899 {
900 public:
901 virtual ~DNSRule ()
902 {
903 }
904 virtual bool matches(const DNSQuestion* dq) const =0;
905 virtual string toString() const = 0;
906 mutable std::atomic<uint64_t> d_matches{0};
907 };
908
909 struct ServerPool
910 {
911 ServerPool()
912 {
913 pthread_rwlock_init(&d_lock, nullptr);
914 }
915 ~ServerPool()
916 {
917 pthread_rwlock_destroy(&d_lock);
918 }
919
920 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
921
922 bool getECS() const
923 {
924 return d_useECS;
925 }
926
927 void setECS(bool useECS)
928 {
929 d_useECS = useECS;
930 }
931
932 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
933 std::shared_ptr<ServerPolicy> policy{nullptr};
934
935 size_t countServers(bool upOnly)
936 {
937 size_t count = 0;
938 ReadLock rl(&d_lock);
939 for (const auto& server : d_servers) {
940 if (!upOnly || std::get<1>(server)->isUp() ) {
941 count++;
942 }
943 }
944 return count;
945 }
946
947 ServerPolicy::NumberedServerVector getServers()
948 {
949 ServerPolicy::NumberedServerVector result;
950 {
951 ReadLock rl(&d_lock);
952 result = d_servers;
953 }
954 return result;
955 }
956
957 void addServer(shared_ptr<DownstreamState>& server)
958 {
959 WriteLock wl(&d_lock);
960 unsigned int count = (unsigned int) d_servers.size();
961 d_servers.push_back(make_pair(++count, server));
962 /* we need to reorder based on the server 'order' */
963 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
964 return a.second->order < b.second->order;
965 });
966 /* and now we need to renumber for Lua (custom policies) */
967 size_t idx = 1;
968 for (auto& serv : d_servers) {
969 serv.first = idx++;
970 }
971 }
972
973 void removeServer(shared_ptr<DownstreamState>& server)
974 {
975 WriteLock wl(&d_lock);
976 size_t idx = 1;
977 bool found = false;
978 for (auto it = d_servers.begin(); it != d_servers.end();) {
979 if (found) {
980 /* we need to renumber the servers placed
981 after the removed one, for Lua (custom policies) */
982 it->first = idx++;
983 it++;
984 }
985 else if (it->second == server) {
986 it = d_servers.erase(it);
987 found = true;
988 } else {
989 idx++;
990 it++;
991 }
992 }
993 }
994
995 private:
996 ServerPolicy::NumberedServerVector d_servers;
997 pthread_rwlock_t d_lock;
998 bool d_useECS{false};
999 };
1000
1001 struct CarbonConfig
1002 {
1003 ComboAddress server;
1004 std::string namespace_name;
1005 std::string ourname;
1006 std::string instance_name;
1007 unsigned int interval;
1008 };
1009
1010 enum ednsHeaderFlags {
1011 EDNS_HEADER_FLAG_NONE = 0,
1012 EDNS_HEADER_FLAG_DO = 32768
1013 };
1014
1015 struct DNSDistRuleAction
1016 {
1017 std::shared_ptr<DNSRule> d_rule;
1018 std::shared_ptr<DNSAction> d_action;
1019 boost::uuids::uuid d_id;
1020 uint64_t d_creationOrder;
1021 };
1022
1023 struct DNSDistResponseRuleAction
1024 {
1025 std::shared_ptr<DNSRule> d_rule;
1026 std::shared_ptr<DNSResponseAction> d_action;
1027 boost::uuids::uuid d_id;
1028 uint64_t d_creationOrder;
1029 };
1030
1031 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1032 extern DNSAction::Action g_dynBlockAction;
1033
1034 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1035 extern GlobalStateHolder<ServerPolicy> g_policy;
1036 extern GlobalStateHolder<servers_t> g_dstates;
1037 extern GlobalStateHolder<pools_t> g_pools;
1038 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1039 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1040 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1041 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1042 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1043
1044 extern ComboAddress g_serverControl; // not changed during runtime
1045
1046 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1047 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1048 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1049 extern bool g_truncateTC;
1050 extern bool g_fixupCase;
1051 extern int g_tcpRecvTimeout;
1052 extern int g_tcpSendTimeout;
1053 extern int g_udpTimeout;
1054 extern uint16_t g_maxOutstanding;
1055 extern std::atomic<bool> g_configurationDone;
1056 extern uint64_t g_maxTCPClientThreads;
1057 extern uint64_t g_maxTCPQueuedConnections;
1058 extern size_t g_maxTCPQueriesPerConn;
1059 extern size_t g_maxTCPConnectionDuration;
1060 extern size_t g_maxTCPConnectionsPerClient;
1061 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1062 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1063 extern uint32_t g_staleCacheEntriesTTL;
1064 extern bool g_apiReadWrite;
1065 extern std::string g_apiConfigDirectory;
1066 extern bool g_servFailOnNoPolicy;
1067 extern uint32_t g_hashperturb;
1068 extern bool g_useTCPSinglePipe;
1069 extern uint16_t g_downstreamTCPCleanupInterval;
1070 extern size_t g_udpVectorSize;
1071 extern bool g_preserveTrailingData;
1072 extern bool g_allowEmptyResponse;
1073 extern bool g_roundrobinFailOnNoServer;
1074 extern double g_consistentHashBalancingFactor;
1075
1076 #ifdef HAVE_EBPF
1077 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1078 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1079 #endif /* HAVE_EBPF */
1080
1081 struct LocalHolders
1082 {
1083 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1084 {
1085 }
1086
1087 LocalStateHolder<NetmaskGroup> acl;
1088 LocalStateHolder<ServerPolicy> policy;
1089 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1090 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1091 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1092 LocalStateHolder<servers_t> servers;
1093 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1094 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1095 LocalStateHolder<pools_t> pools;
1096 };
1097
1098 struct dnsheader;
1099
1100 void controlThread(int fd, ComboAddress local);
1101 vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
1102
1103 struct WebserverConfig
1104 {
1105 std::string password;
1106 std::string apiKey;
1107 boost::optional<std::map<std::string, std::string> > customHeaders;
1108 std::mutex lock;
1109 };
1110
1111 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1112 void setWebserverPassword(const std::string& password);
1113 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1114
1115 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1116 void tcpAcceptorThread(void* p);
1117 #ifdef HAVE_DNS_OVER_HTTPS
1118 void dohThread(ClientState* cs);
1119 #endif /* HAVE_DNS_OVER_HTTPS */
1120
1121 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1122 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1123 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1124 void resetLuaSideEffect(); // reset to indeterminate state
1125
1126 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1127 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1128 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1129
1130 bool checkQueryHeaders(const struct dnsheader* dh);
1131
1132 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1133 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1134 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1135
1136 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1137
1138 uint16_t getRandomDNSID();
1139
1140 #include "dnsdist-snmp.hh"
1141
1142 extern bool g_snmpEnabled;
1143 extern bool g_snmpTrapsEnabled;
1144 extern DNSDistSNMPAgent* g_snmpAgent;
1145 extern bool g_addEDNSToSelfGeneratedResponses;
1146
1147 extern std::set<std::string> g_capabilitiesToRetain;
1148 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
1149 static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
1150
1151 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1152 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1153
1154 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1155 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1156
1157 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1158 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);