]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/dnsdist.hh
Merge pull request #9067 from rgacogne/wrap-pthread-objects
[thirdparty/pdns.git] / pdns / dnsdist.hh
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #pragma once
23 #include "config.h"
24 #include "ext/luawrapper/include/LuaContext.hpp"
25
26 #include <atomic>
27 #include <mutex>
28 #include <string>
29 #include <thread>
30 #include <time.h>
31 #include <unistd.h>
32 #include <unordered_map>
33
34 #include <boost/variant.hpp>
35
36 #include "capabilities.hh"
37 #include "circular_buffer.hh"
38 #include "dnscrypt.hh"
39 #include "dnsdist-cache.hh"
40 #include "dnsdist-dynbpf.hh"
41 #include "dnsdist-lbpolicies.hh"
42 #include "dnsname.hh"
43 #include "doh.hh"
44 #include "ednsoptions.hh"
45 #include "gettime.hh"
46 #include "iputils.hh"
47 #include "misc.hh"
48 #include "mplexer.hh"
49 #include "sholder.hh"
50 #include "tcpiohandler.hh"
51 #include "uuid-utils.hh"
52 #include "proxy-protocol.hh"
53
54 void carbonDumpThread();
55 uint64_t uptimeOfProcess(const std::string& str);
56
57 extern uint16_t g_ECSSourcePrefixV4;
58 extern uint16_t g_ECSSourcePrefixV6;
59 extern bool g_ECSOverride;
60
61 typedef std::unordered_map<string, string> QTag;
62
63 struct DNSQuestion
64 {
65 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
66 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
67 const uint16_t* flags = getFlagsFromDNSHeader(dh);
68 origFlags = *flags;
69 }
70 DNSQuestion(const DNSQuestion&) = delete;
71 DNSQuestion& operator=(const DNSQuestion&) = delete;
72 DNSQuestion(DNSQuestion&&) = default;
73
74 std::string getTrailingData() const;
75 bool setTrailingData(const std::string&);
76
77 #ifdef HAVE_PROTOBUF
78 boost::optional<boost::uuids::uuid> uniqueId;
79 #endif
80 Netmask ecs;
81 boost::optional<Netmask> subnet;
82 std::string sni; /* Server Name Indication, if any (DoT or DoH) */
83 std::string poolname;
84 const DNSName* qname{nullptr};
85 const ComboAddress* local{nullptr};
86 const ComboAddress* remote{nullptr};
87 std::shared_ptr<QTag> qTag{nullptr};
88 std::unique_ptr<std::vector<ProxyProtocolValue>> proxyProtocolValues{nullptr};
89 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
90 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
91 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
92 struct dnsheader* dh{nullptr};
93 const struct timespec* queryTime{nullptr};
94 struct DOHUnit* du{nullptr};
95 size_t size;
96 unsigned int consumed{0};
97 int delayMsec{0};
98 boost::optional<uint32_t> tempFailureTTL;
99 uint32_t cacheKeyNoECS;
100 uint32_t cacheKey;
101 const uint16_t qtype;
102 const uint16_t qclass;
103 uint16_t len;
104 uint16_t ecsPrefixLength;
105 uint16_t origFlags;
106 uint8_t ednsRCode{0};
107 const bool tcp;
108 bool skipCache{false};
109 bool ecsOverride;
110 bool useECS{true};
111 bool addXPF{true};
112 bool ecsSet{false};
113 bool ecsAdded{false};
114 bool ednsAdded{false};
115 bool useZeroScope{false};
116 bool dnssecOK{false};
117 };
118
119 struct DNSResponse : DNSQuestion
120 {
121 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
122 DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
123 DNSResponse(const DNSResponse&) = delete;
124 DNSResponse& operator=(const DNSResponse&) = delete;
125 DNSResponse(DNSResponse&&) = default;
126 };
127
128 /* so what could you do:
129 drop,
130 fake up nxdomain,
131 provide actual answer,
132 allow & and stop processing,
133 continue processing,
134 modify header: (servfail|refused|notimp), set TC=1,
135 send to pool */
136
137 class DNSAction
138 {
139 public:
140 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse, SpoofRaw };
141 static std::string typeToString(const Action& action)
142 {
143 switch(action) {
144 case Action::Drop:
145 return "Drop";
146 case Action::Nxdomain:
147 return "Send NXDomain";
148 case Action::Refused:
149 return "Send Refused";
150 case Action::Spoof:
151 return "Spoof an answer";
152 case Action::SpoofRaw:
153 return "Spoof an answer from raw bytes";
154 case Action::Allow:
155 return "Allow";
156 case Action::HeaderModify:
157 return "Modify the header";
158 case Action::Pool:
159 return "Route to a pool";
160 case Action::Delay:
161 return "Delay";
162 case Action::Truncate:
163 return "Truncate over UDP";
164 case Action::ServFail:
165 return "Send ServFail";
166 case Action::None:
167 case Action::NoOp:
168 return "Do nothing";
169 case Action::NoRecurse:
170 return "Set rd=0";
171 }
172
173 return "Unknown";
174 }
175
176 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
177 virtual ~DNSAction()
178 {
179 }
180 virtual string toString() const = 0;
181 virtual std::map<string, double> getStats() const
182 {
183 return {{}};
184 }
185 };
186
187 class DNSResponseAction
188 {
189 public:
190 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
191 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
192 virtual ~DNSResponseAction()
193 {
194 }
195 virtual string toString() const = 0;
196 };
197
198 struct DynBlock
199 {
200 DynBlock(): action(DNSAction::Action::None), warning(false)
201 {
202 }
203
204 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
205 {
206 }
207
208 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
209 {
210 blocks.store(rhs.blocks);
211 }
212
213 DynBlock& operator=(const DynBlock& rhs)
214 {
215 reason=rhs.reason;
216 until=rhs.until;
217 domain=rhs.domain;
218 action=rhs.action;
219 blocks.store(rhs.blocks);
220 warning=rhs.warning;
221 return *this;
222 }
223
224 string reason;
225 struct timespec until;
226 DNSName domain;
227 DNSAction::Action action;
228 mutable std::atomic<unsigned int> blocks;
229 bool warning;
230 };
231
232 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
233
234 extern vector<pair<struct timeval, std::string> > g_confDelta;
235
236 extern uint64_t getLatencyCount(const std::string&);
237
238 struct DNSDistStats
239 {
240 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
241 stat_t responses{0};
242 stat_t servfailResponses{0};
243 stat_t queries{0};
244 stat_t frontendNXDomain{0};
245 stat_t frontendServFail{0};
246 stat_t frontendNoError{0};
247 stat_t nonCompliantQueries{0};
248 stat_t nonCompliantResponses{0};
249 stat_t rdQueries{0};
250 stat_t emptyQueries{0};
251 stat_t aclDrops{0};
252 stat_t dynBlocked{0};
253 stat_t ruleDrop{0};
254 stat_t ruleNXDomain{0};
255 stat_t ruleRefused{0};
256 stat_t ruleServFail{0};
257 stat_t selfAnswered{0};
258 stat_t downstreamTimeouts{0};
259 stat_t downstreamSendErrors{0};
260 stat_t truncFail{0};
261 stat_t noPolicy{0};
262 stat_t cacheHits{0};
263 stat_t cacheMisses{0};
264 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
265 stat_t securityStatus{0};
266
267 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
268 typedef std::function<uint64_t(const std::string&)> statfunction_t;
269 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
270 std::vector<std::pair<std::string, entry_t>> entries{
271 {"responses", &responses},
272 {"servfail-responses", &servfailResponses},
273 {"queries", &queries},
274 {"frontend-nxdomain", &frontendNXDomain},
275 {"frontend-servfail", &frontendServFail},
276 {"frontend-noerror", &frontendNoError},
277 {"acl-drops", &aclDrops},
278 {"rule-drop", &ruleDrop},
279 {"rule-nxdomain", &ruleNXDomain},
280 {"rule-refused", &ruleRefused},
281 {"rule-servfail", &ruleServFail},
282 {"self-answered", &selfAnswered},
283 {"downstream-timeouts", &downstreamTimeouts},
284 {"downstream-send-errors", &downstreamSendErrors},
285 {"trunc-failures", &truncFail},
286 {"no-policy", &noPolicy},
287 {"latency0-1", &latency0_1},
288 {"latency1-10", &latency1_10},
289 {"latency10-50", &latency10_50},
290 {"latency50-100", &latency50_100},
291 {"latency100-1000", &latency100_1000},
292 {"latency-slow", &latencySlow},
293 {"latency-avg100", &latencyAvg100},
294 {"latency-avg1000", &latencyAvg1000},
295 {"latency-avg10000", &latencyAvg10000},
296 {"latency-avg1000000", &latencyAvg1000000},
297 {"uptime", uptimeOfProcess},
298 {"real-memory-usage", getRealMemoryUsage},
299 {"special-memory-usage", getSpecialMemoryUsage},
300 {"udp-in-errors", boost::bind(udpErrorStats, "udp-in-errors")},
301 {"udp-noport-errors", boost::bind(udpErrorStats, "udp-noport-errors")},
302 {"udp-recvbuf-errors", boost::bind(udpErrorStats, "udp-recvbuf-errors")},
303 {"udp-sndbuf-errors", boost::bind(udpErrorStats, "udp-sndbuf-errors")},
304 {"noncompliant-queries", &nonCompliantQueries},
305 {"noncompliant-responses", &nonCompliantResponses},
306 {"rdqueries", &rdQueries},
307 {"empty-queries", &emptyQueries},
308 {"cache-hits", &cacheHits},
309 {"cache-misses", &cacheMisses},
310 {"cpu-iowait", getCPUIOWait},
311 {"cpu-steal", getCPUSteal},
312 {"cpu-sys-msec", getCPUTimeSystem},
313 {"cpu-user-msec", getCPUTimeUser},
314 {"fd-usage", getOpenFileDescriptors},
315 {"dyn-blocked", &dynBlocked},
316 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
317 {"security-status", &securityStatus},
318 // Latency histogram
319 {"latency-sum", &latencySum},
320 {"latency-count", getLatencyCount},
321 };
322 };
323
324 extern struct DNSDistStats g_stats;
325 void doLatencyStats(double udiff);
326
327
328 struct StopWatch
329 {
330 StopWatch(bool realTime=false): d_needRealTime(realTime)
331 {
332 }
333 struct timespec d_start{0,0};
334 bool d_needRealTime{false};
335
336 void start() {
337 if(gettime(&d_start, d_needRealTime) < 0)
338 unixDie("Getting timestamp");
339
340 }
341
342 void set(const struct timespec& from) {
343 d_start = from;
344 }
345
346 double udiff() const {
347 struct timespec now;
348 if(gettime(&now, d_needRealTime) < 0)
349 unixDie("Getting timestamp");
350
351 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
352 }
353
354 double udiffAndSet() {
355 struct timespec now;
356 if(gettime(&now, d_needRealTime) < 0)
357 unixDie("Getting timestamp");
358
359 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
360 d_start = now;
361 return ret;
362 }
363
364 };
365
366 class BasicQPSLimiter
367 {
368 public:
369 BasicQPSLimiter()
370 {
371 }
372
373 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
374 {
375 d_prev.start();
376 }
377
378 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
379 {
380 auto delta = d_prev.udiffAndSet();
381
382 if(delta > 0.0) // time, frequently, does go backwards..
383 d_tokens += 1.0 * rate * (delta/1000000.0);
384
385 if(d_tokens > burst) {
386 d_tokens = burst;
387 }
388
389 bool ret=false;
390 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
391 ret=true;
392 --d_tokens;
393 }
394
395 return ret;
396 }
397
398 bool seenSince(const struct timespec& cutOff) const
399 {
400 return cutOff < d_prev.d_start;
401 }
402
403 protected:
404 mutable StopWatch d_prev;
405 mutable double d_tokens;
406 };
407
408 class QPSLimiter : public BasicQPSLimiter
409 {
410 public:
411 QPSLimiter(): BasicQPSLimiter()
412 {
413 }
414
415 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
416 {
417 d_prev.start();
418 }
419
420 unsigned int getRate() const
421 {
422 return d_passthrough ? 0 : d_rate;
423 }
424
425 int getPassed() const
426 {
427 return d_passed;
428 }
429
430 int getBlocked() const
431 {
432 return d_blocked;
433 }
434
435 bool check() const // this is not quite fair
436 {
437 if (d_passthrough) {
438 return true;
439 }
440
441 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
442 if (ret) {
443 d_passed++;
444 }
445 else {
446 d_blocked++;
447 }
448
449 return ret;
450 }
451 private:
452 mutable unsigned int d_passed{0};
453 mutable unsigned int d_blocked{0};
454 unsigned int d_rate;
455 unsigned int d_burst;
456 bool d_passthrough{true};
457 };
458
459 struct ClientState;
460
461 struct IDState
462 {
463 IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
464 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
465 {
466 usageIndicator.store(orig.usageIndicator.load());
467 origFD = orig.origFD;
468 origID = orig.origID;
469 delayMsec = orig.delayMsec;
470 tempFailureTTL = orig.tempFailureTTL;
471 }
472
473 static const int64_t unusedIndicator = -1;
474
475 static bool isInUse(int64_t usageIndicator)
476 {
477 return usageIndicator != unusedIndicator;
478 }
479
480 bool isInUse() const
481 {
482 return usageIndicator != unusedIndicator;
483 }
484
485 /* return true if the value has been successfully replaced meaning that
486 no-one updated the usage indicator in the meantime */
487 bool tryMarkUnused(int64_t expectedUsageIndicator)
488 {
489 return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
490 }
491
492 /* mark as unused no matter what, return true if the state was in use before */
493 bool markAsUsed()
494 {
495 auto currentGeneration = generation++;
496 return markAsUsed(currentGeneration);
497 }
498
499 /* mark as unused no matter what, return true if the state was in use before */
500 bool markAsUsed(int64_t currentGeneration)
501 {
502 int64_t oldUsage = usageIndicator.exchange(currentGeneration);
503 return oldUsage != unusedIndicator;
504 }
505
506 /* We use this value to detect whether this state is in use.
507 For performance reasons we don't want to use a lock here, but that means
508 we need to be very careful when modifying this value. Modifications happen
509 from:
510 - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
511 then picking one of the states associated to this backend (via the idOffset).
512 Most of the time this state should not be in use and usageIndicator is -1, but we
513 might not yet have received a response for the query previously associated to this
514 state, meaning that we will 'reuse' this state and erase the existing state.
515 If we ever receive a response for this state, it will be discarded. This is
516 mostly fine for UDP except that we still need to be careful in order to miss
517 the 'outstanding' counters, which should only be increased when we are picking
518 an empty state, and not when reusing ;
519 For DoH, though, we have dynamically allocated a DOHUnit object that needs to
520 be freed, as well as internal objects internals to libh2o.
521 - one of the UDP receiver threads receiving a response from a backend, picking
522 the corresponding state and sending the response to the client ;
523 - the 'healthcheck' thread scanning the states to actively discover timeouts,
524 mostly to keep some counters like the 'outstanding' one sane.
525 We previously based that logic on the origFD (FD on which the query was received,
526 and therefore from where the response should be sent) but this suffered from an
527 ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
528 same value since we only have so much incoming sockets:
529 - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
530 - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
531 qtype and qclass match
532 - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
533 - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
534 5, except it's not the same 5 anymore and it overrides a fresh state.
535 We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
536 wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
537 when the state is unused and the value of our counter otherwise.
538 */
539 std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8
540 std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4
541 ComboAddress origRemote; // 28
542 ComboAddress origDest; // 28
543 StopWatch sentTime; // 16
544 DNSName qname; // 80
545 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
546 #ifdef HAVE_PROTOBUF
547 boost::optional<boost::uuids::uuid> uniqueId;
548 #endif
549 boost::optional<Netmask> subnet{boost::none};
550 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
551 std::shared_ptr<QTag> qTag{nullptr};
552 const ClientState* cs{nullptr};
553 DOHUnit* du{nullptr};
554 uint32_t cacheKey; // 4
555 uint32_t cacheKeyNoECS; // 4
556 uint16_t age; // 4
557 uint16_t qtype; // 2
558 uint16_t qclass; // 2
559 uint16_t origID; // 2
560 uint16_t origFlags; // 2
561 int origFD{-1};
562 int delayMsec;
563 boost::optional<uint32_t> tempFailureTTL;
564 bool ednsAdded{false};
565 bool ecsAdded{false};
566 bool skipCache{false};
567 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
568 bool dnssecOK{false};
569 bool useZeroScope;
570 };
571
572 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
573 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
574 struct QueryCount {
575 QueryCount()
576 {
577 }
578 ~QueryCount()
579 {
580 }
581 QueryCountRecords records;
582 QueryCountFilter filter;
583 ReadWriteLock queryLock;
584 bool enabled{false};
585 };
586
587 extern QueryCount g_qcount;
588
589 struct ClientState
590 {
591 ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort)
592 {
593 }
594
595 std::set<int> cpus;
596 ComboAddress local;
597 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
598 std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
599 std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
600 std::string interface;
601 std::atomic<uint64_t> queries{0};
602 mutable std::atomic<uint64_t> responses{0};
603 std::atomic<uint64_t> tcpDiedReadingQuery{0};
604 std::atomic<uint64_t> tcpDiedSendingResponse{0};
605 std::atomic<uint64_t> tcpGaveUp{0};
606 std::atomic<uint64_t> tcpClientTimeouts{0};
607 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
608 std::atomic<uint64_t> tcpCurrentConnections{0};
609 std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
610 std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
611 std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired)
612 std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one
613 std::atomic<uint64_t> tls10queries{0}; // valid DNS queries received via TLSv1.0
614 std::atomic<uint64_t> tls11queries{0}; // valid DNS queries received via TLSv1.1
615 std::atomic<uint64_t> tls12queries{0}; // valid DNS queries received via TLSv1.2
616 std::atomic<uint64_t> tls13queries{0}; // valid DNS queries received via TLSv1.3
617 std::atomic<uint64_t> tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version
618 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
619 /* in ms */
620 std::atomic<double> tcpAvgConnectionDuration{0.0};
621 int udpFD{-1};
622 int tcpFD{-1};
623 int tcpListenQueueSize{SOMAXCONN};
624 int fastOpenQueueSize{0};
625 bool muted{false};
626 bool tcp;
627 bool reuseport;
628 bool ready{false};
629
630 int getSocket() const
631 {
632 return udpFD != -1 ? udpFD : tcpFD;
633 }
634
635 bool isUDP() const
636 {
637 return udpFD != -1;
638 }
639
640 bool isTCP() const
641 {
642 return udpFD == -1;
643 }
644
645 bool hasTLS() const
646 {
647 return tlsFrontend != nullptr || dohFrontend != nullptr;
648 }
649
650 std::string getType() const
651 {
652 std::string result = udpFD != -1 ? "UDP" : "TCP";
653
654 if (dohFrontend) {
655 result += " (DNS over HTTPS)";
656 }
657 else if (tlsFrontend) {
658 result += " (DNS over TLS)";
659 }
660 else if (dnscryptCtx) {
661 result += " (DNSCrypt)";
662 }
663
664 return result;
665 }
666
667 #ifdef HAVE_EBPF
668 shared_ptr<BPFFilter> d_filter;
669
670 void detachFilter()
671 {
672 if (d_filter) {
673 d_filter->removeSocket(getSocket());
674 d_filter = nullptr;
675 }
676 }
677
678 void attachFilter(shared_ptr<BPFFilter> bpf)
679 {
680 detachFilter();
681
682 bpf->addSocket(getSocket());
683 d_filter = bpf;
684 }
685 #endif /* HAVE_EBPF */
686
687 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
688 {
689 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
690 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
691 }
692 };
693
694 class TCPClientCollection {
695 std::vector<int> d_tcpclientthreads;
696 std::atomic<uint64_t> d_numthreads{0};
697 std::atomic<uint64_t> d_pos{0};
698 std::atomic<uint64_t> d_queued{0};
699 const uint64_t d_maxthreads{0};
700 std::mutex d_mutex;
701 int d_singlePipe[2];
702 const bool d_useSinglePipe;
703 public:
704
705 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
706
707 {
708 d_tcpclientthreads.reserve(maxThreads);
709
710 if (d_useSinglePipe) {
711 if (pipe(d_singlePipe) < 0) {
712 int err = errno;
713 throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
714 }
715
716 if (!setNonBlocking(d_singlePipe[0])) {
717 int err = errno;
718 close(d_singlePipe[0]);
719 close(d_singlePipe[1]);
720 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
721 }
722
723 if (!setNonBlocking(d_singlePipe[1])) {
724 int err = errno;
725 close(d_singlePipe[0]);
726 close(d_singlePipe[1]);
727 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
728 }
729 }
730 }
731 int getThread()
732 {
733 uint64_t pos = d_pos++;
734 ++d_queued;
735 return d_tcpclientthreads[pos % d_numthreads];
736 }
737 bool hasReachedMaxThreads() const
738 {
739 return d_numthreads >= d_maxthreads;
740 }
741 uint64_t getThreadsCount() const
742 {
743 return d_numthreads;
744 }
745 uint64_t getQueuedCount() const
746 {
747 return d_queued;
748 }
749 void decrementQueuedCount()
750 {
751 --d_queued;
752 }
753 void addTCPClientThread();
754 };
755
756 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
757
758 struct DownstreamState
759 {
760 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
761
762 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect);
763 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {}
764 ~DownstreamState()
765 {
766 for (auto& fd : sockets) {
767 if (fd >= 0) {
768 close(fd);
769 fd = -1;
770 }
771 }
772 }
773 boost::uuids::uuid id;
774 std::vector<unsigned int> hashes;
775 mutable ReadWriteLock d_lock;
776 std::vector<int> sockets;
777 const std::string sourceItfName;
778 std::mutex socketsLock;
779 std::mutex connectLock;
780 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
781 std::thread tid;
782 const ComboAddress remote;
783 QPSLimiter qps;
784 vector<IDState> idStates;
785 const ComboAddress sourceAddr;
786 checkfunc_t checkFunction;
787 DNSName checkName{"a.root-servers.net."};
788 QType checkType{QType::A};
789 uint16_t checkClass{QClass::IN};
790 std::atomic<uint64_t> idOffset{0};
791 std::atomic<uint64_t> sendErrors{0};
792 std::atomic<uint64_t> outstanding{0};
793 std::atomic<uint64_t> reuseds{0};
794 std::atomic<uint64_t> queries{0};
795 std::atomic<uint64_t> responses{0};
796 struct {
797 std::atomic<uint64_t> sendErrors{0};
798 std::atomic<uint64_t> reuseds{0};
799 std::atomic<uint64_t> queries{0};
800 } prev;
801 std::atomic<uint64_t> tcpDiedSendingQuery{0};
802 std::atomic<uint64_t> tcpDiedReadingResponse{0};
803 std::atomic<uint64_t> tcpGaveUp{0};
804 std::atomic<uint64_t> tcpReadTimeouts{0};
805 std::atomic<uint64_t> tcpWriteTimeouts{0};
806 std::atomic<uint64_t> tcpCurrentConnections{0};
807 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
808 /* in ms */
809 std::atomic<double> tcpAvgConnectionDuration{0.0};
810 size_t socketsOffset{0};
811 double queryLoad{0.0};
812 double dropRate{0.0};
813 double latencyUsec{0.0};
814 int order{1};
815 int weight{1};
816 int tcpConnectTimeout{5};
817 int tcpRecvTimeout{30};
818 int tcpSendTimeout{30};
819 unsigned int checkInterval{1};
820 unsigned int lastCheck{0};
821 const unsigned int sourceItf{0};
822 uint16_t retries{5};
823 uint16_t xpfRRCode{0};
824 uint16_t checkTimeout{1000}; /* in milliseconds */
825 uint8_t currentCheckFailures{0};
826 uint8_t consecutiveSuccessfulChecks{0};
827 uint8_t maxCheckFailures{1};
828 uint8_t minRiseSuccesses{1};
829 StopWatch sw;
830 set<string> pools;
831 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
832 bool mustResolve{false};
833 bool upStatus{false};
834 bool useECS{false};
835 bool useProxyProtocol{false};
836 bool setCD{false};
837 bool disableZeroScope{false};
838 std::atomic<bool> connected{false};
839 std::atomic_flag threadStarted;
840 bool tcpFastOpen{false};
841 bool ipBindAddrNoPort{true};
842
843 bool isUp() const
844 {
845 if(availability == Availability::Down)
846 return false;
847 if(availability == Availability::Up)
848 return true;
849 return upStatus;
850 }
851 void setUp() { availability = Availability::Up; }
852 void setDown() { availability = Availability::Down; }
853 void setAuto() { availability = Availability::Auto; }
854 const string& getName() const {
855 return name;
856 }
857 const string& getNameWithAddr() const {
858 return nameWithAddr;
859 }
860 void setName(const std::string& newName)
861 {
862 name = newName;
863 nameWithAddr = newName.empty() ? remote.toStringWithPort() : (name + " (" + remote.toStringWithPort()+ ")");
864 }
865
866 string getStatus() const
867 {
868 string status;
869 if(availability == DownstreamState::Availability::Up)
870 status = "UP";
871 else if(availability == DownstreamState::Availability::Down)
872 status = "DOWN";
873 else
874 status = (upStatus ? "up" : "down");
875 return status;
876 }
877 bool reconnect();
878 void hash();
879 void setId(const boost::uuids::uuid& newId);
880 void setWeight(int newWeight);
881
882 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
883 {
884 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
885 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
886 }
887 private:
888 std::string name;
889 std::string nameWithAddr;
890 };
891 using servers_t =vector<std::shared_ptr<DownstreamState>>;
892
893 void responderThread(std::shared_ptr<DownstreamState> state);
894 extern std::mutex g_luamutex;
895 extern LuaContext g_lua;
896 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
897
898 class DNSRule
899 {
900 public:
901 virtual ~DNSRule ()
902 {
903 }
904 virtual bool matches(const DNSQuestion* dq) const =0;
905 virtual string toString() const = 0;
906 mutable std::atomic<uint64_t> d_matches{0};
907 };
908
909 struct ServerPool
910 {
911 ServerPool()
912 {
913 }
914 ~ServerPool()
915 {
916 }
917
918 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
919
920 bool getECS() const
921 {
922 return d_useECS;
923 }
924
925 void setECS(bool useECS)
926 {
927 d_useECS = useECS;
928 }
929
930 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
931 std::shared_ptr<ServerPolicy> policy{nullptr};
932
933 size_t countServers(bool upOnly)
934 {
935 size_t count = 0;
936 ReadLock rl(&d_lock);
937 for (const auto& server : d_servers) {
938 if (!upOnly || std::get<1>(server)->isUp() ) {
939 count++;
940 }
941 }
942 return count;
943 }
944
945 ServerPolicy::NumberedServerVector getServers()
946 {
947 ServerPolicy::NumberedServerVector result;
948 {
949 ReadLock rl(&d_lock);
950 result = d_servers;
951 }
952 return result;
953 }
954
955 void addServer(shared_ptr<DownstreamState>& server)
956 {
957 WriteLock wl(&d_lock);
958 unsigned int count = (unsigned int) d_servers.size();
959 d_servers.push_back(make_pair(++count, server));
960 /* we need to reorder based on the server 'order' */
961 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
962 return a.second->order < b.second->order;
963 });
964 /* and now we need to renumber for Lua (custom policies) */
965 size_t idx = 1;
966 for (auto& serv : d_servers) {
967 serv.first = idx++;
968 }
969 }
970
971 void removeServer(shared_ptr<DownstreamState>& server)
972 {
973 WriteLock wl(&d_lock);
974 size_t idx = 1;
975 bool found = false;
976 for (auto it = d_servers.begin(); it != d_servers.end();) {
977 if (found) {
978 /* we need to renumber the servers placed
979 after the removed one, for Lua (custom policies) */
980 it->first = idx++;
981 it++;
982 }
983 else if (it->second == server) {
984 it = d_servers.erase(it);
985 found = true;
986 } else {
987 idx++;
988 it++;
989 }
990 }
991 }
992
993 private:
994 ServerPolicy::NumberedServerVector d_servers;
995 ReadWriteLock d_lock;
996 bool d_useECS{false};
997 };
998
999 struct CarbonConfig
1000 {
1001 ComboAddress server;
1002 std::string namespace_name;
1003 std::string ourname;
1004 std::string instance_name;
1005 unsigned int interval;
1006 };
1007
1008 enum ednsHeaderFlags {
1009 EDNS_HEADER_FLAG_NONE = 0,
1010 EDNS_HEADER_FLAG_DO = 32768
1011 };
1012
1013 struct DNSDistRuleAction
1014 {
1015 std::shared_ptr<DNSRule> d_rule;
1016 std::shared_ptr<DNSAction> d_action;
1017 boost::uuids::uuid d_id;
1018 uint64_t d_creationOrder;
1019 };
1020
1021 struct DNSDistResponseRuleAction
1022 {
1023 std::shared_ptr<DNSRule> d_rule;
1024 std::shared_ptr<DNSResponseAction> d_action;
1025 boost::uuids::uuid d_id;
1026 uint64_t d_creationOrder;
1027 };
1028
1029 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1030 extern DNSAction::Action g_dynBlockAction;
1031
1032 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1033 extern GlobalStateHolder<ServerPolicy> g_policy;
1034 extern GlobalStateHolder<servers_t> g_dstates;
1035 extern GlobalStateHolder<pools_t> g_pools;
1036 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1037 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1038 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1039 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1040 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1041
1042 extern ComboAddress g_serverControl; // not changed during runtime
1043
1044 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1045 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1046 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1047 extern bool g_truncateTC;
1048 extern bool g_fixupCase;
1049 extern int g_tcpRecvTimeout;
1050 extern int g_tcpSendTimeout;
1051 extern int g_udpTimeout;
1052 extern uint16_t g_maxOutstanding;
1053 extern std::atomic<bool> g_configurationDone;
1054 extern uint64_t g_maxTCPClientThreads;
1055 extern uint64_t g_maxTCPQueuedConnections;
1056 extern size_t g_maxTCPQueriesPerConn;
1057 extern size_t g_maxTCPConnectionDuration;
1058 extern size_t g_maxTCPConnectionsPerClient;
1059 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1060 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1061 extern uint32_t g_staleCacheEntriesTTL;
1062 extern bool g_apiReadWrite;
1063 extern std::string g_apiConfigDirectory;
1064 extern bool g_servFailOnNoPolicy;
1065 extern bool g_useTCPSinglePipe;
1066 extern uint16_t g_downstreamTCPCleanupInterval;
1067 extern size_t g_udpVectorSize;
1068 extern bool g_preserveTrailingData;
1069 extern bool g_allowEmptyResponse;
1070
1071 #ifdef HAVE_EBPF
1072 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1073 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1074 #endif /* HAVE_EBPF */
1075
1076 struct LocalHolders
1077 {
1078 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1079 {
1080 }
1081
1082 LocalStateHolder<NetmaskGroup> acl;
1083 LocalStateHolder<ServerPolicy> policy;
1084 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1085 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1086 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1087 LocalStateHolder<servers_t> servers;
1088 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1089 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1090 LocalStateHolder<pools_t> pools;
1091 };
1092
1093 struct dnsheader;
1094
1095 vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
1096
1097 struct WebserverConfig
1098 {
1099 std::string password;
1100 std::string apiKey;
1101 boost::optional<std::map<std::string, std::string> > customHeaders;
1102 std::mutex lock;
1103 };
1104
1105 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1106 void setWebserverPassword(const std::string& password);
1107 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1108
1109 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1110 void tcpAcceptorThread(void* p);
1111 #ifdef HAVE_DNS_OVER_HTTPS
1112 void dohThread(ClientState* cs);
1113 #endif /* HAVE_DNS_OVER_HTTPS */
1114
1115 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1116 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1117 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1118 void resetLuaSideEffect(); // reset to indeterminate state
1119
1120 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1121 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1122 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1123
1124 bool checkQueryHeaders(const struct dnsheader* dh);
1125
1126 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1127 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1128 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1129
1130 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1131
1132 uint16_t getRandomDNSID();
1133
1134 #include "dnsdist-snmp.hh"
1135
1136 extern bool g_snmpEnabled;
1137 extern bool g_snmpTrapsEnabled;
1138 extern DNSDistSNMPAgent* g_snmpAgent;
1139 extern bool g_addEDNSToSelfGeneratedResponses;
1140
1141 extern std::set<std::string> g_capabilitiesToRetain;
1142 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
1143 static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
1144
1145 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1146 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1147
1148 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1149 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1150
1151 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1152 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);