]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/dnsdist.hh
Merge pull request #9229 from rgacogne/dnsdist-webserver-allow-from
[thirdparty/pdns.git] / pdns / dnsdist.hh
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #pragma once
23 #include "config.h"
24 #include "ext/luawrapper/include/LuaContext.hpp"
25
26 #include <atomic>
27 #include <mutex>
28 #include <string>
29 #include <thread>
30 #include <time.h>
31 #include <unistd.h>
32 #include <unordered_map>
33
34 #include <boost/variant.hpp>
35
36 #include "capabilities.hh"
37 #include "circular_buffer.hh"
38 #include "dnscrypt.hh"
39 #include "dnsdist-cache.hh"
40 #include "dnsdist-dynbpf.hh"
41 #include "dnsdist-lbpolicies.hh"
42 #include "dnsname.hh"
43 #include "doh.hh"
44 #include "ednsoptions.hh"
45 #include "gettime.hh"
46 #include "iputils.hh"
47 #include "misc.hh"
48 #include "mplexer.hh"
49 #include "sholder.hh"
50 #include "tcpiohandler.hh"
51 #include "uuid-utils.hh"
52 #include "proxy-protocol.hh"
53
54 void carbonDumpThread();
55 uint64_t uptimeOfProcess(const std::string& str);
56
57 extern uint16_t g_ECSSourcePrefixV4;
58 extern uint16_t g_ECSSourcePrefixV6;
59 extern bool g_ECSOverride;
60
61 typedef std::unordered_map<string, string> QTag;
62
63 struct DNSQuestion
64 {
65 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
66 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
67 const uint16_t* flags = getFlagsFromDNSHeader(dh);
68 origFlags = *flags;
69 }
70 DNSQuestion(const DNSQuestion&) = delete;
71 DNSQuestion& operator=(const DNSQuestion&) = delete;
72 DNSQuestion(DNSQuestion&&) = default;
73
74 std::string getTrailingData() const;
75 bool setTrailingData(const std::string&);
76
77 #ifdef HAVE_PROTOBUF
78 boost::optional<boost::uuids::uuid> uniqueId;
79 #endif
80 Netmask ecs;
81 boost::optional<Netmask> subnet;
82 std::string sni; /* Server Name Indication, if any (DoT or DoH) */
83 std::string poolname;
84 const DNSName* qname{nullptr};
85 const ComboAddress* local{nullptr};
86 const ComboAddress* remote{nullptr};
87 std::shared_ptr<QTag> qTag{nullptr};
88 std::unique_ptr<std::vector<ProxyProtocolValue>> proxyProtocolValues{nullptr};
89 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
90 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
91 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
92 struct dnsheader* dh{nullptr};
93 const struct timespec* queryTime{nullptr};
94 struct DOHUnit* du{nullptr};
95 size_t size;
96 unsigned int consumed{0};
97 int delayMsec{0};
98 boost::optional<uint32_t> tempFailureTTL;
99 uint32_t cacheKeyNoECS;
100 uint32_t cacheKey;
101 const uint16_t qtype;
102 const uint16_t qclass;
103 uint16_t len;
104 uint16_t ecsPrefixLength;
105 uint16_t origFlags;
106 uint8_t ednsRCode{0};
107 const bool tcp;
108 bool skipCache{false};
109 bool ecsOverride;
110 bool useECS{true};
111 bool addXPF{true};
112 bool ecsSet{false};
113 bool ecsAdded{false};
114 bool ednsAdded{false};
115 bool useZeroScope{false};
116 bool dnssecOK{false};
117 };
118
119 struct DNSResponse : DNSQuestion
120 {
121 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
122 DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
123 DNSResponse(const DNSResponse&) = delete;
124 DNSResponse& operator=(const DNSResponse&) = delete;
125 DNSResponse(DNSResponse&&) = default;
126 };
127
128 /* so what could you do:
129 drop,
130 fake up nxdomain,
131 provide actual answer,
132 allow & and stop processing,
133 continue processing,
134 modify header: (servfail|refused|notimp), set TC=1,
135 send to pool */
136
137 class DNSAction
138 {
139 public:
140 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse, SpoofRaw };
141 static std::string typeToString(const Action& action)
142 {
143 switch(action) {
144 case Action::Drop:
145 return "Drop";
146 case Action::Nxdomain:
147 return "Send NXDomain";
148 case Action::Refused:
149 return "Send Refused";
150 case Action::Spoof:
151 return "Spoof an answer";
152 case Action::SpoofRaw:
153 return "Spoof an answer from raw bytes";
154 case Action::Allow:
155 return "Allow";
156 case Action::HeaderModify:
157 return "Modify the header";
158 case Action::Pool:
159 return "Route to a pool";
160 case Action::Delay:
161 return "Delay";
162 case Action::Truncate:
163 return "Truncate over UDP";
164 case Action::ServFail:
165 return "Send ServFail";
166 case Action::None:
167 case Action::NoOp:
168 return "Do nothing";
169 case Action::NoRecurse:
170 return "Set rd=0";
171 }
172
173 return "Unknown";
174 }
175
176 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
177 virtual ~DNSAction()
178 {
179 }
180 virtual string toString() const = 0;
181 virtual std::map<string, double> getStats() const
182 {
183 return {{}};
184 }
185 };
186
187 class DNSResponseAction
188 {
189 public:
190 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
191 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
192 virtual ~DNSResponseAction()
193 {
194 }
195 virtual string toString() const = 0;
196 };
197
198 struct DynBlock
199 {
200 DynBlock(): action(DNSAction::Action::None), warning(false)
201 {
202 }
203
204 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
205 {
206 }
207
208 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
209 {
210 blocks.store(rhs.blocks);
211 }
212
213 DynBlock& operator=(const DynBlock& rhs)
214 {
215 reason=rhs.reason;
216 until=rhs.until;
217 domain=rhs.domain;
218 action=rhs.action;
219 blocks.store(rhs.blocks);
220 warning=rhs.warning;
221 return *this;
222 }
223
224 string reason;
225 struct timespec until;
226 DNSName domain;
227 DNSAction::Action action;
228 mutable std::atomic<unsigned int> blocks;
229 bool warning;
230 };
231
232 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
233
234 extern vector<pair<struct timeval, std::string> > g_confDelta;
235
236 extern uint64_t getLatencyCount(const std::string&);
237
238 struct DNSDistStats
239 {
240 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
241 stat_t responses{0};
242 stat_t servfailResponses{0};
243 stat_t queries{0};
244 stat_t frontendNXDomain{0};
245 stat_t frontendServFail{0};
246 stat_t frontendNoError{0};
247 stat_t nonCompliantQueries{0};
248 stat_t nonCompliantResponses{0};
249 stat_t rdQueries{0};
250 stat_t emptyQueries{0};
251 stat_t aclDrops{0};
252 stat_t dynBlocked{0};
253 stat_t ruleDrop{0};
254 stat_t ruleNXDomain{0};
255 stat_t ruleRefused{0};
256 stat_t ruleServFail{0};
257 stat_t selfAnswered{0};
258 stat_t downstreamTimeouts{0};
259 stat_t downstreamSendErrors{0};
260 stat_t truncFail{0};
261 stat_t noPolicy{0};
262 stat_t cacheHits{0};
263 stat_t cacheMisses{0};
264 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
265 stat_t securityStatus{0};
266 stat_t dohQueryPipeFull{0};
267 stat_t dohResponsePipeFull{0};
268
269 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
270 typedef std::function<uint64_t(const std::string&)> statfunction_t;
271 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
272 std::vector<std::pair<std::string, entry_t>> entries{
273 {"responses", &responses},
274 {"servfail-responses", &servfailResponses},
275 {"queries", &queries},
276 {"frontend-nxdomain", &frontendNXDomain},
277 {"frontend-servfail", &frontendServFail},
278 {"frontend-noerror", &frontendNoError},
279 {"acl-drops", &aclDrops},
280 {"rule-drop", &ruleDrop},
281 {"rule-nxdomain", &ruleNXDomain},
282 {"rule-refused", &ruleRefused},
283 {"rule-servfail", &ruleServFail},
284 {"self-answered", &selfAnswered},
285 {"downstream-timeouts", &downstreamTimeouts},
286 {"downstream-send-errors", &downstreamSendErrors},
287 {"trunc-failures", &truncFail},
288 {"no-policy", &noPolicy},
289 {"latency0-1", &latency0_1},
290 {"latency1-10", &latency1_10},
291 {"latency10-50", &latency10_50},
292 {"latency50-100", &latency50_100},
293 {"latency100-1000", &latency100_1000},
294 {"latency-slow", &latencySlow},
295 {"latency-avg100", &latencyAvg100},
296 {"latency-avg1000", &latencyAvg1000},
297 {"latency-avg10000", &latencyAvg10000},
298 {"latency-avg1000000", &latencyAvg1000000},
299 {"uptime", uptimeOfProcess},
300 {"real-memory-usage", getRealMemoryUsage},
301 {"special-memory-usage", getSpecialMemoryUsage},
302 {"udp-in-errors", boost::bind(udpErrorStats, "udp-in-errors")},
303 {"udp-noport-errors", boost::bind(udpErrorStats, "udp-noport-errors")},
304 {"udp-recvbuf-errors", boost::bind(udpErrorStats, "udp-recvbuf-errors")},
305 {"udp-sndbuf-errors", boost::bind(udpErrorStats, "udp-sndbuf-errors")},
306 {"noncompliant-queries", &nonCompliantQueries},
307 {"noncompliant-responses", &nonCompliantResponses},
308 {"rdqueries", &rdQueries},
309 {"empty-queries", &emptyQueries},
310 {"cache-hits", &cacheHits},
311 {"cache-misses", &cacheMisses},
312 {"cpu-iowait", getCPUIOWait},
313 {"cpu-steal", getCPUSteal},
314 {"cpu-sys-msec", getCPUTimeSystem},
315 {"cpu-user-msec", getCPUTimeUser},
316 {"fd-usage", getOpenFileDescriptors},
317 {"dyn-blocked", &dynBlocked},
318 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
319 {"security-status", &securityStatus},
320 {"doh-query-pipe-full", &dohQueryPipeFull},
321 {"doh-response-pipe-full", &dohResponsePipeFull},
322 // Latency histogram
323 {"latency-sum", &latencySum},
324 {"latency-count", getLatencyCount},
325 };
326 };
327
328 extern struct DNSDistStats g_stats;
329 void doLatencyStats(double udiff);
330
331
332 struct StopWatch
333 {
334 StopWatch(bool realTime=false): d_needRealTime(realTime)
335 {
336 }
337 struct timespec d_start{0,0};
338 bool d_needRealTime{false};
339
340 void start() {
341 if(gettime(&d_start, d_needRealTime) < 0)
342 unixDie("Getting timestamp");
343
344 }
345
346 void set(const struct timespec& from) {
347 d_start = from;
348 }
349
350 double udiff() const {
351 struct timespec now;
352 if(gettime(&now, d_needRealTime) < 0)
353 unixDie("Getting timestamp");
354
355 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
356 }
357
358 double udiffAndSet() {
359 struct timespec now;
360 if(gettime(&now, d_needRealTime) < 0)
361 unixDie("Getting timestamp");
362
363 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
364 d_start = now;
365 return ret;
366 }
367
368 };
369
370 class BasicQPSLimiter
371 {
372 public:
373 BasicQPSLimiter()
374 {
375 }
376
377 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
378 {
379 d_prev.start();
380 }
381
382 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
383 {
384 auto delta = d_prev.udiffAndSet();
385
386 if(delta > 0.0) // time, frequently, does go backwards..
387 d_tokens += 1.0 * rate * (delta/1000000.0);
388
389 if(d_tokens > burst) {
390 d_tokens = burst;
391 }
392
393 bool ret=false;
394 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
395 ret=true;
396 --d_tokens;
397 }
398
399 return ret;
400 }
401
402 bool seenSince(const struct timespec& cutOff) const
403 {
404 return cutOff < d_prev.d_start;
405 }
406
407 protected:
408 mutable StopWatch d_prev;
409 mutable double d_tokens;
410 };
411
412 class QPSLimiter : public BasicQPSLimiter
413 {
414 public:
415 QPSLimiter(): BasicQPSLimiter()
416 {
417 }
418
419 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
420 {
421 d_prev.start();
422 }
423
424 unsigned int getRate() const
425 {
426 return d_passthrough ? 0 : d_rate;
427 }
428
429 int getPassed() const
430 {
431 return d_passed;
432 }
433
434 int getBlocked() const
435 {
436 return d_blocked;
437 }
438
439 bool check() const // this is not quite fair
440 {
441 if (d_passthrough) {
442 return true;
443 }
444
445 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
446 if (ret) {
447 d_passed++;
448 }
449 else {
450 d_blocked++;
451 }
452
453 return ret;
454 }
455 private:
456 mutable unsigned int d_passed{0};
457 mutable unsigned int d_blocked{0};
458 unsigned int d_rate;
459 unsigned int d_burst;
460 bool d_passthrough{true};
461 };
462
463 struct ClientState;
464
465 struct IDState
466 {
467 IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
468 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
469 {
470 usageIndicator.store(orig.usageIndicator.load());
471 origFD = orig.origFD;
472 origID = orig.origID;
473 delayMsec = orig.delayMsec;
474 tempFailureTTL = orig.tempFailureTTL;
475 }
476
477 static const int64_t unusedIndicator = -1;
478
479 static bool isInUse(int64_t usageIndicator)
480 {
481 return usageIndicator != unusedIndicator;
482 }
483
484 bool isInUse() const
485 {
486 return usageIndicator != unusedIndicator;
487 }
488
489 /* return true if the value has been successfully replaced meaning that
490 no-one updated the usage indicator in the meantime */
491 bool tryMarkUnused(int64_t expectedUsageIndicator)
492 {
493 return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
494 }
495
496 /* mark as unused no matter what, return true if the state was in use before */
497 bool markAsUsed()
498 {
499 auto currentGeneration = generation++;
500 return markAsUsed(currentGeneration);
501 }
502
503 /* mark as unused no matter what, return true if the state was in use before */
504 bool markAsUsed(int64_t currentGeneration)
505 {
506 int64_t oldUsage = usageIndicator.exchange(currentGeneration);
507 return oldUsage != unusedIndicator;
508 }
509
510 /* We use this value to detect whether this state is in use.
511 For performance reasons we don't want to use a lock here, but that means
512 we need to be very careful when modifying this value. Modifications happen
513 from:
514 - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
515 then picking one of the states associated to this backend (via the idOffset).
516 Most of the time this state should not be in use and usageIndicator is -1, but we
517 might not yet have received a response for the query previously associated to this
518 state, meaning that we will 'reuse' this state and erase the existing state.
519 If we ever receive a response for this state, it will be discarded. This is
520 mostly fine for UDP except that we still need to be careful in order to miss
521 the 'outstanding' counters, which should only be increased when we are picking
522 an empty state, and not when reusing ;
523 For DoH, though, we have dynamically allocated a DOHUnit object that needs to
524 be freed, as well as internal objects internals to libh2o.
525 - one of the UDP receiver threads receiving a response from a backend, picking
526 the corresponding state and sending the response to the client ;
527 - the 'healthcheck' thread scanning the states to actively discover timeouts,
528 mostly to keep some counters like the 'outstanding' one sane.
529 We previously based that logic on the origFD (FD on which the query was received,
530 and therefore from where the response should be sent) but this suffered from an
531 ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
532 same value since we only have so much incoming sockets:
533 - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
534 - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
535 qtype and qclass match
536 - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
537 - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
538 5, except it's not the same 5 anymore and it overrides a fresh state.
539 We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
540 wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
541 when the state is unused and the value of our counter otherwise.
542 */
543 std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8
544 std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4
545 ComboAddress origRemote; // 28
546 ComboAddress origDest; // 28
547 StopWatch sentTime; // 16
548 DNSName qname; // 80
549 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
550 #ifdef HAVE_PROTOBUF
551 boost::optional<boost::uuids::uuid> uniqueId;
552 #endif
553 boost::optional<Netmask> subnet{boost::none};
554 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
555 std::shared_ptr<QTag> qTag{nullptr};
556 const ClientState* cs{nullptr};
557 DOHUnit* du{nullptr};
558 uint32_t cacheKey; // 4
559 uint32_t cacheKeyNoECS; // 4
560 uint16_t age; // 4
561 uint16_t qtype; // 2
562 uint16_t qclass; // 2
563 uint16_t origID; // 2
564 uint16_t origFlags; // 2
565 int origFD{-1};
566 int delayMsec;
567 boost::optional<uint32_t> tempFailureTTL;
568 bool ednsAdded{false};
569 bool ecsAdded{false};
570 bool skipCache{false};
571 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
572 bool dnssecOK{false};
573 bool useZeroScope;
574 };
575
576 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
577 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
578 struct QueryCount {
579 QueryCount()
580 {
581 }
582 ~QueryCount()
583 {
584 }
585 QueryCountRecords records;
586 QueryCountFilter filter;
587 ReadWriteLock queryLock;
588 bool enabled{false};
589 };
590
591 extern QueryCount g_qcount;
592
593 struct ClientState
594 {
595 ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort)
596 {
597 }
598
599 std::set<int> cpus;
600 ComboAddress local;
601 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
602 std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
603 std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
604 std::string interface;
605 std::atomic<uint64_t> queries{0};
606 mutable std::atomic<uint64_t> responses{0};
607 std::atomic<uint64_t> tcpDiedReadingQuery{0};
608 std::atomic<uint64_t> tcpDiedSendingResponse{0};
609 std::atomic<uint64_t> tcpGaveUp{0};
610 std::atomic<uint64_t> tcpClientTimeouts{0};
611 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
612 std::atomic<uint64_t> tcpCurrentConnections{0};
613 std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
614 std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
615 std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired)
616 std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one
617 std::atomic<uint64_t> tls10queries{0}; // valid DNS queries received via TLSv1.0
618 std::atomic<uint64_t> tls11queries{0}; // valid DNS queries received via TLSv1.1
619 std::atomic<uint64_t> tls12queries{0}; // valid DNS queries received via TLSv1.2
620 std::atomic<uint64_t> tls13queries{0}; // valid DNS queries received via TLSv1.3
621 std::atomic<uint64_t> tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version
622 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
623 /* in ms */
624 std::atomic<double> tcpAvgConnectionDuration{0.0};
625 int udpFD{-1};
626 int tcpFD{-1};
627 int tcpListenQueueSize{SOMAXCONN};
628 int fastOpenQueueSize{0};
629 bool muted{false};
630 bool tcp;
631 bool reuseport;
632 bool ready{false};
633
634 int getSocket() const
635 {
636 return udpFD != -1 ? udpFD : tcpFD;
637 }
638
639 bool isUDP() const
640 {
641 return udpFD != -1;
642 }
643
644 bool isTCP() const
645 {
646 return udpFD == -1;
647 }
648
649 bool hasTLS() const
650 {
651 return tlsFrontend != nullptr || dohFrontend != nullptr;
652 }
653
654 std::string getType() const
655 {
656 std::string result = udpFD != -1 ? "UDP" : "TCP";
657
658 if (dohFrontend) {
659 result += " (DNS over HTTPS)";
660 }
661 else if (tlsFrontend) {
662 result += " (DNS over TLS)";
663 }
664 else if (dnscryptCtx) {
665 result += " (DNSCrypt)";
666 }
667
668 return result;
669 }
670
671 #ifdef HAVE_EBPF
672 shared_ptr<BPFFilter> d_filter;
673
674 void detachFilter()
675 {
676 if (d_filter) {
677 d_filter->removeSocket(getSocket());
678 d_filter = nullptr;
679 }
680 }
681
682 void attachFilter(shared_ptr<BPFFilter> bpf)
683 {
684 detachFilter();
685
686 bpf->addSocket(getSocket());
687 d_filter = bpf;
688 }
689 #endif /* HAVE_EBPF */
690
691 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
692 {
693 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
694 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
695 }
696 };
697
698 class TCPClientCollection {
699 std::vector<int> d_tcpclientthreads;
700 std::atomic<uint64_t> d_numthreads{0};
701 std::atomic<uint64_t> d_pos{0};
702 std::atomic<uint64_t> d_queued{0};
703 const uint64_t d_maxthreads{0};
704 std::mutex d_mutex;
705 int d_singlePipe[2];
706 const bool d_useSinglePipe;
707 public:
708
709 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
710
711 {
712 d_tcpclientthreads.reserve(maxThreads);
713
714 if (d_useSinglePipe) {
715 if (pipe(d_singlePipe) < 0) {
716 int err = errno;
717 throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
718 }
719
720 if (!setNonBlocking(d_singlePipe[0])) {
721 int err = errno;
722 close(d_singlePipe[0]);
723 close(d_singlePipe[1]);
724 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
725 }
726
727 if (!setNonBlocking(d_singlePipe[1])) {
728 int err = errno;
729 close(d_singlePipe[0]);
730 close(d_singlePipe[1]);
731 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
732 }
733 }
734 }
735 int getThread()
736 {
737 uint64_t pos = d_pos++;
738 ++d_queued;
739 return d_tcpclientthreads[pos % d_numthreads];
740 }
741 bool hasReachedMaxThreads() const
742 {
743 return d_numthreads >= d_maxthreads;
744 }
745 uint64_t getThreadsCount() const
746 {
747 return d_numthreads;
748 }
749 uint64_t getQueuedCount() const
750 {
751 return d_queued;
752 }
753 void decrementQueuedCount()
754 {
755 --d_queued;
756 }
757 void addTCPClientThread();
758 };
759
760 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
761
762 struct DownstreamState
763 {
764 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
765
766 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect);
767 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {}
768 ~DownstreamState()
769 {
770 for (auto& fd : sockets) {
771 if (fd >= 0) {
772 close(fd);
773 fd = -1;
774 }
775 }
776 }
777 boost::uuids::uuid id;
778 std::vector<unsigned int> hashes;
779 mutable ReadWriteLock d_lock;
780 std::vector<int> sockets;
781 const std::string sourceItfName;
782 std::mutex socketsLock;
783 std::mutex connectLock;
784 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
785 std::thread tid;
786 const ComboAddress remote;
787 QPSLimiter qps;
788 vector<IDState> idStates;
789 const ComboAddress sourceAddr;
790 checkfunc_t checkFunction;
791 DNSName checkName{"a.root-servers.net."};
792 QType checkType{QType::A};
793 uint16_t checkClass{QClass::IN};
794 std::atomic<uint64_t> idOffset{0};
795 std::atomic<uint64_t> sendErrors{0};
796 std::atomic<uint64_t> outstanding{0};
797 std::atomic<uint64_t> reuseds{0};
798 std::atomic<uint64_t> queries{0};
799 std::atomic<uint64_t> responses{0};
800 struct {
801 std::atomic<uint64_t> sendErrors{0};
802 std::atomic<uint64_t> reuseds{0};
803 std::atomic<uint64_t> queries{0};
804 } prev;
805 std::atomic<uint64_t> tcpDiedSendingQuery{0};
806 std::atomic<uint64_t> tcpDiedReadingResponse{0};
807 std::atomic<uint64_t> tcpGaveUp{0};
808 std::atomic<uint64_t> tcpReadTimeouts{0};
809 std::atomic<uint64_t> tcpWriteTimeouts{0};
810 std::atomic<uint64_t> tcpCurrentConnections{0};
811 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
812 /* in ms */
813 std::atomic<double> tcpAvgConnectionDuration{0.0};
814 size_t socketsOffset{0};
815 double queryLoad{0.0};
816 double dropRate{0.0};
817 double latencyUsec{0.0};
818 int order{1};
819 int weight{1};
820 int tcpConnectTimeout{5};
821 int tcpRecvTimeout{30};
822 int tcpSendTimeout{30};
823 unsigned int checkInterval{1};
824 unsigned int lastCheck{0};
825 const unsigned int sourceItf{0};
826 uint16_t retries{5};
827 uint16_t xpfRRCode{0};
828 uint16_t checkTimeout{1000}; /* in milliseconds */
829 uint8_t currentCheckFailures{0};
830 uint8_t consecutiveSuccessfulChecks{0};
831 uint8_t maxCheckFailures{1};
832 uint8_t minRiseSuccesses{1};
833 StopWatch sw;
834 set<string> pools;
835 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
836 bool mustResolve{false};
837 bool upStatus{false};
838 bool useECS{false};
839 bool useProxyProtocol{false};
840 bool setCD{false};
841 bool disableZeroScope{false};
842 std::atomic<bool> connected{false};
843 std::atomic_flag threadStarted;
844 bool tcpFastOpen{false};
845 bool ipBindAddrNoPort{true};
846
847 bool isUp() const
848 {
849 if(availability == Availability::Down)
850 return false;
851 if(availability == Availability::Up)
852 return true;
853 return upStatus;
854 }
855 void setUp() { availability = Availability::Up; }
856 void setDown() { availability = Availability::Down; }
857 void setAuto() { availability = Availability::Auto; }
858 const string& getName() const {
859 return name;
860 }
861 const string& getNameWithAddr() const {
862 return nameWithAddr;
863 }
864 void setName(const std::string& newName)
865 {
866 name = newName;
867 nameWithAddr = newName.empty() ? remote.toStringWithPort() : (name + " (" + remote.toStringWithPort()+ ")");
868 }
869
870 string getStatus() const
871 {
872 string status;
873 if(availability == DownstreamState::Availability::Up)
874 status = "UP";
875 else if(availability == DownstreamState::Availability::Down)
876 status = "DOWN";
877 else
878 status = (upStatus ? "up" : "down");
879 return status;
880 }
881 bool reconnect();
882 void hash();
883 void setId(const boost::uuids::uuid& newId);
884 void setWeight(int newWeight);
885
886 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
887 {
888 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
889 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
890 }
891 private:
892 std::string name;
893 std::string nameWithAddr;
894 };
895 using servers_t =vector<std::shared_ptr<DownstreamState>>;
896
897 void responderThread(std::shared_ptr<DownstreamState> state);
898 extern std::mutex g_luamutex;
899 extern LuaContext g_lua;
900 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
901
902 class DNSRule
903 {
904 public:
905 virtual ~DNSRule ()
906 {
907 }
908 virtual bool matches(const DNSQuestion* dq) const =0;
909 virtual string toString() const = 0;
910 mutable std::atomic<uint64_t> d_matches{0};
911 };
912
913 struct ServerPool
914 {
915 ServerPool()
916 {
917 }
918 ~ServerPool()
919 {
920 }
921
922 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
923
924 bool getECS() const
925 {
926 return d_useECS;
927 }
928
929 void setECS(bool useECS)
930 {
931 d_useECS = useECS;
932 }
933
934 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
935 std::shared_ptr<ServerPolicy> policy{nullptr};
936
937 size_t countServers(bool upOnly)
938 {
939 size_t count = 0;
940 ReadLock rl(&d_lock);
941 for (const auto& server : d_servers) {
942 if (!upOnly || std::get<1>(server)->isUp() ) {
943 count++;
944 }
945 }
946 return count;
947 }
948
949 ServerPolicy::NumberedServerVector getServers()
950 {
951 ServerPolicy::NumberedServerVector result;
952 {
953 ReadLock rl(&d_lock);
954 result = d_servers;
955 }
956 return result;
957 }
958
959 void addServer(shared_ptr<DownstreamState>& server)
960 {
961 WriteLock wl(&d_lock);
962 unsigned int count = (unsigned int) d_servers.size();
963 d_servers.push_back(make_pair(++count, server));
964 /* we need to reorder based on the server 'order' */
965 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
966 return a.second->order < b.second->order;
967 });
968 /* and now we need to renumber for Lua (custom policies) */
969 size_t idx = 1;
970 for (auto& serv : d_servers) {
971 serv.first = idx++;
972 }
973 }
974
975 void removeServer(shared_ptr<DownstreamState>& server)
976 {
977 WriteLock wl(&d_lock);
978 size_t idx = 1;
979 bool found = false;
980 for (auto it = d_servers.begin(); it != d_servers.end();) {
981 if (found) {
982 /* we need to renumber the servers placed
983 after the removed one, for Lua (custom policies) */
984 it->first = idx++;
985 it++;
986 }
987 else if (it->second == server) {
988 it = d_servers.erase(it);
989 found = true;
990 } else {
991 idx++;
992 it++;
993 }
994 }
995 }
996
997 private:
998 ServerPolicy::NumberedServerVector d_servers;
999 ReadWriteLock d_lock;
1000 bool d_useECS{false};
1001 };
1002
1003 struct CarbonConfig
1004 {
1005 ComboAddress server;
1006 std::string namespace_name;
1007 std::string ourname;
1008 std::string instance_name;
1009 unsigned int interval;
1010 };
1011
1012 enum ednsHeaderFlags {
1013 EDNS_HEADER_FLAG_NONE = 0,
1014 EDNS_HEADER_FLAG_DO = 32768
1015 };
1016
1017 struct DNSDistRuleAction
1018 {
1019 std::shared_ptr<DNSRule> d_rule;
1020 std::shared_ptr<DNSAction> d_action;
1021 boost::uuids::uuid d_id;
1022 uint64_t d_creationOrder;
1023 };
1024
1025 struct DNSDistResponseRuleAction
1026 {
1027 std::shared_ptr<DNSRule> d_rule;
1028 std::shared_ptr<DNSResponseAction> d_action;
1029 boost::uuids::uuid d_id;
1030 uint64_t d_creationOrder;
1031 };
1032
1033 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1034 extern DNSAction::Action g_dynBlockAction;
1035
1036 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1037 extern GlobalStateHolder<ServerPolicy> g_policy;
1038 extern GlobalStateHolder<servers_t> g_dstates;
1039 extern GlobalStateHolder<pools_t> g_pools;
1040 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1041 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1042 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1043 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1044 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1045
1046 extern ComboAddress g_serverControl; // not changed during runtime
1047
1048 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1049 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1050 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1051 extern bool g_truncateTC;
1052 extern bool g_fixupCase;
1053 extern int g_tcpRecvTimeout;
1054 extern int g_tcpSendTimeout;
1055 extern int g_udpTimeout;
1056 extern uint16_t g_maxOutstanding;
1057 extern std::atomic<bool> g_configurationDone;
1058 extern uint64_t g_maxTCPClientThreads;
1059 extern uint64_t g_maxTCPQueuedConnections;
1060 extern size_t g_maxTCPQueriesPerConn;
1061 extern size_t g_maxTCPConnectionDuration;
1062 extern size_t g_maxTCPConnectionsPerClient;
1063 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1064 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1065 extern uint32_t g_staleCacheEntriesTTL;
1066 extern bool g_apiReadWrite;
1067 extern std::string g_apiConfigDirectory;
1068 extern bool g_servFailOnNoPolicy;
1069 extern bool g_useTCPSinglePipe;
1070 extern uint16_t g_downstreamTCPCleanupInterval;
1071 extern size_t g_udpVectorSize;
1072 extern bool g_preserveTrailingData;
1073 extern bool g_allowEmptyResponse;
1074
1075 #ifdef HAVE_EBPF
1076 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1077 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1078 #endif /* HAVE_EBPF */
1079
1080 struct LocalHolders
1081 {
1082 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1083 {
1084 }
1085
1086 LocalStateHolder<NetmaskGroup> acl;
1087 LocalStateHolder<ServerPolicy> policy;
1088 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1089 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1090 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1091 LocalStateHolder<servers_t> servers;
1092 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1093 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1094 LocalStateHolder<pools_t> pools;
1095 };
1096
1097 struct dnsheader;
1098
1099 vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
1100
1101 void tcpAcceptorThread(void* p);
1102 #ifdef HAVE_DNS_OVER_HTTPS
1103 void dohThread(ClientState* cs);
1104 #endif /* HAVE_DNS_OVER_HTTPS */
1105
1106 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1107 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1108 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1109 void resetLuaSideEffect(); // reset to indeterminate state
1110
1111 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1112 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1113 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1114
1115 bool checkQueryHeaders(const struct dnsheader* dh);
1116
1117 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1118 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1119 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1120
1121 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1122
1123 uint16_t getRandomDNSID();
1124
1125 #include "dnsdist-snmp.hh"
1126
1127 extern bool g_snmpEnabled;
1128 extern bool g_snmpTrapsEnabled;
1129 extern DNSDistSNMPAgent* g_snmpAgent;
1130 extern bool g_addEDNSToSelfGeneratedResponses;
1131
1132 extern std::set<std::string> g_capabilitiesToRetain;
1133 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
1134 static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
1135
1136 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1137 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1138
1139 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1140 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1141
1142 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1143 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);