2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #include "ext/luawrapper/include/LuaContext.hpp"
32 #include <unordered_map>
34 #include <boost/variant.hpp>
36 #include "capabilities.hh"
37 #include "circular_buffer.hh"
38 #include "dnscrypt.hh"
39 #include "dnsdist-cache.hh"
40 #include "dnsdist-dynbpf.hh"
41 #include "dnsdist-lbpolicies.hh"
44 #include "ednsoptions.hh"
50 #include "tcpiohandler.hh"
51 #include "uuid-utils.hh"
52 #include "proxy-protocol.hh"
54 void carbonDumpThread();
55 uint64_t uptimeOfProcess(const std::string& str);
57 extern uint16_t g_ECSSourcePrefixV4;
58 extern uint16_t g_ECSSourcePrefixV6;
59 extern bool g_ECSOverride;
61 typedef std::unordered_map<string, string> QTag;
65 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
66 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
67 const uint16_t* flags = getFlagsFromDNSHeader(dh);
70 DNSQuestion(const DNSQuestion&) = delete;
71 DNSQuestion& operator=(const DNSQuestion&) = delete;
72 DNSQuestion(DNSQuestion&&) = default;
74 std::string getTrailingData() const;
75 bool setTrailingData(const std::string&);
78 boost::optional<boost::uuids::uuid> uniqueId;
81 boost::optional<Netmask> subnet;
82 std::string sni; /* Server Name Indication, if any (DoT or DoH) */
84 const DNSName* qname{nullptr};
85 const ComboAddress* local{nullptr};
86 const ComboAddress* remote{nullptr};
87 std::shared_ptr<QTag> qTag{nullptr};
88 std::unique_ptr<std::vector<ProxyProtocolValue>> proxyProtocolValues{nullptr};
89 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
90 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
91 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
92 struct dnsheader* dh{nullptr};
93 const struct timespec* queryTime{nullptr};
94 struct DOHUnit* du{nullptr};
96 unsigned int consumed{0};
98 boost::optional<uint32_t> tempFailureTTL;
99 uint32_t cacheKeyNoECS;
101 const uint16_t qtype;
102 const uint16_t qclass;
104 uint16_t ecsPrefixLength;
106 uint8_t ednsRCode{0};
108 bool skipCache{false};
113 bool ecsAdded{false};
114 bool ednsAdded{false};
115 bool useZeroScope{false};
116 bool dnssecOK{false};
119 struct DNSResponse : DNSQuestion
121 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
122 DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
123 DNSResponse(const DNSResponse&) = delete;
124 DNSResponse& operator=(const DNSResponse&) = delete;
125 DNSResponse(DNSResponse&&) = default;
128 /* so what could you do:
131 provide actual answer,
132 allow & and stop processing,
134 modify header: (servfail|refused|notimp), set TC=1,
140 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse, SpoofRaw };
141 static std::string typeToString(const Action& action)
146 case Action::Nxdomain:
147 return "Send NXDomain";
148 case Action::Refused:
149 return "Send Refused";
151 return "Spoof an answer";
152 case Action::SpoofRaw:
153 return "Spoof an answer from raw bytes";
156 case Action::HeaderModify:
157 return "Modify the header";
159 return "Route to a pool";
162 case Action::Truncate:
163 return "Truncate over UDP";
164 case Action::ServFail:
165 return "Send ServFail";
169 case Action::NoRecurse:
176 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
180 virtual string toString() const = 0;
181 virtual std::map<string, double> getStats() const
187 class DNSResponseAction
190 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
191 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
192 virtual ~DNSResponseAction()
195 virtual string toString() const = 0;
200 DynBlock(): action(DNSAction::Action::None), warning(false)
204 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
208 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
210 blocks.store(rhs.blocks);
213 DynBlock& operator=(const DynBlock& rhs)
219 blocks.store(rhs.blocks);
225 struct timespec until;
227 DNSAction::Action action;
228 mutable std::atomic<unsigned int> blocks;
232 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
234 extern vector<pair<struct timeval, std::string> > g_confDelta;
236 extern uint64_t getLatencyCount(const std::string&);
240 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
242 stat_t servfailResponses{0};
244 stat_t frontendNXDomain{0};
245 stat_t frontendServFail{0};
246 stat_t frontendNoError{0};
247 stat_t nonCompliantQueries{0};
248 stat_t nonCompliantResponses{0};
250 stat_t emptyQueries{0};
252 stat_t dynBlocked{0};
254 stat_t ruleNXDomain{0};
255 stat_t ruleRefused{0};
256 stat_t ruleServFail{0};
257 stat_t selfAnswered{0};
258 stat_t downstreamTimeouts{0};
259 stat_t downstreamSendErrors{0};
263 stat_t cacheMisses{0};
264 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
265 stat_t securityStatus{0};
267 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
268 typedef std::function<uint64_t(const std::string&)> statfunction_t;
269 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
270 std::vector<std::pair<std::string, entry_t>> entries{
271 {"responses", &responses},
272 {"servfail-responses", &servfailResponses},
273 {"queries", &queries},
274 {"frontend-nxdomain", &frontendNXDomain},
275 {"frontend-servfail", &frontendServFail},
276 {"frontend-noerror", &frontendNoError},
277 {"acl-drops", &aclDrops},
278 {"rule-drop", &ruleDrop},
279 {"rule-nxdomain", &ruleNXDomain},
280 {"rule-refused", &ruleRefused},
281 {"rule-servfail", &ruleServFail},
282 {"self-answered", &selfAnswered},
283 {"downstream-timeouts", &downstreamTimeouts},
284 {"downstream-send-errors", &downstreamSendErrors},
285 {"trunc-failures", &truncFail},
286 {"no-policy", &noPolicy},
287 {"latency0-1", &latency0_1},
288 {"latency1-10", &latency1_10},
289 {"latency10-50", &latency10_50},
290 {"latency50-100", &latency50_100},
291 {"latency100-1000", &latency100_1000},
292 {"latency-slow", &latencySlow},
293 {"latency-avg100", &latencyAvg100},
294 {"latency-avg1000", &latencyAvg1000},
295 {"latency-avg10000", &latencyAvg10000},
296 {"latency-avg1000000", &latencyAvg1000000},
297 {"uptime", uptimeOfProcess},
298 {"real-memory-usage", getRealMemoryUsage},
299 {"special-memory-usage", getSpecialMemoryUsage},
300 {"udp-in-errors", boost::bind(udpErrorStats, "udp-in-errors")},
301 {"udp-noport-errors", boost::bind(udpErrorStats, "udp-noport-errors")},
302 {"udp-recvbuf-errors", boost::bind(udpErrorStats, "udp-recvbuf-errors")},
303 {"udp-sndbuf-errors", boost::bind(udpErrorStats, "udp-sndbuf-errors")},
304 {"noncompliant-queries", &nonCompliantQueries},
305 {"noncompliant-responses", &nonCompliantResponses},
306 {"rdqueries", &rdQueries},
307 {"empty-queries", &emptyQueries},
308 {"cache-hits", &cacheHits},
309 {"cache-misses", &cacheMisses},
310 {"cpu-iowait", getCPUIOWait},
311 {"cpu-steal", getCPUSteal},
312 {"cpu-sys-msec", getCPUTimeSystem},
313 {"cpu-user-msec", getCPUTimeUser},
314 {"fd-usage", getOpenFileDescriptors},
315 {"dyn-blocked", &dynBlocked},
316 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
317 {"security-status", &securityStatus},
319 {"latency-sum", &latencySum},
320 {"latency-count", getLatencyCount},
324 extern struct DNSDistStats g_stats;
325 void doLatencyStats(double udiff);
330 StopWatch(bool realTime=false): d_needRealTime(realTime)
333 struct timespec d_start{0,0};
334 bool d_needRealTime{false};
337 if(gettime(&d_start, d_needRealTime) < 0)
338 unixDie("Getting timestamp");
342 void set(const struct timespec& from) {
346 double udiff() const {
348 if(gettime(&now, d_needRealTime) < 0)
349 unixDie("Getting timestamp");
351 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
354 double udiffAndSet() {
356 if(gettime(&now, d_needRealTime) < 0)
357 unixDie("Getting timestamp");
359 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
366 class BasicQPSLimiter
373 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
378 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
380 auto delta = d_prev.udiffAndSet();
382 if(delta > 0.0) // time, frequently, does go backwards..
383 d_tokens += 1.0 * rate * (delta/1000000.0);
385 if(d_tokens > burst) {
390 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
398 bool seenSince(const struct timespec& cutOff) const
400 return cutOff < d_prev.d_start;
404 mutable StopWatch d_prev;
405 mutable double d_tokens;
408 class QPSLimiter : public BasicQPSLimiter
411 QPSLimiter(): BasicQPSLimiter()
415 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
420 unsigned int getRate() const
422 return d_passthrough ? 0 : d_rate;
425 int getPassed() const
430 int getBlocked() const
435 bool check() const // this is not quite fair
441 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
452 mutable unsigned int d_passed{0};
453 mutable unsigned int d_blocked{0};
455 unsigned int d_burst;
456 bool d_passthrough{true};
463 IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
464 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
466 usageIndicator.store(orig.usageIndicator.load());
467 origFD = orig.origFD;
468 origID = orig.origID;
469 delayMsec = orig.delayMsec;
470 tempFailureTTL = orig.tempFailureTTL;
473 static const int64_t unusedIndicator = -1;
475 static bool isInUse(int64_t usageIndicator)
477 return usageIndicator != unusedIndicator;
482 return usageIndicator != unusedIndicator;
485 /* return true if the value has been successfully replaced meaning that
486 no-one updated the usage indicator in the meantime */
487 bool tryMarkUnused(int64_t expectedUsageIndicator)
489 return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
492 /* mark as unused no matter what, return true if the state was in use before */
495 auto currentGeneration = generation++;
496 return markAsUsed(currentGeneration);
499 /* mark as unused no matter what, return true if the state was in use before */
500 bool markAsUsed(int64_t currentGeneration)
502 int64_t oldUsage = usageIndicator.exchange(currentGeneration);
503 return oldUsage != unusedIndicator;
506 /* We use this value to detect whether this state is in use.
507 For performance reasons we don't want to use a lock here, but that means
508 we need to be very careful when modifying this value. Modifications happen
510 - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
511 then picking one of the states associated to this backend (via the idOffset).
512 Most of the time this state should not be in use and usageIndicator is -1, but we
513 might not yet have received a response for the query previously associated to this
514 state, meaning that we will 'reuse' this state and erase the existing state.
515 If we ever receive a response for this state, it will be discarded. This is
516 mostly fine for UDP except that we still need to be careful in order to miss
517 the 'outstanding' counters, which should only be increased when we are picking
518 an empty state, and not when reusing ;
519 For DoH, though, we have dynamically allocated a DOHUnit object that needs to
520 be freed, as well as internal objects internals to libh2o.
521 - one of the UDP receiver threads receiving a response from a backend, picking
522 the corresponding state and sending the response to the client ;
523 - the 'healthcheck' thread scanning the states to actively discover timeouts,
524 mostly to keep some counters like the 'outstanding' one sane.
525 We previously based that logic on the origFD (FD on which the query was received,
526 and therefore from where the response should be sent) but this suffered from an
527 ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
528 same value since we only have so much incoming sockets:
529 - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
530 - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
531 qtype and qclass match
532 - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
533 - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
534 5, except it's not the same 5 anymore and it overrides a fresh state.
535 We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
536 wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
537 when the state is unused and the value of our counter otherwise.
539 std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8
540 std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4
541 ComboAddress origRemote; // 28
542 ComboAddress origDest; // 28
543 StopWatch sentTime; // 16
545 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
547 boost::optional<boost::uuids::uuid> uniqueId;
549 boost::optional<Netmask> subnet{boost::none};
550 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
551 std::shared_ptr<QTag> qTag{nullptr};
552 const ClientState* cs{nullptr};
553 DOHUnit* du{nullptr};
554 uint32_t cacheKey; // 4
555 uint32_t cacheKeyNoECS; // 4
558 uint16_t qclass; // 2
559 uint16_t origID; // 2
560 uint16_t origFlags; // 2
563 boost::optional<uint32_t> tempFailureTTL;
564 bool ednsAdded{false};
565 bool ecsAdded{false};
566 bool skipCache{false};
567 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
568 bool dnssecOK{false};
572 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
573 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
577 pthread_rwlock_init(&queryLock, nullptr);
581 pthread_rwlock_destroy(&queryLock);
583 QueryCountRecords records;
584 QueryCountFilter filter;
585 pthread_rwlock_t queryLock;
589 extern QueryCount g_qcount;
593 ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort)
599 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
600 std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
601 std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
602 std::string interface;
603 std::atomic<uint64_t> queries{0};
604 mutable std::atomic<uint64_t> responses{0};
605 std::atomic<uint64_t> tcpDiedReadingQuery{0};
606 std::atomic<uint64_t> tcpDiedSendingResponse{0};
607 std::atomic<uint64_t> tcpGaveUp{0};
608 std::atomic<uint64_t> tcpClientTimeouts{0};
609 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
610 std::atomic<uint64_t> tcpCurrentConnections{0};
611 std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
612 std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
613 std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired)
614 std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one
615 std::atomic<uint64_t> tls10queries{0}; // valid DNS queries received via TLSv1.0
616 std::atomic<uint64_t> tls11queries{0}; // valid DNS queries received via TLSv1.1
617 std::atomic<uint64_t> tls12queries{0}; // valid DNS queries received via TLSv1.2
618 std::atomic<uint64_t> tls13queries{0}; // valid DNS queries received via TLSv1.3
619 std::atomic<uint64_t> tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version
620 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
622 std::atomic<double> tcpAvgConnectionDuration{0.0};
625 int fastOpenQueueSize{0};
631 int getSocket() const
633 return udpFD != -1 ? udpFD : tcpFD;
648 return tlsFrontend != nullptr || dohFrontend != nullptr;
651 std::string getType() const
653 std::string result = udpFD != -1 ? "UDP" : "TCP";
656 result += " (DNS over HTTPS)";
658 else if (tlsFrontend) {
659 result += " (DNS over TLS)";
661 else if (dnscryptCtx) {
662 result += " (DNSCrypt)";
669 shared_ptr<BPFFilter> d_filter;
674 d_filter->removeSocket(getSocket());
679 void attachFilter(shared_ptr<BPFFilter> bpf)
683 bpf->addSocket(getSocket());
686 #endif /* HAVE_EBPF */
688 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
690 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
691 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
695 class TCPClientCollection {
696 std::vector<int> d_tcpclientthreads;
697 std::atomic<uint64_t> d_numthreads{0};
698 std::atomic<uint64_t> d_pos{0};
699 std::atomic<uint64_t> d_queued{0};
700 const uint64_t d_maxthreads{0};
703 const bool d_useSinglePipe;
706 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
709 d_tcpclientthreads.reserve(maxThreads);
711 if (d_useSinglePipe) {
712 if (pipe(d_singlePipe) < 0) {
714 throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
717 if (!setNonBlocking(d_singlePipe[0])) {
719 close(d_singlePipe[0]);
720 close(d_singlePipe[1]);
721 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
724 if (!setNonBlocking(d_singlePipe[1])) {
726 close(d_singlePipe[0]);
727 close(d_singlePipe[1]);
728 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
734 uint64_t pos = d_pos++;
736 return d_tcpclientthreads[pos % d_numthreads];
738 bool hasReachedMaxThreads() const
740 return d_numthreads >= d_maxthreads;
742 uint64_t getThreadsCount() const
746 uint64_t getQueuedCount() const
750 void decrementQueuedCount()
754 void addTCPClientThread();
757 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
759 struct DownstreamState
761 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
763 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect);
764 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {}
767 for (auto& fd : sockets) {
773 pthread_rwlock_destroy(&d_lock);
775 boost::uuids::uuid id;
776 std::vector<unsigned int> hashes;
777 mutable pthread_rwlock_t d_lock;
778 std::vector<int> sockets;
779 const std::string sourceItfName;
780 std::mutex socketsLock;
781 std::mutex connectLock;
782 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
784 const ComboAddress remote;
786 vector<IDState> idStates;
787 const ComboAddress sourceAddr;
788 checkfunc_t checkFunction;
789 DNSName checkName{"a.root-servers.net."};
790 QType checkType{QType::A};
791 uint16_t checkClass{QClass::IN};
792 std::atomic<uint64_t> idOffset{0};
793 std::atomic<uint64_t> sendErrors{0};
794 std::atomic<uint64_t> outstanding{0};
795 std::atomic<uint64_t> reuseds{0};
796 std::atomic<uint64_t> queries{0};
797 std::atomic<uint64_t> responses{0};
799 std::atomic<uint64_t> sendErrors{0};
800 std::atomic<uint64_t> reuseds{0};
801 std::atomic<uint64_t> queries{0};
803 std::atomic<uint64_t> tcpDiedSendingQuery{0};
804 std::atomic<uint64_t> tcpDiedReadingResponse{0};
805 std::atomic<uint64_t> tcpGaveUp{0};
806 std::atomic<uint64_t> tcpReadTimeouts{0};
807 std::atomic<uint64_t> tcpWriteTimeouts{0};
808 std::atomic<uint64_t> tcpCurrentConnections{0};
809 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
811 std::atomic<double> tcpAvgConnectionDuration{0.0};
812 size_t socketsOffset{0};
813 double queryLoad{0.0};
814 double dropRate{0.0};
815 double latencyUsec{0.0};
818 int tcpConnectTimeout{5};
819 int tcpRecvTimeout{30};
820 int tcpSendTimeout{30};
821 unsigned int checkInterval{1};
822 unsigned int lastCheck{0};
823 const unsigned int sourceItf{0};
825 uint16_t xpfRRCode{0};
826 uint16_t checkTimeout{1000}; /* in milliseconds */
827 uint8_t currentCheckFailures{0};
828 uint8_t consecutiveSuccessfulChecks{0};
829 uint8_t maxCheckFailures{1};
830 uint8_t minRiseSuccesses{1};
833 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
834 bool mustResolve{false};
835 bool upStatus{false};
837 bool useProxyProtocol{false};
839 bool disableZeroScope{false};
840 std::atomic<bool> connected{false};
841 std::atomic_flag threadStarted;
842 bool tcpFastOpen{false};
843 bool ipBindAddrNoPort{true};
847 if(availability == Availability::Down)
849 if(availability == Availability::Up)
853 void setUp() { availability = Availability::Up; }
854 void setDown() { availability = Availability::Down; }
855 void setAuto() { availability = Availability::Auto; }
856 const string& getName() const {
859 const string& getNameWithAddr() const {
862 void setName(const std::string& newName)
865 nameWithAddr = newName.empty() ? remote.toStringWithPort() : (name + " (" + remote.toStringWithPort()+ ")");
868 string getStatus() const
871 if(availability == DownstreamState::Availability::Up)
873 else if(availability == DownstreamState::Availability::Down)
876 status = (upStatus ? "up" : "down");
881 void setId(const boost::uuids::uuid& newId);
882 void setWeight(int newWeight);
884 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
886 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
887 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
891 std::string nameWithAddr;
893 using servers_t =vector<std::shared_ptr<DownstreamState>>;
895 void responderThread(std::shared_ptr<DownstreamState> state);
896 extern std::mutex g_luamutex;
897 extern LuaContext g_lua;
898 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
906 virtual bool matches(const DNSQuestion* dq) const =0;
907 virtual string toString() const = 0;
908 mutable std::atomic<uint64_t> d_matches{0};
915 pthread_rwlock_init(&d_lock, nullptr);
919 pthread_rwlock_destroy(&d_lock);
922 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
929 void setECS(bool useECS)
934 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
935 std::shared_ptr<ServerPolicy> policy{nullptr};
937 size_t countServers(bool upOnly)
940 ReadLock rl(&d_lock);
941 for (const auto& server : d_servers) {
942 if (!upOnly || std::get<1>(server)->isUp() ) {
949 ServerPolicy::NumberedServerVector getServers()
951 ServerPolicy::NumberedServerVector result;
953 ReadLock rl(&d_lock);
959 void addServer(shared_ptr<DownstreamState>& server)
961 WriteLock wl(&d_lock);
962 unsigned int count = (unsigned int) d_servers.size();
963 d_servers.push_back(make_pair(++count, server));
964 /* we need to reorder based on the server 'order' */
965 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
966 return a.second->order < b.second->order;
968 /* and now we need to renumber for Lua (custom policies) */
970 for (auto& serv : d_servers) {
975 void removeServer(shared_ptr<DownstreamState>& server)
977 WriteLock wl(&d_lock);
980 for (auto it = d_servers.begin(); it != d_servers.end();) {
982 /* we need to renumber the servers placed
983 after the removed one, for Lua (custom policies) */
987 else if (it->second == server) {
988 it = d_servers.erase(it);
998 ServerPolicy::NumberedServerVector d_servers;
999 pthread_rwlock_t d_lock;
1000 bool d_useECS{false};
1005 ComboAddress server;
1006 std::string namespace_name;
1007 std::string ourname;
1008 std::string instance_name;
1009 unsigned int interval;
1012 enum ednsHeaderFlags {
1013 EDNS_HEADER_FLAG_NONE = 0,
1014 EDNS_HEADER_FLAG_DO = 32768
1017 struct DNSDistRuleAction
1019 std::shared_ptr<DNSRule> d_rule;
1020 std::shared_ptr<DNSAction> d_action;
1021 boost::uuids::uuid d_id;
1022 uint64_t d_creationOrder;
1025 struct DNSDistResponseRuleAction
1027 std::shared_ptr<DNSRule> d_rule;
1028 std::shared_ptr<DNSResponseAction> d_action;
1029 boost::uuids::uuid d_id;
1030 uint64_t d_creationOrder;
1033 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1034 extern DNSAction::Action g_dynBlockAction;
1036 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1037 extern GlobalStateHolder<ServerPolicy> g_policy;
1038 extern GlobalStateHolder<servers_t> g_dstates;
1039 extern GlobalStateHolder<pools_t> g_pools;
1040 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1041 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1042 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1043 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1044 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1046 extern ComboAddress g_serverControl; // not changed during runtime
1048 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1049 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1050 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1051 extern bool g_truncateTC;
1052 extern bool g_fixupCase;
1053 extern int g_tcpRecvTimeout;
1054 extern int g_tcpSendTimeout;
1055 extern int g_udpTimeout;
1056 extern uint16_t g_maxOutstanding;
1057 extern std::atomic<bool> g_configurationDone;
1058 extern uint64_t g_maxTCPClientThreads;
1059 extern uint64_t g_maxTCPQueuedConnections;
1060 extern size_t g_maxTCPQueriesPerConn;
1061 extern size_t g_maxTCPConnectionDuration;
1062 extern size_t g_maxTCPConnectionsPerClient;
1063 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1064 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1065 extern uint32_t g_staleCacheEntriesTTL;
1066 extern bool g_apiReadWrite;
1067 extern std::string g_apiConfigDirectory;
1068 extern bool g_servFailOnNoPolicy;
1069 extern bool g_useTCPSinglePipe;
1070 extern uint16_t g_downstreamTCPCleanupInterval;
1071 extern size_t g_udpVectorSize;
1072 extern bool g_preserveTrailingData;
1073 extern bool g_allowEmptyResponse;
1076 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1077 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1078 #endif /* HAVE_EBPF */
1082 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1086 LocalStateHolder<NetmaskGroup> acl;
1087 LocalStateHolder<ServerPolicy> policy;
1088 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1089 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1090 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1091 LocalStateHolder<servers_t> servers;
1092 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1093 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1094 LocalStateHolder<pools_t> pools;
1099 void controlThread(int fd, ComboAddress local);
1100 vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
1102 struct WebserverConfig
1104 std::string password;
1106 boost::optional<std::map<std::string, std::string> > customHeaders;
1110 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1111 void setWebserverPassword(const std::string& password);
1112 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1114 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1115 void tcpAcceptorThread(void* p);
1116 #ifdef HAVE_DNS_OVER_HTTPS
1117 void dohThread(ClientState* cs);
1118 #endif /* HAVE_DNS_OVER_HTTPS */
1120 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1121 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1122 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1123 void resetLuaSideEffect(); // reset to indeterminate state
1125 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1126 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1127 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1129 bool checkQueryHeaders(const struct dnsheader* dh);
1131 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1132 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1133 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1135 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1137 uint16_t getRandomDNSID();
1139 #include "dnsdist-snmp.hh"
1141 extern bool g_snmpEnabled;
1142 extern bool g_snmpTrapsEnabled;
1143 extern DNSDistSNMPAgent* g_snmpAgent;
1144 extern bool g_addEDNSToSelfGeneratedResponses;
1146 extern std::set<std::string> g_capabilitiesToRetain;
1147 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
1148 static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
1150 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1151 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1153 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1154 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1156 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1157 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);