2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #include "ext/luawrapper/include/LuaContext.hpp"
32 #include <unordered_map>
34 #include <boost/variant.hpp>
36 #include "bpf-filter.hh"
37 #include "capabilities.hh"
38 #include "circular_buffer.hh"
39 #include "dnscrypt.hh"
40 #include "dnsdist-cache.hh"
41 #include "dnsdist-dynbpf.hh"
44 #include "ednsoptions.hh"
50 #include "tcpiohandler.hh"
51 #include "uuid-utils.hh"
53 void carbonDumpThread();
54 uint64_t uptimeOfProcess(const std::string& str);
56 extern uint16_t g_ECSSourcePrefixV4;
57 extern uint16_t g_ECSSourcePrefixV6;
58 extern bool g_ECSOverride;
60 typedef std::unordered_map<string, string> QTag;
64 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
65 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
66 const uint16_t* flags = getFlagsFromDNSHeader(dh);
69 DNSQuestion(const DNSQuestion&) = delete;
70 DNSQuestion& operator=(const DNSQuestion&) = delete;
71 DNSQuestion(DNSQuestion&&) = default;
74 boost::optional<boost::uuids::uuid> uniqueId;
77 boost::optional<Netmask> subnet;
78 std::string sni; /* Server Name Indication, if any (DoT or DoH) */
80 const DNSName* qname{nullptr};
81 const ComboAddress* local{nullptr};
82 const ComboAddress* remote{nullptr};
83 std::shared_ptr<QTag> qTag{nullptr};
84 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
85 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
86 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
87 struct dnsheader* dh{nullptr};
88 const struct timespec* queryTime{nullptr};
89 struct DOHUnit* du{nullptr};
91 unsigned int consumed{0};
93 boost::optional<uint32_t> tempFailureTTL;
94 uint32_t cacheKeyNoECS;
97 const uint16_t qclass;
99 uint16_t ecsPrefixLength;
101 uint8_t ednsRCode{0};
103 bool skipCache{false};
108 bool ecsAdded{false};
109 bool ednsAdded{false};
110 bool useZeroScope{false};
111 bool dnssecOK{false};
114 struct DNSResponse : DNSQuestion
116 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
117 DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
118 DNSResponse(const DNSResponse&) = delete;
119 DNSResponse& operator=(const DNSResponse&) = delete;
120 DNSResponse(DNSResponse&&) = default;
123 /* so what could you do:
126 provide actual answer,
127 allow & and stop processing,
129 modify header: (servfail|refused|notimp), set TC=1,
135 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse, SpoofRaw };
136 static std::string typeToString(const Action& action)
141 case Action::Nxdomain:
142 return "Send NXDomain";
143 case Action::Refused:
144 return "Send Refused";
146 return "Spoof an answer";
147 case Action::SpoofRaw:
148 return "Spoof an answer from raw bytes";
151 case Action::HeaderModify:
152 return "Modify the header";
154 return "Route to a pool";
157 case Action::Truncate:
158 return "Truncate over UDP";
159 case Action::ServFail:
160 return "Send ServFail";
164 case Action::NoRecurse:
171 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
175 virtual string toString() const = 0;
176 virtual std::map<string, double> getStats() const
182 class DNSResponseAction
185 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
186 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
187 virtual ~DNSResponseAction()
190 virtual string toString() const = 0;
195 DynBlock(): action(DNSAction::Action::None), warning(false)
199 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
203 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
205 blocks.store(rhs.blocks);
208 DynBlock& operator=(const DynBlock& rhs)
214 blocks.store(rhs.blocks);
220 struct timespec until;
222 DNSAction::Action action;
223 mutable std::atomic<unsigned int> blocks;
227 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
229 extern vector<pair<struct timeval, std::string> > g_confDelta;
231 extern uint64_t getLatencyCount(const std::string&);
235 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
237 stat_t servfailResponses{0};
239 stat_t frontendNXDomain{0};
240 stat_t frontendServFail{0};
241 stat_t frontendNoError{0};
242 stat_t nonCompliantQueries{0};
243 stat_t nonCompliantResponses{0};
245 stat_t emptyQueries{0};
247 stat_t dynBlocked{0};
249 stat_t ruleNXDomain{0};
250 stat_t ruleRefused{0};
251 stat_t ruleServFail{0};
252 stat_t selfAnswered{0};
253 stat_t downstreamTimeouts{0};
254 stat_t downstreamSendErrors{0};
258 stat_t cacheMisses{0};
259 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
260 stat_t securityStatus{0};
262 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
263 typedef std::function<uint64_t(const std::string&)> statfunction_t;
264 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
265 std::vector<std::pair<std::string, entry_t>> entries{
266 {"responses", &responses},
267 {"servfail-responses", &servfailResponses},
268 {"queries", &queries},
269 {"frontend-nxdomain", &frontendNXDomain},
270 {"frontend-servfail", &frontendServFail},
271 {"frontend-noerror", &frontendNoError},
272 {"acl-drops", &aclDrops},
273 {"rule-drop", &ruleDrop},
274 {"rule-nxdomain", &ruleNXDomain},
275 {"rule-refused", &ruleRefused},
276 {"rule-servfail", &ruleServFail},
277 {"self-answered", &selfAnswered},
278 {"downstream-timeouts", &downstreamTimeouts},
279 {"downstream-send-errors", &downstreamSendErrors},
280 {"trunc-failures", &truncFail},
281 {"no-policy", &noPolicy},
282 {"latency0-1", &latency0_1},
283 {"latency1-10", &latency1_10},
284 {"latency10-50", &latency10_50},
285 {"latency50-100", &latency50_100},
286 {"latency100-1000", &latency100_1000},
287 {"latency-slow", &latencySlow},
288 {"latency-avg100", &latencyAvg100},
289 {"latency-avg1000", &latencyAvg1000},
290 {"latency-avg10000", &latencyAvg10000},
291 {"latency-avg1000000", &latencyAvg1000000},
292 {"uptime", uptimeOfProcess},
293 {"real-memory-usage", getRealMemoryUsage},
294 {"special-memory-usage", getSpecialMemoryUsage},
295 {"noncompliant-queries", &nonCompliantQueries},
296 {"noncompliant-responses", &nonCompliantResponses},
297 {"rdqueries", &rdQueries},
298 {"empty-queries", &emptyQueries},
299 {"cache-hits", &cacheHits},
300 {"cache-misses", &cacheMisses},
301 {"cpu-user-msec", getCPUTimeUser},
302 {"cpu-sys-msec", getCPUTimeSystem},
303 {"fd-usage", getOpenFileDescriptors},
304 {"dyn-blocked", &dynBlocked},
305 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
306 {"security-status", &securityStatus},
308 {"latency-sum", &latencySum},
309 {"latency-count", getLatencyCount},
313 // Metric types for Prometheus
314 enum class PrometheusMetricType: int {
319 // Keeps additional information about metrics
320 struct MetricDefinition {
321 MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
324 MetricDefinition() = default;
326 // Metric description
327 std::string description;
328 // Metric type for Prometheus
329 PrometheusMetricType prometheusType;
332 struct MetricDefinitionStorage {
333 // Return metric definition by name
334 bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
335 auto metricDetailsIter = metrics.find(metricName);
337 if (metricDetailsIter == metrics.end()) {
341 metric = metricDetailsIter->second;
345 // Return string representation of Prometheus metric type
346 std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
347 switch (metricType) {
348 case PrometheusMetricType::counter:
351 case PrometheusMetricType::gauge:
360 std::map<std::string, MetricDefinition> metrics = {
361 { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
362 { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
363 { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
364 { "frontend-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
365 { "frontend-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
366 { "frontend-noerror", MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
367 { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
368 { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
369 { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
370 { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
371 { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
372 { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
373 { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
374 { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
375 { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
376 { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
377 { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
378 { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
379 { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
380 { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
381 { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
382 { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
383 { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")},
384 { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")},
385 { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")},
386 { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")},
387 { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")},
388 { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")},
389 { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
390 { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
391 { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
392 { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
393 { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
394 { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
395 { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
396 { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
397 { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")},
398 { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
399 { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") },
400 { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
404 extern MetricDefinitionStorage g_metricDefinitions;
405 extern struct DNSDistStats g_stats;
406 void doLatencyStats(double udiff);
411 StopWatch(bool realTime=false): d_needRealTime(realTime)
414 struct timespec d_start{0,0};
415 bool d_needRealTime{false};
418 if(gettime(&d_start, d_needRealTime) < 0)
419 unixDie("Getting timestamp");
423 void set(const struct timespec& from) {
427 double udiff() const {
429 if(gettime(&now, d_needRealTime) < 0)
430 unixDie("Getting timestamp");
432 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
435 double udiffAndSet() {
437 if(gettime(&now, d_needRealTime) < 0)
438 unixDie("Getting timestamp");
440 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
447 class BasicQPSLimiter
454 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
459 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
461 auto delta = d_prev.udiffAndSet();
463 if(delta > 0.0) // time, frequently, does go backwards..
464 d_tokens += 1.0 * rate * (delta/1000000.0);
466 if(d_tokens > burst) {
471 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
479 bool seenSince(const struct timespec& cutOff) const
481 return cutOff < d_prev.d_start;
485 mutable StopWatch d_prev;
486 mutable double d_tokens;
489 class QPSLimiter : public BasicQPSLimiter
492 QPSLimiter(): BasicQPSLimiter()
496 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
501 unsigned int getRate() const
503 return d_passthrough ? 0 : d_rate;
506 int getPassed() const
511 int getBlocked() const
516 bool check() const // this is not quite fair
522 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
533 mutable unsigned int d_passed{0};
534 mutable unsigned int d_blocked{0};
536 unsigned int d_burst;
537 bool d_passthrough{true};
544 IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
545 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
547 usageIndicator.store(orig.usageIndicator.load());
548 origFD = orig.origFD;
549 origID = orig.origID;
550 delayMsec = orig.delayMsec;
551 tempFailureTTL = orig.tempFailureTTL;
554 static const int64_t unusedIndicator = -1;
556 static bool isInUse(int64_t usageIndicator)
558 return usageIndicator != unusedIndicator;
563 return usageIndicator != unusedIndicator;
566 /* return true if the value has been successfully replaced meaning that
567 no-one updated the usage indicator in the meantime */
568 bool tryMarkUnused(int64_t expectedUsageIndicator)
570 return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
573 /* mark as unused no matter what, return true if the state was in use before */
576 auto currentGeneration = generation++;
577 return markAsUsed(currentGeneration);
580 /* mark as unused no matter what, return true if the state was in use before */
581 bool markAsUsed(int64_t currentGeneration)
583 int64_t oldUsage = usageIndicator.exchange(currentGeneration);
584 return oldUsage != unusedIndicator;
587 /* We use this value to detect whether this state is in use.
588 For performance reasons we don't want to use a lock here, but that means
589 we need to be very careful when modifying this value. Modifications happen
591 - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
592 then picking one of the states associated to this backend (via the idOffset).
593 Most of the time this state should not be in use and usageIndicator is -1, but we
594 might not yet have received a response for the query previously associated to this
595 state, meaning that we will 'reuse' this state and erase the existing state.
596 If we ever receive a response for this state, it will be discarded. This is
597 mostly fine for UDP except that we still need to be careful in order to miss
598 the 'outstanding' counters, which should only be increased when we are picking
599 an empty state, and not when reusing ;
600 For DoH, though, we have dynamically allocated a DOHUnit object that needs to
601 be freed, as well as internal objects internals to libh2o.
602 - one of the UDP receiver threads receiving a response from a backend, picking
603 the corresponding state and sending the response to the client ;
604 - the 'healthcheck' thread scanning the states to actively discover timeouts,
605 mostly to keep some counters like the 'outstanding' one sane.
606 We previously based that logic on the origFD (FD on which the query was received,
607 and therefore from where the response should be sent) but this suffered from an
608 ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
609 same value since we only have so much incoming sockets:
610 - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
611 - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
612 qtype and qclass match
613 - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
614 - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
615 5, except it's not the same 5 anymore and it overrides a fresh state.
616 We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
617 wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
618 when the state is unused and the value of our counter otherwise.
620 std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8
621 std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4
622 ComboAddress origRemote; // 28
623 ComboAddress origDest; // 28
624 StopWatch sentTime; // 16
626 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
628 boost::optional<boost::uuids::uuid> uniqueId;
630 boost::optional<Netmask> subnet{boost::none};
631 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
632 std::shared_ptr<QTag> qTag{nullptr};
633 const ClientState* cs{nullptr};
634 DOHUnit* du{nullptr};
635 uint32_t cacheKey; // 4
636 uint32_t cacheKeyNoECS; // 4
639 uint16_t qclass; // 2
640 uint16_t origID; // 2
641 uint16_t origFlags; // 2
644 boost::optional<uint32_t> tempFailureTTL;
645 bool ednsAdded{false};
646 bool ecsAdded{false};
647 bool skipCache{false};
648 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
649 bool dnssecOK{false};
653 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
654 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
658 pthread_rwlock_init(&queryLock, nullptr);
662 pthread_rwlock_destroy(&queryLock);
664 QueryCountRecords records;
665 QueryCountFilter filter;
666 pthread_rwlock_t queryLock;
670 extern QueryCount g_qcount;
674 ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort)
680 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
681 std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
682 std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
683 std::string interface;
684 std::atomic<uint64_t> queries{0};
685 mutable std::atomic<uint64_t> responses{0};
686 std::atomic<uint64_t> tcpDiedReadingQuery{0};
687 std::atomic<uint64_t> tcpDiedSendingResponse{0};
688 std::atomic<uint64_t> tcpGaveUp{0};
689 std::atomic<uint64_t> tcpClientTimeouts{0};
690 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
691 std::atomic<uint64_t> tcpCurrentConnections{0};
692 std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
693 std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
694 std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired)
695 std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one
696 std::atomic<uint64_t> tls10queries{0}; // valid DNS queries received via TLSv1.0
697 std::atomic<uint64_t> tls11queries{0}; // valid DNS queries received via TLSv1.1
698 std::atomic<uint64_t> tls12queries{0}; // valid DNS queries received via TLSv1.2
699 std::atomic<uint64_t> tls13queries{0}; // valid DNS queries received via TLSv1.3
700 std::atomic<uint64_t> tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version
701 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
703 std::atomic<double> tcpAvgConnectionDuration{0.0};
706 int fastOpenQueueSize{0};
712 int getSocket() const
714 return udpFD != -1 ? udpFD : tcpFD;
729 return tlsFrontend != nullptr || dohFrontend != nullptr;
732 std::string getType() const
734 std::string result = udpFD != -1 ? "UDP" : "TCP";
737 result += " (DNS over HTTPS)";
739 else if (tlsFrontend) {
740 result += " (DNS over TLS)";
742 else if (dnscryptCtx) {
743 result += " (DNSCrypt)";
750 shared_ptr<BPFFilter> d_filter;
755 d_filter->removeSocket(getSocket());
760 void attachFilter(shared_ptr<BPFFilter> bpf)
764 bpf->addSocket(getSocket());
767 #endif /* HAVE_EBPF */
769 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
771 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
772 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
776 class TCPClientCollection {
777 std::vector<int> d_tcpclientthreads;
778 std::atomic<uint64_t> d_numthreads{0};
779 std::atomic<uint64_t> d_pos{0};
780 std::atomic<uint64_t> d_queued{0};
781 const uint64_t d_maxthreads{0};
784 const bool d_useSinglePipe;
787 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
790 d_tcpclientthreads.reserve(maxThreads);
792 if (d_useSinglePipe) {
793 if (pipe(d_singlePipe) < 0) {
795 throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
798 if (!setNonBlocking(d_singlePipe[0])) {
800 close(d_singlePipe[0]);
801 close(d_singlePipe[1]);
802 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
805 if (!setNonBlocking(d_singlePipe[1])) {
807 close(d_singlePipe[0]);
808 close(d_singlePipe[1]);
809 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
815 uint64_t pos = d_pos++;
817 return d_tcpclientthreads[pos % d_numthreads];
819 bool hasReachedMaxThreads() const
821 return d_numthreads >= d_maxthreads;
823 uint64_t getThreadsCount() const
827 uint64_t getQueuedCount() const
831 void decrementQueuedCount()
835 void addTCPClientThread();
838 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
840 struct DownstreamState
842 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
844 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect);
845 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {}
848 for (auto& fd : sockets) {
854 pthread_rwlock_destroy(&d_lock);
856 boost::uuids::uuid id;
857 std::set<unsigned int> hashes;
858 mutable pthread_rwlock_t d_lock;
859 std::vector<int> sockets;
860 const std::string sourceItfName;
861 std::mutex socketsLock;
862 std::mutex connectLock;
863 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
865 const ComboAddress remote;
867 vector<IDState> idStates;
868 const ComboAddress sourceAddr;
869 checkfunc_t checkFunction;
870 DNSName checkName{"a.root-servers.net."};
871 QType checkType{QType::A};
872 uint16_t checkClass{QClass::IN};
873 std::atomic<uint64_t> idOffset{0};
874 std::atomic<uint64_t> sendErrors{0};
875 std::atomic<uint64_t> outstanding{0};
876 std::atomic<uint64_t> reuseds{0};
877 std::atomic<uint64_t> queries{0};
878 std::atomic<uint64_t> responses{0};
880 std::atomic<uint64_t> sendErrors{0};
881 std::atomic<uint64_t> reuseds{0};
882 std::atomic<uint64_t> queries{0};
884 std::atomic<uint64_t> tcpDiedSendingQuery{0};
885 std::atomic<uint64_t> tcpDiedReadingResponse{0};
886 std::atomic<uint64_t> tcpGaveUp{0};
887 std::atomic<uint64_t> tcpReadTimeouts{0};
888 std::atomic<uint64_t> tcpWriteTimeouts{0};
889 std::atomic<uint64_t> tcpCurrentConnections{0};
890 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
892 std::atomic<double> tcpAvgConnectionDuration{0.0};
894 size_t socketsOffset{0};
895 double queryLoad{0.0};
896 double dropRate{0.0};
897 double latencyUsec{0.0};
900 int tcpConnectTimeout{5};
901 int tcpRecvTimeout{30};
902 int tcpSendTimeout{30};
903 unsigned int checkInterval{1};
904 unsigned int lastCheck{0};
905 const unsigned int sourceItf{0};
907 uint16_t xpfRRCode{0};
908 uint16_t checkTimeout{1000}; /* in milliseconds */
909 uint8_t currentCheckFailures{0};
910 uint8_t consecutiveSuccessfulChecks{0};
911 uint8_t maxCheckFailures{1};
912 uint8_t minRiseSuccesses{1};
915 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
916 bool mustResolve{false};
917 bool upStatus{false};
920 bool disableZeroScope{false};
921 std::atomic<bool> connected{false};
922 std::atomic_flag threadStarted;
923 bool tcpFastOpen{false};
924 bool ipBindAddrNoPort{true};
928 if(availability == Availability::Down)
930 if(availability == Availability::Up)
934 void setUp() { availability = Availability::Up; }
935 void setDown() { availability = Availability::Down; }
936 void setAuto() { availability = Availability::Auto; }
937 string getName() const {
939 return remote.toStringWithPort();
943 string getNameWithAddr() const {
945 return remote.toStringWithPort();
947 return name + " (" + remote.toStringWithPort()+ ")";
949 string getStatus() const
952 if(availability == DownstreamState::Availability::Up)
954 else if(availability == DownstreamState::Availability::Down)
957 status = (upStatus ? "up" : "down");
962 void setId(const boost::uuids::uuid& newId);
963 void setWeight(int newWeight);
965 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
967 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
968 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
971 using servers_t =vector<std::shared_ptr<DownstreamState>>;
973 template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
975 void responderThread(std::shared_ptr<DownstreamState> state);
976 extern std::mutex g_luamutex;
977 extern LuaContext g_lua;
978 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
986 virtual bool matches(const DNSQuestion* dq) const =0;
987 virtual string toString() const = 0;
988 mutable std::atomic<uint64_t> d_matches{0};
991 using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
992 typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
999 std::string toString() const {
1000 return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
1008 pthread_rwlock_init(&d_lock, nullptr);
1012 pthread_rwlock_destroy(&d_lock);
1015 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
1022 void setECS(bool useECS)
1027 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
1028 std::shared_ptr<ServerPolicy> policy{nullptr};
1030 size_t countServers(bool upOnly)
1033 ReadLock rl(&d_lock);
1034 for (const auto& server : d_servers) {
1035 if (!upOnly || std::get<1>(server)->isUp() ) {
1042 NumberedVector<shared_ptr<DownstreamState>> getServers()
1044 NumberedVector<shared_ptr<DownstreamState>> result;
1046 ReadLock rl(&d_lock);
1052 void addServer(shared_ptr<DownstreamState>& server)
1054 WriteLock wl(&d_lock);
1055 unsigned int count = (unsigned int) d_servers.size();
1056 d_servers.push_back(make_pair(++count, server));
1057 /* we need to reorder based on the server 'order' */
1058 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
1059 return a.second->order < b.second->order;
1061 /* and now we need to renumber for Lua (custom policies) */
1063 for (auto& serv : d_servers) {
1068 void removeServer(shared_ptr<DownstreamState>& server)
1070 WriteLock wl(&d_lock);
1073 for (auto it = d_servers.begin(); it != d_servers.end();) {
1075 /* we need to renumber the servers placed
1076 after the removed one, for Lua (custom policies) */
1080 else if (it->second == server) {
1081 it = d_servers.erase(it);
1091 NumberedVector<shared_ptr<DownstreamState>> d_servers;
1092 pthread_rwlock_t d_lock;
1093 bool d_useECS{false};
1095 using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
1096 void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
1097 void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1098 void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1102 ComboAddress server;
1103 std::string namespace_name;
1104 std::string ourname;
1105 std::string instance_name;
1106 unsigned int interval;
1109 enum ednsHeaderFlags {
1110 EDNS_HEADER_FLAG_NONE = 0,
1111 EDNS_HEADER_FLAG_DO = 32768
1114 struct DNSDistRuleAction
1116 std::shared_ptr<DNSRule> d_rule;
1117 std::shared_ptr<DNSAction> d_action;
1118 boost::uuids::uuid d_id;
1119 uint64_t d_creationOrder;
1122 struct DNSDistResponseRuleAction
1124 std::shared_ptr<DNSRule> d_rule;
1125 std::shared_ptr<DNSResponseAction> d_action;
1126 boost::uuids::uuid d_id;
1127 uint64_t d_creationOrder;
1130 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1131 extern DNSAction::Action g_dynBlockAction;
1133 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1134 extern GlobalStateHolder<ServerPolicy> g_policy;
1135 extern GlobalStateHolder<servers_t> g_dstates;
1136 extern GlobalStateHolder<pools_t> g_pools;
1137 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1138 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1139 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1140 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1141 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1143 extern ComboAddress g_serverControl; // not changed during runtime
1145 extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
1146 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1147 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1148 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1149 extern bool g_truncateTC;
1150 extern bool g_fixupCase;
1151 extern int g_tcpRecvTimeout;
1152 extern int g_tcpSendTimeout;
1153 extern int g_udpTimeout;
1154 extern uint16_t g_maxOutstanding;
1155 extern std::atomic<bool> g_configurationDone;
1156 extern uint64_t g_maxTCPClientThreads;
1157 extern uint64_t g_maxTCPQueuedConnections;
1158 extern size_t g_maxTCPQueriesPerConn;
1159 extern size_t g_maxTCPConnectionDuration;
1160 extern size_t g_maxTCPConnectionsPerClient;
1161 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1162 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1163 extern uint32_t g_staleCacheEntriesTTL;
1164 extern bool g_apiReadWrite;
1165 extern std::string g_apiConfigDirectory;
1166 extern bool g_servFailOnNoPolicy;
1167 extern uint32_t g_hashperturb;
1168 extern bool g_useTCPSinglePipe;
1169 extern uint16_t g_downstreamTCPCleanupInterval;
1170 extern size_t g_udpVectorSize;
1171 extern bool g_preserveTrailingData;
1172 extern bool g_allowEmptyResponse;
1173 extern bool g_roundrobinFailOnNoServer;
1174 extern double g_consistentHashBalancingFactor;
1177 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1178 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1179 #endif /* HAVE_EBPF */
1183 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1187 LocalStateHolder<NetmaskGroup> acl;
1188 LocalStateHolder<ServerPolicy> policy;
1189 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1190 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1191 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1192 LocalStateHolder<servers_t> servers;
1193 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1194 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1195 LocalStateHolder<pools_t> pools;
1200 void controlThread(int fd, ComboAddress local);
1201 std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
1202 std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
1203 NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
1205 std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
1207 std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
1208 std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
1209 std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1210 std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1211 std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
1213 struct WebserverConfig
1215 std::string password;
1217 boost::optional<std::map<std::string, std::string> > customHeaders;
1221 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1222 void setWebserverPassword(const std::string& password);
1223 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1225 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1226 void tcpAcceptorThread(void* p);
1227 #ifdef HAVE_DNS_OVER_HTTPS
1228 void dohThread(ClientState* cs);
1229 #endif /* HAVE_DNS_OVER_HTTPS */
1231 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1232 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1233 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1234 void resetLuaSideEffect(); // reset to indeterminate state
1236 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1237 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1238 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1240 bool checkQueryHeaders(const struct dnsheader* dh);
1242 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1243 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1244 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1246 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1248 uint16_t getRandomDNSID();
1250 #include "dnsdist-snmp.hh"
1252 extern bool g_snmpEnabled;
1253 extern bool g_snmpTrapsEnabled;
1254 extern DNSDistSNMPAgent* g_snmpAgent;
1255 extern bool g_addEDNSToSelfGeneratedResponses;
1257 extern std::set<std::string> g_capabilitiesToRetain;
1258 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
1259 static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
1261 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1262 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1264 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1265 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1267 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1268 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);