2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #include "ext/luawrapper/include/LuaContext.hpp"
32 #include <unordered_map>
34 #include <boost/variant.hpp>
36 #include "bpf-filter.hh"
37 #include "capabilities.hh"
38 #include "circular_buffer.hh"
39 #include "dnscrypt.hh"
40 #include "dnsdist-cache.hh"
41 #include "dnsdist-dynbpf.hh"
44 #include "ednsoptions.hh"
50 #include "tcpiohandler.hh"
51 #include "uuid-utils.hh"
53 void carbonDumpThread();
54 uint64_t uptimeOfProcess(const std::string& str);
56 extern uint16_t g_ECSSourcePrefixV4;
57 extern uint16_t g_ECSSourcePrefixV6;
58 extern bool g_ECSOverride;
60 typedef std::unordered_map<string, string> QTag;
64 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
65 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
66 const uint16_t* flags = getFlagsFromDNSHeader(dh);
69 DNSQuestion(const DNSQuestion&) = delete;
70 DNSQuestion& operator=(const DNSQuestion&) = delete;
71 DNSQuestion(DNSQuestion&&) = default;
74 boost::optional<boost::uuids::uuid> uniqueId;
77 boost::optional<Netmask> subnet;
78 std::string sni; /* Server Name Indication, if any (DoT or DoH) */
80 const DNSName* qname{nullptr};
81 const ComboAddress* local{nullptr};
82 const ComboAddress* remote{nullptr};
83 std::shared_ptr<QTag> qTag{nullptr};
84 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
85 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
86 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
87 struct dnsheader* dh{nullptr};
88 const struct timespec* queryTime{nullptr};
89 struct DOHUnit* du{nullptr};
91 unsigned int consumed{0};
93 boost::optional<uint32_t> tempFailureTTL;
94 uint32_t cacheKeyNoECS;
97 const uint16_t qclass;
99 uint16_t ecsPrefixLength;
101 uint8_t ednsRCode{0};
103 bool skipCache{false};
108 bool ecsAdded{false};
109 bool ednsAdded{false};
110 bool useZeroScope{false};
111 bool dnssecOK{false};
114 struct DNSResponse : DNSQuestion
116 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
117 DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
118 DNSResponse(const DNSResponse&) = delete;
119 DNSResponse& operator=(const DNSResponse&) = delete;
120 DNSResponse(DNSResponse&&) = default;
123 /* so what could you do:
126 provide actual answer,
127 allow & and stop processing,
129 modify header: (servfail|refused|notimp), set TC=1,
135 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse };
136 static std::string typeToString(const Action& action)
141 case Action::Nxdomain:
142 return "Send NXDomain";
143 case Action::Refused:
144 return "Send Refused";
146 return "Spoof an answer";
149 case Action::HeaderModify:
150 return "Modify the header";
152 return "Route to a pool";
155 case Action::Truncate:
156 return "Truncate over UDP";
157 case Action::ServFail:
158 return "Send ServFail";
162 case Action::NoRecurse:
169 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
173 virtual string toString() const = 0;
174 virtual std::map<string, double> getStats() const
180 class DNSResponseAction
183 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
184 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
185 virtual ~DNSResponseAction()
188 virtual string toString() const = 0;
193 DynBlock(): action(DNSAction::Action::None), warning(false)
197 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
201 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
203 blocks.store(rhs.blocks);
206 DynBlock& operator=(const DynBlock& rhs)
212 blocks.store(rhs.blocks);
218 struct timespec until;
220 DNSAction::Action action;
221 mutable std::atomic<unsigned int> blocks;
225 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
227 extern vector<pair<struct timeval, std::string> > g_confDelta;
229 extern uint64_t getLatencyCount(const std::string&);
233 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
235 stat_t servfailResponses{0};
237 stat_t frontendNXDomain{0};
238 stat_t frontendServFail{0};
239 stat_t frontendNoError{0};
240 stat_t nonCompliantQueries{0};
241 stat_t nonCompliantResponses{0};
243 stat_t emptyQueries{0};
245 stat_t dynBlocked{0};
247 stat_t ruleNXDomain{0};
248 stat_t ruleRefused{0};
249 stat_t ruleServFail{0};
250 stat_t selfAnswered{0};
251 stat_t downstreamTimeouts{0};
252 stat_t downstreamSendErrors{0};
256 stat_t cacheMisses{0};
257 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
258 stat_t securityStatus{0};
260 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
261 typedef std::function<uint64_t(const std::string&)> statfunction_t;
262 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
263 std::vector<std::pair<std::string, entry_t>> entries{
264 {"responses", &responses},
265 {"servfail-responses", &servfailResponses},
266 {"queries", &queries},
267 {"frontend-nxdomain", &frontendNXDomain},
268 {"frontend-servfail", &frontendServFail},
269 {"frontend-noerror", &frontendNoError},
270 {"acl-drops", &aclDrops},
271 {"rule-drop", &ruleDrop},
272 {"rule-nxdomain", &ruleNXDomain},
273 {"rule-refused", &ruleRefused},
274 {"rule-servfail", &ruleServFail},
275 {"self-answered", &selfAnswered},
276 {"downstream-timeouts", &downstreamTimeouts},
277 {"downstream-send-errors", &downstreamSendErrors},
278 {"trunc-failures", &truncFail},
279 {"no-policy", &noPolicy},
280 {"latency0-1", &latency0_1},
281 {"latency1-10", &latency1_10},
282 {"latency10-50", &latency10_50},
283 {"latency50-100", &latency50_100},
284 {"latency100-1000", &latency100_1000},
285 {"latency-slow", &latencySlow},
286 {"latency-avg100", &latencyAvg100},
287 {"latency-avg1000", &latencyAvg1000},
288 {"latency-avg10000", &latencyAvg10000},
289 {"latency-avg1000000", &latencyAvg1000000},
290 {"uptime", uptimeOfProcess},
291 {"real-memory-usage", getRealMemoryUsage},
292 {"special-memory-usage", getSpecialMemoryUsage},
293 {"noncompliant-queries", &nonCompliantQueries},
294 {"noncompliant-responses", &nonCompliantResponses},
295 {"rdqueries", &rdQueries},
296 {"empty-queries", &emptyQueries},
297 {"cache-hits", &cacheHits},
298 {"cache-misses", &cacheMisses},
299 {"cpu-user-msec", getCPUTimeUser},
300 {"cpu-sys-msec", getCPUTimeSystem},
301 {"fd-usage", getOpenFileDescriptors},
302 {"dyn-blocked", &dynBlocked},
303 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
304 {"security-status", &securityStatus},
306 {"latency-sum", &latencySum},
307 {"latency-count", getLatencyCount},
311 // Metric types for Prometheus
312 enum class PrometheusMetricType: int {
317 // Keeps additional information about metrics
318 struct MetricDefinition {
319 MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
322 MetricDefinition() = default;
324 // Metric description
325 std::string description;
326 // Metric type for Prometheus
327 PrometheusMetricType prometheusType;
330 struct MetricDefinitionStorage {
331 // Return metric definition by name
332 bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
333 auto metricDetailsIter = metrics.find(metricName);
335 if (metricDetailsIter == metrics.end()) {
339 metric = metricDetailsIter->second;
343 // Return string representation of Prometheus metric type
344 std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
345 switch (metricType) {
346 case PrometheusMetricType::counter:
349 case PrometheusMetricType::gauge:
358 std::map<std::string, MetricDefinition> metrics = {
359 { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
360 { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
361 { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
362 { "frontend-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
363 { "frontend-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
364 { "frontend-noerror", MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
365 { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
366 { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
367 { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
368 { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
369 { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
370 { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
371 { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
372 { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
373 { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
374 { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
375 { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
376 { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
377 { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
378 { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
379 { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
380 { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
381 { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")},
382 { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")},
383 { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")},
384 { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")},
385 { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")},
386 { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")},
387 { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
388 { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
389 { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
390 { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
391 { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
392 { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
393 { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
394 { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
395 { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")},
396 { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
397 { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") },
398 { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
402 extern MetricDefinitionStorage g_metricDefinitions;
403 extern struct DNSDistStats g_stats;
404 void doLatencyStats(double udiff);
409 StopWatch(bool realTime=false): d_needRealTime(realTime)
412 struct timespec d_start{0,0};
413 bool d_needRealTime{false};
416 if(gettime(&d_start, d_needRealTime) < 0)
417 unixDie("Getting timestamp");
421 void set(const struct timespec& from) {
425 double udiff() const {
427 if(gettime(&now, d_needRealTime) < 0)
428 unixDie("Getting timestamp");
430 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
433 double udiffAndSet() {
435 if(gettime(&now, d_needRealTime) < 0)
436 unixDie("Getting timestamp");
438 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
445 class BasicQPSLimiter
452 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
457 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
459 auto delta = d_prev.udiffAndSet();
461 if(delta > 0.0) // time, frequently, does go backwards..
462 d_tokens += 1.0 * rate * (delta/1000000.0);
464 if(d_tokens > burst) {
469 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
477 bool seenSince(const struct timespec& cutOff) const
479 return cutOff < d_prev.d_start;
483 mutable StopWatch d_prev;
484 mutable double d_tokens;
487 class QPSLimiter : public BasicQPSLimiter
490 QPSLimiter(): BasicQPSLimiter()
494 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
499 unsigned int getRate() const
501 return d_passthrough ? 0 : d_rate;
504 int getPassed() const
509 int getBlocked() const
514 bool check() const // this is not quite fair
520 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
531 mutable unsigned int d_passed{0};
532 mutable unsigned int d_blocked{0};
534 unsigned int d_burst;
535 bool d_passthrough{true};
542 IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
543 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
545 usageIndicator.store(orig.usageIndicator.load());
546 origFD = orig.origFD;
547 origID = orig.origID;
548 delayMsec = orig.delayMsec;
549 tempFailureTTL = orig.tempFailureTTL;
552 static const int64_t unusedIndicator = -1;
554 static bool isInUse(int64_t usageIndicator)
556 return usageIndicator != unusedIndicator;
561 return usageIndicator != unusedIndicator;
564 /* return true if the value has been successfully replaced meaning that
565 no-one updated the usage indicator in the meantime */
566 bool tryMarkUnused(int64_t expectedUsageIndicator)
568 return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
571 /* mark as unused no matter what, return true if the state was in use before */
574 auto currentGeneration = generation++;
575 return markAsUsed(currentGeneration);
578 /* mark as unused no matter what, return true if the state was in use before */
579 bool markAsUsed(int64_t currentGeneration)
581 int64_t oldUsage = usageIndicator.exchange(currentGeneration);
582 return oldUsage != unusedIndicator;
585 /* We use this value to detect whether this state is in use.
586 For performance reasons we don't want to use a lock here, but that means
587 we need to be very careful when modifying this value. Modifications happen
589 - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
590 then picking one of the states associated to this backend (via the idOffset).
591 Most of the time this state should not be in use and usageIndicator is -1, but we
592 might not yet have received a response for the query previously associated to this
593 state, meaning that we will 'reuse' this state and erase the existing state.
594 If we ever receive a response for this state, it will be discarded. This is
595 mostly fine for UDP except that we still need to be careful in order to miss
596 the 'outstanding' counters, which should only be increased when we are picking
597 an empty state, and not when reusing ;
598 For DoH, though, we have dynamically allocated a DOHUnit object that needs to
599 be freed, as well as internal objects internals to libh2o.
600 - one of the UDP receiver threads receiving a response from a backend, picking
601 the corresponding state and sending the response to the client ;
602 - the 'healthcheck' thread scanning the states to actively discover timeouts,
603 mostly to keep some counters like the 'outstanding' one sane.
604 We previously based that logic on the origFD (FD on which the query was received,
605 and therefore from where the response should be sent) but this suffered from an
606 ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
607 same value since we only have so much incoming sockets:
608 - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
609 - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
610 qtype and qclass match
611 - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
612 - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
613 5, except it's not the same 5 anymore and it overrides a fresh state.
614 We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
615 wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
616 when the state is unused and the value of our counter otherwise.
618 std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8
619 std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4
620 ComboAddress origRemote; // 28
621 ComboAddress origDest; // 28
622 StopWatch sentTime; // 16
624 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
626 boost::optional<boost::uuids::uuid> uniqueId;
628 boost::optional<Netmask> subnet{boost::none};
629 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
630 std::shared_ptr<QTag> qTag{nullptr};
631 const ClientState* cs{nullptr};
632 DOHUnit* du{nullptr};
633 uint32_t cacheKey; // 4
634 uint32_t cacheKeyNoECS; // 4
637 uint16_t qclass; // 2
638 uint16_t origID; // 2
639 uint16_t origFlags; // 2
642 boost::optional<uint32_t> tempFailureTTL;
643 bool ednsAdded{false};
644 bool ecsAdded{false};
645 bool skipCache{false};
646 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
647 bool dnssecOK{false};
651 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
652 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
656 pthread_rwlock_init(&queryLock, nullptr);
658 QueryCountRecords records;
659 QueryCountFilter filter;
660 pthread_rwlock_t queryLock;
664 extern QueryCount g_qcount;
668 ClientState(const ComboAddress& local_, bool isTCP, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP), reuseport(doReusePort)
674 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
675 std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
676 std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
677 std::string interface;
678 std::atomic<uint64_t> queries{0};
679 mutable std::atomic<uint64_t> responses{0};
680 std::atomic<uint64_t> tcpDiedReadingQuery{0};
681 std::atomic<uint64_t> tcpDiedSendingResponse{0};
682 std::atomic<uint64_t> tcpGaveUp{0};
683 std::atomic<uint64_t> tcpClientTimeouts{0};
684 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
685 std::atomic<uint64_t> tcpCurrentConnections{0};
686 std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
687 std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
688 std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired)
689 std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one
690 std::atomic<uint64_t> tls10queries{0}; // valid DNS queries received via TLSv1.0
691 std::atomic<uint64_t> tls11queries{0}; // valid DNS queries received via TLSv1.1
692 std::atomic<uint64_t> tls12queries{0}; // valid DNS queries received via TLSv1.2
693 std::atomic<uint64_t> tls13queries{0}; // valid DNS queries received via TLSv1.3
694 std::atomic<uint64_t> tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version
695 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
697 std::atomic<double> tcpAvgConnectionDuration{0.0};
700 int fastOpenQueueSize{0};
706 int getSocket() const
708 return udpFD != -1 ? udpFD : tcpFD;
723 return tlsFrontend != nullptr || dohFrontend != nullptr;
726 std::string getType() const
728 std::string result = udpFD != -1 ? "UDP" : "TCP";
731 result += " (DNS over HTTPS)";
733 else if (tlsFrontend) {
734 result += " (DNS over TLS)";
736 else if (dnscryptCtx) {
737 result += " (DNSCrypt)";
744 shared_ptr<BPFFilter> d_filter;
749 d_filter->removeSocket(getSocket());
754 void attachFilter(shared_ptr<BPFFilter> bpf)
758 bpf->addSocket(getSocket());
761 #endif /* HAVE_EBPF */
763 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
765 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
766 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
770 class TCPClientCollection {
771 std::vector<int> d_tcpclientthreads;
772 std::atomic<uint64_t> d_numthreads{0};
773 std::atomic<uint64_t> d_pos{0};
774 std::atomic<uint64_t> d_queued{0};
775 const uint64_t d_maxthreads{0};
778 const bool d_useSinglePipe;
781 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
784 d_tcpclientthreads.reserve(maxThreads);
786 if (d_useSinglePipe) {
787 if (pipe(d_singlePipe) < 0) {
789 throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
792 if (!setNonBlocking(d_singlePipe[0])) {
794 close(d_singlePipe[0]);
795 close(d_singlePipe[1]);
796 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
799 if (!setNonBlocking(d_singlePipe[1])) {
801 close(d_singlePipe[0]);
802 close(d_singlePipe[1]);
803 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
809 uint64_t pos = d_pos++;
811 return d_tcpclientthreads[pos % d_numthreads];
813 bool hasReachedMaxThreads() const
815 return d_numthreads >= d_maxthreads;
817 uint64_t getThreadsCount() const
821 uint64_t getQueuedCount() const
825 void decrementQueuedCount()
829 void addTCPClientThread();
832 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
834 struct DownstreamState
836 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
838 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets);
839 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1) {}
842 for (auto& fd : sockets) {
849 boost::uuids::uuid id;
850 std::set<unsigned int> hashes;
851 mutable pthread_rwlock_t d_lock;
852 std::vector<int> sockets;
853 const std::string sourceItfName;
854 std::mutex socketsLock;
855 std::mutex connectLock;
856 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
858 const ComboAddress remote;
860 vector<IDState> idStates;
861 const ComboAddress sourceAddr;
862 checkfunc_t checkFunction;
863 DNSName checkName{"a.root-servers.net."};
864 QType checkType{QType::A};
865 uint16_t checkClass{QClass::IN};
866 std::atomic<uint64_t> idOffset{0};
867 std::atomic<uint64_t> sendErrors{0};
868 std::atomic<uint64_t> outstanding{0};
869 std::atomic<uint64_t> reuseds{0};
870 std::atomic<uint64_t> queries{0};
871 std::atomic<uint64_t> responses{0};
873 std::atomic<uint64_t> sendErrors{0};
874 std::atomic<uint64_t> reuseds{0};
875 std::atomic<uint64_t> queries{0};
877 std::atomic<uint64_t> tcpDiedSendingQuery{0};
878 std::atomic<uint64_t> tcpDiedReadingResponse{0};
879 std::atomic<uint64_t> tcpGaveUp{0};
880 std::atomic<uint64_t> tcpReadTimeouts{0};
881 std::atomic<uint64_t> tcpWriteTimeouts{0};
882 std::atomic<uint64_t> tcpCurrentConnections{0};
883 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
885 std::atomic<double> tcpAvgConnectionDuration{0.0};
887 size_t socketsOffset{0};
888 double queryLoad{0.0};
889 double dropRate{0.0};
890 double latencyUsec{0.0};
893 int tcpConnectTimeout{5};
894 int tcpRecvTimeout{30};
895 int tcpSendTimeout{30};
896 unsigned int checkInterval{1};
897 unsigned int lastCheck{0};
898 const unsigned int sourceItf{0};
900 uint16_t xpfRRCode{0};
901 uint16_t checkTimeout{1000}; /* in milliseconds */
902 uint8_t currentCheckFailures{0};
903 uint8_t consecutiveSuccessfulChecks{0};
904 uint8_t maxCheckFailures{1};
905 uint8_t minRiseSuccesses{1};
908 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
909 bool mustResolve{false};
910 bool upStatus{false};
913 bool disableZeroScope{false};
914 std::atomic<bool> connected{false};
915 std::atomic_flag threadStarted;
916 bool tcpFastOpen{false};
917 bool ipBindAddrNoPort{true};
921 if(availability == Availability::Down)
923 if(availability == Availability::Up)
927 void setUp() { availability = Availability::Up; }
928 void setDown() { availability = Availability::Down; }
929 void setAuto() { availability = Availability::Auto; }
930 string getName() const {
932 return remote.toStringWithPort();
936 string getNameWithAddr() const {
938 return remote.toStringWithPort();
940 return name + " (" + remote.toStringWithPort()+ ")";
942 string getStatus() const
945 if(availability == DownstreamState::Availability::Up)
947 else if(availability == DownstreamState::Availability::Down)
950 status = (upStatus ? "up" : "down");
955 void setId(const boost::uuids::uuid& newId);
956 void setWeight(int newWeight);
958 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
960 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
961 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
964 using servers_t =vector<std::shared_ptr<DownstreamState>>;
966 template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
968 void responderThread(std::shared_ptr<DownstreamState> state);
969 extern std::mutex g_luamutex;
970 extern LuaContext g_lua;
971 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
979 virtual bool matches(const DNSQuestion* dq) const =0;
980 virtual string toString() const = 0;
981 mutable std::atomic<uint64_t> d_matches{0};
984 using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
985 typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
992 std::string toString() const {
993 return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
1001 pthread_rwlock_init(&d_lock, nullptr);
1004 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
1011 void setECS(bool useECS)
1016 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
1017 std::shared_ptr<ServerPolicy> policy{nullptr};
1019 size_t countServers(bool upOnly)
1022 ReadLock rl(&d_lock);
1023 for (const auto& server : d_servers) {
1024 if (!upOnly || std::get<1>(server)->isUp() ) {
1031 NumberedVector<shared_ptr<DownstreamState>> getServers()
1033 NumberedVector<shared_ptr<DownstreamState>> result;
1035 ReadLock rl(&d_lock);
1041 void addServer(shared_ptr<DownstreamState>& server)
1043 WriteLock wl(&d_lock);
1044 unsigned int count = (unsigned int) d_servers.size();
1045 d_servers.push_back(make_pair(++count, server));
1046 /* we need to reorder based on the server 'order' */
1047 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
1048 return a.second->order < b.second->order;
1050 /* and now we need to renumber for Lua (custom policies) */
1052 for (auto& serv : d_servers) {
1057 void removeServer(shared_ptr<DownstreamState>& server)
1059 WriteLock wl(&d_lock);
1062 for (auto it = d_servers.begin(); it != d_servers.end();) {
1064 /* we need to renumber the servers placed
1065 after the removed one, for Lua (custom policies) */
1069 else if (it->second == server) {
1070 it = d_servers.erase(it);
1080 NumberedVector<shared_ptr<DownstreamState>> d_servers;
1081 pthread_rwlock_t d_lock;
1082 bool d_useECS{false};
1084 using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
1085 void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
1086 void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1087 void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1091 ComboAddress server;
1092 std::string namespace_name;
1093 std::string ourname;
1094 std::string instance_name;
1095 unsigned int interval;
1098 enum ednsHeaderFlags {
1099 EDNS_HEADER_FLAG_NONE = 0,
1100 EDNS_HEADER_FLAG_DO = 32768
1103 struct DNSDistRuleAction
1105 std::shared_ptr<DNSRule> d_rule;
1106 std::shared_ptr<DNSAction> d_action;
1107 boost::uuids::uuid d_id;
1108 uint64_t d_creationOrder;
1111 struct DNSDistResponseRuleAction
1113 std::shared_ptr<DNSRule> d_rule;
1114 std::shared_ptr<DNSResponseAction> d_action;
1115 boost::uuids::uuid d_id;
1116 uint64_t d_creationOrder;
1119 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1120 extern DNSAction::Action g_dynBlockAction;
1122 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1123 extern GlobalStateHolder<ServerPolicy> g_policy;
1124 extern GlobalStateHolder<servers_t> g_dstates;
1125 extern GlobalStateHolder<pools_t> g_pools;
1126 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1127 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1128 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1129 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1130 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1132 extern ComboAddress g_serverControl; // not changed during runtime
1134 extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
1135 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1136 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1137 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1138 extern bool g_truncateTC;
1139 extern bool g_fixupCase;
1140 extern int g_tcpRecvTimeout;
1141 extern int g_tcpSendTimeout;
1142 extern int g_udpTimeout;
1143 extern uint16_t g_maxOutstanding;
1144 extern std::atomic<bool> g_configurationDone;
1145 extern uint64_t g_maxTCPClientThreads;
1146 extern uint64_t g_maxTCPQueuedConnections;
1147 extern size_t g_maxTCPQueriesPerConn;
1148 extern size_t g_maxTCPConnectionDuration;
1149 extern size_t g_maxTCPConnectionsPerClient;
1150 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1151 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1152 extern bool g_verboseHealthChecks;
1153 extern uint32_t g_staleCacheEntriesTTL;
1154 extern bool g_apiReadWrite;
1155 extern std::string g_apiConfigDirectory;
1156 extern bool g_servFailOnNoPolicy;
1157 extern uint32_t g_hashperturb;
1158 extern bool g_useTCPSinglePipe;
1159 extern uint16_t g_downstreamTCPCleanupInterval;
1160 extern size_t g_udpVectorSize;
1161 extern bool g_preserveTrailingData;
1162 extern bool g_allowEmptyResponse;
1163 extern bool g_roundrobinFailOnNoServer;
1166 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1167 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1168 #endif /* HAVE_EBPF */
1172 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1176 LocalStateHolder<NetmaskGroup> acl;
1177 LocalStateHolder<ServerPolicy> policy;
1178 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1179 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1180 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1181 LocalStateHolder<servers_t> servers;
1182 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1183 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1184 LocalStateHolder<pools_t> pools;
1189 void controlThread(int fd, ComboAddress local);
1190 vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
1191 std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
1192 std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
1193 NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
1195 std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
1197 std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
1198 std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
1199 std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1200 std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1201 std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
1203 struct WebserverConfig
1205 std::string password;
1207 boost::optional<std::map<std::string, std::string> > customHeaders;
1211 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1212 void setWebserverPassword(const std::string& password);
1213 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1215 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1216 void tcpAcceptorThread(void* p);
1217 #ifdef HAVE_DNS_OVER_HTTPS
1218 void dohThread(ClientState* cs);
1219 #endif /* HAVE_DNS_OVER_HTTPS */
1221 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1222 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1223 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1224 void resetLuaSideEffect(); // reset to indeterminate state
1226 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1227 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1228 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1230 bool checkQueryHeaders(const struct dnsheader* dh);
1232 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1233 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1234 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1236 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1238 uint16_t getRandomDNSID();
1240 #include "dnsdist-snmp.hh"
1242 extern bool g_snmpEnabled;
1243 extern bool g_snmpTrapsEnabled;
1244 extern DNSDistSNMPAgent* g_snmpAgent;
1245 extern bool g_addEDNSToSelfGeneratedResponses;
1247 extern std::set<std::string> g_capabilitiesToRetain;
1248 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
1249 static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
1251 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1252 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1254 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1255 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1257 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1258 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);