]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/dnsdist.hh
Merge pull request #7908 from omoerbeek/rec-4.1.14-changelog
[thirdparty/pdns.git] / pdns / dnsdist.hh
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #pragma once
23 #include "config.h"
24 #include "ext/luawrapper/include/LuaContext.hpp"
25
26 #include <atomic>
27 #include <mutex>
28 #include <string>
29 #include <thread>
30 #include <time.h>
31 #include <unistd.h>
32 #include <unordered_map>
33
34 #include <boost/circular_buffer.hpp>
35 #include <boost/variant.hpp>
36
37 #include "bpf-filter.hh"
38 #include "capabilities.hh"
39 #include "dnscrypt.hh"
40 #include "dnsdist-cache.hh"
41 #include "dnsdist-dynbpf.hh"
42 #include "dnsname.hh"
43 #include "doh.hh"
44 #include "ednsoptions.hh"
45 #include "gettime.hh"
46 #include "iputils.hh"
47 #include "misc.hh"
48 #include "mplexer.hh"
49 #include "sholder.hh"
50 #include "tcpiohandler.hh"
51 #include "uuid-utils.hh"
52
53 void carbonDumpThread();
54 uint64_t uptimeOfProcess(const std::string& str);
55
56 extern uint16_t g_ECSSourcePrefixV4;
57 extern uint16_t g_ECSSourcePrefixV6;
58 extern bool g_ECSOverride;
59
60 typedef std::unordered_map<string, string> QTag;
61
62 struct DNSQuestion
63 {
64 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
65 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
66 const uint16_t* flags = getFlagsFromDNSHeader(dh);
67 origFlags = *flags;
68 }
69 DNSQuestion(const DNSQuestion&) = delete;
70 DNSQuestion& operator=(const DNSQuestion&) = delete;
71 DNSQuestion(DNSQuestion&&) = default;
72
73 #ifdef HAVE_PROTOBUF
74 boost::optional<boost::uuids::uuid> uniqueId;
75 #endif
76 Netmask ecs;
77 boost::optional<Netmask> subnet;
78 std::string sni; /* Server Name Indication, if any (DoT or DoH) */
79 const DNSName* qname{nullptr};
80 const ComboAddress* local{nullptr};
81 const ComboAddress* remote{nullptr};
82 std::shared_ptr<QTag> qTag{nullptr};
83 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
84 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
85 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
86 struct dnsheader* dh{nullptr};
87 const struct timespec* queryTime{nullptr};
88 struct DOHUnit* du{nullptr};
89 size_t size;
90 unsigned int consumed{0};
91 int delayMsec{0};
92 boost::optional<uint32_t> tempFailureTTL;
93 uint32_t cacheKeyNoECS;
94 uint32_t cacheKey;
95 const uint16_t qtype;
96 const uint16_t qclass;
97 uint16_t len;
98 uint16_t ecsPrefixLength;
99 uint16_t origFlags;
100 uint8_t ednsRCode{0};
101 const bool tcp;
102 bool skipCache{false};
103 bool ecsOverride;
104 bool useECS{true};
105 bool addXPF{true};
106 bool ecsSet{false};
107 bool ecsAdded{false};
108 bool ednsAdded{false};
109 bool useZeroScope{false};
110 bool dnssecOK{false};
111 };
112
113 struct DNSResponse : DNSQuestion
114 {
115 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
116 DNSQuestion(name, type, class_, consumed, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
117 DNSResponse(const DNSResponse&) = delete;
118 DNSResponse& operator=(const DNSResponse&) = delete;
119 DNSResponse(DNSResponse&&) = default;
120 };
121
122 /* so what could you do:
123 drop,
124 fake up nxdomain,
125 provide actual answer,
126 allow & and stop processing,
127 continue processing,
128 modify header: (servfail|refused|notimp), set TC=1,
129 send to pool */
130
131 class DNSAction
132 {
133 public:
134 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse };
135 static std::string typeToString(const Action& action)
136 {
137 switch(action) {
138 case Action::Drop:
139 return "Drop";
140 case Action::Nxdomain:
141 return "Send NXDomain";
142 case Action::Refused:
143 return "Send Refused";
144 case Action::Spoof:
145 return "Spoof an answer";
146 case Action::Allow:
147 return "Allow";
148 case Action::HeaderModify:
149 return "Modify the header";
150 case Action::Pool:
151 return "Route to a pool";
152 case Action::Delay:
153 return "Delay";
154 case Action::Truncate:
155 return "Truncate over UDP";
156 case Action::ServFail:
157 return "Send ServFail";
158 case Action::None:
159 case Action::NoOp:
160 return "Do nothing";
161 case Action::NoRecurse:
162 return "Set rd=0";
163 }
164
165 return "Unknown";
166 }
167
168 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
169 virtual ~DNSAction()
170 {
171 }
172 virtual string toString() const = 0;
173 virtual std::map<string, double> getStats() const
174 {
175 return {{}};
176 }
177 };
178
179 class DNSResponseAction
180 {
181 public:
182 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
183 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
184 virtual ~DNSResponseAction()
185 {
186 }
187 virtual string toString() const = 0;
188 };
189
190 struct DynBlock
191 {
192 DynBlock(): action(DNSAction::Action::None), warning(false)
193 {
194 }
195
196 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
197 {
198 }
199
200 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
201 {
202 blocks.store(rhs.blocks);
203 }
204
205 DynBlock& operator=(const DynBlock& rhs)
206 {
207 reason=rhs.reason;
208 until=rhs.until;
209 domain=rhs.domain;
210 action=rhs.action;
211 blocks.store(rhs.blocks);
212 warning=rhs.warning;
213 return *this;
214 }
215
216 string reason;
217 struct timespec until;
218 DNSName domain;
219 DNSAction::Action action;
220 mutable std::atomic<unsigned int> blocks;
221 bool warning;
222 };
223
224 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
225
226 extern vector<pair<struct timeval, std::string> > g_confDelta;
227
228 extern uint64_t getLatencyCount(const std::string&);
229
230 struct DNSDistStats
231 {
232 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
233 stat_t responses{0};
234 stat_t servfailResponses{0};
235 stat_t queries{0};
236 stat_t frontendNXDomain{0};
237 stat_t frontendServFail{0};
238 stat_t frontendNoError{0};
239 stat_t nonCompliantQueries{0};
240 stat_t nonCompliantResponses{0};
241 stat_t rdQueries{0};
242 stat_t emptyQueries{0};
243 stat_t aclDrops{0};
244 stat_t dynBlocked{0};
245 stat_t ruleDrop{0};
246 stat_t ruleNXDomain{0};
247 stat_t ruleRefused{0};
248 stat_t ruleServFail{0};
249 stat_t selfAnswered{0};
250 stat_t downstreamTimeouts{0};
251 stat_t downstreamSendErrors{0};
252 stat_t truncFail{0};
253 stat_t noPolicy{0};
254 stat_t cacheHits{0};
255 stat_t cacheMisses{0};
256 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
257 stat_t securityStatus{0};
258
259 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
260 typedef std::function<uint64_t(const std::string&)> statfunction_t;
261 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
262 std::vector<std::pair<std::string, entry_t>> entries{
263 {"responses", &responses},
264 {"servfail-responses", &servfailResponses},
265 {"queries", &queries},
266 {"frontend-nxdomain", &frontendNXDomain},
267 {"frontend-servfail", &frontendServFail},
268 {"frontend-noerror", &frontendNoError},
269 {"acl-drops", &aclDrops},
270 {"rule-drop", &ruleDrop},
271 {"rule-nxdomain", &ruleNXDomain},
272 {"rule-refused", &ruleRefused},
273 {"rule-servfail", &ruleServFail},
274 {"self-answered", &selfAnswered},
275 {"downstream-timeouts", &downstreamTimeouts},
276 {"downstream-send-errors", &downstreamSendErrors},
277 {"trunc-failures", &truncFail},
278 {"no-policy", &noPolicy},
279 {"latency0-1", &latency0_1},
280 {"latency1-10", &latency1_10},
281 {"latency10-50", &latency10_50},
282 {"latency50-100", &latency50_100},
283 {"latency100-1000", &latency100_1000},
284 {"latency-slow", &latencySlow},
285 {"latency-avg100", &latencyAvg100},
286 {"latency-avg1000", &latencyAvg1000},
287 {"latency-avg10000", &latencyAvg10000},
288 {"latency-avg1000000", &latencyAvg1000000},
289 {"uptime", uptimeOfProcess},
290 {"real-memory-usage", getRealMemoryUsage},
291 {"special-memory-usage", getSpecialMemoryUsage},
292 {"noncompliant-queries", &nonCompliantQueries},
293 {"noncompliant-responses", &nonCompliantResponses},
294 {"rdqueries", &rdQueries},
295 {"empty-queries", &emptyQueries},
296 {"cache-hits", &cacheHits},
297 {"cache-misses", &cacheMisses},
298 {"cpu-user-msec", getCPUTimeUser},
299 {"cpu-sys-msec", getCPUTimeSystem},
300 {"fd-usage", getOpenFileDescriptors},
301 {"dyn-blocked", &dynBlocked},
302 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
303 {"security-status", &securityStatus},
304 // Latency histogram
305 {"latency-sum", &latencySum},
306 {"latency-count", getLatencyCount},
307 };
308 };
309
310 // Metric types for Prometheus
311 enum class PrometheusMetricType: int {
312 counter = 1,
313 gauge = 2
314 };
315
316 // Keeps additional information about metrics
317 struct MetricDefinition {
318 MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
319 }
320
321 MetricDefinition() = default;
322
323 // Metric description
324 std::string description;
325 // Metric type for Prometheus
326 PrometheusMetricType prometheusType;
327 };
328
329 struct MetricDefinitionStorage {
330 // Return metric definition by name
331 bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
332 auto metricDetailsIter = metrics.find(metricName);
333
334 if (metricDetailsIter == metrics.end()) {
335 return false;
336 }
337
338 metric = metricDetailsIter->second;
339 return true;
340 };
341
342 // Return string representation of Prometheus metric type
343 std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
344 switch (metricType) {
345 case PrometheusMetricType::counter:
346 return "counter";
347 break;
348 case PrometheusMetricType::gauge:
349 return "gauge";
350 break;
351 default:
352 return "";
353 break;
354 }
355 };
356
357 std::map<std::string, MetricDefinition> metrics = {
358 { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
359 { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
360 { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
361 { "frontend-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
362 { "frontend-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
363 { "frontend-noerror", MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
364 { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
365 { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
366 { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
367 { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
368 { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
369 { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
370 { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
371 { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
372 { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
373 { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
374 { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
375 { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
376 { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
377 { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
378 { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
379 { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
380 { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")},
381 { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")},
382 { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")},
383 { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")},
384 { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")},
385 { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")},
386 { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
387 { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
388 { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
389 { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
390 { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
391 { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
392 { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
393 { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
394 { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")},
395 { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
396 { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") },
397 { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
398 // Latency histogram
399 { "latency-sum", MetricDefinition(PrometheusMetricType::counter, "Total response time in milliseconds")},
400 { "latency-count", MetricDefinition(PrometheusMetricType::counter, "Number of queries contributing to response time histogram")},
401 };
402 };
403
404 extern MetricDefinitionStorage g_metricDefinitions;
405 extern struct DNSDistStats g_stats;
406 void doLatencyStats(double udiff);
407
408
409 struct StopWatch
410 {
411 StopWatch(bool realTime=false): d_needRealTime(realTime)
412 {
413 }
414 struct timespec d_start{0,0};
415 bool d_needRealTime{false};
416
417 void start() {
418 if(gettime(&d_start, d_needRealTime) < 0)
419 unixDie("Getting timestamp");
420
421 }
422
423 void set(const struct timespec& from) {
424 d_start = from;
425 }
426
427 double udiff() const {
428 struct timespec now;
429 if(gettime(&now, d_needRealTime) < 0)
430 unixDie("Getting timestamp");
431
432 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
433 }
434
435 double udiffAndSet() {
436 struct timespec now;
437 if(gettime(&now, d_needRealTime) < 0)
438 unixDie("Getting timestamp");
439
440 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
441 d_start = now;
442 return ret;
443 }
444
445 };
446
447 class BasicQPSLimiter
448 {
449 public:
450 BasicQPSLimiter()
451 {
452 }
453
454 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
455 {
456 d_prev.start();
457 }
458
459 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
460 {
461 auto delta = d_prev.udiffAndSet();
462
463 if(delta > 0.0) // time, frequently, does go backwards..
464 d_tokens += 1.0 * rate * (delta/1000000.0);
465
466 if(d_tokens > burst) {
467 d_tokens = burst;
468 }
469
470 bool ret=false;
471 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
472 ret=true;
473 --d_tokens;
474 }
475
476 return ret;
477 }
478
479 bool seenSince(const struct timespec& cutOff) const
480 {
481 return cutOff < d_prev.d_start;
482 }
483
484 protected:
485 mutable StopWatch d_prev;
486 mutable double d_tokens;
487 };
488
489 class QPSLimiter : public BasicQPSLimiter
490 {
491 public:
492 QPSLimiter(): BasicQPSLimiter()
493 {
494 }
495
496 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
497 {
498 d_prev.start();
499 }
500
501 unsigned int getRate() const
502 {
503 return d_passthrough ? 0 : d_rate;
504 }
505
506 int getPassed() const
507 {
508 return d_passed;
509 }
510
511 int getBlocked() const
512 {
513 return d_blocked;
514 }
515
516 bool check() const // this is not quite fair
517 {
518 if (d_passthrough) {
519 return true;
520 }
521
522 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
523 if (ret) {
524 d_passed++;
525 }
526 else {
527 d_blocked++;
528 }
529
530 return ret;
531 }
532 private:
533 mutable unsigned int d_passed{0};
534 mutable unsigned int d_blocked{0};
535 unsigned int d_rate;
536 unsigned int d_burst;
537 bool d_passthrough{true};
538 };
539
540 struct ClientState;
541
542 struct IDState
543 {
544 IDState() : origFD(-1), sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
545 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
546 {
547 origFD.store(orig.origFD.load());
548 origID = orig.origID;
549 delayMsec = orig.delayMsec;
550 tempFailureTTL = orig.tempFailureTTL;
551 }
552
553 std::atomic<int> origFD; // set to <0 to indicate this state is empty // 4
554
555 ComboAddress origRemote; // 28
556 ComboAddress origDest; // 28
557 StopWatch sentTime; // 16
558 DNSName qname; // 80
559 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
560 #ifdef HAVE_PROTOBUF
561 boost::optional<boost::uuids::uuid> uniqueId;
562 #endif
563 boost::optional<Netmask> subnet{boost::none};
564 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
565 std::shared_ptr<QTag> qTag{nullptr};
566 const ClientState* cs{nullptr};
567 DOHUnit* du{nullptr};
568 uint32_t cacheKey; // 4
569 uint32_t cacheKeyNoECS; // 4
570 uint16_t age; // 4
571 uint16_t qtype; // 2
572 uint16_t qclass; // 2
573 uint16_t origID; // 2
574 uint16_t origFlags; // 2
575 int delayMsec;
576 boost::optional<uint32_t> tempFailureTTL;
577 bool ednsAdded{false};
578 bool ecsAdded{false};
579 bool skipCache{false};
580 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
581 bool dnssecOK{false};
582 bool useZeroScope;
583 };
584
585 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
586 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
587 struct QueryCount {
588 QueryCount()
589 {
590 pthread_rwlock_init(&queryLock, nullptr);
591 }
592 QueryCountRecords records;
593 QueryCountFilter filter;
594 pthread_rwlock_t queryLock;
595 bool enabled{false};
596 };
597
598 extern QueryCount g_qcount;
599
600 struct ClientState
601 {
602 ClientState(const ComboAddress& local_, bool isTCP, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP), reuseport(doReusePort)
603 {
604 }
605
606 std::set<int> cpus;
607 ComboAddress local;
608 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
609 std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
610 std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
611 std::string interface;
612 std::atomic<uint64_t> queries{0};
613 std::atomic<uint64_t> tcpDiedReadingQuery{0};
614 std::atomic<uint64_t> tcpDiedSendingResponse{0};
615 std::atomic<uint64_t> tcpGaveUp{0};
616 std::atomic<uint64_t> tcpClientTimeouts{0};
617 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
618 std::atomic<uint64_t> tcpCurrentConnections{0};
619 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
620 /* in ms */
621 std::atomic<double> tcpAvgConnectionDuration{0.0};
622 int udpFD{-1};
623 int tcpFD{-1};
624 int fastOpenQueueSize{0};
625 bool muted{false};
626 bool tcp;
627 bool reuseport;
628 bool ready{false};
629
630 int getSocket() const
631 {
632 return udpFD != -1 ? udpFD : tcpFD;
633 }
634
635 std::string getType() const
636 {
637 std::string result = udpFD != -1 ? "UDP" : "TCP";
638
639 if (dohFrontend) {
640 result += " (DNS over HTTPS)";
641 }
642 else if (tlsFrontend) {
643 result += " (DNS over TLS)";
644 }
645 else if (dnscryptCtx) {
646 result += " (DNSCrypt)";
647 }
648
649 return result;
650 }
651
652 #ifdef HAVE_EBPF
653 shared_ptr<BPFFilter> d_filter;
654
655 void detachFilter()
656 {
657 if (d_filter) {
658 d_filter->removeSocket(getSocket());
659 d_filter = nullptr;
660 }
661 }
662
663 void attachFilter(shared_ptr<BPFFilter> bpf)
664 {
665 detachFilter();
666
667 bpf->addSocket(getSocket());
668 d_filter = bpf;
669 }
670 #endif /* HAVE_EBPF */
671
672 void updateTCPMetrics(size_t queries, uint64_t durationMs)
673 {
674 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0);
675 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
676 }
677 };
678
679 class TCPClientCollection {
680 std::vector<int> d_tcpclientthreads;
681 std::atomic<uint64_t> d_numthreads{0};
682 std::atomic<uint64_t> d_pos{0};
683 std::atomic<uint64_t> d_queued{0};
684 const uint64_t d_maxthreads{0};
685 std::mutex d_mutex;
686 int d_singlePipe[2];
687 const bool d_useSinglePipe;
688 public:
689
690 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
691
692 {
693 d_tcpclientthreads.reserve(maxThreads);
694
695 if (d_useSinglePipe) {
696 if (pipe(d_singlePipe) < 0) {
697 throw std::runtime_error("Error creating the TCP single communication pipe: " + string(strerror(errno)));
698 }
699
700 if (!setNonBlocking(d_singlePipe[0])) {
701 int err = errno;
702 close(d_singlePipe[0]);
703 close(d_singlePipe[1]);
704 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + string(strerror(err)));
705 }
706
707 if (!setNonBlocking(d_singlePipe[1])) {
708 int err = errno;
709 close(d_singlePipe[0]);
710 close(d_singlePipe[1]);
711 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + string(strerror(err)));
712 }
713 }
714 }
715 int getThread()
716 {
717 uint64_t pos = d_pos++;
718 ++d_queued;
719 return d_tcpclientthreads[pos % d_numthreads];
720 }
721 bool hasReachedMaxThreads() const
722 {
723 return d_numthreads >= d_maxthreads;
724 }
725 uint64_t getThreadsCount() const
726 {
727 return d_numthreads;
728 }
729 uint64_t getQueuedCount() const
730 {
731 return d_queued;
732 }
733 void decrementQueuedCount()
734 {
735 --d_queued;
736 }
737 void addTCPClientThread();
738 };
739
740 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
741
742 struct DownstreamState
743 {
744 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
745
746 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, size_t numberOfSockets);
747 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, 1) {}
748 ~DownstreamState()
749 {
750 for (auto& fd : sockets) {
751 if (fd >= 0) {
752 close(fd);
753 fd = -1;
754 }
755 }
756 }
757 boost::uuids::uuid id;
758 std::set<unsigned int> hashes;
759 mutable pthread_rwlock_t d_lock;
760 std::vector<int> sockets;
761 std::mutex socketsLock;
762 std::mutex connectLock;
763 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
764 std::thread tid;
765 const ComboAddress remote;
766 QPSLimiter qps;
767 vector<IDState> idStates;
768 const ComboAddress sourceAddr;
769 checkfunc_t checkFunction;
770 DNSName checkName{"a.root-servers.net."};
771 QType checkType{QType::A};
772 uint16_t checkClass{QClass::IN};
773 std::atomic<uint64_t> idOffset{0};
774 std::atomic<uint64_t> sendErrors{0};
775 std::atomic<uint64_t> outstanding{0};
776 std::atomic<uint64_t> reuseds{0};
777 std::atomic<uint64_t> queries{0};
778 struct {
779 std::atomic<uint64_t> sendErrors{0};
780 std::atomic<uint64_t> reuseds{0};
781 std::atomic<uint64_t> queries{0};
782 } prev;
783 std::atomic<uint64_t> tcpDiedSendingQuery{0};
784 std::atomic<uint64_t> tcpDiedReadingResponse{0};
785 std::atomic<uint64_t> tcpGaveUp{0};
786 std::atomic<uint64_t> tcpReadTimeouts{0};
787 std::atomic<uint64_t> tcpWriteTimeouts{0};
788 std::atomic<uint64_t> tcpCurrentConnections{0};
789 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
790 /* in ms */
791 std::atomic<double> tcpAvgConnectionDuration{0.0};
792 string name;
793 size_t socketsOffset{0};
794 double queryLoad{0.0};
795 double dropRate{0.0};
796 double latencyUsec{0.0};
797 int order{1};
798 int weight{1};
799 int tcpConnectTimeout{5};
800 int tcpRecvTimeout{30};
801 int tcpSendTimeout{30};
802 unsigned int checkInterval{1};
803 unsigned int lastCheck{0};
804 const unsigned int sourceItf{0};
805 uint16_t retries{5};
806 uint16_t xpfRRCode{0};
807 uint16_t checkTimeout{1000}; /* in milliseconds */
808 uint8_t currentCheckFailures{0};
809 uint8_t consecutiveSuccessfulChecks{0};
810 uint8_t maxCheckFailures{1};
811 uint8_t minRiseSuccesses{1};
812 StopWatch sw;
813 set<string> pools;
814 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
815 bool mustResolve{false};
816 bool upStatus{false};
817 bool useECS{false};
818 bool setCD{false};
819 bool disableZeroScope{false};
820 std::atomic<bool> connected{false};
821 std::atomic_flag threadStarted;
822 bool tcpFastOpen{false};
823 bool ipBindAddrNoPort{true};
824
825 bool isUp() const
826 {
827 if(availability == Availability::Down)
828 return false;
829 if(availability == Availability::Up)
830 return true;
831 return upStatus;
832 }
833 void setUp() { availability = Availability::Up; }
834 void setDown() { availability = Availability::Down; }
835 void setAuto() { availability = Availability::Auto; }
836 string getName() const {
837 if (name.empty()) {
838 return remote.toStringWithPort();
839 }
840 return name;
841 }
842 string getNameWithAddr() const {
843 if (name.empty()) {
844 return remote.toStringWithPort();
845 }
846 return name + " (" + remote.toStringWithPort()+ ")";
847 }
848 string getStatus() const
849 {
850 string status;
851 if(availability == DownstreamState::Availability::Up)
852 status = "UP";
853 else if(availability == DownstreamState::Availability::Down)
854 status = "DOWN";
855 else
856 status = (upStatus ? "up" : "down");
857 return status;
858 }
859 bool reconnect();
860 void hash();
861 void setId(const boost::uuids::uuid& newId);
862 void setWeight(int newWeight);
863
864 void updateTCPMetrics(size_t queries, uint64_t durationMs)
865 {
866 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0);
867 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
868 }
869 };
870 using servers_t =vector<std::shared_ptr<DownstreamState>>;
871
872 template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
873
874 void responderThread(std::shared_ptr<DownstreamState> state);
875 extern std::mutex g_luamutex;
876 extern LuaContext g_lua;
877 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
878
879 class DNSRule
880 {
881 public:
882 virtual ~DNSRule ()
883 {
884 }
885 virtual bool matches(const DNSQuestion* dq) const =0;
886 virtual string toString() const = 0;
887 mutable std::atomic<uint64_t> d_matches{0};
888 };
889
890 using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
891 typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
892
893 struct ServerPolicy
894 {
895 string name;
896 policyfunc_t policy;
897 bool isLua;
898 std::string toString() const {
899 return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
900 }
901 };
902
903 struct ServerPool
904 {
905 ServerPool()
906 {
907 pthread_rwlock_init(&d_lock, nullptr);
908 }
909
910 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
911
912 bool getECS() const
913 {
914 return d_useECS;
915 }
916
917 void setECS(bool useECS)
918 {
919 d_useECS = useECS;
920 }
921
922 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
923 std::shared_ptr<ServerPolicy> policy{nullptr};
924
925 size_t countServers(bool upOnly)
926 {
927 size_t count = 0;
928 ReadLock rl(&d_lock);
929 for (const auto& server : d_servers) {
930 if (!upOnly || std::get<1>(server)->isUp() ) {
931 count++;
932 }
933 }
934 return count;
935 }
936
937 NumberedVector<shared_ptr<DownstreamState>> getServers()
938 {
939 NumberedVector<shared_ptr<DownstreamState>> result;
940 {
941 ReadLock rl(&d_lock);
942 result = d_servers;
943 }
944 return result;
945 }
946
947 void addServer(shared_ptr<DownstreamState>& server)
948 {
949 WriteLock wl(&d_lock);
950 unsigned int count = (unsigned int) d_servers.size();
951 d_servers.push_back(make_pair(++count, server));
952 /* we need to reorder based on the server 'order' */
953 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
954 return a.second->order < b.second->order;
955 });
956 /* and now we need to renumber for Lua (custom policies) */
957 size_t idx = 1;
958 for (auto& serv : d_servers) {
959 serv.first = idx++;
960 }
961 }
962
963 void removeServer(shared_ptr<DownstreamState>& server)
964 {
965 WriteLock wl(&d_lock);
966 size_t idx = 1;
967 bool found = false;
968 for (auto it = d_servers.begin(); it != d_servers.end();) {
969 if (found) {
970 /* we need to renumber the servers placed
971 after the removed one, for Lua (custom policies) */
972 it->first = idx++;
973 it++;
974 }
975 else if (it->second == server) {
976 it = d_servers.erase(it);
977 found = true;
978 } else {
979 idx++;
980 it++;
981 }
982 }
983 }
984
985 private:
986 NumberedVector<shared_ptr<DownstreamState>> d_servers;
987 pthread_rwlock_t d_lock;
988 bool d_useECS{false};
989 };
990 using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
991 void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
992 void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
993 void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
994
995 struct CarbonConfig
996 {
997 ComboAddress server;
998 std::string namespace_name;
999 std::string ourname;
1000 std::string instance_name;
1001 unsigned int interval;
1002 };
1003
1004 enum ednsHeaderFlags {
1005 EDNS_HEADER_FLAG_NONE = 0,
1006 EDNS_HEADER_FLAG_DO = 32768
1007 };
1008
1009 struct DNSDistRuleAction
1010 {
1011 std::shared_ptr<DNSRule> d_rule;
1012 std::shared_ptr<DNSAction> d_action;
1013 boost::uuids::uuid d_id;
1014 uint64_t d_creationOrder;
1015 };
1016
1017 struct DNSDistResponseRuleAction
1018 {
1019 std::shared_ptr<DNSRule> d_rule;
1020 std::shared_ptr<DNSResponseAction> d_action;
1021 boost::uuids::uuid d_id;
1022 uint64_t d_creationOrder;
1023 };
1024
1025 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1026 extern DNSAction::Action g_dynBlockAction;
1027
1028 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1029 extern GlobalStateHolder<ServerPolicy> g_policy;
1030 extern GlobalStateHolder<servers_t> g_dstates;
1031 extern GlobalStateHolder<pools_t> g_pools;
1032 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1033 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1034 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1035 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1036 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1037
1038 extern ComboAddress g_serverControl; // not changed during runtime
1039
1040 extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
1041 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1042 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1043 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1044 extern bool g_truncateTC;
1045 extern bool g_fixupCase;
1046 extern int g_tcpRecvTimeout;
1047 extern int g_tcpSendTimeout;
1048 extern int g_udpTimeout;
1049 extern uint16_t g_maxOutstanding;
1050 extern std::atomic<bool> g_configurationDone;
1051 extern uint64_t g_maxTCPClientThreads;
1052 extern uint64_t g_maxTCPQueuedConnections;
1053 extern size_t g_maxTCPQueriesPerConn;
1054 extern size_t g_maxTCPConnectionDuration;
1055 extern size_t g_maxTCPConnectionsPerClient;
1056 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1057 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1058 extern bool g_verboseHealthChecks;
1059 extern uint32_t g_staleCacheEntriesTTL;
1060 extern bool g_apiReadWrite;
1061 extern std::string g_apiConfigDirectory;
1062 extern bool g_servFailOnNoPolicy;
1063 extern uint32_t g_hashperturb;
1064 extern bool g_useTCPSinglePipe;
1065 extern uint16_t g_downstreamTCPCleanupInterval;
1066 extern size_t g_udpVectorSize;
1067 extern bool g_preserveTrailingData;
1068 extern bool g_allowEmptyResponse;
1069 extern bool g_roundrobinFailOnNoServer;
1070
1071 #ifdef HAVE_EBPF
1072 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1073 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1074 #endif /* HAVE_EBPF */
1075
1076 struct LocalHolders
1077 {
1078 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1079 {
1080 }
1081
1082 LocalStateHolder<NetmaskGroup> acl;
1083 LocalStateHolder<ServerPolicy> policy;
1084 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1085 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1086 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1087 LocalStateHolder<servers_t> servers;
1088 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1089 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1090 LocalStateHolder<pools_t> pools;
1091 };
1092
1093 struct dnsheader;
1094
1095 void controlThread(int fd, ComboAddress local);
1096 vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
1097 std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
1098 std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
1099 NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
1100
1101 std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
1102
1103 std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
1104 std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
1105 std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1106 std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1107 std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
1108
1109 struct WebserverConfig
1110 {
1111 std::string password;
1112 std::string apiKey;
1113 boost::optional<std::map<std::string, std::string> > customHeaders;
1114 std::mutex lock;
1115 };
1116
1117 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1118 void setWebserverPassword(const std::string& password);
1119 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1120
1121 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1122 void tcpAcceptorThread(void* p);
1123 #ifdef HAVE_DNS_OVER_HTTPS
1124 void dohThread(ClientState* cs);
1125 #endif /* HAVE_DNS_OVER_HTTPS */
1126
1127 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1128 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1129 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1130 void resetLuaSideEffect(); // reset to indeterminate state
1131
1132 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1133 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1134
1135 bool checkQueryHeaders(const struct dnsheader* dh);
1136
1137 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1138 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1139 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1140
1141 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1142
1143 uint16_t getRandomDNSID();
1144
1145 #include "dnsdist-snmp.hh"
1146
1147 extern bool g_snmpEnabled;
1148 extern bool g_snmpTrapsEnabled;
1149 extern DNSDistSNMPAgent* g_snmpAgent;
1150 extern bool g_addEDNSToSelfGeneratedResponses;
1151
1152 static const size_t s_udpIncomingBufferSize{1500};
1153
1154 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1155 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1156
1157 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1158 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1159
1160 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1161 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);