]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/dnsdist.hh
Merge pull request #7695 from rgacogne/dnsdist-roundrobin-all-servers-down
[thirdparty/pdns.git] / pdns / dnsdist.hh
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #pragma once
23 #include "config.h"
24 #include "ext/luawrapper/include/LuaContext.hpp"
25
26 #include <atomic>
27 #include <mutex>
28 #include <string>
29 #include <thread>
30 #include <time.h>
31 #include <unistd.h>
32 #include <unordered_map>
33
34 #include <boost/circular_buffer.hpp>
35 #include <boost/variant.hpp>
36
37 #include "bpf-filter.hh"
38 #include "capabilities.hh"
39 #include "dnscrypt.hh"
40 #include "dnsdist-cache.hh"
41 #include "dnsdist-dynbpf.hh"
42 #include "dnsname.hh"
43 #include "ednsoptions.hh"
44 #include "gettime.hh"
45 #include "iputils.hh"
46 #include "misc.hh"
47 #include "mplexer.hh"
48 #include "sholder.hh"
49 #include "tcpiohandler.hh"
50 #include "uuid-utils.hh"
51
52 void carbonDumpThread();
53 uint64_t uptimeOfProcess(const std::string& str);
54
55 extern uint16_t g_ECSSourcePrefixV4;
56 extern uint16_t g_ECSSourcePrefixV6;
57 extern bool g_ECSOverride;
58
59 typedef std::unordered_map<string, string> QTag;
60
61 struct DNSQuestion
62 {
63 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
64 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
65 const uint16_t* flags = getFlagsFromDNSHeader(dh);
66 origFlags = *flags;
67 }
68 DNSQuestion(const DNSQuestion&) = delete;
69 DNSQuestion& operator=(const DNSQuestion&) = delete;
70 DNSQuestion(DNSQuestion&&) = default;
71
72 #ifdef HAVE_PROTOBUF
73 boost::optional<boost::uuids::uuid> uniqueId;
74 #endif
75 Netmask ecs;
76 boost::optional<Netmask> subnet;
77 const DNSName* qname{nullptr};
78 const ComboAddress* local{nullptr};
79 const ComboAddress* remote{nullptr};
80 std::shared_ptr<QTag> qTag{nullptr};
81 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
82 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
83 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
84 struct dnsheader* dh{nullptr};
85 const struct timespec* queryTime{nullptr};
86 size_t size;
87 unsigned int consumed{0};
88 int delayMsec{0};
89 boost::optional<uint32_t> tempFailureTTL;
90 uint32_t cacheKeyNoECS;
91 uint32_t cacheKey;
92 const uint16_t qtype;
93 const uint16_t qclass;
94 uint16_t len;
95 uint16_t ecsPrefixLength;
96 uint16_t origFlags;
97 uint8_t ednsRCode{0};
98 const bool tcp;
99 bool skipCache{false};
100 bool ecsOverride;
101 bool useECS{true};
102 bool addXPF{true};
103 bool ecsSet{false};
104 bool ecsAdded{false};
105 bool ednsAdded{false};
106 bool useZeroScope{false};
107 bool dnssecOK{false};
108 };
109
110 struct DNSResponse : DNSQuestion
111 {
112 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
113 DNSQuestion(name, type, class_, consumed, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
114 DNSResponse(const DNSResponse&) = delete;
115 DNSResponse& operator=(const DNSResponse&) = delete;
116 DNSResponse(DNSResponse&&) = default;
117 };
118
119 /* so what could you do:
120 drop,
121 fake up nxdomain,
122 provide actual answer,
123 allow & and stop processing,
124 continue processing,
125 modify header: (servfail|refused|notimp), set TC=1,
126 send to pool */
127
128 class DNSAction
129 {
130 public:
131 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse };
132 static std::string typeToString(const Action& action)
133 {
134 switch(action) {
135 case Action::Drop:
136 return "Drop";
137 case Action::Nxdomain:
138 return "Send NXDomain";
139 case Action::Refused:
140 return "Send Refused";
141 case Action::Spoof:
142 return "Spoof an answer";
143 case Action::Allow:
144 return "Allow";
145 case Action::HeaderModify:
146 return "Modify the header";
147 case Action::Pool:
148 return "Route to a pool";
149 case Action::Delay:
150 return "Delay";
151 case Action::Truncate:
152 return "Truncate over UDP";
153 case Action::ServFail:
154 return "Send ServFail";
155 case Action::None:
156 case Action::NoOp:
157 return "Do nothing";
158 case Action::NoRecurse:
159 return "Set rd=0";
160 }
161
162 return "Unknown";
163 }
164
165 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
166 virtual ~DNSAction()
167 {
168 }
169 virtual string toString() const = 0;
170 virtual std::map<string, double> getStats() const
171 {
172 return {{}};
173 }
174 };
175
176 class DNSResponseAction
177 {
178 public:
179 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
180 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
181 virtual ~DNSResponseAction()
182 {
183 }
184 virtual string toString() const = 0;
185 };
186
187 struct DynBlock
188 {
189 DynBlock(): action(DNSAction::Action::None), warning(false)
190 {
191 }
192
193 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
194 {
195 }
196
197 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
198 {
199 blocks.store(rhs.blocks);
200 }
201
202 DynBlock& operator=(const DynBlock& rhs)
203 {
204 reason=rhs.reason;
205 until=rhs.until;
206 domain=rhs.domain;
207 action=rhs.action;
208 blocks.store(rhs.blocks);
209 warning=rhs.warning;
210 return *this;
211 }
212
213 string reason;
214 struct timespec until;
215 DNSName domain;
216 DNSAction::Action action;
217 mutable std::atomic<unsigned int> blocks;
218 bool warning;
219 };
220
221 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
222
223 extern vector<pair<struct timeval, std::string> > g_confDelta;
224
225 struct DNSDistStats
226 {
227 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
228 stat_t responses{0};
229 stat_t servfailResponses{0};
230 stat_t queries{0};
231 stat_t frontendNXDomain{0};
232 stat_t frontendServFail{0};
233 stat_t frontendNoError{0};
234 stat_t nonCompliantQueries{0};
235 stat_t nonCompliantResponses{0};
236 stat_t rdQueries{0};
237 stat_t emptyQueries{0};
238 stat_t aclDrops{0};
239 stat_t dynBlocked{0};
240 stat_t ruleDrop{0};
241 stat_t ruleNXDomain{0};
242 stat_t ruleRefused{0};
243 stat_t ruleServFail{0};
244 stat_t selfAnswered{0};
245 stat_t downstreamTimeouts{0};
246 stat_t downstreamSendErrors{0};
247 stat_t truncFail{0};
248 stat_t noPolicy{0};
249 stat_t cacheHits{0};
250 stat_t cacheMisses{0};
251 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0};
252 stat_t securityStatus{0};
253
254 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
255 typedef std::function<uint64_t(const std::string&)> statfunction_t;
256 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
257 std::vector<std::pair<std::string, entry_t>> entries{
258 {"responses", &responses},
259 {"servfail-responses", &servfailResponses},
260 {"queries", &queries},
261 {"frontend-nxdomain", &frontendNXDomain},
262 {"frontend-servfail", &frontendServFail},
263 {"frontend-noerror", &frontendNoError},
264 {"acl-drops", &aclDrops},
265 {"rule-drop", &ruleDrop},
266 {"rule-nxdomain", &ruleNXDomain},
267 {"rule-refused", &ruleRefused},
268 {"rule-servfail", &ruleServFail},
269 {"self-answered", &selfAnswered},
270 {"downstream-timeouts", &downstreamTimeouts},
271 {"downstream-send-errors", &downstreamSendErrors},
272 {"trunc-failures", &truncFail},
273 {"no-policy", &noPolicy},
274 {"latency0-1", &latency0_1},
275 {"latency1-10", &latency1_10},
276 {"latency10-50", &latency10_50},
277 {"latency50-100", &latency50_100},
278 {"latency100-1000", &latency100_1000},
279 {"latency-slow", &latencySlow},
280 {"latency-avg100", &latencyAvg100},
281 {"latency-avg1000", &latencyAvg1000},
282 {"latency-avg10000", &latencyAvg10000},
283 {"latency-avg1000000", &latencyAvg1000000},
284 {"uptime", uptimeOfProcess},
285 {"real-memory-usage", getRealMemoryUsage},
286 {"special-memory-usage", getSpecialMemoryUsage},
287 {"noncompliant-queries", &nonCompliantQueries},
288 {"noncompliant-responses", &nonCompliantResponses},
289 {"rdqueries", &rdQueries},
290 {"empty-queries", &emptyQueries},
291 {"cache-hits", &cacheHits},
292 {"cache-misses", &cacheMisses},
293 {"cpu-user-msec", getCPUTimeUser},
294 {"cpu-sys-msec", getCPUTimeSystem},
295 {"fd-usage", getOpenFileDescriptors},
296 {"dyn-blocked", &dynBlocked},
297 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
298 {"security-status", &securityStatus}
299 };
300 };
301
302 // Metric types for Prometheus
303 enum class PrometheusMetricType: int {
304 counter = 1,
305 gauge = 2
306 };
307
308 // Keeps additional information about metrics
309 struct MetricDefinition {
310 MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
311 }
312
313 MetricDefinition() = default;
314
315 // Metric description
316 std::string description;
317 // Metric type for Prometheus
318 PrometheusMetricType prometheusType;
319 };
320
321 struct MetricDefinitionStorage {
322 // Return metric definition by name
323 bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
324 auto metricDetailsIter = metrics.find(metricName);
325
326 if (metricDetailsIter == metrics.end()) {
327 return false;
328 }
329
330 metric = metricDetailsIter->second;
331 return true;
332 };
333
334 // Return string representation of Prometheus metric type
335 std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
336 switch (metricType) {
337 case PrometheusMetricType::counter:
338 return "counter";
339 break;
340 case PrometheusMetricType::gauge:
341 return "gauge";
342 break;
343 default:
344 return "";
345 break;
346 }
347 };
348
349 std::map<std::string, MetricDefinition> metrics = {
350 { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
351 { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
352 { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
353 { "frontend-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
354 { "frontend-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
355 { "frontend-noerror", MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
356 { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
357 { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
358 { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
359 { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
360 { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
361 { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
362 { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
363 { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
364 { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
365 { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
366 { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
367 { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
368 { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
369 { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
370 { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
371 { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
372 { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")},
373 { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")},
374 { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")},
375 { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")},
376 { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")},
377 { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")},
378 { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
379 { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
380 { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
381 { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
382 { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
383 { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
384 { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
385 { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
386 { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")},
387 { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
388 { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") },
389 { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
390 };
391 };
392
393 extern MetricDefinitionStorage g_metricDefinitions;
394 extern struct DNSDistStats g_stats;
395 void doLatencyStats(double udiff);
396
397
398 struct StopWatch
399 {
400 StopWatch(bool realTime=false): d_needRealTime(realTime)
401 {
402 }
403 struct timespec d_start{0,0};
404 bool d_needRealTime{false};
405
406 void start() {
407 if(gettime(&d_start, d_needRealTime) < 0)
408 unixDie("Getting timestamp");
409
410 }
411
412 void set(const struct timespec& from) {
413 d_start = from;
414 }
415
416 double udiff() const {
417 struct timespec now;
418 if(gettime(&now, d_needRealTime) < 0)
419 unixDie("Getting timestamp");
420
421 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
422 }
423
424 double udiffAndSet() {
425 struct timespec now;
426 if(gettime(&now, d_needRealTime) < 0)
427 unixDie("Getting timestamp");
428
429 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
430 d_start = now;
431 return ret;
432 }
433
434 };
435
436 class BasicQPSLimiter
437 {
438 public:
439 BasicQPSLimiter()
440 {
441 }
442
443 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
444 {
445 d_prev.start();
446 }
447
448 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
449 {
450 auto delta = d_prev.udiffAndSet();
451
452 if(delta > 0.0) // time, frequently, does go backwards..
453 d_tokens += 1.0 * rate * (delta/1000000.0);
454
455 if(d_tokens > burst) {
456 d_tokens = burst;
457 }
458
459 bool ret=false;
460 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
461 ret=true;
462 --d_tokens;
463 }
464
465 return ret;
466 }
467
468 bool seenSince(const struct timespec& cutOff) const
469 {
470 return cutOff < d_prev.d_start;
471 }
472
473 protected:
474 mutable StopWatch d_prev;
475 mutable double d_tokens;
476 };
477
478 class QPSLimiter : public BasicQPSLimiter
479 {
480 public:
481 QPSLimiter(): BasicQPSLimiter()
482 {
483 }
484
485 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
486 {
487 d_prev.start();
488 }
489
490 unsigned int getRate() const
491 {
492 return d_passthrough ? 0 : d_rate;
493 }
494
495 int getPassed() const
496 {
497 return d_passed;
498 }
499
500 int getBlocked() const
501 {
502 return d_blocked;
503 }
504
505 bool check() const // this is not quite fair
506 {
507 if (d_passthrough) {
508 return true;
509 }
510
511 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
512 if (ret) {
513 d_passed++;
514 }
515 else {
516 d_blocked++;
517 }
518
519 return ret;
520 }
521 private:
522 mutable unsigned int d_passed{0};
523 mutable unsigned int d_blocked{0};
524 unsigned int d_rate;
525 unsigned int d_burst;
526 bool d_passthrough{true};
527 };
528
529 struct ClientState;
530
531 struct IDState
532 {
533 IDState() : origFD(-1), sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
534 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
535 {
536 origFD.store(orig.origFD.load());
537 origID = orig.origID;
538 delayMsec = orig.delayMsec;
539 tempFailureTTL = orig.tempFailureTTL;
540 }
541
542 std::atomic<int> origFD; // set to <0 to indicate this state is empty // 4
543
544 ComboAddress origRemote; // 28
545 ComboAddress origDest; // 28
546 StopWatch sentTime; // 16
547 DNSName qname; // 80
548 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
549 #ifdef HAVE_PROTOBUF
550 boost::optional<boost::uuids::uuid> uniqueId;
551 #endif
552 boost::optional<Netmask> subnet{boost::none};
553 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
554 std::shared_ptr<QTag> qTag{nullptr};
555 const ClientState* cs{nullptr};
556 uint32_t cacheKey; // 4
557 uint32_t cacheKeyNoECS; // 4
558 uint16_t age; // 4
559 uint16_t qtype; // 2
560 uint16_t qclass; // 2
561 uint16_t origID; // 2
562 uint16_t origFlags; // 2
563 int delayMsec;
564 boost::optional<uint32_t> tempFailureTTL;
565 bool ednsAdded{false};
566 bool ecsAdded{false};
567 bool skipCache{false};
568 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
569 bool dnssecOK{false};
570 bool useZeroScope;
571 };
572
573 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
574 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
575 struct QueryCount {
576 QueryCount()
577 {
578 pthread_rwlock_init(&queryLock, nullptr);
579 }
580 QueryCountRecords records;
581 QueryCountFilter filter;
582 pthread_rwlock_t queryLock;
583 bool enabled{false};
584 };
585
586 extern QueryCount g_qcount;
587
588 struct ClientState
589 {
590 std::set<int> cpus;
591 ComboAddress local;
592 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
593 shared_ptr<TLSFrontend> tlsFrontend;
594 std::atomic<uint64_t> queries{0};
595 std::atomic<uint64_t> tcpDiedReadingQuery{0};
596 std::atomic<uint64_t> tcpDiedSendingResponse{0};
597 std::atomic<uint64_t> tcpGaveUp{0};
598 std::atomic<uint64_t> tcpClientTimeouts{0};
599 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
600 std::atomic<uint64_t> tcpCurrentConnections{0};
601 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
602 /* in ms */
603 std::atomic<double> tcpAvgConnectionDuration{0.0};
604 int udpFD{-1};
605 int tcpFD{-1};
606 bool muted{false};
607
608 int getSocket() const
609 {
610 return udpFD != -1 ? udpFD : tcpFD;
611 }
612
613 std::string getType() const
614 {
615 std::string result = udpFD != -1 ? "UDP" : "TCP";
616
617 if (tlsFrontend) {
618 result += " (DNS over TLS)";
619 }
620 else if (dnscryptCtx) {
621 result += " (DNSCrypt)";
622 }
623
624 return result;
625 }
626
627 #ifdef HAVE_EBPF
628 shared_ptr<BPFFilter> d_filter;
629
630 void detachFilter()
631 {
632 if (d_filter) {
633 d_filter->removeSocket(getSocket());
634 d_filter = nullptr;
635 }
636 }
637
638 void attachFilter(shared_ptr<BPFFilter> bpf)
639 {
640 detachFilter();
641
642 bpf->addSocket(getSocket());
643 d_filter = bpf;
644 }
645 #endif /* HAVE_EBPF */
646
647 void updateTCPMetrics(size_t queries, uint64_t durationMs)
648 {
649 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0);
650 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
651 }
652 };
653
654 class TCPClientCollection {
655 std::vector<int> d_tcpclientthreads;
656 std::atomic<uint64_t> d_numthreads{0};
657 std::atomic<uint64_t> d_pos{0};
658 std::atomic<uint64_t> d_queued{0};
659 const uint64_t d_maxthreads{0};
660 std::mutex d_mutex;
661 int d_singlePipe[2];
662 const bool d_useSinglePipe;
663 public:
664
665 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
666
667 {
668 d_tcpclientthreads.reserve(maxThreads);
669
670 if (d_useSinglePipe) {
671 if (pipe(d_singlePipe) < 0) {
672 throw std::runtime_error("Error creating the TCP single communication pipe: " + string(strerror(errno)));
673 }
674
675 if (!setNonBlocking(d_singlePipe[0])) {
676 int err = errno;
677 close(d_singlePipe[0]);
678 close(d_singlePipe[1]);
679 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + string(strerror(err)));
680 }
681
682 if (!setNonBlocking(d_singlePipe[1])) {
683 int err = errno;
684 close(d_singlePipe[0]);
685 close(d_singlePipe[1]);
686 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + string(strerror(err)));
687 }
688 }
689 }
690 int getThread()
691 {
692 uint64_t pos = d_pos++;
693 ++d_queued;
694 return d_tcpclientthreads[pos % d_numthreads];
695 }
696 bool hasReachedMaxThreads() const
697 {
698 return d_numthreads >= d_maxthreads;
699 }
700 uint64_t getThreadsCount() const
701 {
702 return d_numthreads;
703 }
704 uint64_t getQueuedCount() const
705 {
706 return d_queued;
707 }
708 void decrementQueuedCount()
709 {
710 --d_queued;
711 }
712 void addTCPClientThread();
713 };
714
715 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
716
717 struct DownstreamState
718 {
719 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
720
721 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, size_t numberOfSockets);
722 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, 1) {}
723 ~DownstreamState()
724 {
725 for (auto& fd : sockets) {
726 if (fd >= 0) {
727 close(fd);
728 fd = -1;
729 }
730 }
731 }
732 boost::uuids::uuid id;
733 std::set<unsigned int> hashes;
734 mutable pthread_rwlock_t d_lock;
735 std::vector<int> sockets;
736 std::mutex socketsLock;
737 std::mutex connectLock;
738 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
739 std::thread tid;
740 const ComboAddress remote;
741 QPSLimiter qps;
742 vector<IDState> idStates;
743 const ComboAddress sourceAddr;
744 checkfunc_t checkFunction;
745 DNSName checkName{"a.root-servers.net."};
746 QType checkType{QType::A};
747 uint16_t checkClass{QClass::IN};
748 std::atomic<uint64_t> idOffset{0};
749 std::atomic<uint64_t> sendErrors{0};
750 std::atomic<uint64_t> outstanding{0};
751 std::atomic<uint64_t> reuseds{0};
752 std::atomic<uint64_t> queries{0};
753 struct {
754 std::atomic<uint64_t> sendErrors{0};
755 std::atomic<uint64_t> reuseds{0};
756 std::atomic<uint64_t> queries{0};
757 } prev;
758 std::atomic<uint64_t> tcpDiedSendingQuery{0};
759 std::atomic<uint64_t> tcpDiedReadingResponse{0};
760 std::atomic<uint64_t> tcpGaveUp{0};
761 std::atomic<uint64_t> tcpReadTimeouts{0};
762 std::atomic<uint64_t> tcpWriteTimeouts{0};
763 std::atomic<uint64_t> tcpCurrentConnections{0};
764 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
765 /* in ms */
766 std::atomic<double> tcpAvgConnectionDuration{0.0};
767 string name;
768 size_t socketsOffset{0};
769 double queryLoad{0.0};
770 double dropRate{0.0};
771 double latencyUsec{0.0};
772 int order{1};
773 int weight{1};
774 int tcpConnectTimeout{5};
775 int tcpRecvTimeout{30};
776 int tcpSendTimeout{30};
777 unsigned int checkInterval{1};
778 unsigned int lastCheck{0};
779 const unsigned int sourceItf{0};
780 uint16_t retries{5};
781 uint16_t xpfRRCode{0};
782 uint16_t checkTimeout{1000}; /* in milliseconds */
783 uint8_t currentCheckFailures{0};
784 uint8_t consecutiveSuccessfulChecks{0};
785 uint8_t maxCheckFailures{1};
786 uint8_t minRiseSuccesses{1};
787 StopWatch sw;
788 set<string> pools;
789 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
790 bool mustResolve{false};
791 bool upStatus{false};
792 bool useECS{false};
793 bool setCD{false};
794 bool disableZeroScope{false};
795 std::atomic<bool> connected{false};
796 std::atomic_flag threadStarted;
797 bool tcpFastOpen{false};
798 bool ipBindAddrNoPort{true};
799
800 bool isUp() const
801 {
802 if(availability == Availability::Down)
803 return false;
804 if(availability == Availability::Up)
805 return true;
806 return upStatus;
807 }
808 void setUp() { availability = Availability::Up; }
809 void setDown() { availability = Availability::Down; }
810 void setAuto() { availability = Availability::Auto; }
811 string getName() const {
812 if (name.empty()) {
813 return remote.toStringWithPort();
814 }
815 return name;
816 }
817 string getNameWithAddr() const {
818 if (name.empty()) {
819 return remote.toStringWithPort();
820 }
821 return name + " (" + remote.toStringWithPort()+ ")";
822 }
823 string getStatus() const
824 {
825 string status;
826 if(availability == DownstreamState::Availability::Up)
827 status = "UP";
828 else if(availability == DownstreamState::Availability::Down)
829 status = "DOWN";
830 else
831 status = (upStatus ? "up" : "down");
832 return status;
833 }
834 bool reconnect();
835 void hash();
836 void setId(const boost::uuids::uuid& newId);
837 void setWeight(int newWeight);
838
839 void updateTCPMetrics(size_t queries, uint64_t durationMs)
840 {
841 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0);
842 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
843 }
844 };
845 using servers_t =vector<std::shared_ptr<DownstreamState>>;
846
847 template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
848
849 void responderThread(std::shared_ptr<DownstreamState> state);
850 extern std::mutex g_luamutex;
851 extern LuaContext g_lua;
852 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
853
854 class DNSRule
855 {
856 public:
857 virtual ~DNSRule ()
858 {
859 }
860 virtual bool matches(const DNSQuestion* dq) const =0;
861 virtual string toString() const = 0;
862 mutable std::atomic<uint64_t> d_matches{0};
863 };
864
865 using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
866 typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
867
868 struct ServerPolicy
869 {
870 string name;
871 policyfunc_t policy;
872 bool isLua;
873 std::string toString() const {
874 return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
875 }
876 };
877
878 struct ServerPool
879 {
880 ServerPool()
881 {
882 pthread_rwlock_init(&d_lock, nullptr);
883 }
884
885 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
886
887 bool getECS() const
888 {
889 return d_useECS;
890 }
891
892 void setECS(bool useECS)
893 {
894 d_useECS = useECS;
895 }
896
897 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
898 std::shared_ptr<ServerPolicy> policy{nullptr};
899
900 size_t countServers(bool upOnly)
901 {
902 size_t count = 0;
903 ReadLock rl(&d_lock);
904 for (const auto& server : d_servers) {
905 if (!upOnly || std::get<1>(server)->isUp() ) {
906 count++;
907 }
908 }
909 return count;
910 }
911
912 NumberedVector<shared_ptr<DownstreamState>> getServers()
913 {
914 NumberedVector<shared_ptr<DownstreamState>> result;
915 {
916 ReadLock rl(&d_lock);
917 result = d_servers;
918 }
919 return result;
920 }
921
922 void addServer(shared_ptr<DownstreamState>& server)
923 {
924 WriteLock wl(&d_lock);
925 unsigned int count = (unsigned int) d_servers.size();
926 d_servers.push_back(make_pair(++count, server));
927 /* we need to reorder based on the server 'order' */
928 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
929 return a.second->order < b.second->order;
930 });
931 /* and now we need to renumber for Lua (custom policies) */
932 size_t idx = 1;
933 for (auto& serv : d_servers) {
934 serv.first = idx++;
935 }
936 }
937
938 void removeServer(shared_ptr<DownstreamState>& server)
939 {
940 WriteLock wl(&d_lock);
941 size_t idx = 1;
942 bool found = false;
943 for (auto it = d_servers.begin(); it != d_servers.end();) {
944 if (found) {
945 /* we need to renumber the servers placed
946 after the removed one, for Lua (custom policies) */
947 it->first = idx++;
948 it++;
949 }
950 else if (it->second == server) {
951 it = d_servers.erase(it);
952 found = true;
953 } else {
954 idx++;
955 it++;
956 }
957 }
958 }
959
960 private:
961 NumberedVector<shared_ptr<DownstreamState>> d_servers;
962 pthread_rwlock_t d_lock;
963 bool d_useECS{false};
964 };
965 using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
966 void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
967 void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
968 void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
969
970 struct CarbonConfig
971 {
972 ComboAddress server;
973 std::string namespace_name;
974 std::string ourname;
975 std::string instance_name;
976 unsigned int interval;
977 };
978
979 enum ednsHeaderFlags {
980 EDNS_HEADER_FLAG_NONE = 0,
981 EDNS_HEADER_FLAG_DO = 32768
982 };
983
984 struct DNSDistRuleAction
985 {
986 std::shared_ptr<DNSRule> d_rule;
987 std::shared_ptr<DNSAction> d_action;
988 boost::uuids::uuid d_id;
989 uint64_t d_creationOrder;
990 };
991
992 struct DNSDistResponseRuleAction
993 {
994 std::shared_ptr<DNSRule> d_rule;
995 std::shared_ptr<DNSResponseAction> d_action;
996 boost::uuids::uuid d_id;
997 uint64_t d_creationOrder;
998 };
999
1000 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1001 extern DNSAction::Action g_dynBlockAction;
1002
1003 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1004 extern GlobalStateHolder<ServerPolicy> g_policy;
1005 extern GlobalStateHolder<servers_t> g_dstates;
1006 extern GlobalStateHolder<pools_t> g_pools;
1007 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1008 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1009 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1010 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1011 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1012
1013 extern ComboAddress g_serverControl; // not changed during runtime
1014
1015 extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
1016 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1017 extern vector<ClientState*> g_frontends;
1018 extern bool g_truncateTC;
1019 extern bool g_fixupCase;
1020 extern int g_tcpRecvTimeout;
1021 extern int g_tcpSendTimeout;
1022 extern int g_udpTimeout;
1023 extern uint16_t g_maxOutstanding;
1024 extern std::atomic<bool> g_configurationDone;
1025 extern uint64_t g_maxTCPClientThreads;
1026 extern uint64_t g_maxTCPQueuedConnections;
1027 extern size_t g_maxTCPQueriesPerConn;
1028 extern size_t g_maxTCPConnectionDuration;
1029 extern size_t g_maxTCPConnectionsPerClient;
1030 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1031 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1032 extern bool g_verboseHealthChecks;
1033 extern uint32_t g_staleCacheEntriesTTL;
1034 extern bool g_apiReadWrite;
1035 extern std::string g_apiConfigDirectory;
1036 extern bool g_servFailOnNoPolicy;
1037 extern uint32_t g_hashperturb;
1038 extern bool g_useTCPSinglePipe;
1039 extern uint16_t g_downstreamTCPCleanupInterval;
1040 extern size_t g_udpVectorSize;
1041 extern bool g_preserveTrailingData;
1042 extern bool g_allowEmptyResponse;
1043 extern bool g_roundrobinFailOnNoServer;
1044
1045 #ifdef HAVE_EBPF
1046 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1047 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1048 #endif /* HAVE_EBPF */
1049
1050 struct LocalHolders
1051 {
1052 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1053 {
1054 }
1055
1056 LocalStateHolder<NetmaskGroup> acl;
1057 LocalStateHolder<ServerPolicy> policy;
1058 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1059 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1060 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1061 LocalStateHolder<servers_t> servers;
1062 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1063 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1064 LocalStateHolder<pools_t> pools;
1065 };
1066
1067 struct dnsheader;
1068
1069 void controlThread(int fd, ComboAddress local);
1070 vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
1071 std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
1072 std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
1073 NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
1074
1075 std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
1076
1077 std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
1078 std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
1079 std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1080 std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1081 std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
1082
1083 struct WebserverConfig
1084 {
1085 std::string password;
1086 std::string apiKey;
1087 boost::optional<std::map<std::string, std::string> > customHeaders;
1088 std::mutex lock;
1089 };
1090
1091 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1092 void setWebserverPassword(const std::string& password);
1093 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1094
1095 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1096 void tcpAcceptorThread(void* p);
1097
1098 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1099 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1100 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1101 void resetLuaSideEffect(); // reset to indeterminate state
1102
1103 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1104 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1105
1106 bool checkQueryHeaders(const struct dnsheader* dh);
1107
1108 extern std::vector<std::tuple<ComboAddress, std::shared_ptr<DNSCryptContext>, bool, int, std::string, std::set<int> > > g_dnsCryptLocals;
1109 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1110 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1111
1112 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1113
1114 uint16_t getRandomDNSID();
1115
1116 #include "dnsdist-snmp.hh"
1117
1118 extern bool g_snmpEnabled;
1119 extern bool g_snmpTrapsEnabled;
1120 extern DNSDistSNMPAgent* g_snmpAgent;
1121 extern bool g_addEDNSToSelfGeneratedResponses;
1122
1123 static const size_t s_udpIncomingBufferSize{1500};
1124
1125 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1126 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1127
1128 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1129 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);