]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/dnsdist.hh
dnsdist: add ability to update webserver credentials
[thirdparty/pdns.git] / pdns / dnsdist.hh
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #pragma once
23 #include "config.h"
24 #include "ext/luawrapper/include/LuaContext.hpp"
25
26 #include <atomic>
27 #include <mutex>
28 #include <string>
29 #include <thread>
30 #include <time.h>
31 #include <unistd.h>
32 #include <unordered_map>
33
34 #include <boost/circular_buffer.hpp>
35 #include <boost/variant.hpp>
36
37 #include "bpf-filter.hh"
38 #include "dnscrypt.hh"
39 #include "dnsdist-cache.hh"
40 #include "dnsdist-dynbpf.hh"
41 #include "dnsname.hh"
42 #include "ednsoptions.hh"
43 #include "gettime.hh"
44 #include "iputils.hh"
45 #include "misc.hh"
46 #include "mplexer.hh"
47 #include "sholder.hh"
48 #include "tcpiohandler.hh"
49
50 #include <boost/uuid/uuid.hpp>
51 #include <boost/uuid/uuid_generators.hpp>
52 #include <boost/uuid/uuid_io.hpp>
53
54 void* carbonDumpThread();
55 uint64_t uptimeOfProcess(const std::string& str);
56
57 extern uint16_t g_ECSSourcePrefixV4;
58 extern uint16_t g_ECSSourcePrefixV6;
59 extern bool g_ECSOverride;
60
61 extern thread_local boost::uuids::random_generator t_uuidGenerator;
62
63 typedef std::unordered_map<string, string> QTag;
64
65 struct DNSQuestion
66 {
67 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
68 qname(name), qtype(type), qclass(class_), local(lc), remote(rem), dh(header), size(bufferSize), consumed(consumed_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tempFailureTTL(boost::none), tcp(isTcp), queryTime(queryTime_), ecsOverride(g_ECSOverride) { }
69
70 #ifdef HAVE_PROTOBUF
71 boost::optional<boost::uuids::uuid> uniqueId;
72 #endif
73 Netmask ecs;
74 const DNSName* qname;
75 const uint16_t qtype;
76 const uint16_t qclass;
77 const ComboAddress* local;
78 const ComboAddress* remote;
79 std::shared_ptr<QTag> qTag{nullptr};
80 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
81 struct dnsheader* dh;
82 size_t size;
83 unsigned int consumed{0};
84 uint16_t len;
85 uint16_t ecsPrefixLength;
86 boost::optional<uint32_t> tempFailureTTL;
87 const bool tcp;
88 const struct timespec* queryTime;
89 bool skipCache{false};
90 bool ecsOverride;
91 bool useECS{true};
92 bool addXPF{true};
93 bool ecsSet{false};
94 };
95
96 struct DNSResponse : DNSQuestion
97 {
98 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
99 DNSQuestion(name, type, class_, consumed, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
100 };
101
102 /* so what could you do:
103 drop,
104 fake up nxdomain,
105 provide actual answer,
106 allow & and stop processing,
107 continue processing,
108 modify header: (servfail|refused|notimp), set TC=1,
109 send to pool */
110
111 class DNSAction
112 {
113 public:
114 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp };
115 static std::string typeToString(const Action& action)
116 {
117 switch(action) {
118 case Action::Drop:
119 return "Drop";
120 case Action::Nxdomain:
121 return "Send NXDomain";
122 case Action::Refused:
123 return "Send Refused";
124 case Action::Spoof:
125 return "Spoof an answer";
126 case Action::Allow:
127 return "Allow";
128 case Action::HeaderModify:
129 return "Modify the header";
130 case Action::Pool:
131 return "Route to a pool";
132 case Action::Delay:
133 return "Delay";
134 case Action::Truncate:
135 return "Truncate over UDP";
136 case Action::ServFail:
137 return "Send ServFail";
138 case Action::None:
139 case Action::NoOp:
140 return "Do nothing";
141 }
142
143 return "Unknown";
144 }
145
146 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
147 virtual ~DNSAction()
148 {
149 }
150 virtual string toString() const = 0;
151 virtual std::map<string, double> getStats() const
152 {
153 return {{}};
154 }
155 };
156
157 class DNSResponseAction
158 {
159 public:
160 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
161 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
162 virtual ~DNSResponseAction()
163 {
164 }
165 virtual string toString() const = 0;
166 };
167
168 struct DynBlock
169 {
170 DynBlock(): action(DNSAction::Action::None), warning(false)
171 {
172 }
173
174 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
175 {
176 }
177
178 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
179 {
180 blocks.store(rhs.blocks);
181 }
182
183 DynBlock& operator=(const DynBlock& rhs)
184 {
185 reason=rhs.reason;
186 until=rhs.until;
187 domain=rhs.domain;
188 action=rhs.action;
189 blocks.store(rhs.blocks);
190 warning=rhs.warning;
191 return *this;
192 }
193
194 string reason;
195 struct timespec until;
196 DNSName domain;
197 DNSAction::Action action;
198 mutable std::atomic<unsigned int> blocks;
199 bool warning;
200 };
201
202 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
203
204 extern vector<pair<struct timeval, std::string> > g_confDelta;
205
206 struct DNSDistStats
207 {
208 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
209 stat_t responses{0};
210 stat_t servfailResponses{0};
211 stat_t queries{0};
212 stat_t nonCompliantQueries{0};
213 stat_t nonCompliantResponses{0};
214 stat_t rdQueries{0};
215 stat_t emptyQueries{0};
216 stat_t aclDrops{0};
217 stat_t dynBlocked{0};
218 stat_t ruleDrop{0};
219 stat_t ruleNXDomain{0};
220 stat_t ruleRefused{0};
221 stat_t ruleServFail{0};
222 stat_t selfAnswered{0};
223 stat_t downstreamTimeouts{0};
224 stat_t downstreamSendErrors{0};
225 stat_t truncFail{0};
226 stat_t noPolicy{0};
227 stat_t cacheHits{0};
228 stat_t cacheMisses{0};
229 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0};
230
231 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
232 typedef std::function<uint64_t(const std::string&)> statfunction_t;
233 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
234 std::vector<std::pair<std::string, entry_t>> entries{
235 {"responses", &responses},
236 {"servfail-responses", &servfailResponses},
237 {"queries", &queries},
238 {"acl-drops", &aclDrops},
239 {"rule-drop", &ruleDrop},
240 {"rule-nxdomain", &ruleNXDomain},
241 {"rule-refused", &ruleRefused},
242 {"rule-servfail", &ruleServFail},
243 {"self-answered", &selfAnswered},
244 {"downstream-timeouts", &downstreamTimeouts},
245 {"downstream-send-errors", &downstreamSendErrors},
246 {"trunc-failures", &truncFail},
247 {"no-policy", &noPolicy},
248 {"latency0-1", &latency0_1},
249 {"latency1-10", &latency1_10},
250 {"latency10-50", &latency10_50},
251 {"latency50-100", &latency50_100},
252 {"latency100-1000", &latency100_1000},
253 {"latency-slow", &latencySlow},
254 {"latency-avg100", &latencyAvg100},
255 {"latency-avg1000", &latencyAvg1000},
256 {"latency-avg10000", &latencyAvg10000},
257 {"latency-avg1000000", &latencyAvg1000000},
258 {"uptime", uptimeOfProcess},
259 {"real-memory-usage", getRealMemoryUsage},
260 {"noncompliant-queries", &nonCompliantQueries},
261 {"noncompliant-responses", &nonCompliantResponses},
262 {"rdqueries", &rdQueries},
263 {"empty-queries", &emptyQueries},
264 {"cache-hits", &cacheHits},
265 {"cache-misses", &cacheMisses},
266 {"cpu-user-msec", getCPUTimeUser},
267 {"cpu-sys-msec", getCPUTimeSystem},
268 {"fd-usage", getOpenFileDescriptors},
269 {"dyn-blocked", &dynBlocked},
270 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }}
271 };
272 };
273
274 // Metric types for Prometheus
275 enum class PrometheusMetricType: int {
276 counter = 1,
277 gauge = 2
278 };
279
280 // Keeps additional information about metrics
281 struct MetricDefinition {
282 MetricDefinition(PrometheusMetricType prometheusType, const std::string& description) {
283 this->prometheusType = prometheusType;
284 this->description = description;
285 }
286
287 MetricDefinition() = default;
288
289 // Metric description
290 std::string description;
291 // Metric type for Prometheus
292 PrometheusMetricType prometheusType;
293 };
294
295 struct MetricDefinitionStorage {
296 // Return metric definition by name
297 bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
298 auto metricDetailsIter = metrics.find(metricName);
299
300 if (metricDetailsIter == metrics.end()) {
301 return false;
302 }
303
304 metric = metricDetailsIter->second;
305 return true;
306 };
307
308 // Return string representation of Prometheus metric type
309 std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
310 switch (metricType) {
311 case PrometheusMetricType::counter:
312 return "counter";
313 break;
314 case PrometheusMetricType::gauge:
315 return "gauge";
316 break;
317 default:
318 return "";
319 break;
320 }
321 };
322
323 std::map<std::string, MetricDefinition> metrics = {
324 { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
325 { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
326 { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
327 { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
328 { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
329 { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
330 { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
331 { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
332 { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
333 { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
334 { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
335 { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
336 { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
337 { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
338 { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
339 { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
340 { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
341 { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
342 { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
343 { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")},
344 { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")},
345 { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")},
346 { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")},
347 { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")},
348 { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")},
349 { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
350 { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
351 { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
352 { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
353 { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
354 { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
355 { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
356 { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
357 { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")},
358 { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
359 { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") },
360 };
361 };
362
363 extern MetricDefinitionStorage g_metricDefinitions;
364 extern struct DNSDistStats g_stats;
365 void doLatencyStats(double udiff);
366
367
368 struct StopWatch
369 {
370 StopWatch(bool realTime=false): d_needRealTime(realTime)
371 {
372 }
373 struct timespec d_start{0,0};
374 bool d_needRealTime{false};
375
376 void start() {
377 if(gettime(&d_start, d_needRealTime) < 0)
378 unixDie("Getting timestamp");
379
380 }
381
382 void set(const struct timespec& from) {
383 d_start = from;
384 }
385
386 double udiff() const {
387 struct timespec now;
388 if(gettime(&now, d_needRealTime) < 0)
389 unixDie("Getting timestamp");
390
391 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
392 }
393
394 double udiffAndSet() {
395 struct timespec now;
396 if(gettime(&now, d_needRealTime) < 0)
397 unixDie("Getting timestamp");
398
399 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
400 d_start = now;
401 return ret;
402 }
403
404 };
405
406 class BasicQPSLimiter
407 {
408 public:
409 BasicQPSLimiter()
410 {
411 }
412
413 BasicQPSLimiter(unsigned int rate, unsigned int burst): d_tokens(burst)
414 {
415 d_prev.start();
416 }
417
418 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
419 {
420 auto delta = d_prev.udiffAndSet();
421
422 d_tokens += 1.0 * rate * (delta/1000000.0);
423
424 if(d_tokens > burst) {
425 d_tokens = burst;
426 }
427
428 bool ret=false;
429 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
430 ret=true;
431 --d_tokens;
432 }
433
434 return ret;
435 }
436
437 bool seenSince(const struct timespec& cutOff) const
438 {
439 return cutOff < d_prev.d_start;
440 }
441
442 protected:
443 mutable StopWatch d_prev;
444 mutable double d_tokens;
445 };
446
447 class QPSLimiter : public BasicQPSLimiter
448 {
449 public:
450 QPSLimiter(): BasicQPSLimiter()
451 {
452 }
453
454 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(rate, burst), d_rate(rate), d_burst(burst), d_passthrough(false)
455 {
456 d_prev.start();
457 }
458
459 unsigned int getRate() const
460 {
461 return d_passthrough ? 0 : d_rate;
462 }
463
464 int getPassed() const
465 {
466 return d_passed;
467 }
468
469 int getBlocked() const
470 {
471 return d_blocked;
472 }
473
474 bool check() const // this is not quite fair
475 {
476 if (d_passthrough) {
477 return true;
478 }
479
480 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
481 if (ret) {
482 d_passed++;
483 }
484 else {
485 d_blocked++;
486 }
487
488 return ret;
489 }
490 private:
491 mutable unsigned int d_passed{0};
492 mutable unsigned int d_blocked{0};
493 unsigned int d_rate;
494 unsigned int d_burst;
495 bool d_passthrough{true};
496 };
497
498 struct ClientState;
499
500 struct IDState
501 {
502 IDState() : origFD(-1), sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
503 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
504 {
505 origFD.store(orig.origFD.load());
506 origID = orig.origID;
507 delayMsec = orig.delayMsec;
508 tempFailureTTL = orig.tempFailureTTL;
509 }
510
511 std::atomic<int> origFD; // set to <0 to indicate this state is empty // 4
512
513 ComboAddress origRemote; // 28
514 ComboAddress origDest; // 28
515 StopWatch sentTime; // 16
516 DNSName qname; // 80
517 #ifdef HAVE_DNSCRYPT
518 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
519 #endif
520 #ifdef HAVE_PROTOBUF
521 boost::optional<boost::uuids::uuid> uniqueId;
522 #endif
523 boost::optional<Netmask> subnet{boost::none};
524 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
525 std::shared_ptr<QTag> qTag{nullptr};
526 const ClientState* cs{nullptr};
527 uint32_t cacheKey; // 8
528 uint16_t age; // 4
529 uint16_t qtype; // 2
530 uint16_t qclass; // 2
531 uint16_t origID; // 2
532 uint16_t origFlags; // 2
533 int delayMsec;
534 boost::optional<uint32_t> tempFailureTTL;
535 bool ednsAdded{false};
536 bool ecsAdded{false};
537 bool skipCache{false};
538 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
539 };
540
541 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
542 typedef std::function<std::tuple<bool, string>(DNSQuestion dq)> QueryCountFilter;
543 struct QueryCount {
544 QueryCount()
545 {
546 pthread_rwlock_init(&queryLock, nullptr);
547 }
548 QueryCountRecords records;
549 QueryCountFilter filter;
550 pthread_rwlock_t queryLock;
551 bool enabled{false};
552 };
553
554 extern QueryCount g_qcount;
555
556 struct ClientState
557 {
558 std::set<int> cpus;
559 ComboAddress local;
560 #ifdef HAVE_DNSCRYPT
561 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
562 #endif
563 shared_ptr<TLSFrontend> tlsFrontend;
564 std::atomic<uint64_t> queries{0};
565 int udpFD{-1};
566 int tcpFD{-1};
567 bool muted{false};
568
569 int getSocket() const
570 {
571 return udpFD != -1 ? udpFD : tcpFD;
572 }
573
574 #ifdef HAVE_EBPF
575 shared_ptr<BPFFilter> d_filter;
576
577 void detachFilter()
578 {
579 if (d_filter) {
580 d_filter->removeSocket(getSocket());
581 d_filter = nullptr;
582 }
583 }
584
585 void attachFilter(shared_ptr<BPFFilter> bpf)
586 {
587 detachFilter();
588
589 bpf->addSocket(getSocket());
590 d_filter = bpf;
591 }
592 #endif /* HAVE_EBPF */
593 };
594
595 class TCPClientCollection {
596 std::vector<int> d_tcpclientthreads;
597 std::atomic<uint64_t> d_numthreads{0};
598 std::atomic<uint64_t> d_pos{0};
599 std::atomic<uint64_t> d_queued{0};
600 const uint64_t d_maxthreads{0};
601 std::mutex d_mutex;
602 int d_singlePipe[2];
603 const bool d_useSinglePipe;
604 public:
605
606 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
607
608 {
609 d_tcpclientthreads.reserve(maxThreads);
610
611 if (d_useSinglePipe) {
612 if (pipe(d_singlePipe) < 0) {
613 throw std::runtime_error("Error creating the TCP single communication pipe: " + string(strerror(errno)));
614 }
615 if (!setNonBlocking(d_singlePipe[1])) {
616 int err = errno;
617 close(d_singlePipe[0]);
618 close(d_singlePipe[1]);
619 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + string(strerror(err)));
620 }
621 }
622 }
623 int getThread()
624 {
625 uint64_t pos = d_pos++;
626 ++d_queued;
627 return d_tcpclientthreads[pos % d_numthreads];
628 }
629 bool hasReachedMaxThreads() const
630 {
631 return d_numthreads >= d_maxthreads;
632 }
633 uint64_t getThreadsCount() const
634 {
635 return d_numthreads;
636 }
637 uint64_t getQueuedCount() const
638 {
639 return d_queued;
640 }
641 void decrementQueuedCount()
642 {
643 --d_queued;
644 }
645 void addTCPClientThread();
646 };
647
648 extern std::shared_ptr<TCPClientCollection> g_tcpclientthreads;
649
650 struct DownstreamState
651 {
652 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
653
654 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, size_t numberOfSockets);
655 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, 1) {}
656 ~DownstreamState()
657 {
658 for (auto& fd : sockets) {
659 if (fd >= 0) {
660 close(fd);
661 fd = -1;
662 }
663 }
664 }
665 boost::uuids::uuid id;
666 std::set<unsigned int> hashes;
667 mutable pthread_rwlock_t d_lock;
668 std::vector<int> sockets;
669 std::mutex socketsLock;
670 std::mutex connectLock;
671 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
672 std::thread tid;
673 const ComboAddress remote;
674 QPSLimiter qps;
675 vector<IDState> idStates;
676 const ComboAddress sourceAddr;
677 checkfunc_t checkFunction;
678 DNSName checkName{"a.root-servers.net."};
679 QType checkType{QType::A};
680 uint16_t checkClass{QClass::IN};
681 std::atomic<uint64_t> idOffset{0};
682 std::atomic<uint64_t> sendErrors{0};
683 std::atomic<uint64_t> outstanding{0};
684 std::atomic<uint64_t> reuseds{0};
685 std::atomic<uint64_t> queries{0};
686 struct {
687 std::atomic<uint64_t> sendErrors{0};
688 std::atomic<uint64_t> reuseds{0};
689 std::atomic<uint64_t> queries{0};
690 } prev;
691 string name;
692 size_t socketsOffset{0};
693 double queryLoad{0.0};
694 double dropRate{0.0};
695 double latencyUsec{0.0};
696 int order{1};
697 int weight{1};
698 int tcpConnectTimeout{5};
699 int tcpRecvTimeout{30};
700 int tcpSendTimeout{30};
701 const unsigned int sourceItf{0};
702 uint16_t retries{5};
703 uint16_t xpfRRCode{0};
704 uint8_t currentCheckFailures{0};
705 uint8_t maxCheckFailures{1};
706 StopWatch sw;
707 set<string> pools;
708 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
709 bool mustResolve{false};
710 bool upStatus{false};
711 bool useECS{false};
712 bool setCD{false};
713 std::atomic<bool> connected{false};
714 std::atomic_flag threadStarted;
715 bool tcpFastOpen{false};
716 bool ipBindAddrNoPort{true};
717
718 bool isUp() const
719 {
720 if(availability == Availability::Down)
721 return false;
722 if(availability == Availability::Up)
723 return true;
724 return upStatus;
725 }
726 void setUp() { availability = Availability::Up; }
727 void setDown() { availability = Availability::Down; }
728 void setAuto() { availability = Availability::Auto; }
729 string getName() const {
730 if (name.empty()) {
731 return remote.toStringWithPort();
732 }
733 return name;
734 }
735 string getNameWithAddr() const {
736 if (name.empty()) {
737 return remote.toStringWithPort();
738 }
739 return name + " (" + remote.toStringWithPort()+ ")";
740 }
741 string getStatus() const
742 {
743 string status;
744 if(availability == DownstreamState::Availability::Up)
745 status = "UP";
746 else if(availability == DownstreamState::Availability::Down)
747 status = "DOWN";
748 else
749 status = (upStatus ? "up" : "down");
750 return status;
751 }
752 bool reconnect();
753 void hash();
754 void setId(const boost::uuids::uuid& newId);
755 void setWeight(int newWeight);
756 };
757 using servers_t =vector<std::shared_ptr<DownstreamState>>;
758
759 template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
760
761 void* responderThread(std::shared_ptr<DownstreamState> state);
762 extern std::mutex g_luamutex;
763 extern LuaContext g_lua;
764 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
765
766 class DNSRule
767 {
768 public:
769 virtual ~DNSRule ()
770 {
771 }
772 virtual bool matches(const DNSQuestion* dq) const =0;
773 virtual string toString() const = 0;
774 mutable std::atomic<uint64_t> d_matches{0};
775 };
776
777 using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
778 typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
779
780 struct ServerPolicy
781 {
782 string name;
783 policyfunc_t policy;
784 bool isLua;
785 std::string toString() const {
786 return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
787 }
788 };
789
790 struct ServerPool
791 {
792 ServerPool()
793 {
794 pthread_rwlock_init(&d_lock, nullptr);
795 }
796
797 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
798
799 bool getECS() const
800 {
801 return d_useECS;
802 }
803
804 void setECS(bool useECS)
805 {
806 d_useECS = useECS;
807 }
808
809 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
810 std::shared_ptr<ServerPolicy> policy{nullptr};
811
812 size_t countServers(bool upOnly)
813 {
814 size_t count = 0;
815 ReadLock rl(&d_lock);
816 for (const auto& server : d_servers) {
817 if (!upOnly || std::get<1>(server)->isUp() ) {
818 count++;
819 };
820 };
821 return count;
822 }
823
824 NumberedVector<shared_ptr<DownstreamState>> getServers()
825 {
826 NumberedVector<shared_ptr<DownstreamState>> result;
827 {
828 ReadLock rl(&d_lock);
829 result = d_servers;
830 }
831 return result;
832 }
833
834 void addServer(shared_ptr<DownstreamState>& server)
835 {
836 WriteLock wl(&d_lock);
837 unsigned int count = (unsigned int) d_servers.size();
838 d_servers.push_back(make_pair(++count, server));
839 /* we need to reorder based on the server 'order' */
840 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
841 return a.second->order < b.second->order;
842 });
843 /* and now we need to renumber for Lua (custom policies) */
844 size_t idx = 1;
845 for (auto& serv : d_servers) {
846 serv.first = idx++;
847 }
848 }
849
850 void removeServer(shared_ptr<DownstreamState>& server)
851 {
852 WriteLock wl(&d_lock);
853 size_t idx = 1;
854 bool found = false;
855 for (auto it = d_servers.begin(); it != d_servers.end();) {
856 if (found) {
857 /* we need to renumber the servers placed
858 after the removed one, for Lua (custom policies) */
859 it->first = idx++;
860 it++;
861 }
862 else if (it->second == server) {
863 it = d_servers.erase(it);
864 found = true;
865 } else {
866 idx++;
867 it++;
868 }
869 }
870 }
871
872 private:
873 NumberedVector<shared_ptr<DownstreamState>> d_servers;
874 pthread_rwlock_t d_lock;
875 bool d_useECS{false};
876 };
877 using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
878 void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
879 void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
880 void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
881
882 struct CarbonConfig
883 {
884 ComboAddress server;
885 std::string ourname;
886 unsigned int interval;
887 };
888
889 enum ednsHeaderFlags {
890 EDNS_HEADER_FLAG_NONE = 0,
891 EDNS_HEADER_FLAG_DO = 32768
892 };
893
894 struct DNSDistRuleAction
895 {
896 std::shared_ptr<DNSRule> d_rule;
897 std::shared_ptr<DNSAction> d_action;
898 boost::uuids::uuid d_id;
899 };
900
901 struct DNSDistResponseRuleAction
902 {
903 std::shared_ptr<DNSRule> d_rule;
904 std::shared_ptr<DNSResponseAction> d_action;
905 boost::uuids::uuid d_id;
906 };
907
908 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
909 extern DNSAction::Action g_dynBlockAction;
910
911 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
912 extern GlobalStateHolder<ServerPolicy> g_policy;
913 extern GlobalStateHolder<servers_t> g_dstates;
914 extern GlobalStateHolder<pools_t> g_pools;
915 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
916 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
917 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
918 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
919 extern GlobalStateHolder<NetmaskGroup> g_ACL;
920
921 extern ComboAddress g_serverControl; // not changed during runtime
922
923 extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
924 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
925 extern vector<ClientState*> g_frontends;
926 extern bool g_truncateTC;
927 extern bool g_fixupCase;
928 extern int g_tcpRecvTimeout;
929 extern int g_tcpSendTimeout;
930 extern int g_udpTimeout;
931 extern uint16_t g_maxOutstanding;
932 extern std::atomic<bool> g_configurationDone;
933 extern uint64_t g_maxTCPClientThreads;
934 extern uint64_t g_maxTCPQueuedConnections;
935 extern size_t g_maxTCPQueriesPerConn;
936 extern size_t g_maxTCPConnectionDuration;
937 extern size_t g_maxTCPConnectionsPerClient;
938 extern std::atomic<uint16_t> g_cacheCleaningDelay;
939 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
940 extern bool g_verboseHealthChecks;
941 extern uint32_t g_staleCacheEntriesTTL;
942 extern bool g_apiReadWrite;
943 extern std::string g_apiConfigDirectory;
944 extern bool g_servFailOnNoPolicy;
945 extern uint32_t g_hashperturb;
946 extern bool g_useTCPSinglePipe;
947 extern std::atomic<uint16_t> g_downstreamTCPCleanupInterval;
948 extern size_t g_udpVectorSize;
949
950 #ifdef HAVE_EBPF
951 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
952 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
953 #endif /* HAVE_EBPF */
954
955 struct LocalHolders
956 {
957 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
958 {
959 }
960
961 LocalStateHolder<NetmaskGroup> acl;
962 LocalStateHolder<ServerPolicy> policy;
963 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
964 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
965 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
966 LocalStateHolder<servers_t> servers;
967 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
968 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
969 LocalStateHolder<pools_t> pools;
970 };
971
972 struct dnsheader;
973
974 void controlThread(int fd, ComboAddress local);
975 vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
976 std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
977 std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
978 NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
979
980 std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
981
982 std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
983 std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
984 std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
985 std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
986 std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
987
988 struct WebserverConfig
989 {
990 std::string password;
991 std::string apiKey;
992 boost::optional<std::map<std::string, std::string> > customHeaders;
993 std::mutex lock;
994 };
995
996 void setWebserverConfig(const std::string& password, const boost::optional<std::string> apiKey, const boost::optional<std::map<std::string, std::string> > customHeaders);
997 void dnsdistWebserverThread(int sock, const ComboAddress& local);
998 bool getMsgLen32(int fd, uint32_t* len);
999 bool putMsgLen32(int fd, uint32_t len);
1000 void* tcpAcceptorThread(void* p);
1001
1002 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1003 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1004 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1005 void resetLuaSideEffect(); // reset to indeterminate state
1006
1007 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1008 bool processQuery(LocalHolders& holders, DNSQuestion& dq, string& poolname, int* delayMsec, const struct timespec& now);
1009 bool processResponse(LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, int* delayMsec);
1010 bool fixUpQueryTurnedResponse(DNSQuestion& dq, const uint16_t origFlags);
1011 bool fixUpResponse(char** response, uint16_t* responseLen, size_t* responseSize, const DNSName& qname, uint16_t origFlags, bool ednsAdded, bool ecsAdded, std::vector<uint8_t>& rewrittenResponse, uint16_t addRoom);
1012 void restoreFlags(struct dnsheader* dh, uint16_t origFlags);
1013 bool checkQueryHeaders(const struct dnsheader* dh);
1014
1015 #ifdef HAVE_DNSCRYPT
1016 extern std::vector<std::tuple<ComboAddress, std::shared_ptr<DNSCryptContext>, bool, int, std::string, std::set<int> > > g_dnsCryptLocals;
1017
1018 bool encryptResponse(char* response, uint16_t* responseLen, size_t responseSize, bool tcp, std::shared_ptr<DNSCryptQuery> dnsCryptQuery, dnsheader** dh, dnsheader* dhCopy);
1019 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1020 #endif
1021
1022 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1023
1024 #include "dnsdist-snmp.hh"
1025
1026 extern bool g_snmpEnabled;
1027 extern bool g_snmpTrapsEnabled;
1028 extern DNSDistSNMPAgent* g_snmpAgent;
1029 extern bool g_addEDNSToSelfGeneratedResponses;
1030
1031 static const size_t s_udpIncomingBufferSize{1500};