]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/dnsdist.hh
Add functions to retrieve 'IO wait' and 'steal' metrics on Linux
[thirdparty/pdns.git] / pdns / dnsdist.hh
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #pragma once
23 #include "config.h"
24 #include "ext/luawrapper/include/LuaContext.hpp"
25
26 #include <atomic>
27 #include <mutex>
28 #include <string>
29 #include <thread>
30 #include <time.h>
31 #include <unistd.h>
32 #include <unordered_map>
33
34 #include <boost/variant.hpp>
35
36 #include "bpf-filter.hh"
37 #include "capabilities.hh"
38 #include "circular_buffer.hh"
39 #include "dnscrypt.hh"
40 #include "dnsdist-cache.hh"
41 #include "dnsdist-dynbpf.hh"
42 #include "dnsname.hh"
43 #include "doh.hh"
44 #include "ednsoptions.hh"
45 #include "gettime.hh"
46 #include "iputils.hh"
47 #include "misc.hh"
48 #include "mplexer.hh"
49 #include "sholder.hh"
50 #include "tcpiohandler.hh"
51 #include "uuid-utils.hh"
52
53 void carbonDumpThread();
54 uint64_t uptimeOfProcess(const std::string& str);
55
56 extern uint16_t g_ECSSourcePrefixV4;
57 extern uint16_t g_ECSSourcePrefixV6;
58 extern bool g_ECSOverride;
59
60 typedef std::unordered_map<string, string> QTag;
61
62 struct DNSQuestion
63 {
64 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
65 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
66 const uint16_t* flags = getFlagsFromDNSHeader(dh);
67 origFlags = *flags;
68 }
69 DNSQuestion(const DNSQuestion&) = delete;
70 DNSQuestion& operator=(const DNSQuestion&) = delete;
71 DNSQuestion(DNSQuestion&&) = default;
72
73 #ifdef HAVE_PROTOBUF
74 boost::optional<boost::uuids::uuid> uniqueId;
75 #endif
76 Netmask ecs;
77 boost::optional<Netmask> subnet;
78 std::string sni; /* Server Name Indication, if any (DoT or DoH) */
79 std::string poolname;
80 const DNSName* qname{nullptr};
81 const ComboAddress* local{nullptr};
82 const ComboAddress* remote{nullptr};
83 std::shared_ptr<QTag> qTag{nullptr};
84 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
85 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
86 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
87 struct dnsheader* dh{nullptr};
88 const struct timespec* queryTime{nullptr};
89 struct DOHUnit* du{nullptr};
90 size_t size;
91 unsigned int consumed{0};
92 int delayMsec{0};
93 boost::optional<uint32_t> tempFailureTTL;
94 uint32_t cacheKeyNoECS;
95 uint32_t cacheKey;
96 const uint16_t qtype;
97 const uint16_t qclass;
98 uint16_t len;
99 uint16_t ecsPrefixLength;
100 uint16_t origFlags;
101 uint8_t ednsRCode{0};
102 const bool tcp;
103 bool skipCache{false};
104 bool ecsOverride;
105 bool useECS{true};
106 bool addXPF{true};
107 bool ecsSet{false};
108 bool ecsAdded{false};
109 bool ednsAdded{false};
110 bool useZeroScope{false};
111 bool dnssecOK{false};
112 };
113
114 struct DNSResponse : DNSQuestion
115 {
116 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
117 DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
118 DNSResponse(const DNSResponse&) = delete;
119 DNSResponse& operator=(const DNSResponse&) = delete;
120 DNSResponse(DNSResponse&&) = default;
121 };
122
123 /* so what could you do:
124 drop,
125 fake up nxdomain,
126 provide actual answer,
127 allow & and stop processing,
128 continue processing,
129 modify header: (servfail|refused|notimp), set TC=1,
130 send to pool */
131
132 class DNSAction
133 {
134 public:
135 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse };
136 static std::string typeToString(const Action& action)
137 {
138 switch(action) {
139 case Action::Drop:
140 return "Drop";
141 case Action::Nxdomain:
142 return "Send NXDomain";
143 case Action::Refused:
144 return "Send Refused";
145 case Action::Spoof:
146 return "Spoof an answer";
147 case Action::Allow:
148 return "Allow";
149 case Action::HeaderModify:
150 return "Modify the header";
151 case Action::Pool:
152 return "Route to a pool";
153 case Action::Delay:
154 return "Delay";
155 case Action::Truncate:
156 return "Truncate over UDP";
157 case Action::ServFail:
158 return "Send ServFail";
159 case Action::None:
160 case Action::NoOp:
161 return "Do nothing";
162 case Action::NoRecurse:
163 return "Set rd=0";
164 }
165
166 return "Unknown";
167 }
168
169 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
170 virtual ~DNSAction()
171 {
172 }
173 virtual string toString() const = 0;
174 virtual std::map<string, double> getStats() const
175 {
176 return {{}};
177 }
178 };
179
180 class DNSResponseAction
181 {
182 public:
183 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
184 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
185 virtual ~DNSResponseAction()
186 {
187 }
188 virtual string toString() const = 0;
189 };
190
191 struct DynBlock
192 {
193 DynBlock(): action(DNSAction::Action::None), warning(false)
194 {
195 }
196
197 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
198 {
199 }
200
201 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
202 {
203 blocks.store(rhs.blocks);
204 }
205
206 DynBlock& operator=(const DynBlock& rhs)
207 {
208 reason=rhs.reason;
209 until=rhs.until;
210 domain=rhs.domain;
211 action=rhs.action;
212 blocks.store(rhs.blocks);
213 warning=rhs.warning;
214 return *this;
215 }
216
217 string reason;
218 struct timespec until;
219 DNSName domain;
220 DNSAction::Action action;
221 mutable std::atomic<unsigned int> blocks;
222 bool warning;
223 };
224
225 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
226
227 extern vector<pair<struct timeval, std::string> > g_confDelta;
228
229 extern uint64_t getLatencyCount(const std::string&);
230
231 struct DNSDistStats
232 {
233 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
234 stat_t responses{0};
235 stat_t servfailResponses{0};
236 stat_t queries{0};
237 stat_t frontendNXDomain{0};
238 stat_t frontendServFail{0};
239 stat_t frontendNoError{0};
240 stat_t nonCompliantQueries{0};
241 stat_t nonCompliantResponses{0};
242 stat_t rdQueries{0};
243 stat_t emptyQueries{0};
244 stat_t aclDrops{0};
245 stat_t dynBlocked{0};
246 stat_t ruleDrop{0};
247 stat_t ruleNXDomain{0};
248 stat_t ruleRefused{0};
249 stat_t ruleServFail{0};
250 stat_t selfAnswered{0};
251 stat_t downstreamTimeouts{0};
252 stat_t downstreamSendErrors{0};
253 stat_t truncFail{0};
254 stat_t noPolicy{0};
255 stat_t cacheHits{0};
256 stat_t cacheMisses{0};
257 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
258 stat_t securityStatus{0};
259
260 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
261 typedef std::function<uint64_t(const std::string&)> statfunction_t;
262 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
263 std::vector<std::pair<std::string, entry_t>> entries{
264 {"responses", &responses},
265 {"servfail-responses", &servfailResponses},
266 {"queries", &queries},
267 {"frontend-nxdomain", &frontendNXDomain},
268 {"frontend-servfail", &frontendServFail},
269 {"frontend-noerror", &frontendNoError},
270 {"acl-drops", &aclDrops},
271 {"rule-drop", &ruleDrop},
272 {"rule-nxdomain", &ruleNXDomain},
273 {"rule-refused", &ruleRefused},
274 {"rule-servfail", &ruleServFail},
275 {"self-answered", &selfAnswered},
276 {"downstream-timeouts", &downstreamTimeouts},
277 {"downstream-send-errors", &downstreamSendErrors},
278 {"trunc-failures", &truncFail},
279 {"no-policy", &noPolicy},
280 {"latency0-1", &latency0_1},
281 {"latency1-10", &latency1_10},
282 {"latency10-50", &latency10_50},
283 {"latency50-100", &latency50_100},
284 {"latency100-1000", &latency100_1000},
285 {"latency-slow", &latencySlow},
286 {"latency-avg100", &latencyAvg100},
287 {"latency-avg1000", &latencyAvg1000},
288 {"latency-avg10000", &latencyAvg10000},
289 {"latency-avg1000000", &latencyAvg1000000},
290 {"uptime", uptimeOfProcess},
291 {"real-memory-usage", getRealMemoryUsage},
292 {"special-memory-usage", getSpecialMemoryUsage},
293 {"noncompliant-queries", &nonCompliantQueries},
294 {"noncompliant-responses", &nonCompliantResponses},
295 {"rdqueries", &rdQueries},
296 {"empty-queries", &emptyQueries},
297 {"cache-hits", &cacheHits},
298 {"cache-misses", &cacheMisses},
299 {"cpu-user-msec", getCPUTimeUser},
300 {"cpu-sys-msec", getCPUTimeSystem},
301 {"fd-usage", getOpenFileDescriptors},
302 {"dyn-blocked", &dynBlocked},
303 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
304 {"security-status", &securityStatus},
305 // Latency histogram
306 {"latency-sum", &latencySum},
307 {"latency-count", getLatencyCount},
308 };
309 };
310
311 // Metric types for Prometheus
312 enum class PrometheusMetricType: int {
313 counter = 1,
314 gauge = 2
315 };
316
317 // Keeps additional information about metrics
318 struct MetricDefinition {
319 MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
320 }
321
322 MetricDefinition() = default;
323
324 // Metric description
325 std::string description;
326 // Metric type for Prometheus
327 PrometheusMetricType prometheusType;
328 };
329
330 struct MetricDefinitionStorage {
331 // Return metric definition by name
332 bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
333 auto metricDetailsIter = metrics.find(metricName);
334
335 if (metricDetailsIter == metrics.end()) {
336 return false;
337 }
338
339 metric = metricDetailsIter->second;
340 return true;
341 };
342
343 // Return string representation of Prometheus metric type
344 std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
345 switch (metricType) {
346 case PrometheusMetricType::counter:
347 return "counter";
348 break;
349 case PrometheusMetricType::gauge:
350 return "gauge";
351 break;
352 default:
353 return "";
354 break;
355 }
356 };
357
358 std::map<std::string, MetricDefinition> metrics = {
359 { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
360 { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
361 { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
362 { "frontend-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
363 { "frontend-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
364 { "frontend-noerror", MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
365 { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
366 { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
367 { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
368 { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
369 { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
370 { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
371 { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
372 { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
373 { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
374 { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
375 { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
376 { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
377 { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
378 { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
379 { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
380 { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
381 { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")},
382 { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")},
383 { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")},
384 { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")},
385 { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")},
386 { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")},
387 { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
388 { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
389 { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
390 { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
391 { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
392 { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
393 { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
394 { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
395 { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")},
396 { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
397 { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") },
398 { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
399 };
400 };
401
402 extern MetricDefinitionStorage g_metricDefinitions;
403 extern struct DNSDistStats g_stats;
404 void doLatencyStats(double udiff);
405
406
407 struct StopWatch
408 {
409 StopWatch(bool realTime=false): d_needRealTime(realTime)
410 {
411 }
412 struct timespec d_start{0,0};
413 bool d_needRealTime{false};
414
415 void start() {
416 if(gettime(&d_start, d_needRealTime) < 0)
417 unixDie("Getting timestamp");
418
419 }
420
421 void set(const struct timespec& from) {
422 d_start = from;
423 }
424
425 double udiff() const {
426 struct timespec now;
427 if(gettime(&now, d_needRealTime) < 0)
428 unixDie("Getting timestamp");
429
430 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
431 }
432
433 double udiffAndSet() {
434 struct timespec now;
435 if(gettime(&now, d_needRealTime) < 0)
436 unixDie("Getting timestamp");
437
438 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
439 d_start = now;
440 return ret;
441 }
442
443 };
444
445 class BasicQPSLimiter
446 {
447 public:
448 BasicQPSLimiter()
449 {
450 }
451
452 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
453 {
454 d_prev.start();
455 }
456
457 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
458 {
459 auto delta = d_prev.udiffAndSet();
460
461 if(delta > 0.0) // time, frequently, does go backwards..
462 d_tokens += 1.0 * rate * (delta/1000000.0);
463
464 if(d_tokens > burst) {
465 d_tokens = burst;
466 }
467
468 bool ret=false;
469 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
470 ret=true;
471 --d_tokens;
472 }
473
474 return ret;
475 }
476
477 bool seenSince(const struct timespec& cutOff) const
478 {
479 return cutOff < d_prev.d_start;
480 }
481
482 protected:
483 mutable StopWatch d_prev;
484 mutable double d_tokens;
485 };
486
487 class QPSLimiter : public BasicQPSLimiter
488 {
489 public:
490 QPSLimiter(): BasicQPSLimiter()
491 {
492 }
493
494 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
495 {
496 d_prev.start();
497 }
498
499 unsigned int getRate() const
500 {
501 return d_passthrough ? 0 : d_rate;
502 }
503
504 int getPassed() const
505 {
506 return d_passed;
507 }
508
509 int getBlocked() const
510 {
511 return d_blocked;
512 }
513
514 bool check() const // this is not quite fair
515 {
516 if (d_passthrough) {
517 return true;
518 }
519
520 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
521 if (ret) {
522 d_passed++;
523 }
524 else {
525 d_blocked++;
526 }
527
528 return ret;
529 }
530 private:
531 mutable unsigned int d_passed{0};
532 mutable unsigned int d_blocked{0};
533 unsigned int d_rate;
534 unsigned int d_burst;
535 bool d_passthrough{true};
536 };
537
538 struct ClientState;
539
540 struct IDState
541 {
542 IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
543 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
544 {
545 usageIndicator.store(orig.usageIndicator.load());
546 origFD = orig.origFD;
547 origID = orig.origID;
548 delayMsec = orig.delayMsec;
549 tempFailureTTL = orig.tempFailureTTL;
550 }
551
552 static const int64_t unusedIndicator = -1;
553
554 static bool isInUse(int64_t usageIndicator)
555 {
556 return usageIndicator != unusedIndicator;
557 }
558
559 bool isInUse() const
560 {
561 return usageIndicator != unusedIndicator;
562 }
563
564 /* return true if the value has been successfully replaced meaning that
565 no-one updated the usage indicator in the meantime */
566 bool tryMarkUnused(int64_t expectedUsageIndicator)
567 {
568 return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
569 }
570
571 /* mark as unused no matter what, return true if the state was in use before */
572 bool markAsUsed()
573 {
574 auto currentGeneration = generation++;
575 return markAsUsed(currentGeneration);
576 }
577
578 /* mark as unused no matter what, return true if the state was in use before */
579 bool markAsUsed(int64_t currentGeneration)
580 {
581 int64_t oldUsage = usageIndicator.exchange(currentGeneration);
582 return oldUsage != unusedIndicator;
583 }
584
585 /* We use this value to detect whether this state is in use.
586 For performance reasons we don't want to use a lock here, but that means
587 we need to be very careful when modifying this value. Modifications happen
588 from:
589 - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
590 then picking one of the states associated to this backend (via the idOffset).
591 Most of the time this state should not be in use and usageIndicator is -1, but we
592 might not yet have received a response for the query previously associated to this
593 state, meaning that we will 'reuse' this state and erase the existing state.
594 If we ever receive a response for this state, it will be discarded. This is
595 mostly fine for UDP except that we still need to be careful in order to miss
596 the 'outstanding' counters, which should only be increased when we are picking
597 an empty state, and not when reusing ;
598 For DoH, though, we have dynamically allocated a DOHUnit object that needs to
599 be freed, as well as internal objects internals to libh2o.
600 - one of the UDP receiver threads receiving a response from a backend, picking
601 the corresponding state and sending the response to the client ;
602 - the 'healthcheck' thread scanning the states to actively discover timeouts,
603 mostly to keep some counters like the 'outstanding' one sane.
604 We previously based that logic on the origFD (FD on which the query was received,
605 and therefore from where the response should be sent) but this suffered from an
606 ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
607 same value since we only have so much incoming sockets:
608 - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
609 - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
610 qtype and qclass match
611 - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
612 - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
613 5, except it's not the same 5 anymore and it overrides a fresh state.
614 We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
615 wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
616 when the state is unused and the value of our counter otherwise.
617 */
618 std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8
619 std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4
620 ComboAddress origRemote; // 28
621 ComboAddress origDest; // 28
622 StopWatch sentTime; // 16
623 DNSName qname; // 80
624 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
625 #ifdef HAVE_PROTOBUF
626 boost::optional<boost::uuids::uuid> uniqueId;
627 #endif
628 boost::optional<Netmask> subnet{boost::none};
629 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
630 std::shared_ptr<QTag> qTag{nullptr};
631 const ClientState* cs{nullptr};
632 DOHUnit* du{nullptr};
633 uint32_t cacheKey; // 4
634 uint32_t cacheKeyNoECS; // 4
635 uint16_t age; // 4
636 uint16_t qtype; // 2
637 uint16_t qclass; // 2
638 uint16_t origID; // 2
639 uint16_t origFlags; // 2
640 int origFD{-1};
641 int delayMsec;
642 boost::optional<uint32_t> tempFailureTTL;
643 bool ednsAdded{false};
644 bool ecsAdded{false};
645 bool skipCache{false};
646 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
647 bool dnssecOK{false};
648 bool useZeroScope;
649 };
650
651 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
652 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
653 struct QueryCount {
654 QueryCount()
655 {
656 pthread_rwlock_init(&queryLock, nullptr);
657 }
658 ~QueryCount()
659 {
660 pthread_rwlock_destroy(&queryLock);
661 }
662 QueryCountRecords records;
663 QueryCountFilter filter;
664 pthread_rwlock_t queryLock;
665 bool enabled{false};
666 };
667
668 extern QueryCount g_qcount;
669
670 struct ClientState
671 {
672 ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort)
673 {
674 }
675
676 std::set<int> cpus;
677 ComboAddress local;
678 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
679 std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
680 std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
681 std::string interface;
682 std::atomic<uint64_t> queries{0};
683 mutable std::atomic<uint64_t> responses{0};
684 std::atomic<uint64_t> tcpDiedReadingQuery{0};
685 std::atomic<uint64_t> tcpDiedSendingResponse{0};
686 std::atomic<uint64_t> tcpGaveUp{0};
687 std::atomic<uint64_t> tcpClientTimeouts{0};
688 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
689 std::atomic<uint64_t> tcpCurrentConnections{0};
690 std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
691 std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
692 std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired)
693 std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one
694 std::atomic<uint64_t> tls10queries{0}; // valid DNS queries received via TLSv1.0
695 std::atomic<uint64_t> tls11queries{0}; // valid DNS queries received via TLSv1.1
696 std::atomic<uint64_t> tls12queries{0}; // valid DNS queries received via TLSv1.2
697 std::atomic<uint64_t> tls13queries{0}; // valid DNS queries received via TLSv1.3
698 std::atomic<uint64_t> tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version
699 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
700 /* in ms */
701 std::atomic<double> tcpAvgConnectionDuration{0.0};
702 int udpFD{-1};
703 int tcpFD{-1};
704 int fastOpenQueueSize{0};
705 bool muted{false};
706 bool tcp;
707 bool reuseport;
708 bool ready{false};
709
710 int getSocket() const
711 {
712 return udpFD != -1 ? udpFD : tcpFD;
713 }
714
715 bool isUDP() const
716 {
717 return udpFD != -1;
718 }
719
720 bool isTCP() const
721 {
722 return udpFD == -1;
723 }
724
725 bool hasTLS() const
726 {
727 return tlsFrontend != nullptr || dohFrontend != nullptr;
728 }
729
730 std::string getType() const
731 {
732 std::string result = udpFD != -1 ? "UDP" : "TCP";
733
734 if (dohFrontend) {
735 result += " (DNS over HTTPS)";
736 }
737 else if (tlsFrontend) {
738 result += " (DNS over TLS)";
739 }
740 else if (dnscryptCtx) {
741 result += " (DNSCrypt)";
742 }
743
744 return result;
745 }
746
747 #ifdef HAVE_EBPF
748 shared_ptr<BPFFilter> d_filter;
749
750 void detachFilter()
751 {
752 if (d_filter) {
753 d_filter->removeSocket(getSocket());
754 d_filter = nullptr;
755 }
756 }
757
758 void attachFilter(shared_ptr<BPFFilter> bpf)
759 {
760 detachFilter();
761
762 bpf->addSocket(getSocket());
763 d_filter = bpf;
764 }
765 #endif /* HAVE_EBPF */
766
767 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
768 {
769 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
770 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
771 }
772 };
773
774 class TCPClientCollection {
775 std::vector<int> d_tcpclientthreads;
776 std::atomic<uint64_t> d_numthreads{0};
777 std::atomic<uint64_t> d_pos{0};
778 std::atomic<uint64_t> d_queued{0};
779 const uint64_t d_maxthreads{0};
780 std::mutex d_mutex;
781 int d_singlePipe[2];
782 const bool d_useSinglePipe;
783 public:
784
785 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
786
787 {
788 d_tcpclientthreads.reserve(maxThreads);
789
790 if (d_useSinglePipe) {
791 if (pipe(d_singlePipe) < 0) {
792 int err = errno;
793 throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
794 }
795
796 if (!setNonBlocking(d_singlePipe[0])) {
797 int err = errno;
798 close(d_singlePipe[0]);
799 close(d_singlePipe[1]);
800 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
801 }
802
803 if (!setNonBlocking(d_singlePipe[1])) {
804 int err = errno;
805 close(d_singlePipe[0]);
806 close(d_singlePipe[1]);
807 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
808 }
809 }
810 }
811 int getThread()
812 {
813 uint64_t pos = d_pos++;
814 ++d_queued;
815 return d_tcpclientthreads[pos % d_numthreads];
816 }
817 bool hasReachedMaxThreads() const
818 {
819 return d_numthreads >= d_maxthreads;
820 }
821 uint64_t getThreadsCount() const
822 {
823 return d_numthreads;
824 }
825 uint64_t getQueuedCount() const
826 {
827 return d_queued;
828 }
829 void decrementQueuedCount()
830 {
831 --d_queued;
832 }
833 void addTCPClientThread();
834 };
835
836 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
837
838 struct DownstreamState
839 {
840 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
841
842 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect);
843 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {}
844 ~DownstreamState()
845 {
846 for (auto& fd : sockets) {
847 if (fd >= 0) {
848 close(fd);
849 fd = -1;
850 }
851 }
852 pthread_rwlock_destroy(&d_lock);
853 }
854 boost::uuids::uuid id;
855 std::set<unsigned int> hashes;
856 mutable pthread_rwlock_t d_lock;
857 std::vector<int> sockets;
858 const std::string sourceItfName;
859 std::mutex socketsLock;
860 std::mutex connectLock;
861 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
862 std::thread tid;
863 const ComboAddress remote;
864 QPSLimiter qps;
865 vector<IDState> idStates;
866 const ComboAddress sourceAddr;
867 checkfunc_t checkFunction;
868 DNSName checkName{"a.root-servers.net."};
869 QType checkType{QType::A};
870 uint16_t checkClass{QClass::IN};
871 std::atomic<uint64_t> idOffset{0};
872 std::atomic<uint64_t> sendErrors{0};
873 std::atomic<uint64_t> outstanding{0};
874 std::atomic<uint64_t> reuseds{0};
875 std::atomic<uint64_t> queries{0};
876 std::atomic<uint64_t> responses{0};
877 struct {
878 std::atomic<uint64_t> sendErrors{0};
879 std::atomic<uint64_t> reuseds{0};
880 std::atomic<uint64_t> queries{0};
881 } prev;
882 std::atomic<uint64_t> tcpDiedSendingQuery{0};
883 std::atomic<uint64_t> tcpDiedReadingResponse{0};
884 std::atomic<uint64_t> tcpGaveUp{0};
885 std::atomic<uint64_t> tcpReadTimeouts{0};
886 std::atomic<uint64_t> tcpWriteTimeouts{0};
887 std::atomic<uint64_t> tcpCurrentConnections{0};
888 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
889 /* in ms */
890 std::atomic<double> tcpAvgConnectionDuration{0.0};
891 string name;
892 size_t socketsOffset{0};
893 double queryLoad{0.0};
894 double dropRate{0.0};
895 double latencyUsec{0.0};
896 int order{1};
897 int weight{1};
898 int tcpConnectTimeout{5};
899 int tcpRecvTimeout{30};
900 int tcpSendTimeout{30};
901 unsigned int checkInterval{1};
902 unsigned int lastCheck{0};
903 const unsigned int sourceItf{0};
904 uint16_t retries{5};
905 uint16_t xpfRRCode{0};
906 uint16_t checkTimeout{1000}; /* in milliseconds */
907 uint8_t currentCheckFailures{0};
908 uint8_t consecutiveSuccessfulChecks{0};
909 uint8_t maxCheckFailures{1};
910 uint8_t minRiseSuccesses{1};
911 StopWatch sw;
912 set<string> pools;
913 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
914 bool mustResolve{false};
915 bool upStatus{false};
916 bool useECS{false};
917 bool setCD{false};
918 bool disableZeroScope{false};
919 std::atomic<bool> connected{false};
920 std::atomic_flag threadStarted;
921 bool tcpFastOpen{false};
922 bool ipBindAddrNoPort{true};
923
924 bool isUp() const
925 {
926 if(availability == Availability::Down)
927 return false;
928 if(availability == Availability::Up)
929 return true;
930 return upStatus;
931 }
932 void setUp() { availability = Availability::Up; }
933 void setDown() { availability = Availability::Down; }
934 void setAuto() { availability = Availability::Auto; }
935 string getName() const {
936 if (name.empty()) {
937 return remote.toStringWithPort();
938 }
939 return name;
940 }
941 string getNameWithAddr() const {
942 if (name.empty()) {
943 return remote.toStringWithPort();
944 }
945 return name + " (" + remote.toStringWithPort()+ ")";
946 }
947 string getStatus() const
948 {
949 string status;
950 if(availability == DownstreamState::Availability::Up)
951 status = "UP";
952 else if(availability == DownstreamState::Availability::Down)
953 status = "DOWN";
954 else
955 status = (upStatus ? "up" : "down");
956 return status;
957 }
958 bool reconnect();
959 void hash();
960 void setId(const boost::uuids::uuid& newId);
961 void setWeight(int newWeight);
962
963 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
964 {
965 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
966 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
967 }
968 };
969 using servers_t =vector<std::shared_ptr<DownstreamState>>;
970
971 template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
972
973 void responderThread(std::shared_ptr<DownstreamState> state);
974 extern std::mutex g_luamutex;
975 extern LuaContext g_lua;
976 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
977
978 class DNSRule
979 {
980 public:
981 virtual ~DNSRule ()
982 {
983 }
984 virtual bool matches(const DNSQuestion* dq) const =0;
985 virtual string toString() const = 0;
986 mutable std::atomic<uint64_t> d_matches{0};
987 };
988
989 using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
990 typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
991
992 struct ServerPolicy
993 {
994 string name;
995 policyfunc_t policy;
996 bool isLua;
997 std::string toString() const {
998 return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
999 }
1000 };
1001
1002 struct ServerPool
1003 {
1004 ServerPool()
1005 {
1006 pthread_rwlock_init(&d_lock, nullptr);
1007 }
1008 ~ServerPool()
1009 {
1010 pthread_rwlock_destroy(&d_lock);
1011 }
1012
1013 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
1014
1015 bool getECS() const
1016 {
1017 return d_useECS;
1018 }
1019
1020 void setECS(bool useECS)
1021 {
1022 d_useECS = useECS;
1023 }
1024
1025 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
1026 std::shared_ptr<ServerPolicy> policy{nullptr};
1027
1028 size_t countServers(bool upOnly)
1029 {
1030 size_t count = 0;
1031 ReadLock rl(&d_lock);
1032 for (const auto& server : d_servers) {
1033 if (!upOnly || std::get<1>(server)->isUp() ) {
1034 count++;
1035 }
1036 }
1037 return count;
1038 }
1039
1040 NumberedVector<shared_ptr<DownstreamState>> getServers()
1041 {
1042 NumberedVector<shared_ptr<DownstreamState>> result;
1043 {
1044 ReadLock rl(&d_lock);
1045 result = d_servers;
1046 }
1047 return result;
1048 }
1049
1050 void addServer(shared_ptr<DownstreamState>& server)
1051 {
1052 WriteLock wl(&d_lock);
1053 unsigned int count = (unsigned int) d_servers.size();
1054 d_servers.push_back(make_pair(++count, server));
1055 /* we need to reorder based on the server 'order' */
1056 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
1057 return a.second->order < b.second->order;
1058 });
1059 /* and now we need to renumber for Lua (custom policies) */
1060 size_t idx = 1;
1061 for (auto& serv : d_servers) {
1062 serv.first = idx++;
1063 }
1064 }
1065
1066 void removeServer(shared_ptr<DownstreamState>& server)
1067 {
1068 WriteLock wl(&d_lock);
1069 size_t idx = 1;
1070 bool found = false;
1071 for (auto it = d_servers.begin(); it != d_servers.end();) {
1072 if (found) {
1073 /* we need to renumber the servers placed
1074 after the removed one, for Lua (custom policies) */
1075 it->first = idx++;
1076 it++;
1077 }
1078 else if (it->second == server) {
1079 it = d_servers.erase(it);
1080 found = true;
1081 } else {
1082 idx++;
1083 it++;
1084 }
1085 }
1086 }
1087
1088 private:
1089 NumberedVector<shared_ptr<DownstreamState>> d_servers;
1090 pthread_rwlock_t d_lock;
1091 bool d_useECS{false};
1092 };
1093 using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
1094 void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
1095 void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1096 void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1097
1098 struct CarbonConfig
1099 {
1100 ComboAddress server;
1101 std::string namespace_name;
1102 std::string ourname;
1103 std::string instance_name;
1104 unsigned int interval;
1105 };
1106
1107 enum ednsHeaderFlags {
1108 EDNS_HEADER_FLAG_NONE = 0,
1109 EDNS_HEADER_FLAG_DO = 32768
1110 };
1111
1112 struct DNSDistRuleAction
1113 {
1114 std::shared_ptr<DNSRule> d_rule;
1115 std::shared_ptr<DNSAction> d_action;
1116 boost::uuids::uuid d_id;
1117 uint64_t d_creationOrder;
1118 };
1119
1120 struct DNSDistResponseRuleAction
1121 {
1122 std::shared_ptr<DNSRule> d_rule;
1123 std::shared_ptr<DNSResponseAction> d_action;
1124 boost::uuids::uuid d_id;
1125 uint64_t d_creationOrder;
1126 };
1127
1128 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1129 extern DNSAction::Action g_dynBlockAction;
1130
1131 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1132 extern GlobalStateHolder<ServerPolicy> g_policy;
1133 extern GlobalStateHolder<servers_t> g_dstates;
1134 extern GlobalStateHolder<pools_t> g_pools;
1135 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1136 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1137 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1138 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1139 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1140
1141 extern ComboAddress g_serverControl; // not changed during runtime
1142
1143 extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
1144 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1145 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1146 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1147 extern bool g_truncateTC;
1148 extern bool g_fixupCase;
1149 extern int g_tcpRecvTimeout;
1150 extern int g_tcpSendTimeout;
1151 extern int g_udpTimeout;
1152 extern uint16_t g_maxOutstanding;
1153 extern std::atomic<bool> g_configurationDone;
1154 extern uint64_t g_maxTCPClientThreads;
1155 extern uint64_t g_maxTCPQueuedConnections;
1156 extern size_t g_maxTCPQueriesPerConn;
1157 extern size_t g_maxTCPConnectionDuration;
1158 extern size_t g_maxTCPConnectionsPerClient;
1159 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1160 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1161 extern uint32_t g_staleCacheEntriesTTL;
1162 extern bool g_apiReadWrite;
1163 extern std::string g_apiConfigDirectory;
1164 extern bool g_servFailOnNoPolicy;
1165 extern uint32_t g_hashperturb;
1166 extern bool g_useTCPSinglePipe;
1167 extern uint16_t g_downstreamTCPCleanupInterval;
1168 extern size_t g_udpVectorSize;
1169 extern bool g_preserveTrailingData;
1170 extern bool g_allowEmptyResponse;
1171 extern bool g_roundrobinFailOnNoServer;
1172 extern double g_consistentHashBalancingFactor;
1173
1174 #ifdef HAVE_EBPF
1175 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1176 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1177 #endif /* HAVE_EBPF */
1178
1179 struct LocalHolders
1180 {
1181 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1182 {
1183 }
1184
1185 LocalStateHolder<NetmaskGroup> acl;
1186 LocalStateHolder<ServerPolicy> policy;
1187 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1188 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1189 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1190 LocalStateHolder<servers_t> servers;
1191 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1192 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1193 LocalStateHolder<pools_t> pools;
1194 };
1195
1196 struct dnsheader;
1197
1198 void controlThread(int fd, ComboAddress local);
1199 std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
1200 std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
1201 NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
1202
1203 std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
1204
1205 std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
1206 std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
1207 std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1208 std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1209 std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
1210
1211 struct WebserverConfig
1212 {
1213 std::string password;
1214 std::string apiKey;
1215 boost::optional<std::map<std::string, std::string> > customHeaders;
1216 std::mutex lock;
1217 };
1218
1219 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1220 void setWebserverPassword(const std::string& password);
1221 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1222
1223 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1224 void tcpAcceptorThread(void* p);
1225 #ifdef HAVE_DNS_OVER_HTTPS
1226 void dohThread(ClientState* cs);
1227 #endif /* HAVE_DNS_OVER_HTTPS */
1228
1229 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1230 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1231 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1232 void resetLuaSideEffect(); // reset to indeterminate state
1233
1234 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1235 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1236 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1237
1238 bool checkQueryHeaders(const struct dnsheader* dh);
1239
1240 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1241 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1242 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1243
1244 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1245
1246 uint16_t getRandomDNSID();
1247
1248 #include "dnsdist-snmp.hh"
1249
1250 extern bool g_snmpEnabled;
1251 extern bool g_snmpTrapsEnabled;
1252 extern DNSDistSNMPAgent* g_snmpAgent;
1253 extern bool g_addEDNSToSelfGeneratedResponses;
1254
1255 extern std::set<std::string> g_capabilitiesToRetain;
1256 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
1257 static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
1258
1259 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1260 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1261
1262 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1263 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1264
1265 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1266 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);