]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/dnsdist.hh
Merge pull request #7820 from pieterlexis/systemd-no-setuid
[thirdparty/pdns.git] / pdns / dnsdist.hh
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #pragma once
23 #include "config.h"
24 #include "ext/luawrapper/include/LuaContext.hpp"
25
26 #include <atomic>
27 #include <mutex>
28 #include <string>
29 #include <thread>
30 #include <time.h>
31 #include <unistd.h>
32 #include <unordered_map>
33
34 #include <boost/variant.hpp>
35
36 #include "bpf-filter.hh"
37 #include "capabilities.hh"
38 #include "circular_buffer.hh"
39 #include "dnscrypt.hh"
40 #include "dnsdist-cache.hh"
41 #include "dnsdist-dynbpf.hh"
42 #include "dnsname.hh"
43 #include "doh.hh"
44 #include "ednsoptions.hh"
45 #include "gettime.hh"
46 #include "iputils.hh"
47 #include "misc.hh"
48 #include "mplexer.hh"
49 #include "sholder.hh"
50 #include "tcpiohandler.hh"
51 #include "uuid-utils.hh"
52
53 void carbonDumpThread();
54 uint64_t uptimeOfProcess(const std::string& str);
55
56 extern uint16_t g_ECSSourcePrefixV4;
57 extern uint16_t g_ECSSourcePrefixV6;
58 extern bool g_ECSOverride;
59
60 typedef std::unordered_map<string, string> QTag;
61
62 struct DNSQuestion
63 {
64 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
65 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
66 const uint16_t* flags = getFlagsFromDNSHeader(dh);
67 origFlags = *flags;
68 }
69 DNSQuestion(const DNSQuestion&) = delete;
70 DNSQuestion& operator=(const DNSQuestion&) = delete;
71 DNSQuestion(DNSQuestion&&) = default;
72
73 #ifdef HAVE_PROTOBUF
74 boost::optional<boost::uuids::uuid> uniqueId;
75 #endif
76 Netmask ecs;
77 boost::optional<Netmask> subnet;
78 std::string sni; /* Server Name Indication, if any (DoT or DoH) */
79 std::string poolname;
80 const DNSName* qname{nullptr};
81 const ComboAddress* local{nullptr};
82 const ComboAddress* remote{nullptr};
83 std::shared_ptr<QTag> qTag{nullptr};
84 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
85 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
86 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
87 struct dnsheader* dh{nullptr};
88 const struct timespec* queryTime{nullptr};
89 struct DOHUnit* du{nullptr};
90 size_t size;
91 unsigned int consumed{0};
92 int delayMsec{0};
93 boost::optional<uint32_t> tempFailureTTL;
94 uint32_t cacheKeyNoECS;
95 uint32_t cacheKey;
96 const uint16_t qtype;
97 const uint16_t qclass;
98 uint16_t len;
99 uint16_t ecsPrefixLength;
100 uint16_t origFlags;
101 uint8_t ednsRCode{0};
102 const bool tcp;
103 bool skipCache{false};
104 bool ecsOverride;
105 bool useECS{true};
106 bool addXPF{true};
107 bool ecsSet{false};
108 bool ecsAdded{false};
109 bool ednsAdded{false};
110 bool useZeroScope{false};
111 bool dnssecOK{false};
112 };
113
114 struct DNSResponse : DNSQuestion
115 {
116 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
117 DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
118 DNSResponse(const DNSResponse&) = delete;
119 DNSResponse& operator=(const DNSResponse&) = delete;
120 DNSResponse(DNSResponse&&) = default;
121 };
122
123 /* so what could you do:
124 drop,
125 fake up nxdomain,
126 provide actual answer,
127 allow & and stop processing,
128 continue processing,
129 modify header: (servfail|refused|notimp), set TC=1,
130 send to pool */
131
132 class DNSAction
133 {
134 public:
135 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse, SpoofRaw };
136 static std::string typeToString(const Action& action)
137 {
138 switch(action) {
139 case Action::Drop:
140 return "Drop";
141 case Action::Nxdomain:
142 return "Send NXDomain";
143 case Action::Refused:
144 return "Send Refused";
145 case Action::Spoof:
146 return "Spoof an answer";
147 case Action::SpoofRaw:
148 return "Spoof an answer from raw bytes";
149 case Action::Allow:
150 return "Allow";
151 case Action::HeaderModify:
152 return "Modify the header";
153 case Action::Pool:
154 return "Route to a pool";
155 case Action::Delay:
156 return "Delay";
157 case Action::Truncate:
158 return "Truncate over UDP";
159 case Action::ServFail:
160 return "Send ServFail";
161 case Action::None:
162 case Action::NoOp:
163 return "Do nothing";
164 case Action::NoRecurse:
165 return "Set rd=0";
166 }
167
168 return "Unknown";
169 }
170
171 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
172 virtual ~DNSAction()
173 {
174 }
175 virtual string toString() const = 0;
176 virtual std::map<string, double> getStats() const
177 {
178 return {{}};
179 }
180 };
181
182 class DNSResponseAction
183 {
184 public:
185 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
186 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
187 virtual ~DNSResponseAction()
188 {
189 }
190 virtual string toString() const = 0;
191 };
192
193 struct DynBlock
194 {
195 DynBlock(): action(DNSAction::Action::None), warning(false)
196 {
197 }
198
199 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
200 {
201 }
202
203 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
204 {
205 blocks.store(rhs.blocks);
206 }
207
208 DynBlock& operator=(const DynBlock& rhs)
209 {
210 reason=rhs.reason;
211 until=rhs.until;
212 domain=rhs.domain;
213 action=rhs.action;
214 blocks.store(rhs.blocks);
215 warning=rhs.warning;
216 return *this;
217 }
218
219 string reason;
220 struct timespec until;
221 DNSName domain;
222 DNSAction::Action action;
223 mutable std::atomic<unsigned int> blocks;
224 bool warning;
225 };
226
227 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
228
229 extern vector<pair<struct timeval, std::string> > g_confDelta;
230
231 extern uint64_t getLatencyCount(const std::string&);
232
233 struct DNSDistStats
234 {
235 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
236 stat_t responses{0};
237 stat_t servfailResponses{0};
238 stat_t queries{0};
239 stat_t frontendNXDomain{0};
240 stat_t frontendServFail{0};
241 stat_t frontendNoError{0};
242 stat_t nonCompliantQueries{0};
243 stat_t nonCompliantResponses{0};
244 stat_t rdQueries{0};
245 stat_t emptyQueries{0};
246 stat_t aclDrops{0};
247 stat_t dynBlocked{0};
248 stat_t ruleDrop{0};
249 stat_t ruleNXDomain{0};
250 stat_t ruleRefused{0};
251 stat_t ruleServFail{0};
252 stat_t selfAnswered{0};
253 stat_t downstreamTimeouts{0};
254 stat_t downstreamSendErrors{0};
255 stat_t truncFail{0};
256 stat_t noPolicy{0};
257 stat_t cacheHits{0};
258 stat_t cacheMisses{0};
259 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
260 stat_t securityStatus{0};
261
262 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
263 typedef std::function<uint64_t(const std::string&)> statfunction_t;
264 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
265 std::vector<std::pair<std::string, entry_t>> entries{
266 {"responses", &responses},
267 {"servfail-responses", &servfailResponses},
268 {"queries", &queries},
269 {"frontend-nxdomain", &frontendNXDomain},
270 {"frontend-servfail", &frontendServFail},
271 {"frontend-noerror", &frontendNoError},
272 {"acl-drops", &aclDrops},
273 {"rule-drop", &ruleDrop},
274 {"rule-nxdomain", &ruleNXDomain},
275 {"rule-refused", &ruleRefused},
276 {"rule-servfail", &ruleServFail},
277 {"self-answered", &selfAnswered},
278 {"downstream-timeouts", &downstreamTimeouts},
279 {"downstream-send-errors", &downstreamSendErrors},
280 {"trunc-failures", &truncFail},
281 {"no-policy", &noPolicy},
282 {"latency0-1", &latency0_1},
283 {"latency1-10", &latency1_10},
284 {"latency10-50", &latency10_50},
285 {"latency50-100", &latency50_100},
286 {"latency100-1000", &latency100_1000},
287 {"latency-slow", &latencySlow},
288 {"latency-avg100", &latencyAvg100},
289 {"latency-avg1000", &latencyAvg1000},
290 {"latency-avg10000", &latencyAvg10000},
291 {"latency-avg1000000", &latencyAvg1000000},
292 {"uptime", uptimeOfProcess},
293 {"real-memory-usage", getRealMemoryUsage},
294 {"special-memory-usage", getSpecialMemoryUsage},
295 {"noncompliant-queries", &nonCompliantQueries},
296 {"noncompliant-responses", &nonCompliantResponses},
297 {"rdqueries", &rdQueries},
298 {"empty-queries", &emptyQueries},
299 {"cache-hits", &cacheHits},
300 {"cache-misses", &cacheMisses},
301 {"cpu-user-msec", getCPUTimeUser},
302 {"cpu-sys-msec", getCPUTimeSystem},
303 {"fd-usage", getOpenFileDescriptors},
304 {"dyn-blocked", &dynBlocked},
305 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
306 {"security-status", &securityStatus},
307 // Latency histogram
308 {"latency-sum", &latencySum},
309 {"latency-count", getLatencyCount},
310 };
311 };
312
313 // Metric types for Prometheus
314 enum class PrometheusMetricType: int {
315 counter = 1,
316 gauge = 2
317 };
318
319 // Keeps additional information about metrics
320 struct MetricDefinition {
321 MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
322 }
323
324 MetricDefinition() = default;
325
326 // Metric description
327 std::string description;
328 // Metric type for Prometheus
329 PrometheusMetricType prometheusType;
330 };
331
332 struct MetricDefinitionStorage {
333 // Return metric definition by name
334 bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
335 auto metricDetailsIter = metrics.find(metricName);
336
337 if (metricDetailsIter == metrics.end()) {
338 return false;
339 }
340
341 metric = metricDetailsIter->second;
342 return true;
343 };
344
345 // Return string representation of Prometheus metric type
346 std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
347 switch (metricType) {
348 case PrometheusMetricType::counter:
349 return "counter";
350 break;
351 case PrometheusMetricType::gauge:
352 return "gauge";
353 break;
354 default:
355 return "";
356 break;
357 }
358 };
359
360 std::map<std::string, MetricDefinition> metrics = {
361 { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
362 { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
363 { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
364 { "frontend-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
365 { "frontend-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
366 { "frontend-noerror", MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
367 { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
368 { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
369 { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
370 { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
371 { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
372 { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
373 { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
374 { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
375 { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
376 { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
377 { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
378 { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
379 { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
380 { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
381 { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
382 { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
383 { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")},
384 { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")},
385 { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")},
386 { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")},
387 { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")},
388 { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")},
389 { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
390 { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
391 { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
392 { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
393 { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
394 { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
395 { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
396 { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
397 { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")},
398 { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
399 { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") },
400 { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
401 };
402 };
403
404 extern MetricDefinitionStorage g_metricDefinitions;
405 extern struct DNSDistStats g_stats;
406 void doLatencyStats(double udiff);
407
408
409 struct StopWatch
410 {
411 StopWatch(bool realTime=false): d_needRealTime(realTime)
412 {
413 }
414 struct timespec d_start{0,0};
415 bool d_needRealTime{false};
416
417 void start() {
418 if(gettime(&d_start, d_needRealTime) < 0)
419 unixDie("Getting timestamp");
420
421 }
422
423 void set(const struct timespec& from) {
424 d_start = from;
425 }
426
427 double udiff() const {
428 struct timespec now;
429 if(gettime(&now, d_needRealTime) < 0)
430 unixDie("Getting timestamp");
431
432 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
433 }
434
435 double udiffAndSet() {
436 struct timespec now;
437 if(gettime(&now, d_needRealTime) < 0)
438 unixDie("Getting timestamp");
439
440 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
441 d_start = now;
442 return ret;
443 }
444
445 };
446
447 class BasicQPSLimiter
448 {
449 public:
450 BasicQPSLimiter()
451 {
452 }
453
454 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
455 {
456 d_prev.start();
457 }
458
459 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
460 {
461 auto delta = d_prev.udiffAndSet();
462
463 if(delta > 0.0) // time, frequently, does go backwards..
464 d_tokens += 1.0 * rate * (delta/1000000.0);
465
466 if(d_tokens > burst) {
467 d_tokens = burst;
468 }
469
470 bool ret=false;
471 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
472 ret=true;
473 --d_tokens;
474 }
475
476 return ret;
477 }
478
479 bool seenSince(const struct timespec& cutOff) const
480 {
481 return cutOff < d_prev.d_start;
482 }
483
484 protected:
485 mutable StopWatch d_prev;
486 mutable double d_tokens;
487 };
488
489 class QPSLimiter : public BasicQPSLimiter
490 {
491 public:
492 QPSLimiter(): BasicQPSLimiter()
493 {
494 }
495
496 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
497 {
498 d_prev.start();
499 }
500
501 unsigned int getRate() const
502 {
503 return d_passthrough ? 0 : d_rate;
504 }
505
506 int getPassed() const
507 {
508 return d_passed;
509 }
510
511 int getBlocked() const
512 {
513 return d_blocked;
514 }
515
516 bool check() const // this is not quite fair
517 {
518 if (d_passthrough) {
519 return true;
520 }
521
522 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
523 if (ret) {
524 d_passed++;
525 }
526 else {
527 d_blocked++;
528 }
529
530 return ret;
531 }
532 private:
533 mutable unsigned int d_passed{0};
534 mutable unsigned int d_blocked{0};
535 unsigned int d_rate;
536 unsigned int d_burst;
537 bool d_passthrough{true};
538 };
539
540 struct ClientState;
541
542 struct IDState
543 {
544 IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
545 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
546 {
547 usageIndicator.store(orig.usageIndicator.load());
548 origFD = orig.origFD;
549 origID = orig.origID;
550 delayMsec = orig.delayMsec;
551 tempFailureTTL = orig.tempFailureTTL;
552 }
553
554 static const int64_t unusedIndicator = -1;
555
556 static bool isInUse(int64_t usageIndicator)
557 {
558 return usageIndicator != unusedIndicator;
559 }
560
561 bool isInUse() const
562 {
563 return usageIndicator != unusedIndicator;
564 }
565
566 /* return true if the value has been successfully replaced meaning that
567 no-one updated the usage indicator in the meantime */
568 bool tryMarkUnused(int64_t expectedUsageIndicator)
569 {
570 return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
571 }
572
573 /* mark as unused no matter what, return true if the state was in use before */
574 bool markAsUsed()
575 {
576 auto currentGeneration = generation++;
577 return markAsUsed(currentGeneration);
578 }
579
580 /* mark as unused no matter what, return true if the state was in use before */
581 bool markAsUsed(int64_t currentGeneration)
582 {
583 int64_t oldUsage = usageIndicator.exchange(currentGeneration);
584 return oldUsage != unusedIndicator;
585 }
586
587 /* We use this value to detect whether this state is in use.
588 For performance reasons we don't want to use a lock here, but that means
589 we need to be very careful when modifying this value. Modifications happen
590 from:
591 - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
592 then picking one of the states associated to this backend (via the idOffset).
593 Most of the time this state should not be in use and usageIndicator is -1, but we
594 might not yet have received a response for the query previously associated to this
595 state, meaning that we will 'reuse' this state and erase the existing state.
596 If we ever receive a response for this state, it will be discarded. This is
597 mostly fine for UDP except that we still need to be careful in order to miss
598 the 'outstanding' counters, which should only be increased when we are picking
599 an empty state, and not when reusing ;
600 For DoH, though, we have dynamically allocated a DOHUnit object that needs to
601 be freed, as well as internal objects internals to libh2o.
602 - one of the UDP receiver threads receiving a response from a backend, picking
603 the corresponding state and sending the response to the client ;
604 - the 'healthcheck' thread scanning the states to actively discover timeouts,
605 mostly to keep some counters like the 'outstanding' one sane.
606 We previously based that logic on the origFD (FD on which the query was received,
607 and therefore from where the response should be sent) but this suffered from an
608 ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
609 same value since we only have so much incoming sockets:
610 - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
611 - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
612 qtype and qclass match
613 - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
614 - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
615 5, except it's not the same 5 anymore and it overrides a fresh state.
616 We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
617 wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
618 when the state is unused and the value of our counter otherwise.
619 */
620 std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8
621 std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4
622 ComboAddress origRemote; // 28
623 ComboAddress origDest; // 28
624 StopWatch sentTime; // 16
625 DNSName qname; // 80
626 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
627 #ifdef HAVE_PROTOBUF
628 boost::optional<boost::uuids::uuid> uniqueId;
629 #endif
630 boost::optional<Netmask> subnet{boost::none};
631 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
632 std::shared_ptr<QTag> qTag{nullptr};
633 const ClientState* cs{nullptr};
634 DOHUnit* du{nullptr};
635 uint32_t cacheKey; // 4
636 uint32_t cacheKeyNoECS; // 4
637 uint16_t age; // 4
638 uint16_t qtype; // 2
639 uint16_t qclass; // 2
640 uint16_t origID; // 2
641 uint16_t origFlags; // 2
642 int origFD{-1};
643 int delayMsec;
644 boost::optional<uint32_t> tempFailureTTL;
645 bool ednsAdded{false};
646 bool ecsAdded{false};
647 bool skipCache{false};
648 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
649 bool dnssecOK{false};
650 bool useZeroScope;
651 };
652
653 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
654 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
655 struct QueryCount {
656 QueryCount()
657 {
658 pthread_rwlock_init(&queryLock, nullptr);
659 }
660 ~QueryCount()
661 {
662 pthread_rwlock_destroy(&queryLock);
663 }
664 QueryCountRecords records;
665 QueryCountFilter filter;
666 pthread_rwlock_t queryLock;
667 bool enabled{false};
668 };
669
670 extern QueryCount g_qcount;
671
672 struct ClientState
673 {
674 ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort)
675 {
676 }
677
678 std::set<int> cpus;
679 ComboAddress local;
680 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
681 std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
682 std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
683 std::string interface;
684 std::atomic<uint64_t> queries{0};
685 mutable std::atomic<uint64_t> responses{0};
686 std::atomic<uint64_t> tcpDiedReadingQuery{0};
687 std::atomic<uint64_t> tcpDiedSendingResponse{0};
688 std::atomic<uint64_t> tcpGaveUp{0};
689 std::atomic<uint64_t> tcpClientTimeouts{0};
690 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
691 std::atomic<uint64_t> tcpCurrentConnections{0};
692 std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
693 std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
694 std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired)
695 std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one
696 std::atomic<uint64_t> tls10queries{0}; // valid DNS queries received via TLSv1.0
697 std::atomic<uint64_t> tls11queries{0}; // valid DNS queries received via TLSv1.1
698 std::atomic<uint64_t> tls12queries{0}; // valid DNS queries received via TLSv1.2
699 std::atomic<uint64_t> tls13queries{0}; // valid DNS queries received via TLSv1.3
700 std::atomic<uint64_t> tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version
701 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
702 /* in ms */
703 std::atomic<double> tcpAvgConnectionDuration{0.0};
704 int udpFD{-1};
705 int tcpFD{-1};
706 int fastOpenQueueSize{0};
707 bool muted{false};
708 bool tcp;
709 bool reuseport;
710 bool ready{false};
711
712 int getSocket() const
713 {
714 return udpFD != -1 ? udpFD : tcpFD;
715 }
716
717 bool isUDP() const
718 {
719 return udpFD != -1;
720 }
721
722 bool isTCP() const
723 {
724 return udpFD == -1;
725 }
726
727 bool hasTLS() const
728 {
729 return tlsFrontend != nullptr || dohFrontend != nullptr;
730 }
731
732 std::string getType() const
733 {
734 std::string result = udpFD != -1 ? "UDP" : "TCP";
735
736 if (dohFrontend) {
737 result += " (DNS over HTTPS)";
738 }
739 else if (tlsFrontend) {
740 result += " (DNS over TLS)";
741 }
742 else if (dnscryptCtx) {
743 result += " (DNSCrypt)";
744 }
745
746 return result;
747 }
748
749 #ifdef HAVE_EBPF
750 shared_ptr<BPFFilter> d_filter;
751
752 void detachFilter()
753 {
754 if (d_filter) {
755 d_filter->removeSocket(getSocket());
756 d_filter = nullptr;
757 }
758 }
759
760 void attachFilter(shared_ptr<BPFFilter> bpf)
761 {
762 detachFilter();
763
764 bpf->addSocket(getSocket());
765 d_filter = bpf;
766 }
767 #endif /* HAVE_EBPF */
768
769 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
770 {
771 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
772 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
773 }
774 };
775
776 class TCPClientCollection {
777 std::vector<int> d_tcpclientthreads;
778 std::atomic<uint64_t> d_numthreads{0};
779 std::atomic<uint64_t> d_pos{0};
780 std::atomic<uint64_t> d_queued{0};
781 const uint64_t d_maxthreads{0};
782 std::mutex d_mutex;
783 int d_singlePipe[2];
784 const bool d_useSinglePipe;
785 public:
786
787 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
788
789 {
790 d_tcpclientthreads.reserve(maxThreads);
791
792 if (d_useSinglePipe) {
793 if (pipe(d_singlePipe) < 0) {
794 int err = errno;
795 throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
796 }
797
798 if (!setNonBlocking(d_singlePipe[0])) {
799 int err = errno;
800 close(d_singlePipe[0]);
801 close(d_singlePipe[1]);
802 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
803 }
804
805 if (!setNonBlocking(d_singlePipe[1])) {
806 int err = errno;
807 close(d_singlePipe[0]);
808 close(d_singlePipe[1]);
809 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
810 }
811 }
812 }
813 int getThread()
814 {
815 uint64_t pos = d_pos++;
816 ++d_queued;
817 return d_tcpclientthreads[pos % d_numthreads];
818 }
819 bool hasReachedMaxThreads() const
820 {
821 return d_numthreads >= d_maxthreads;
822 }
823 uint64_t getThreadsCount() const
824 {
825 return d_numthreads;
826 }
827 uint64_t getQueuedCount() const
828 {
829 return d_queued;
830 }
831 void decrementQueuedCount()
832 {
833 --d_queued;
834 }
835 void addTCPClientThread();
836 };
837
838 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
839
840 struct DownstreamState
841 {
842 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
843
844 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect);
845 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {}
846 ~DownstreamState()
847 {
848 for (auto& fd : sockets) {
849 if (fd >= 0) {
850 close(fd);
851 fd = -1;
852 }
853 }
854 pthread_rwlock_destroy(&d_lock);
855 }
856 boost::uuids::uuid id;
857 std::set<unsigned int> hashes;
858 mutable pthread_rwlock_t d_lock;
859 std::vector<int> sockets;
860 const std::string sourceItfName;
861 std::mutex socketsLock;
862 std::mutex connectLock;
863 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
864 std::thread tid;
865 const ComboAddress remote;
866 QPSLimiter qps;
867 vector<IDState> idStates;
868 const ComboAddress sourceAddr;
869 checkfunc_t checkFunction;
870 DNSName checkName{"a.root-servers.net."};
871 QType checkType{QType::A};
872 uint16_t checkClass{QClass::IN};
873 std::atomic<uint64_t> idOffset{0};
874 std::atomic<uint64_t> sendErrors{0};
875 std::atomic<uint64_t> outstanding{0};
876 std::atomic<uint64_t> reuseds{0};
877 std::atomic<uint64_t> queries{0};
878 std::atomic<uint64_t> responses{0};
879 struct {
880 std::atomic<uint64_t> sendErrors{0};
881 std::atomic<uint64_t> reuseds{0};
882 std::atomic<uint64_t> queries{0};
883 } prev;
884 std::atomic<uint64_t> tcpDiedSendingQuery{0};
885 std::atomic<uint64_t> tcpDiedReadingResponse{0};
886 std::atomic<uint64_t> tcpGaveUp{0};
887 std::atomic<uint64_t> tcpReadTimeouts{0};
888 std::atomic<uint64_t> tcpWriteTimeouts{0};
889 std::atomic<uint64_t> tcpCurrentConnections{0};
890 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
891 /* in ms */
892 std::atomic<double> tcpAvgConnectionDuration{0.0};
893 string name;
894 size_t socketsOffset{0};
895 double queryLoad{0.0};
896 double dropRate{0.0};
897 double latencyUsec{0.0};
898 int order{1};
899 int weight{1};
900 int tcpConnectTimeout{5};
901 int tcpRecvTimeout{30};
902 int tcpSendTimeout{30};
903 unsigned int checkInterval{1};
904 unsigned int lastCheck{0};
905 const unsigned int sourceItf{0};
906 uint16_t retries{5};
907 uint16_t xpfRRCode{0};
908 uint16_t checkTimeout{1000}; /* in milliseconds */
909 uint8_t currentCheckFailures{0};
910 uint8_t consecutiveSuccessfulChecks{0};
911 uint8_t maxCheckFailures{1};
912 uint8_t minRiseSuccesses{1};
913 StopWatch sw;
914 set<string> pools;
915 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
916 bool mustResolve{false};
917 bool upStatus{false};
918 bool useECS{false};
919 bool setCD{false};
920 bool disableZeroScope{false};
921 std::atomic<bool> connected{false};
922 std::atomic_flag threadStarted;
923 bool tcpFastOpen{false};
924 bool ipBindAddrNoPort{true};
925
926 bool isUp() const
927 {
928 if(availability == Availability::Down)
929 return false;
930 if(availability == Availability::Up)
931 return true;
932 return upStatus;
933 }
934 void setUp() { availability = Availability::Up; }
935 void setDown() { availability = Availability::Down; }
936 void setAuto() { availability = Availability::Auto; }
937 string getName() const {
938 if (name.empty()) {
939 return remote.toStringWithPort();
940 }
941 return name;
942 }
943 string getNameWithAddr() const {
944 if (name.empty()) {
945 return remote.toStringWithPort();
946 }
947 return name + " (" + remote.toStringWithPort()+ ")";
948 }
949 string getStatus() const
950 {
951 string status;
952 if(availability == DownstreamState::Availability::Up)
953 status = "UP";
954 else if(availability == DownstreamState::Availability::Down)
955 status = "DOWN";
956 else
957 status = (upStatus ? "up" : "down");
958 return status;
959 }
960 bool reconnect();
961 void hash();
962 void setId(const boost::uuids::uuid& newId);
963 void setWeight(int newWeight);
964
965 void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
966 {
967 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
968 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
969 }
970 };
971 using servers_t =vector<std::shared_ptr<DownstreamState>>;
972
973 template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
974
975 void responderThread(std::shared_ptr<DownstreamState> state);
976 extern std::mutex g_luamutex;
977 extern LuaContext g_lua;
978 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
979
980 class DNSRule
981 {
982 public:
983 virtual ~DNSRule ()
984 {
985 }
986 virtual bool matches(const DNSQuestion* dq) const =0;
987 virtual string toString() const = 0;
988 mutable std::atomic<uint64_t> d_matches{0};
989 };
990
991 using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
992 typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
993
994 struct ServerPolicy
995 {
996 string name;
997 policyfunc_t policy;
998 bool isLua;
999 std::string toString() const {
1000 return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
1001 }
1002 };
1003
1004 struct ServerPool
1005 {
1006 ServerPool()
1007 {
1008 pthread_rwlock_init(&d_lock, nullptr);
1009 }
1010 ~ServerPool()
1011 {
1012 pthread_rwlock_destroy(&d_lock);
1013 }
1014
1015 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
1016
1017 bool getECS() const
1018 {
1019 return d_useECS;
1020 }
1021
1022 void setECS(bool useECS)
1023 {
1024 d_useECS = useECS;
1025 }
1026
1027 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
1028 std::shared_ptr<ServerPolicy> policy{nullptr};
1029
1030 size_t countServers(bool upOnly)
1031 {
1032 size_t count = 0;
1033 ReadLock rl(&d_lock);
1034 for (const auto& server : d_servers) {
1035 if (!upOnly || std::get<1>(server)->isUp() ) {
1036 count++;
1037 }
1038 }
1039 return count;
1040 }
1041
1042 NumberedVector<shared_ptr<DownstreamState>> getServers()
1043 {
1044 NumberedVector<shared_ptr<DownstreamState>> result;
1045 {
1046 ReadLock rl(&d_lock);
1047 result = d_servers;
1048 }
1049 return result;
1050 }
1051
1052 void addServer(shared_ptr<DownstreamState>& server)
1053 {
1054 WriteLock wl(&d_lock);
1055 unsigned int count = (unsigned int) d_servers.size();
1056 d_servers.push_back(make_pair(++count, server));
1057 /* we need to reorder based on the server 'order' */
1058 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
1059 return a.second->order < b.second->order;
1060 });
1061 /* and now we need to renumber for Lua (custom policies) */
1062 size_t idx = 1;
1063 for (auto& serv : d_servers) {
1064 serv.first = idx++;
1065 }
1066 }
1067
1068 void removeServer(shared_ptr<DownstreamState>& server)
1069 {
1070 WriteLock wl(&d_lock);
1071 size_t idx = 1;
1072 bool found = false;
1073 for (auto it = d_servers.begin(); it != d_servers.end();) {
1074 if (found) {
1075 /* we need to renumber the servers placed
1076 after the removed one, for Lua (custom policies) */
1077 it->first = idx++;
1078 it++;
1079 }
1080 else if (it->second == server) {
1081 it = d_servers.erase(it);
1082 found = true;
1083 } else {
1084 idx++;
1085 it++;
1086 }
1087 }
1088 }
1089
1090 private:
1091 NumberedVector<shared_ptr<DownstreamState>> d_servers;
1092 pthread_rwlock_t d_lock;
1093 bool d_useECS{false};
1094 };
1095 using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
1096 void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
1097 void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1098 void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1099
1100 struct CarbonConfig
1101 {
1102 ComboAddress server;
1103 std::string namespace_name;
1104 std::string ourname;
1105 std::string instance_name;
1106 unsigned int interval;
1107 };
1108
1109 enum ednsHeaderFlags {
1110 EDNS_HEADER_FLAG_NONE = 0,
1111 EDNS_HEADER_FLAG_DO = 32768
1112 };
1113
1114 struct DNSDistRuleAction
1115 {
1116 std::shared_ptr<DNSRule> d_rule;
1117 std::shared_ptr<DNSAction> d_action;
1118 boost::uuids::uuid d_id;
1119 uint64_t d_creationOrder;
1120 };
1121
1122 struct DNSDistResponseRuleAction
1123 {
1124 std::shared_ptr<DNSRule> d_rule;
1125 std::shared_ptr<DNSResponseAction> d_action;
1126 boost::uuids::uuid d_id;
1127 uint64_t d_creationOrder;
1128 };
1129
1130 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1131 extern DNSAction::Action g_dynBlockAction;
1132
1133 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1134 extern GlobalStateHolder<ServerPolicy> g_policy;
1135 extern GlobalStateHolder<servers_t> g_dstates;
1136 extern GlobalStateHolder<pools_t> g_pools;
1137 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1138 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1139 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1140 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1141 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1142
1143 extern ComboAddress g_serverControl; // not changed during runtime
1144
1145 extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
1146 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1147 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1148 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1149 extern bool g_truncateTC;
1150 extern bool g_fixupCase;
1151 extern int g_tcpRecvTimeout;
1152 extern int g_tcpSendTimeout;
1153 extern int g_udpTimeout;
1154 extern uint16_t g_maxOutstanding;
1155 extern std::atomic<bool> g_configurationDone;
1156 extern uint64_t g_maxTCPClientThreads;
1157 extern uint64_t g_maxTCPQueuedConnections;
1158 extern size_t g_maxTCPQueriesPerConn;
1159 extern size_t g_maxTCPConnectionDuration;
1160 extern size_t g_maxTCPConnectionsPerClient;
1161 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1162 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1163 extern uint32_t g_staleCacheEntriesTTL;
1164 extern bool g_apiReadWrite;
1165 extern std::string g_apiConfigDirectory;
1166 extern bool g_servFailOnNoPolicy;
1167 extern uint32_t g_hashperturb;
1168 extern bool g_useTCPSinglePipe;
1169 extern uint16_t g_downstreamTCPCleanupInterval;
1170 extern size_t g_udpVectorSize;
1171 extern bool g_preserveTrailingData;
1172 extern bool g_allowEmptyResponse;
1173 extern bool g_roundrobinFailOnNoServer;
1174 extern double g_consistentHashBalancingFactor;
1175
1176 #ifdef HAVE_EBPF
1177 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1178 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1179 #endif /* HAVE_EBPF */
1180
1181 struct LocalHolders
1182 {
1183 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1184 {
1185 }
1186
1187 LocalStateHolder<NetmaskGroup> acl;
1188 LocalStateHolder<ServerPolicy> policy;
1189 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1190 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1191 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1192 LocalStateHolder<servers_t> servers;
1193 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1194 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1195 LocalStateHolder<pools_t> pools;
1196 };
1197
1198 struct dnsheader;
1199
1200 void controlThread(int fd, ComboAddress local);
1201 std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
1202 std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
1203 NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
1204
1205 std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
1206
1207 std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
1208 std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
1209 std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1210 std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1211 std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
1212
1213 struct WebserverConfig
1214 {
1215 std::string password;
1216 std::string apiKey;
1217 boost::optional<std::map<std::string, std::string> > customHeaders;
1218 std::mutex lock;
1219 };
1220
1221 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1222 void setWebserverPassword(const std::string& password);
1223 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1224
1225 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1226 void tcpAcceptorThread(void* p);
1227 #ifdef HAVE_DNS_OVER_HTTPS
1228 void dohThread(ClientState* cs);
1229 #endif /* HAVE_DNS_OVER_HTTPS */
1230
1231 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1232 void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1233 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1234 void resetLuaSideEffect(); // reset to indeterminate state
1235
1236 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1237 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1238 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1239
1240 bool checkQueryHeaders(const struct dnsheader* dh);
1241
1242 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1243 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1244 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1245
1246 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1247
1248 uint16_t getRandomDNSID();
1249
1250 #include "dnsdist-snmp.hh"
1251
1252 extern bool g_snmpEnabled;
1253 extern bool g_snmpTrapsEnabled;
1254 extern DNSDistSNMPAgent* g_snmpAgent;
1255 extern bool g_addEDNSToSelfGeneratedResponses;
1256
1257 extern std::set<std::string> g_capabilitiesToRetain;
1258 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
1259 static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
1260
1261 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1262 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1263
1264 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1265 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1266
1267 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1268 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);