]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/dnsdist.hh
Merge pull request #8223 from PowerDNS/omoerbeek-patch-1
[thirdparty/pdns.git] / pdns / dnsdist.hh
CommitLineData
12471842
PL
1/*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
df111b53 22#pragma once
11e1e08b 23#include "config.h"
df111b53 24#include "ext/luawrapper/include/LuaContext.hpp"
cbf4e13a 25
df111b53 26#include <atomic>
df111b53 27#include <mutex>
cbf4e13a 28#include <string>
df111b53 29#include <thread>
cbf4e13a 30#include <time.h>
bffca8b9 31#include <unistd.h>
cbf4e13a
RG
32#include <unordered_map>
33
cbf4e13a
RG
34#include <boost/variant.hpp>
35
36#include "bpf-filter.hh"
f12666f2 37#include "capabilities.hh"
9f6a31ff 38#include "circular_buffer.hh"
11e1e08b 39#include "dnscrypt.hh"
886e2cf2 40#include "dnsdist-cache.hh"
87b515ed 41#include "dnsdist-dynbpf.hh"
cbf4e13a 42#include "dnsname.hh"
fbf14b03 43#include "doh.hh"
cbf4e13a
RG
44#include "ednsoptions.hh"
45#include "gettime.hh"
46#include "iputils.hh"
47#include "misc.hh"
48#include "mplexer.hh"
49#include "sholder.hh"
a227f47d 50#include "tcpiohandler.hh"
d61aa945 51#include "uuid-utils.hh"
d8c19b98 52
9b73b71c 53void carbonDumpThread();
61d1b966 54uint64_t uptimeOfProcess(const std::string& str);
bd1c631b 55
7b925432
RG
56extern uint16_t g_ECSSourcePrefixV4;
57extern uint16_t g_ECSSourcePrefixV6;
58extern bool g_ECSOverride;
26a6373d 59
15fac047
CH
60typedef std::unordered_map<string, string> QTag;
61
7b925432
RG
62struct DNSQuestion
63{
e7c732b8 64 DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
4ab01344
RG
65 qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
66 const uint16_t* flags = getFlagsFromDNSHeader(dh);
67 origFlags = *flags;
68 }
dd1a3034
RG
69 DNSQuestion(const DNSQuestion&) = delete;
70 DNSQuestion& operator=(const DNSQuestion&) = delete;
71 DNSQuestion(DNSQuestion&&) = default;
7b925432
RG
72
73#ifdef HAVE_PROTOBUF
ec48a28d 74 boost::optional<boost::uuids::uuid> uniqueId;
7b925432 75#endif
bd14f087 76 Netmask ecs;
4ab01344 77 boost::optional<Netmask> subnet;
046bac5c 78 std::string sni; /* Server Name Indication, if any (DoT or DoH) */
2a28db86 79 std::string poolname;
4ab01344
RG
80 const DNSName* qname{nullptr};
81 const ComboAddress* local{nullptr};
82 const ComboAddress* remote{nullptr};
15fac047 83 std::shared_ptr<QTag> qTag{nullptr};
cbf4e13a 84 std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
4ab01344
RG
85 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
86 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
87 struct dnsheader* dh{nullptr};
88 const struct timespec* queryTime{nullptr};
fbf14b03 89 struct DOHUnit* du{nullptr};
7b925432 90 size_t size;
e7c732b8 91 unsigned int consumed{0};
4ab01344
RG
92 int delayMsec{0};
93 boost::optional<uint32_t> tempFailureTTL;
94 uint32_t cacheKeyNoECS;
95 uint32_t cacheKey;
96 const uint16_t qtype;
97 const uint16_t qclass;
7b925432
RG
98 uint16_t len;
99 uint16_t ecsPrefixLength;
4ab01344 100 uint16_t origFlags;
1ecbd15e 101 uint8_t ednsRCode{0};
7b925432
RG
102 const bool tcp;
103 bool skipCache{false};
104 bool ecsOverride;
5b8255ba 105 bool useECS{true};
5cc8371b 106 bool addXPF{true};
bd14f087 107 bool ecsSet{false};
4ab01344
RG
108 bool ecsAdded{false};
109 bool ednsAdded{false};
110 bool useZeroScope{false};
111 bool dnssecOK{false};
7b925432
RG
112};
113
114struct DNSResponse : DNSQuestion
115{
e7c732b8
RG
116 DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
117 DNSQuestion(name, type, class_, consumed, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
dd1a3034
RG
118 DNSResponse(const DNSResponse&) = delete;
119 DNSResponse& operator=(const DNSResponse&) = delete;
120 DNSResponse(DNSResponse&&) = default;
7b925432
RG
121};
122
5c30ec69
LM
123/* so what could you do:
124 drop,
125 fake up nxdomain,
126 provide actual answer,
127 allow & and stop processing,
128 continue processing,
7b925432
RG
129 modify header: (servfail|refused|notimp), set TC=1,
130 send to pool */
131
132class DNSAction
133{
134public:
3d60b39a 135 enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse };
b718792f
RG
136 static std::string typeToString(const Action& action)
137 {
138 switch(action) {
139 case Action::Drop:
140 return "Drop";
141 case Action::Nxdomain:
142 return "Send NXDomain";
143 case Action::Refused:
144 return "Send Refused";
145 case Action::Spoof:
146 return "Spoof an answer";
147 case Action::Allow:
148 return "Allow";
149 case Action::HeaderModify:
150 return "Modify the header";
151 case Action::Pool:
152 return "Route to a pool";
153 case Action::Delay:
154 return "Delay";
155 case Action::Truncate:
156 return "Truncate over UDP";
157 case Action::ServFail:
158 return "Send ServFail";
159 case Action::None:
477c86a0 160 case Action::NoOp:
b718792f 161 return "Do nothing";
3d60b39a 162 case Action::NoRecurse:
163 return "Set rd=0";
b718792f
RG
164 }
165
166 return "Unknown";
167 }
168
7b925432 169 virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
205f2081
RG
170 virtual ~DNSAction()
171 {
172 }
7b925432 173 virtual string toString() const = 0;
b8019cf7 174 virtual std::map<string, double> getStats() const
7b925432
RG
175 {
176 return {{}};
177 }
178};
179
180class DNSResponseAction
181{
182public:
5f23eb98 183 enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
7b925432 184 virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
205f2081
RG
185 virtual ~DNSResponseAction()
186 {
187 }
7b925432
RG
188 virtual string toString() const = 0;
189};
190
78ffa782 191struct DynBlock
192{
1d3ba133 193 DynBlock(): action(DNSAction::Action::None), warning(false)
5708a729
RG
194 {
195 }
196
1d3ba133 197 DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
5708a729
RG
198 {
199 }
200
1d3ba133 201 DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
5708a729
RG
202 {
203 blocks.store(rhs.blocks);
204 }
205
78ffa782 206 DynBlock& operator=(const DynBlock& rhs)
207 {
208 reason=rhs.reason;
209 until=rhs.until;
71c94675 210 domain=rhs.domain;
7b925432 211 action=rhs.action;
78ffa782 212 blocks.store(rhs.blocks);
1d3ba133 213 warning=rhs.warning;
78ffa782 214 return *this;
215 }
71c94675 216
78ffa782 217 string reason;
218 struct timespec until;
71c94675 219 DNSName domain;
7b925432 220 DNSAction::Action action;
78ffa782 221 mutable std::atomic<unsigned int> blocks;
1d3ba133 222 bool warning;
78ffa782 223};
224
225extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
f758857a 226
227extern vector<pair<struct timeval, std::string> > g_confDelta;
228
eb0335ff
MC
229extern uint64_t getLatencyCount(const std::string&);
230
e48090d1 231struct DNSDistStats
232{
6ad8b29a 233 using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
e48090d1 234 stat_t responses{0};
235 stat_t servfailResponses{0};
236 stat_t queries{0};
61d10a4d
MH
237 stat_t frontendNXDomain{0};
238 stat_t frontendServFail{0};
239 stat_t frontendNoError{0};
e73ec7d3 240 stat_t nonCompliantQueries{0};
d08b1cdf 241 stat_t nonCompliantResponses{0};
643a182a 242 stat_t rdQueries{0};
2efd427d 243 stat_t emptyQueries{0};
e48090d1 244 stat_t aclDrops{0};
bd1c631b 245 stat_t dynBlocked{0};
e48090d1 246 stat_t ruleDrop{0};
247 stat_t ruleNXDomain{0};
dd46e5e3 248 stat_t ruleRefused{0};
5f23eb98 249 stat_t ruleServFail{0};
e48090d1 250 stat_t selfAnswered{0};
251 stat_t downstreamTimeouts{0};
252 stat_t downstreamSendErrors{0};
6ad8b29a 253 stat_t truncFail{0};
b8bc7e61 254 stat_t noPolicy{0};
886e2cf2
RG
255 stat_t cacheHits{0};
256 stat_t cacheMisses{0};
eb0335ff 257 stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
f29758cc 258 stat_t securityStatus{0};
5c30ec69 259
e16fd59c 260 double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
a1a787dc 261 typedef std::function<uint64_t(const std::string&)> statfunction_t;
72f58a53 262 typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
e16fd59c 263 std::vector<std::pair<std::string, entry_t>> entries{
dd46e5e3
RG
264 {"responses", &responses},
265 {"servfail-responses", &servfailResponses},
266 {"queries", &queries},
61d10a4d
MH
267 {"frontend-nxdomain", &frontendNXDomain},
268 {"frontend-servfail", &frontendServFail},
269 {"frontend-noerror", &frontendNoError},
dd46e5e3 270 {"acl-drops", &aclDrops},
dd46e5e3
RG
271 {"rule-drop", &ruleDrop},
272 {"rule-nxdomain", &ruleNXDomain},
273 {"rule-refused", &ruleRefused},
5f23eb98 274 {"rule-servfail", &ruleServFail},
dd46e5e3
RG
275 {"self-answered", &selfAnswered},
276 {"downstream-timeouts", &downstreamTimeouts},
5c30ec69 277 {"downstream-send-errors", &downstreamSendErrors},
dd46e5e3
RG
278 {"trunc-failures", &truncFail},
279 {"no-policy", &noPolicy},
280 {"latency0-1", &latency0_1},
281 {"latency1-10", &latency1_10},
282 {"latency10-50", &latency10_50},
283 {"latency50-100", &latency50_100},
284 {"latency100-1000", &latency100_1000},
285 {"latency-slow", &latencySlow},
286 {"latency-avg100", &latencyAvg100},
287 {"latency-avg1000", &latencyAvg1000},
288 {"latency-avg10000", &latencyAvg10000},
289 {"latency-avg1000000", &latencyAvg1000000},
61d1b966 290 {"uptime", uptimeOfProcess},
a9b6db56 291 {"real-memory-usage", getRealMemoryUsage},
330dcb5c 292 {"special-memory-usage", getSpecialMemoryUsage},
a2aa00ed 293 {"noncompliant-queries", &nonCompliantQueries},
d08b1cdf 294 {"noncompliant-responses", &nonCompliantResponses},
643a182a 295 {"rdqueries", &rdQueries},
2efd427d 296 {"empty-queries", &emptyQueries},
886e2cf2
RG
297 {"cache-hits", &cacheHits},
298 {"cache-misses", &cacheMisses},
4f99f3d3
RG
299 {"cpu-user-msec", getCPUTimeUser},
300 {"cpu-sys-msec", getCPUTimeSystem},
dd46e5e3 301 {"fd-usage", getOpenFileDescriptors},
5c30ec69 302 {"dyn-blocked", &dynBlocked},
f29758cc 303 {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
eb0335ff
MC
304 {"security-status", &securityStatus},
305 // Latency histogram
306 {"latency-sum", &latencySum},
307 {"latency-count", getLatencyCount},
42fae326 308 };
e48090d1 309};
310
2e567c94
PO
311// Metric types for Prometheus
312enum class PrometheusMetricType: int {
313 counter = 1,
314 gauge = 2
315};
316
37a5c2d5
PO
317// Keeps additional information about metrics
318struct MetricDefinition {
3cbe2773 319 MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
37a5c2d5
PO
320 }
321
322 MetricDefinition() = default;
323
324 // Metric description
325 std::string description;
326 // Metric type for Prometheus
2e567c94 327 PrometheusMetricType prometheusType;
37a5c2d5
PO
328};
329
330struct MetricDefinitionStorage {
331 // Return metric definition by name
332 bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
333 auto metricDetailsIter = metrics.find(metricName);
334
335 if (metricDetailsIter == metrics.end()) {
336 return false;
337 }
338
339 metric = metricDetailsIter->second;
340 return true;
341 };
342
2e567c94
PO
343 // Return string representation of Prometheus metric type
344 std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
345 switch (metricType) {
346 case PrometheusMetricType::counter:
347 return "counter";
348 break;
349 case PrometheusMetricType::gauge:
350 return "gauge";
351 break;
352 default:
353 return "";
354 break;
355 }
356 };
357
37a5c2d5 358 std::map<std::string, MetricDefinition> metrics = {
2e567c94
PO
359 { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
360 { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
361 { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
61d10a4d
MH
362 { "frontend-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
363 { "frontend-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
364 { "frontend-noerror", MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
2e567c94
PO
365 { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
366 { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
367 { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
368 { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
369 { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
370 { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
371 { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
372 { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
373 { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
374 { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
375 { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
376 { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
377 { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
378 { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
379 { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
380 { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
381 { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")},
382 { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")},
383 { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")},
384 { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")},
385 { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")},
386 { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")},
387 { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
388 { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
389 { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
390 { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
391 { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
392 { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
393 { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
394 { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
395 { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")},
1200be3e 396 { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
2e567c94 397 { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") },
f29758cc 398 { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
eb0335ff
MC
399 // Latency histogram
400 { "latency-sum", MetricDefinition(PrometheusMetricType::counter, "Total response time in milliseconds")},
401 { "latency-count", MetricDefinition(PrometheusMetricType::counter, "Number of queries contributing to response time histogram")},
37a5c2d5
PO
402 };
403};
e16fd59c 404
37a5c2d5 405extern MetricDefinitionStorage g_metricDefinitions;
e48090d1 406extern struct DNSDistStats g_stats;
f653b8df 407void doLatencyStats(double udiff);
e48090d1 408
638184e9 409
df111b53 410struct StopWatch
411{
58307a85
RG
412 StopWatch(bool realTime=false): d_needRealTime(realTime)
413 {
414 }
df111b53 415 struct timespec d_start{0,0};
58307a85
RG
416 bool d_needRealTime{false};
417
5c30ec69 418 void start() {
58307a85 419 if(gettime(&d_start, d_needRealTime) < 0)
df111b53 420 unixDie("Getting timestamp");
5c30ec69 421
df111b53 422 }
cf48b0ce
RG
423
424 void set(const struct timespec& from) {
425 d_start = from;
426 }
5c30ec69 427
df111b53 428 double udiff() const {
429 struct timespec now;
58307a85 430 if(gettime(&now, d_needRealTime) < 0)
df111b53 431 unixDie("Getting timestamp");
5c30ec69 432
df111b53 433 return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
434 }
435
436 double udiffAndSet() {
437 struct timespec now;
58307a85 438 if(gettime(&now, d_needRealTime) < 0)
df111b53 439 unixDie("Getting timestamp");
5c30ec69 440
df111b53 441 auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
442 d_start = now;
443 return ret;
444 }
445
446};
447
67ce0bdd 448class BasicQPSLimiter
df111b53 449{
450public:
67ce0bdd 451 BasicQPSLimiter()
df111b53 452 {
453 }
454
2d29e6b7 455 BasicQPSLimiter(unsigned int burst): d_tokens(burst)
67ce0bdd
RG
456 {
457 d_prev.start();
458 }
459
460 bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
461 {
462 auto delta = d_prev.udiffAndSet();
463
1a1787b6 464 if(delta > 0.0) // time, frequently, does go backwards..
465 d_tokens += 1.0 * rate * (delta/1000000.0);
67ce0bdd
RG
466
467 if(d_tokens > burst) {
468 d_tokens = burst;
469 }
470
471 bool ret=false;
472 if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
473 ret=true;
474 --d_tokens;
475 }
476
477 return ret;
478 }
479
480 bool seenSince(const struct timespec& cutOff) const
481 {
482 return cutOff < d_prev.d_start;
483 }
484
485protected:
486 mutable StopWatch d_prev;
487 mutable double d_tokens;
488};
489
490class QPSLimiter : public BasicQPSLimiter
491{
492public:
493 QPSLimiter(): BasicQPSLimiter()
494 {
495 }
496
2d29e6b7 497 QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
df111b53 498 {
df111b53 499 d_prev.start();
500 }
501
502 unsigned int getRate() const
503 {
67ce0bdd 504 return d_passthrough ? 0 : d_rate;
df111b53 505 }
506
507 int getPassed() const
508 {
509 return d_passed;
510 }
67ce0bdd 511
df111b53 512 int getBlocked() const
513 {
514 return d_blocked;
515 }
516
ecbe9133 517 bool check() const // this is not quite fair
df111b53 518 {
67ce0bdd 519 if (d_passthrough) {
df111b53 520 return true;
67ce0bdd 521 }
df111b53 522
67ce0bdd
RG
523 bool ret = BasicQPSLimiter::check(d_rate, d_burst);
524 if (ret) {
df111b53 525 d_passed++;
526 }
67ce0bdd 527 else {
df111b53 528 d_blocked++;
67ce0bdd 529 }
df111b53 530
5c30ec69 531 return ret;
df111b53 532 }
533private:
ecbe9133 534 mutable unsigned int d_passed{0};
535 mutable unsigned int d_blocked{0};
67ce0bdd
RG
536 unsigned int d_rate;
537 unsigned int d_burst;
538 bool d_passthrough{true};
df111b53 539};
540
b5b93e0b
RG
541struct ClientState;
542
df111b53 543struct IDState
544{
a9489723 545 IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
71b86bd8 546 IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
df111b53 547 {
a9489723
RG
548 usageIndicator.store(orig.usageIndicator.load());
549 origFD = orig.origFD;
df111b53 550 origID = orig.origID;
7b3865cd 551 delayMsec = orig.delayMsec;
acb8f5d5 552 tempFailureTTL = orig.tempFailureTTL;
df111b53 553 }
554
311f19d5
RG
555 static const int64_t unusedIndicator = -1;
556
557 static bool isInUse(int64_t usageIndicator)
558 {
559 return usageIndicator != unusedIndicator;
560 }
561
562 bool isInUse() const
563 {
564 return usageIndicator != unusedIndicator;
565 }
566
567 /* return true if the value has been successfully replaced meaning that
568 no-one updated the usage indicator in the meantime */
569 bool tryMarkUnused(int64_t expectedUsageIndicator)
570 {
571 return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
572 }
573
574 /* mark as unused no matter what, return true if the state was in use before */
575 bool markAsUsed()
576 {
577 auto currentGeneration = generation++;
578 return markAsUsed(currentGeneration);
579 }
580
581 /* mark as unused no matter what, return true if the state was in use before */
582 bool markAsUsed(int64_t currentGeneration)
583 {
584 int64_t oldUsage = usageIndicator.exchange(currentGeneration);
585 return oldUsage != unusedIndicator;
586 }
587
a9489723 588 /* We use this value to detect whether this state is in use.
9bd1a882
RG
589 For performance reasons we don't want to use a lock here, but that means
590 we need to be very careful when modifying this value. Modifications happen
591 from:
592 - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
593 then picking one of the states associated to this backend (via the idOffset).
a9489723 594 Most of the time this state should not be in use and usageIndicator is -1, but we
9bd1a882
RG
595 might not yet have received a response for the query previously associated to this
596 state, meaning that we will 'reuse' this state and erase the existing state.
597 If we ever receive a response for this state, it will be discarded. This is
598 mostly fine for UDP except that we still need to be careful in order to miss
599 the 'outstanding' counters, which should only be increased when we are picking
600 an empty state, and not when reusing ;
601 For DoH, though, we have dynamically allocated a DOHUnit object that needs to
602 be freed, as well as internal objects internals to libh2o.
603 - one of the UDP receiver threads receiving a response from a backend, picking
604 the corresponding state and sending the response to the client ;
605 - the 'healthcheck' thread scanning the states to actively discover timeouts,
606 mostly to keep some counters like the 'outstanding' one sane.
a9489723
RG
607 We previously based that logic on the origFD (FD on which the query was received,
608 and therefore from where the response should be sent) but this suffered from an
609 ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
610 same value since we only have so much incoming sockets:
611 - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
612 - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
613 qtype and qclass match
614 - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
615 - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
616 5, except it's not the same 5 anymore and it overrides a fresh state.
617 We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
618 wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
619 when the state is unused and the value of our counter otherwise.
9bd1a882 620 */
311f19d5
RG
621 std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8
622 std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4
2bf26975 623 ComboAddress origRemote; // 28
549d63c9 624 ComboAddress origDest; // 28
2bf26975 625 StopWatch sentTime; // 16
626 DNSName qname; // 80
43234e76 627 std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
d8c19b98 628#ifdef HAVE_PROTOBUF
ec48a28d 629 boost::optional<boost::uuids::uuid> uniqueId;
11e1e08b 630#endif
78e3ac9e 631 boost::optional<Netmask> subnet{boost::none};
886e2cf2 632 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
a76b0d63 633 std::shared_ptr<QTag> qTag{nullptr};
b5b93e0b 634 const ClientState* cs{nullptr};
fbf14b03 635 DOHUnit* du{nullptr};
9837850d 636 uint32_t cacheKey; // 4
637 uint32_t cacheKeyNoECS; // 4
71b86bd8 638 uint16_t age; // 4
2bf26975 639 uint16_t qtype; // 2
886e2cf2 640 uint16_t qclass; // 2
2bf26975 641 uint16_t origID; // 2
aeb36780 642 uint16_t origFlags; // 2
a9489723 643 int origFD{-1};
7b3865cd 644 int delayMsec;
acb8f5d5 645 boost::optional<uint32_t> tempFailureTTL;
ca404e94 646 bool ednsAdded{false};
ff73f02b 647 bool ecsAdded{false};
886e2cf2 648 bool skipCache{false};
7cea4e39 649 bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
d7728daf 650 bool dnssecOK{false};
389d903a 651 bool useZeroScope;
df111b53 652};
653
786e4d8c 654typedef std::unordered_map<string, unsigned int> QueryCountRecords;
dd1a3034 655typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
786e4d8c
RS
656struct QueryCount {
657 QueryCount()
658 {
43234e76 659 pthread_rwlock_init(&queryLock, nullptr);
786e4d8c
RS
660 }
661 QueryCountRecords records;
662 QueryCountFilter filter;
663 pthread_rwlock_t queryLock;
664 bool enabled{false};
665};
666
667extern QueryCount g_qcount;
668
8a5d5053 669struct ClientState
670{
6e9fd124
RG
671 ClientState(const ComboAddress& local_, bool isTCP, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP), reuseport(doReusePort)
672 {
673 }
674
f0e4dcba 675 std::set<int> cpus;
8a5d5053 676 ComboAddress local;
43234e76 677 std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
6e9fd124 678 std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
fbf14b03 679 std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
6e9fd124 680 std::string interface;
963bef8d 681 std::atomic<uint64_t> queries{0};
a6e9e107
RG
682 std::atomic<uint64_t> tcpDiedReadingQuery{0};
683 std::atomic<uint64_t> tcpDiedSendingResponse{0};
684 std::atomic<uint64_t> tcpGaveUp{0};
685 std::atomic<uint64_t> tcpClientTimeouts{0};
686 std::atomic<uint64_t> tcpDownstreamTimeouts{0};
cff9aa03
RG
687 std::atomic<uint64_t> tcpCurrentConnections{0};
688 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
689 /* in ms */
690 std::atomic<double> tcpAvgConnectionDuration{0.0};
a36ce055
RG
691 int udpFD{-1};
692 int tcpFD{-1};
6e9fd124 693 int fastOpenQueueSize{0};
b5b93e0b 694 bool muted{false};
6e9fd124
RG
695 bool tcp;
696 bool reuseport;
697 bool ready{false};
8429ad04
RG
698
699 int getSocket() const
700 {
701 return udpFD != -1 ? udpFD : tcpFD;
702 }
703
ba7ec340
RG
704 std::string getType() const
705 {
706 std::string result = udpFD != -1 ? "UDP" : "TCP";
707
fbf14b03
RG
708 if (dohFrontend) {
709 result += " (DNS over HTTPS)";
710 }
711 else if (tlsFrontend) {
ba7ec340
RG
712 result += " (DNS over TLS)";
713 }
714 else if (dnscryptCtx) {
715 result += " (DNSCrypt)";
716 }
717
718 return result;
719 }
720
8429ad04
RG
721#ifdef HAVE_EBPF
722 shared_ptr<BPFFilter> d_filter;
723
724 void detachFilter()
725 {
726 if (d_filter) {
727 d_filter->removeSocket(getSocket());
728 d_filter = nullptr;
729 }
730 }
731
732 void attachFilter(shared_ptr<BPFFilter> bpf)
733 {
734 detachFilter();
735
736 bpf->addSocket(getSocket());
737 d_filter = bpf;
738 }
739#endif /* HAVE_EBPF */
cff9aa03
RG
740
741 void updateTCPMetrics(size_t queries, uint64_t durationMs)
742 {
743 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0);
744 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
745 }
8a5d5053 746};
747
748class TCPClientCollection {
749 std::vector<int> d_tcpclientthreads;
ded1985a 750 std::atomic<uint64_t> d_numthreads{0};
a9bf3ec4 751 std::atomic<uint64_t> d_pos{0};
ded1985a 752 std::atomic<uint64_t> d_queued{0};
73402775 753 const uint64_t d_maxthreads{0};
ded1985a 754 std::mutex d_mutex;
edbda1ad 755 int d_singlePipe[2];
73402775 756 const bool d_useSinglePipe;
ded1985a 757public:
8a5d5053 758
b79e4996
RG
759 TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
760
8a5d5053 761 {
a9bf3ec4 762 d_tcpclientthreads.reserve(maxThreads);
edbda1ad
RG
763
764 if (d_useSinglePipe) {
765 if (pipe(d_singlePipe) < 0) {
766 throw std::runtime_error("Error creating the TCP single communication pipe: " + string(strerror(errno)));
767 }
3b07fd1b
RG
768
769 if (!setNonBlocking(d_singlePipe[0])) {
770 int err = errno;
771 close(d_singlePipe[0]);
772 close(d_singlePipe[1]);
773 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + string(strerror(err)));
774 }
775
edbda1ad
RG
776 if (!setNonBlocking(d_singlePipe[1])) {
777 int err = errno;
778 close(d_singlePipe[0]);
779 close(d_singlePipe[1]);
780 throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + string(strerror(err)));
781 }
782 }
8a5d5053 783 }
a9bf3ec4 784 int getThread()
8a5d5053 785 {
6c1ca990 786 uint64_t pos = d_pos++;
8a5d5053 787 ++d_queued;
788 return d_tcpclientthreads[pos % d_numthreads];
789 }
ded1985a
RG
790 bool hasReachedMaxThreads() const
791 {
792 return d_numthreads >= d_maxthreads;
793 }
794 uint64_t getThreadsCount() const
795 {
796 return d_numthreads;
797 }
798 uint64_t getQueuedCount() const
799 {
800 return d_queued;
801 }
802 void decrementQueuedCount()
803 {
804 --d_queued;
805 }
8a5d5053 806 void addTCPClientThread();
807};
808
1f7646c2 809extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
8a5d5053 810
df111b53 811struct DownstreamState
812{
1720247e 813 typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
98650fde 814
150105a2
RG
815 DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, size_t numberOfSockets);
816 DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, 1) {}
6a62c0e3
RG
817 ~DownstreamState()
818 {
5bdbb83d 819 for (auto& fd : sockets) {
150105a2
RG
820 if (fd >= 0) {
821 close(fd);
822 fd = -1;
823 }
824 }
6a62c0e3 825 }
1720247e
CHB
826 boost::uuids::uuid id;
827 std::set<unsigned int> hashes;
d58e616a 828 mutable pthread_rwlock_t d_lock;
5bdbb83d
RG
829 std::vector<int> sockets;
830 std::mutex socketsLock;
5d7e6765 831 std::mutex connectLock;
5bdbb83d 832 std::unique_ptr<FDMultiplexer> mplexer{nullptr};
df111b53 833 std::thread tid;
a2353842 834 const ComboAddress remote;
df111b53 835 QPSLimiter qps;
836 vector<IDState> idStates;
73402775 837 const ComboAddress sourceAddr;
98650fde 838 checkfunc_t checkFunction;
fbe2a2e0
RG
839 DNSName checkName{"a.root-servers.net."};
840 QType checkType{QType::A};
de9f7157 841 uint16_t checkClass{QClass::IN};
df111b53 842 std::atomic<uint64_t> idOffset{0};
843 std::atomic<uint64_t> sendErrors{0};
844 std::atomic<uint64_t> outstanding{0};
845 std::atomic<uint64_t> reuseds{0};
846 std::atomic<uint64_t> queries{0};
847 struct {
848 std::atomic<uint64_t> sendErrors{0};
849 std::atomic<uint64_t> reuseds{0};
850 std::atomic<uint64_t> queries{0};
851 } prev;
a6e9e107
RG
852 std::atomic<uint64_t> tcpDiedSendingQuery{0};
853 std::atomic<uint64_t> tcpDiedReadingResponse{0};
854 std::atomic<uint64_t> tcpGaveUp{0};
855 std::atomic<uint64_t> tcpReadTimeouts{0};
856 std::atomic<uint64_t> tcpWriteTimeouts{0};
cff9aa03
RG
857 std::atomic<uint64_t> tcpCurrentConnections{0};
858 std::atomic<double> tcpAvgQueriesPerConnection{0.0};
859 /* in ms */
860 std::atomic<double> tcpAvgConnectionDuration{0.0};
18eeccc9 861 string name;
5bdbb83d 862 size_t socketsOffset{0};
df111b53 863 double queryLoad{0.0};
864 double dropRate{0.0};
865 double latencyUsec{0.0};
866 int order{1};
867 int weight{1};
b40cffe7 868 int tcpConnectTimeout{5};
3f6d07a4
RG
869 int tcpRecvTimeout{30};
870 int tcpSendTimeout{30};
7c9bf18d 871 unsigned int checkInterval{1};
872 unsigned int lastCheck{0};
73402775 873 const unsigned int sourceItf{0};
3f6d07a4 874 uint16_t retries{5};
c85f69a8 875 uint16_t xpfRRCode{0};
b7e6f4a1 876 uint16_t checkTimeout{1000}; /* in milliseconds */
9e87dcb8 877 uint8_t currentCheckFailures{0};
853faf61 878 uint8_t consecutiveSuccessfulChecks{0};
9e87dcb8 879 uint8_t maxCheckFailures{1};
1b633bec 880 uint8_t minRiseSuccesses{1};
df111b53 881 StopWatch sw;
882 set<string> pools;
883 enum class Availability { Up, Down, Auto} availability{Availability::Auto};
fbe2a2e0 884 bool mustResolve{false};
df111b53 885 bool upStatus{false};
ca404e94 886 bool useECS{false};
21830638 887 bool setCD{false};
49c33a6c 888 bool disableZeroScope{false};
7565f4e6 889 std::atomic<bool> connected{false};
5d7e6765 890 std::atomic_flag threadStarted;
284d460c 891 bool tcpFastOpen{false};
5602f131 892 bool ipBindAddrNoPort{true};
5d7e6765 893
df111b53 894 bool isUp() const
895 {
896 if(availability == Availability::Down)
897 return false;
898 if(availability == Availability::Up)
899 return true;
900 return upStatus;
901 }
902 void setUp() { availability = Availability::Up; }
903 void setDown() { availability = Availability::Down; }
904 void setAuto() { availability = Availability::Auto; }
18eeccc9
RG
905 string getName() const {
906 if (name.empty()) {
907 return remote.toStringWithPort();
908 }
909 return name;
910 }
a7940c06 911 string getNameWithAddr() const {
912 if (name.empty()) {
913 return remote.toStringWithPort();
914 }
915 return name + " (" + remote.toStringWithPort()+ ")";
916 }
9f4eb5cc
RG
917 string getStatus() const
918 {
919 string status;
920 if(availability == DownstreamState::Availability::Up)
921 status = "UP";
922 else if(availability == DownstreamState::Availability::Down)
923 status = "DOWN";
924 else
925 status = (upStatus ? "up" : "down");
926 return status;
927 }
5d7e6765 928 bool reconnect();
f2caf657
CHB
929 void hash();
930 void setId(const boost::uuids::uuid& newId);
931 void setWeight(int newWeight);
cff9aa03
RG
932
933 void updateTCPMetrics(size_t queries, uint64_t durationMs)
934 {
935 tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0);
936 tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
937 }
df111b53 938};
939using servers_t =vector<std::shared_ptr<DownstreamState>>;
df111b53 940
da4e7813 941template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
942
9b73b71c 943void responderThread(std::shared_ptr<DownstreamState> state);
da4e7813 944extern std::mutex g_luamutex;
945extern LuaContext g_lua;
946extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
947
0940e4eb 948class DNSRule
949{
950public:
205f2081
RG
951 virtual ~DNSRule ()
952 {
953 }
497a6e3a 954 virtual bool matches(const DNSQuestion* dq) const =0;
0940e4eb 955 virtual string toString() const = 0;
956 mutable std::atomic<uint64_t> d_matches{0};
957};
958
da4e7813 959using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
497a6e3a 960typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
df111b53 961
962struct ServerPolicy
963{
964 string name;
70a57b05 965 policyfunc_t policy;
a1b1a29d 966 bool isLua;
a4fd2d2f
CH
967 std::string toString() const {
968 return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
969 }
df111b53 970};
971
886e2cf2
RG
972struct ServerPool
973{
a1b1a29d
RG
974 ServerPool()
975 {
976 pthread_rwlock_init(&d_lock, nullptr);
977 }
978
886e2cf2
RG
979 const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
980
7e687744
RG
981 bool getECS() const
982 {
983 return d_useECS;
984 }
985
986 void setECS(bool useECS)
987 {
988 d_useECS = useECS;
989 }
990
886e2cf2 991 std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
b9f8a6c8 992 std::shared_ptr<ServerPolicy> policy{nullptr};
5c30ec69 993
a1b1a29d
RG
994 size_t countServers(bool upOnly)
995 {
996 size_t count = 0;
997 ReadLock rl(&d_lock);
998 for (const auto& server : d_servers) {
999 if (!upOnly || std::get<1>(server)->isUp() ) {
1000 count++;
c1b81381
RG
1001 }
1002 }
a1b1a29d
RG
1003 return count;
1004 }
1005
1006 NumberedVector<shared_ptr<DownstreamState>> getServers()
1007 {
1008 NumberedVector<shared_ptr<DownstreamState>> result;
1009 {
1010 ReadLock rl(&d_lock);
1011 result = d_servers;
1012 }
1013 return result;
1014 }
1015
1016 void addServer(shared_ptr<DownstreamState>& server)
1017 {
1018 WriteLock wl(&d_lock);
1019 unsigned int count = (unsigned int) d_servers.size();
1020 d_servers.push_back(make_pair(++count, server));
1021 /* we need to reorder based on the server 'order' */
1022 std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
1023 return a.second->order < b.second->order;
1024 });
1025 /* and now we need to renumber for Lua (custom policies) */
1026 size_t idx = 1;
1027 for (auto& serv : d_servers) {
1028 serv.first = idx++;
1029 }
1030 }
1031
1032 void removeServer(shared_ptr<DownstreamState>& server)
1033 {
1034 WriteLock wl(&d_lock);
1035 size_t idx = 1;
1036 bool found = false;
1037 for (auto it = d_servers.begin(); it != d_servers.end();) {
1038 if (found) {
1039 /* we need to renumber the servers placed
1040 after the removed one, for Lua (custom policies) */
1041 it->first = idx++;
1042 it++;
1043 }
1044 else if (it->second == server) {
1045 it = d_servers.erase(it);
1046 found = true;
1047 } else {
1048 idx++;
1049 it++;
1050 }
1051 }
1052 }
1053
1054private:
1055 NumberedVector<shared_ptr<DownstreamState>> d_servers;
1056 pthread_rwlock_t d_lock;
7e687744 1057 bool d_useECS{false};
886e2cf2
RG
1058};
1059using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
742c079a 1060void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
886e2cf2
RG
1061void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1062void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1063
42fae326 1064struct CarbonConfig
1065{
d617b22c 1066 ComboAddress server;
813b0ba9 1067 std::string namespace_name;
42fae326 1068 std::string ourname;
813b0ba9 1069 std::string instance_name;
d617b22c 1070 unsigned int interval;
42fae326 1071};
1072
ca404e94
RG
1073enum ednsHeaderFlags {
1074 EDNS_HEADER_FLAG_NONE = 0,
1075 EDNS_HEADER_FLAG_DO = 32768
1076};
1077
4d5959e6
RG
1078struct DNSDistRuleAction
1079{
1080 std::shared_ptr<DNSRule> d_rule;
1081 std::shared_ptr<DNSAction> d_action;
1082 boost::uuids::uuid d_id;
f8a222ac 1083 uint64_t d_creationOrder;
4d5959e6
RG
1084};
1085
1086struct DNSDistResponseRuleAction
1087{
1088 std::shared_ptr<DNSRule> d_rule;
1089 std::shared_ptr<DNSResponseAction> d_action;
1090 boost::uuids::uuid d_id;
f8a222ac 1091 uint64_t d_creationOrder;
4d5959e6
RG
1092};
1093
71c94675 1094extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
dd46e5e3 1095extern DNSAction::Action g_dynBlockAction;
71c94675 1096
d617b22c 1097extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
ecbe9133 1098extern GlobalStateHolder<ServerPolicy> g_policy;
1099extern GlobalStateHolder<servers_t> g_dstates;
886e2cf2 1100extern GlobalStateHolder<pools_t> g_pools;
4d5959e6
RG
1101extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1102extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1103extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
2d4783a8 1104extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
638184e9 1105extern GlobalStateHolder<NetmaskGroup> g_ACL;
2e72cc0e 1106
ecbe9133 1107extern ComboAddress g_serverControl; // not changed during runtime
1108
f0e4dcba 1109extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
a227f47d 1110extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
fbf14b03 1111extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
6e9fd124 1112extern std::vector<std::unique_ptr<ClientState>> g_frontends;
6ad8b29a 1113extern bool g_truncateTC;
b29edbee 1114extern bool g_fixupCase;
3f6d07a4
RG
1115extern int g_tcpRecvTimeout;
1116extern int g_tcpSendTimeout;
e0b5e49d 1117extern int g_udpTimeout;
e41f8165
RG
1118extern uint16_t g_maxOutstanding;
1119extern std::atomic<bool> g_configurationDone;
6c1ca990
RG
1120extern uint64_t g_maxTCPClientThreads;
1121extern uint64_t g_maxTCPQueuedConnections;
9396d955
RG
1122extern size_t g_maxTCPQueriesPerConn;
1123extern size_t g_maxTCPConnectionDuration;
1124extern size_t g_maxTCPConnectionsPerClient;
886e2cf2 1125extern std::atomic<uint16_t> g_cacheCleaningDelay;
f65ea0c2 1126extern std::atomic<uint16_t> g_cacheCleaningPercentage;
9e87dcb8 1127extern bool g_verboseHealthChecks;
1ea747c0 1128extern uint32_t g_staleCacheEntriesTTL;
56d68fad
RG
1129extern bool g_apiReadWrite;
1130extern std::string g_apiConfigDirectory;
26a3cdb7 1131extern bool g_servFailOnNoPolicy;
36e763fa 1132extern uint32_t g_hashperturb;
edbda1ad 1133extern bool g_useTCPSinglePipe;
cff9aa03 1134extern uint16_t g_downstreamTCPCleanupInterval;
0beaa5c8 1135extern size_t g_udpVectorSize;
53c57da7 1136extern bool g_preserveTrailingData;
0dffe9e3 1137extern bool g_allowEmptyResponse;
32b86928 1138extern bool g_roundrobinFailOnNoServer;
ca404e94 1139
87b515ed
RG
1140#ifdef HAVE_EBPF
1141extern shared_ptr<BPFFilter> g_defaultBPFFilter;
8429ad04 1142extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
87b515ed
RG
1143#endif /* HAVE_EBPF */
1144
0beaa5c8
RG
1145struct LocalHolders
1146{
2d4783a8 1147 LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
0beaa5c8
RG
1148 {
1149 }
1150
1151 LocalStateHolder<NetmaskGroup> acl;
1152 LocalStateHolder<ServerPolicy> policy;
4d5959e6
RG
1153 LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1154 LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
2d4783a8 1155 LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
0beaa5c8
RG
1156 LocalStateHolder<servers_t> servers;
1157 LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1158 LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1159 LocalStateHolder<pools_t> pools;
1160};
1161
ecbe9133 1162struct dnsheader;
1163
1164void controlThread(int fd, ComboAddress local);
839f3021 1165vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
886e2cf2
RG
1166std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
1167std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
a1b1a29d 1168NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
da4e7813 1169
497a6e3a 1170std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
ecbe9133 1171
497a6e3a
RG
1172std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
1173std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
1174std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1720247e 1175std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
497a6e3a 1176std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
e7c732b8 1177
80dbd7d2
CHB
1178struct WebserverConfig
1179{
1180 std::string password;
1181 std::string apiKey;
1182 boost::optional<std::map<std::string, std::string> > customHeaders;
1183 std::mutex lock;
1184};
1185
32c97b56
CHB
1186void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1187void setWebserverPassword(const std::string& password);
1188void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1189
80dbd7d2 1190void dnsdistWebserverThread(int sock, const ComboAddress& local);
9b73b71c 1191void tcpAcceptorThread(void* p);
fbf14b03
RG
1192#ifdef HAVE_DNS_OVER_HTTPS
1193void dohThread(ClientState* cs);
1194#endif /* HAVE_DNS_OVER_HTTPS */
80a216c9 1195
f758857a 1196void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1197void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls
1198bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1199void resetLuaSideEffect(); // reset to indeterminate state
11e1e08b 1200
e7c732b8 1201bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
3e425868 1202bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
2a28db86 1203bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
4ab01344 1204
0beaa5c8 1205bool checkQueryHeaders(const struct dnsheader* dh);
fcffc585 1206
6e9fd124 1207extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
43234e76 1208int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
4ab01344 1209boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
9f4eb5cc 1210
18f707fa 1211bool addXPF(DNSQuestion& dq, uint16_t optionCode);
5cc8371b 1212
555970c9
RG
1213uint16_t getRandomDNSID();
1214
9f4eb5cc
RG
1215#include "dnsdist-snmp.hh"
1216
1217extern bool g_snmpEnabled;
1218extern bool g_snmpTrapsEnabled;
1219extern DNSDistSNMPAgent* g_snmpAgent;
e7c732b8
RG
1220extern bool g_addEDNSToSelfGeneratedResponses;
1221
1222static const size_t s_udpIncomingBufferSize{1500};
4ab01344 1223
3e425868
RG
1224enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1225ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
4ab01344 1226
d0ae6360
RG
1227DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1228void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
fbf14b03
RG
1229
1230int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1231ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);