]>
Commit | Line | Data |
---|---|---|
12471842 PL |
1 | /* |
2 | * This file is part of PowerDNS or dnsdist. | |
3 | * Copyright -- PowerDNS.COM B.V. and its contributors | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of version 2 of the GNU General Public License as | |
7 | * published by the Free Software Foundation. | |
8 | * | |
9 | * In addition, for the avoidance of any doubt, permission is granted to | |
10 | * link this program with OpenSSL and to (re)distribute the binaries | |
11 | * produced as the result of such linking. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
21 | */ | |
df111b53 | 22 | #pragma once |
11e1e08b | 23 | #include "config.h" |
df111b53 | 24 | #include "ext/luawrapper/include/LuaContext.hpp" |
cbf4e13a | 25 | |
df111b53 | 26 | #include <atomic> |
df111b53 | 27 | #include <mutex> |
cbf4e13a | 28 | #include <string> |
df111b53 | 29 | #include <thread> |
cbf4e13a | 30 | #include <time.h> |
bffca8b9 | 31 | #include <unistd.h> |
cbf4e13a RG |
32 | #include <unordered_map> |
33 | ||
cbf4e13a RG |
34 | #include <boost/variant.hpp> |
35 | ||
36 | #include "bpf-filter.hh" | |
f12666f2 | 37 | #include "capabilities.hh" |
9f6a31ff | 38 | #include "circular_buffer.hh" |
11e1e08b | 39 | #include "dnscrypt.hh" |
886e2cf2 | 40 | #include "dnsdist-cache.hh" |
87b515ed | 41 | #include "dnsdist-dynbpf.hh" |
cbf4e13a | 42 | #include "dnsname.hh" |
fbf14b03 | 43 | #include "doh.hh" |
cbf4e13a RG |
44 | #include "ednsoptions.hh" |
45 | #include "gettime.hh" | |
46 | #include "iputils.hh" | |
47 | #include "misc.hh" | |
48 | #include "mplexer.hh" | |
49 | #include "sholder.hh" | |
a227f47d | 50 | #include "tcpiohandler.hh" |
d61aa945 | 51 | #include "uuid-utils.hh" |
d8c19b98 | 52 | |
9b73b71c | 53 | void carbonDumpThread(); |
61d1b966 | 54 | uint64_t uptimeOfProcess(const std::string& str); |
bd1c631b | 55 | |
7b925432 RG |
56 | extern uint16_t g_ECSSourcePrefixV4; |
57 | extern uint16_t g_ECSSourcePrefixV6; | |
58 | extern bool g_ECSOverride; | |
26a6373d | 59 | |
15fac047 CH |
60 | typedef std::unordered_map<string, string> QTag; |
61 | ||
7b925432 RG |
62 | struct DNSQuestion |
63 | { | |
e7c732b8 | 64 | DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_): |
4ab01344 RG |
65 | qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) { |
66 | const uint16_t* flags = getFlagsFromDNSHeader(dh); | |
67 | origFlags = *flags; | |
68 | } | |
dd1a3034 RG |
69 | DNSQuestion(const DNSQuestion&) = delete; |
70 | DNSQuestion& operator=(const DNSQuestion&) = delete; | |
71 | DNSQuestion(DNSQuestion&&) = default; | |
7b925432 RG |
72 | |
73 | #ifdef HAVE_PROTOBUF | |
ec48a28d | 74 | boost::optional<boost::uuids::uuid> uniqueId; |
7b925432 | 75 | #endif |
bd14f087 | 76 | Netmask ecs; |
4ab01344 | 77 | boost::optional<Netmask> subnet; |
046bac5c | 78 | std::string sni; /* Server Name Indication, if any (DoT or DoH) */ |
2a28db86 | 79 | std::string poolname; |
4ab01344 RG |
80 | const DNSName* qname{nullptr}; |
81 | const ComboAddress* local{nullptr}; | |
82 | const ComboAddress* remote{nullptr}; | |
15fac047 | 83 | std::shared_ptr<QTag> qTag{nullptr}; |
cbf4e13a | 84 | std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions; |
4ab01344 RG |
85 | std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr}; |
86 | std::shared_ptr<DNSDistPacketCache> packetCache{nullptr}; | |
87 | struct dnsheader* dh{nullptr}; | |
88 | const struct timespec* queryTime{nullptr}; | |
fbf14b03 | 89 | struct DOHUnit* du{nullptr}; |
7b925432 | 90 | size_t size; |
e7c732b8 | 91 | unsigned int consumed{0}; |
4ab01344 RG |
92 | int delayMsec{0}; |
93 | boost::optional<uint32_t> tempFailureTTL; | |
94 | uint32_t cacheKeyNoECS; | |
95 | uint32_t cacheKey; | |
96 | const uint16_t qtype; | |
97 | const uint16_t qclass; | |
7b925432 RG |
98 | uint16_t len; |
99 | uint16_t ecsPrefixLength; | |
4ab01344 | 100 | uint16_t origFlags; |
1ecbd15e | 101 | uint8_t ednsRCode{0}; |
7b925432 RG |
102 | const bool tcp; |
103 | bool skipCache{false}; | |
104 | bool ecsOverride; | |
5b8255ba | 105 | bool useECS{true}; |
5cc8371b | 106 | bool addXPF{true}; |
bd14f087 | 107 | bool ecsSet{false}; |
4ab01344 RG |
108 | bool ecsAdded{false}; |
109 | bool ednsAdded{false}; | |
110 | bool useZeroScope{false}; | |
111 | bool dnssecOK{false}; | |
7b925432 RG |
112 | }; |
113 | ||
114 | struct DNSResponse : DNSQuestion | |
115 | { | |
e7c732b8 RG |
116 | DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_): |
117 | DNSQuestion(name, type, class_, consumed, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { } | |
dd1a3034 RG |
118 | DNSResponse(const DNSResponse&) = delete; |
119 | DNSResponse& operator=(const DNSResponse&) = delete; | |
120 | DNSResponse(DNSResponse&&) = default; | |
7b925432 RG |
121 | }; |
122 | ||
5c30ec69 LM |
123 | /* so what could you do: |
124 | drop, | |
125 | fake up nxdomain, | |
126 | provide actual answer, | |
127 | allow & and stop processing, | |
128 | continue processing, | |
7b925432 RG |
129 | modify header: (servfail|refused|notimp), set TC=1, |
130 | send to pool */ | |
131 | ||
132 | class DNSAction | |
133 | { | |
134 | public: | |
3d60b39a | 135 | enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse }; |
b718792f RG |
136 | static std::string typeToString(const Action& action) |
137 | { | |
138 | switch(action) { | |
139 | case Action::Drop: | |
140 | return "Drop"; | |
141 | case Action::Nxdomain: | |
142 | return "Send NXDomain"; | |
143 | case Action::Refused: | |
144 | return "Send Refused"; | |
145 | case Action::Spoof: | |
146 | return "Spoof an answer"; | |
147 | case Action::Allow: | |
148 | return "Allow"; | |
149 | case Action::HeaderModify: | |
150 | return "Modify the header"; | |
151 | case Action::Pool: | |
152 | return "Route to a pool"; | |
153 | case Action::Delay: | |
154 | return "Delay"; | |
155 | case Action::Truncate: | |
156 | return "Truncate over UDP"; | |
157 | case Action::ServFail: | |
158 | return "Send ServFail"; | |
159 | case Action::None: | |
477c86a0 | 160 | case Action::NoOp: |
b718792f | 161 | return "Do nothing"; |
3d60b39a | 162 | case Action::NoRecurse: |
163 | return "Set rd=0"; | |
b718792f RG |
164 | } |
165 | ||
166 | return "Unknown"; | |
167 | } | |
168 | ||
7b925432 | 169 | virtual Action operator()(DNSQuestion*, string* ruleresult) const =0; |
205f2081 RG |
170 | virtual ~DNSAction() |
171 | { | |
172 | } | |
7b925432 | 173 | virtual string toString() const = 0; |
b8019cf7 | 174 | virtual std::map<string, double> getStats() const |
7b925432 RG |
175 | { |
176 | return {{}}; | |
177 | } | |
178 | }; | |
179 | ||
180 | class DNSResponseAction | |
181 | { | |
182 | public: | |
5f23eb98 | 183 | enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None }; |
7b925432 | 184 | virtual Action operator()(DNSResponse*, string* ruleresult) const =0; |
205f2081 RG |
185 | virtual ~DNSResponseAction() |
186 | { | |
187 | } | |
7b925432 RG |
188 | virtual string toString() const = 0; |
189 | }; | |
190 | ||
78ffa782 | 191 | struct DynBlock |
192 | { | |
1d3ba133 | 193 | DynBlock(): action(DNSAction::Action::None), warning(false) |
5708a729 RG |
194 | { |
195 | } | |
196 | ||
1d3ba133 | 197 | DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false) |
5708a729 RG |
198 | { |
199 | } | |
200 | ||
1d3ba133 | 201 | DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning) |
5708a729 RG |
202 | { |
203 | blocks.store(rhs.blocks); | |
204 | } | |
205 | ||
78ffa782 | 206 | DynBlock& operator=(const DynBlock& rhs) |
207 | { | |
208 | reason=rhs.reason; | |
209 | until=rhs.until; | |
71c94675 | 210 | domain=rhs.domain; |
7b925432 | 211 | action=rhs.action; |
78ffa782 | 212 | blocks.store(rhs.blocks); |
1d3ba133 | 213 | warning=rhs.warning; |
78ffa782 | 214 | return *this; |
215 | } | |
71c94675 | 216 | |
78ffa782 | 217 | string reason; |
218 | struct timespec until; | |
71c94675 | 219 | DNSName domain; |
7b925432 | 220 | DNSAction::Action action; |
78ffa782 | 221 | mutable std::atomic<unsigned int> blocks; |
1d3ba133 | 222 | bool warning; |
78ffa782 | 223 | }; |
224 | ||
225 | extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG; | |
f758857a | 226 | |
227 | extern vector<pair<struct timeval, std::string> > g_confDelta; | |
228 | ||
eb0335ff MC |
229 | extern uint64_t getLatencyCount(const std::string&); |
230 | ||
e48090d1 | 231 | struct DNSDistStats |
232 | { | |
6ad8b29a | 233 | using stat_t=std::atomic<uint64_t>; // aww yiss ;-) |
e48090d1 | 234 | stat_t responses{0}; |
235 | stat_t servfailResponses{0}; | |
236 | stat_t queries{0}; | |
61d10a4d MH |
237 | stat_t frontendNXDomain{0}; |
238 | stat_t frontendServFail{0}; | |
239 | stat_t frontendNoError{0}; | |
e73ec7d3 | 240 | stat_t nonCompliantQueries{0}; |
d08b1cdf | 241 | stat_t nonCompliantResponses{0}; |
643a182a | 242 | stat_t rdQueries{0}; |
2efd427d | 243 | stat_t emptyQueries{0}; |
e48090d1 | 244 | stat_t aclDrops{0}; |
bd1c631b | 245 | stat_t dynBlocked{0}; |
e48090d1 | 246 | stat_t ruleDrop{0}; |
247 | stat_t ruleNXDomain{0}; | |
dd46e5e3 | 248 | stat_t ruleRefused{0}; |
5f23eb98 | 249 | stat_t ruleServFail{0}; |
e48090d1 | 250 | stat_t selfAnswered{0}; |
251 | stat_t downstreamTimeouts{0}; | |
252 | stat_t downstreamSendErrors{0}; | |
6ad8b29a | 253 | stat_t truncFail{0}; |
b8bc7e61 | 254 | stat_t noPolicy{0}; |
886e2cf2 RG |
255 | stat_t cacheHits{0}; |
256 | stat_t cacheMisses{0}; | |
eb0335ff | 257 | stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0}; |
f29758cc | 258 | stat_t securityStatus{0}; |
5c30ec69 | 259 | |
e16fd59c | 260 | double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0}; |
a1a787dc | 261 | typedef std::function<uint64_t(const std::string&)> statfunction_t; |
72f58a53 | 262 | typedef boost::variant<stat_t*, double*, statfunction_t> entry_t; |
e16fd59c | 263 | std::vector<std::pair<std::string, entry_t>> entries{ |
dd46e5e3 RG |
264 | {"responses", &responses}, |
265 | {"servfail-responses", &servfailResponses}, | |
266 | {"queries", &queries}, | |
61d10a4d MH |
267 | {"frontend-nxdomain", &frontendNXDomain}, |
268 | {"frontend-servfail", &frontendServFail}, | |
269 | {"frontend-noerror", &frontendNoError}, | |
dd46e5e3 | 270 | {"acl-drops", &aclDrops}, |
dd46e5e3 RG |
271 | {"rule-drop", &ruleDrop}, |
272 | {"rule-nxdomain", &ruleNXDomain}, | |
273 | {"rule-refused", &ruleRefused}, | |
5f23eb98 | 274 | {"rule-servfail", &ruleServFail}, |
dd46e5e3 RG |
275 | {"self-answered", &selfAnswered}, |
276 | {"downstream-timeouts", &downstreamTimeouts}, | |
5c30ec69 | 277 | {"downstream-send-errors", &downstreamSendErrors}, |
dd46e5e3 RG |
278 | {"trunc-failures", &truncFail}, |
279 | {"no-policy", &noPolicy}, | |
280 | {"latency0-1", &latency0_1}, | |
281 | {"latency1-10", &latency1_10}, | |
282 | {"latency10-50", &latency10_50}, | |
283 | {"latency50-100", &latency50_100}, | |
284 | {"latency100-1000", &latency100_1000}, | |
285 | {"latency-slow", &latencySlow}, | |
286 | {"latency-avg100", &latencyAvg100}, | |
287 | {"latency-avg1000", &latencyAvg1000}, | |
288 | {"latency-avg10000", &latencyAvg10000}, | |
289 | {"latency-avg1000000", &latencyAvg1000000}, | |
61d1b966 | 290 | {"uptime", uptimeOfProcess}, |
a9b6db56 | 291 | {"real-memory-usage", getRealMemoryUsage}, |
330dcb5c | 292 | {"special-memory-usage", getSpecialMemoryUsage}, |
a2aa00ed | 293 | {"noncompliant-queries", &nonCompliantQueries}, |
d08b1cdf | 294 | {"noncompliant-responses", &nonCompliantResponses}, |
643a182a | 295 | {"rdqueries", &rdQueries}, |
2efd427d | 296 | {"empty-queries", &emptyQueries}, |
886e2cf2 RG |
297 | {"cache-hits", &cacheHits}, |
298 | {"cache-misses", &cacheMisses}, | |
4f99f3d3 RG |
299 | {"cpu-user-msec", getCPUTimeUser}, |
300 | {"cpu-sys-msec", getCPUTimeSystem}, | |
dd46e5e3 | 301 | {"fd-usage", getOpenFileDescriptors}, |
5c30ec69 | 302 | {"dyn-blocked", &dynBlocked}, |
f29758cc | 303 | {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }}, |
eb0335ff MC |
304 | {"security-status", &securityStatus}, |
305 | // Latency histogram | |
306 | {"latency-sum", &latencySum}, | |
307 | {"latency-count", getLatencyCount}, | |
42fae326 | 308 | }; |
e48090d1 | 309 | }; |
310 | ||
2e567c94 PO |
311 | // Metric types for Prometheus |
312 | enum class PrometheusMetricType: int { | |
313 | counter = 1, | |
314 | gauge = 2 | |
315 | }; | |
316 | ||
37a5c2d5 PO |
317 | // Keeps additional information about metrics |
318 | struct MetricDefinition { | |
3cbe2773 | 319 | MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) { |
37a5c2d5 PO |
320 | } |
321 | ||
322 | MetricDefinition() = default; | |
323 | ||
324 | // Metric description | |
325 | std::string description; | |
326 | // Metric type for Prometheus | |
2e567c94 | 327 | PrometheusMetricType prometheusType; |
37a5c2d5 PO |
328 | }; |
329 | ||
330 | struct MetricDefinitionStorage { | |
331 | // Return metric definition by name | |
332 | bool getMetricDetails(std::string metricName, MetricDefinition& metric) { | |
333 | auto metricDetailsIter = metrics.find(metricName); | |
334 | ||
335 | if (metricDetailsIter == metrics.end()) { | |
336 | return false; | |
337 | } | |
338 | ||
339 | metric = metricDetailsIter->second; | |
340 | return true; | |
341 | }; | |
342 | ||
2e567c94 PO |
343 | // Return string representation of Prometheus metric type |
344 | std::string getPrometheusStringMetricType(PrometheusMetricType metricType) { | |
345 | switch (metricType) { | |
346 | case PrometheusMetricType::counter: | |
347 | return "counter"; | |
348 | break; | |
349 | case PrometheusMetricType::gauge: | |
350 | return "gauge"; | |
351 | break; | |
352 | default: | |
353 | return ""; | |
354 | break; | |
355 | } | |
356 | }; | |
357 | ||
37a5c2d5 | 358 | std::map<std::string, MetricDefinition> metrics = { |
2e567c94 PO |
359 | { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") }, |
360 | { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") }, | |
361 | { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")}, | |
61d10a4d MH |
362 | { "frontend-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")}, |
363 | { "frontend-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")}, | |
364 | { "frontend-noerror", MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")}, | |
2e567c94 PO |
365 | { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")}, |
366 | { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")}, | |
367 | { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")}, | |
368 | { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")}, | |
369 | { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")}, | |
370 | { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")}, | |
371 | { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")}, | |
372 | { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")}, | |
373 | { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")}, | |
374 | { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")}, | |
375 | { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")}, | |
376 | { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")}, | |
377 | { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")}, | |
378 | { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")}, | |
379 | { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")}, | |
380 | { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")}, | |
381 | { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")}, | |
382 | { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")}, | |
383 | { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")}, | |
384 | { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")}, | |
385 | { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")}, | |
386 | { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")}, | |
387 | { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")}, | |
388 | { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")}, | |
389 | { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")}, | |
390 | { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")}, | |
391 | { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")}, | |
392 | { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")}, | |
393 | { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")}, | |
394 | { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")}, | |
395 | { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")}, | |
1200be3e | 396 | { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")}, |
2e567c94 | 397 | { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") }, |
f29758cc | 398 | { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") }, |
eb0335ff MC |
399 | // Latency histogram |
400 | { "latency-sum", MetricDefinition(PrometheusMetricType::counter, "Total response time in milliseconds")}, | |
401 | { "latency-count", MetricDefinition(PrometheusMetricType::counter, "Number of queries contributing to response time histogram")}, | |
37a5c2d5 PO |
402 | }; |
403 | }; | |
e16fd59c | 404 | |
37a5c2d5 | 405 | extern MetricDefinitionStorage g_metricDefinitions; |
e48090d1 | 406 | extern struct DNSDistStats g_stats; |
f653b8df | 407 | void doLatencyStats(double udiff); |
e48090d1 | 408 | |
638184e9 | 409 | |
df111b53 | 410 | struct StopWatch |
411 | { | |
58307a85 RG |
412 | StopWatch(bool realTime=false): d_needRealTime(realTime) |
413 | { | |
414 | } | |
df111b53 | 415 | struct timespec d_start{0,0}; |
58307a85 RG |
416 | bool d_needRealTime{false}; |
417 | ||
5c30ec69 | 418 | void start() { |
58307a85 | 419 | if(gettime(&d_start, d_needRealTime) < 0) |
df111b53 | 420 | unixDie("Getting timestamp"); |
5c30ec69 | 421 | |
df111b53 | 422 | } |
cf48b0ce RG |
423 | |
424 | void set(const struct timespec& from) { | |
425 | d_start = from; | |
426 | } | |
5c30ec69 | 427 | |
df111b53 | 428 | double udiff() const { |
429 | struct timespec now; | |
58307a85 | 430 | if(gettime(&now, d_needRealTime) < 0) |
df111b53 | 431 | unixDie("Getting timestamp"); |
5c30ec69 | 432 | |
df111b53 | 433 | return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0; |
434 | } | |
435 | ||
436 | double udiffAndSet() { | |
437 | struct timespec now; | |
58307a85 | 438 | if(gettime(&now, d_needRealTime) < 0) |
df111b53 | 439 | unixDie("Getting timestamp"); |
5c30ec69 | 440 | |
df111b53 | 441 | auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0; |
442 | d_start = now; | |
443 | return ret; | |
444 | } | |
445 | ||
446 | }; | |
447 | ||
67ce0bdd | 448 | class BasicQPSLimiter |
df111b53 | 449 | { |
450 | public: | |
67ce0bdd | 451 | BasicQPSLimiter() |
df111b53 | 452 | { |
453 | } | |
454 | ||
2d29e6b7 | 455 | BasicQPSLimiter(unsigned int burst): d_tokens(burst) |
67ce0bdd RG |
456 | { |
457 | d_prev.start(); | |
458 | } | |
459 | ||
460 | bool check(unsigned int rate, unsigned int burst) const // this is not quite fair | |
461 | { | |
462 | auto delta = d_prev.udiffAndSet(); | |
463 | ||
1a1787b6 | 464 | if(delta > 0.0) // time, frequently, does go backwards.. |
465 | d_tokens += 1.0 * rate * (delta/1000000.0); | |
67ce0bdd RG |
466 | |
467 | if(d_tokens > burst) { | |
468 | d_tokens = burst; | |
469 | } | |
470 | ||
471 | bool ret=false; | |
472 | if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise | |
473 | ret=true; | |
474 | --d_tokens; | |
475 | } | |
476 | ||
477 | return ret; | |
478 | } | |
479 | ||
480 | bool seenSince(const struct timespec& cutOff) const | |
481 | { | |
482 | return cutOff < d_prev.d_start; | |
483 | } | |
484 | ||
485 | protected: | |
486 | mutable StopWatch d_prev; | |
487 | mutable double d_tokens; | |
488 | }; | |
489 | ||
490 | class QPSLimiter : public BasicQPSLimiter | |
491 | { | |
492 | public: | |
493 | QPSLimiter(): BasicQPSLimiter() | |
494 | { | |
495 | } | |
496 | ||
2d29e6b7 | 497 | QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false) |
df111b53 | 498 | { |
df111b53 | 499 | d_prev.start(); |
500 | } | |
501 | ||
502 | unsigned int getRate() const | |
503 | { | |
67ce0bdd | 504 | return d_passthrough ? 0 : d_rate; |
df111b53 | 505 | } |
506 | ||
507 | int getPassed() const | |
508 | { | |
509 | return d_passed; | |
510 | } | |
67ce0bdd | 511 | |
df111b53 | 512 | int getBlocked() const |
513 | { | |
514 | return d_blocked; | |
515 | } | |
516 | ||
ecbe9133 | 517 | bool check() const // this is not quite fair |
df111b53 | 518 | { |
67ce0bdd | 519 | if (d_passthrough) { |
df111b53 | 520 | return true; |
67ce0bdd | 521 | } |
df111b53 | 522 | |
67ce0bdd RG |
523 | bool ret = BasicQPSLimiter::check(d_rate, d_burst); |
524 | if (ret) { | |
df111b53 | 525 | d_passed++; |
526 | } | |
67ce0bdd | 527 | else { |
df111b53 | 528 | d_blocked++; |
67ce0bdd | 529 | } |
df111b53 | 530 | |
5c30ec69 | 531 | return ret; |
df111b53 | 532 | } |
533 | private: | |
ecbe9133 | 534 | mutable unsigned int d_passed{0}; |
535 | mutable unsigned int d_blocked{0}; | |
67ce0bdd RG |
536 | unsigned int d_rate; |
537 | unsigned int d_burst; | |
538 | bool d_passthrough{true}; | |
df111b53 | 539 | }; |
540 | ||
b5b93e0b RG |
541 | struct ClientState; |
542 | ||
df111b53 | 543 | struct IDState |
544 | { | |
a9489723 | 545 | IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;} |
71b86bd8 | 546 | IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age) |
df111b53 | 547 | { |
a9489723 RG |
548 | usageIndicator.store(orig.usageIndicator.load()); |
549 | origFD = orig.origFD; | |
df111b53 | 550 | origID = orig.origID; |
7b3865cd | 551 | delayMsec = orig.delayMsec; |
acb8f5d5 | 552 | tempFailureTTL = orig.tempFailureTTL; |
df111b53 | 553 | } |
554 | ||
311f19d5 RG |
555 | static const int64_t unusedIndicator = -1; |
556 | ||
557 | static bool isInUse(int64_t usageIndicator) | |
558 | { | |
559 | return usageIndicator != unusedIndicator; | |
560 | } | |
561 | ||
562 | bool isInUse() const | |
563 | { | |
564 | return usageIndicator != unusedIndicator; | |
565 | } | |
566 | ||
567 | /* return true if the value has been successfully replaced meaning that | |
568 | no-one updated the usage indicator in the meantime */ | |
569 | bool tryMarkUnused(int64_t expectedUsageIndicator) | |
570 | { | |
571 | return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator); | |
572 | } | |
573 | ||
574 | /* mark as unused no matter what, return true if the state was in use before */ | |
575 | bool markAsUsed() | |
576 | { | |
577 | auto currentGeneration = generation++; | |
578 | return markAsUsed(currentGeneration); | |
579 | } | |
580 | ||
581 | /* mark as unused no matter what, return true if the state was in use before */ | |
582 | bool markAsUsed(int64_t currentGeneration) | |
583 | { | |
584 | int64_t oldUsage = usageIndicator.exchange(currentGeneration); | |
585 | return oldUsage != unusedIndicator; | |
586 | } | |
587 | ||
a9489723 | 588 | /* We use this value to detect whether this state is in use. |
9bd1a882 RG |
589 | For performance reasons we don't want to use a lock here, but that means |
590 | we need to be very careful when modifying this value. Modifications happen | |
591 | from: | |
592 | - one of the UDP or DoH 'client' threads receiving a query, selecting a backend | |
593 | then picking one of the states associated to this backend (via the idOffset). | |
a9489723 | 594 | Most of the time this state should not be in use and usageIndicator is -1, but we |
9bd1a882 RG |
595 | might not yet have received a response for the query previously associated to this |
596 | state, meaning that we will 'reuse' this state and erase the existing state. | |
597 | If we ever receive a response for this state, it will be discarded. This is | |
598 | mostly fine for UDP except that we still need to be careful in order to miss | |
599 | the 'outstanding' counters, which should only be increased when we are picking | |
600 | an empty state, and not when reusing ; | |
601 | For DoH, though, we have dynamically allocated a DOHUnit object that needs to | |
602 | be freed, as well as internal objects internals to libh2o. | |
603 | - one of the UDP receiver threads receiving a response from a backend, picking | |
604 | the corresponding state and sending the response to the client ; | |
605 | - the 'healthcheck' thread scanning the states to actively discover timeouts, | |
606 | mostly to keep some counters like the 'outstanding' one sane. | |
a9489723 RG |
607 | We previously based that logic on the origFD (FD on which the query was received, |
608 | and therefore from where the response should be sent) but this suffered from an | |
609 | ABA problem since it was quite likely that a UDP 'client thread' would reset it to the | |
610 | same value since we only have so much incoming sockets: | |
611 | - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ; | |
612 | - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname, | |
613 | qtype and qclass match | |
614 | - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ; | |
615 | - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still | |
616 | 5, except it's not the same 5 anymore and it overrides a fresh state. | |
617 | We now use a 32-bit unsigned counter instead, which is incremented every time the state is set, | |
618 | wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1 | |
619 | when the state is unused and the value of our counter otherwise. | |
9bd1a882 | 620 | */ |
311f19d5 RG |
621 | std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8 |
622 | std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4 | |
2bf26975 | 623 | ComboAddress origRemote; // 28 |
549d63c9 | 624 | ComboAddress origDest; // 28 |
2bf26975 | 625 | StopWatch sentTime; // 16 |
626 | DNSName qname; // 80 | |
43234e76 | 627 | std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr}; |
d8c19b98 | 628 | #ifdef HAVE_PROTOBUF |
ec48a28d | 629 | boost::optional<boost::uuids::uuid> uniqueId; |
11e1e08b | 630 | #endif |
78e3ac9e | 631 | boost::optional<Netmask> subnet{boost::none}; |
886e2cf2 | 632 | std::shared_ptr<DNSDistPacketCache> packetCache{nullptr}; |
a76b0d63 | 633 | std::shared_ptr<QTag> qTag{nullptr}; |
b5b93e0b | 634 | const ClientState* cs{nullptr}; |
fbf14b03 | 635 | DOHUnit* du{nullptr}; |
9837850d | 636 | uint32_t cacheKey; // 4 |
637 | uint32_t cacheKeyNoECS; // 4 | |
71b86bd8 | 638 | uint16_t age; // 4 |
2bf26975 | 639 | uint16_t qtype; // 2 |
886e2cf2 | 640 | uint16_t qclass; // 2 |
2bf26975 | 641 | uint16_t origID; // 2 |
aeb36780 | 642 | uint16_t origFlags; // 2 |
a9489723 | 643 | int origFD{-1}; |
7b3865cd | 644 | int delayMsec; |
acb8f5d5 | 645 | boost::optional<uint32_t> tempFailureTTL; |
ca404e94 | 646 | bool ednsAdded{false}; |
ff73f02b | 647 | bool ecsAdded{false}; |
886e2cf2 | 648 | bool skipCache{false}; |
7cea4e39 | 649 | bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr |
d7728daf | 650 | bool dnssecOK{false}; |
389d903a | 651 | bool useZeroScope; |
df111b53 | 652 | }; |
653 | ||
786e4d8c | 654 | typedef std::unordered_map<string, unsigned int> QueryCountRecords; |
dd1a3034 | 655 | typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter; |
786e4d8c RS |
656 | struct QueryCount { |
657 | QueryCount() | |
658 | { | |
43234e76 | 659 | pthread_rwlock_init(&queryLock, nullptr); |
786e4d8c RS |
660 | } |
661 | QueryCountRecords records; | |
662 | QueryCountFilter filter; | |
663 | pthread_rwlock_t queryLock; | |
664 | bool enabled{false}; | |
665 | }; | |
666 | ||
667 | extern QueryCount g_qcount; | |
668 | ||
8a5d5053 | 669 | struct ClientState |
670 | { | |
6e9fd124 RG |
671 | ClientState(const ComboAddress& local_, bool isTCP, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP), reuseport(doReusePort) |
672 | { | |
673 | } | |
674 | ||
f0e4dcba | 675 | std::set<int> cpus; |
8a5d5053 | 676 | ComboAddress local; |
43234e76 | 677 | std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr}; |
6e9fd124 | 678 | std::shared_ptr<TLSFrontend> tlsFrontend{nullptr}; |
fbf14b03 | 679 | std::shared_ptr<DOHFrontend> dohFrontend{nullptr}; |
6e9fd124 | 680 | std::string interface; |
963bef8d | 681 | std::atomic<uint64_t> queries{0}; |
a6e9e107 RG |
682 | std::atomic<uint64_t> tcpDiedReadingQuery{0}; |
683 | std::atomic<uint64_t> tcpDiedSendingResponse{0}; | |
684 | std::atomic<uint64_t> tcpGaveUp{0}; | |
685 | std::atomic<uint64_t> tcpClientTimeouts{0}; | |
686 | std::atomic<uint64_t> tcpDownstreamTimeouts{0}; | |
cff9aa03 RG |
687 | std::atomic<uint64_t> tcpCurrentConnections{0}; |
688 | std::atomic<double> tcpAvgQueriesPerConnection{0.0}; | |
689 | /* in ms */ | |
690 | std::atomic<double> tcpAvgConnectionDuration{0.0}; | |
a36ce055 RG |
691 | int udpFD{-1}; |
692 | int tcpFD{-1}; | |
6e9fd124 | 693 | int fastOpenQueueSize{0}; |
b5b93e0b | 694 | bool muted{false}; |
6e9fd124 RG |
695 | bool tcp; |
696 | bool reuseport; | |
697 | bool ready{false}; | |
8429ad04 RG |
698 | |
699 | int getSocket() const | |
700 | { | |
701 | return udpFD != -1 ? udpFD : tcpFD; | |
702 | } | |
703 | ||
ba7ec340 RG |
704 | std::string getType() const |
705 | { | |
706 | std::string result = udpFD != -1 ? "UDP" : "TCP"; | |
707 | ||
fbf14b03 RG |
708 | if (dohFrontend) { |
709 | result += " (DNS over HTTPS)"; | |
710 | } | |
711 | else if (tlsFrontend) { | |
ba7ec340 RG |
712 | result += " (DNS over TLS)"; |
713 | } | |
714 | else if (dnscryptCtx) { | |
715 | result += " (DNSCrypt)"; | |
716 | } | |
717 | ||
718 | return result; | |
719 | } | |
720 | ||
8429ad04 RG |
721 | #ifdef HAVE_EBPF |
722 | shared_ptr<BPFFilter> d_filter; | |
723 | ||
724 | void detachFilter() | |
725 | { | |
726 | if (d_filter) { | |
727 | d_filter->removeSocket(getSocket()); | |
728 | d_filter = nullptr; | |
729 | } | |
730 | } | |
731 | ||
732 | void attachFilter(shared_ptr<BPFFilter> bpf) | |
733 | { | |
734 | detachFilter(); | |
735 | ||
736 | bpf->addSocket(getSocket()); | |
737 | d_filter = bpf; | |
738 | } | |
739 | #endif /* HAVE_EBPF */ | |
cff9aa03 RG |
740 | |
741 | void updateTCPMetrics(size_t queries, uint64_t durationMs) | |
742 | { | |
743 | tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0); | |
744 | tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0); | |
745 | } | |
8a5d5053 | 746 | }; |
747 | ||
748 | class TCPClientCollection { | |
749 | std::vector<int> d_tcpclientthreads; | |
ded1985a | 750 | std::atomic<uint64_t> d_numthreads{0}; |
a9bf3ec4 | 751 | std::atomic<uint64_t> d_pos{0}; |
ded1985a | 752 | std::atomic<uint64_t> d_queued{0}; |
73402775 | 753 | const uint64_t d_maxthreads{0}; |
ded1985a | 754 | std::mutex d_mutex; |
edbda1ad | 755 | int d_singlePipe[2]; |
73402775 | 756 | const bool d_useSinglePipe; |
ded1985a | 757 | public: |
8a5d5053 | 758 | |
b79e4996 RG |
759 | TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe) |
760 | ||
8a5d5053 | 761 | { |
a9bf3ec4 | 762 | d_tcpclientthreads.reserve(maxThreads); |
edbda1ad RG |
763 | |
764 | if (d_useSinglePipe) { | |
765 | if (pipe(d_singlePipe) < 0) { | |
766 | throw std::runtime_error("Error creating the TCP single communication pipe: " + string(strerror(errno))); | |
767 | } | |
3b07fd1b RG |
768 | |
769 | if (!setNonBlocking(d_singlePipe[0])) { | |
770 | int err = errno; | |
771 | close(d_singlePipe[0]); | |
772 | close(d_singlePipe[1]); | |
773 | throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + string(strerror(err))); | |
774 | } | |
775 | ||
edbda1ad RG |
776 | if (!setNonBlocking(d_singlePipe[1])) { |
777 | int err = errno; | |
778 | close(d_singlePipe[0]); | |
779 | close(d_singlePipe[1]); | |
780 | throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + string(strerror(err))); | |
781 | } | |
782 | } | |
8a5d5053 | 783 | } |
a9bf3ec4 | 784 | int getThread() |
8a5d5053 | 785 | { |
6c1ca990 | 786 | uint64_t pos = d_pos++; |
8a5d5053 | 787 | ++d_queued; |
788 | return d_tcpclientthreads[pos % d_numthreads]; | |
789 | } | |
ded1985a RG |
790 | bool hasReachedMaxThreads() const |
791 | { | |
792 | return d_numthreads >= d_maxthreads; | |
793 | } | |
794 | uint64_t getThreadsCount() const | |
795 | { | |
796 | return d_numthreads; | |
797 | } | |
798 | uint64_t getQueuedCount() const | |
799 | { | |
800 | return d_queued; | |
801 | } | |
802 | void decrementQueuedCount() | |
803 | { | |
804 | --d_queued; | |
805 | } | |
8a5d5053 | 806 | void addTCPClientThread(); |
807 | }; | |
808 | ||
1f7646c2 | 809 | extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads; |
8a5d5053 | 810 | |
df111b53 | 811 | struct DownstreamState |
812 | { | |
1720247e | 813 | typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t; |
98650fde | 814 | |
150105a2 RG |
815 | DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, size_t numberOfSockets); |
816 | DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, 1) {} | |
6a62c0e3 RG |
817 | ~DownstreamState() |
818 | { | |
5bdbb83d | 819 | for (auto& fd : sockets) { |
150105a2 RG |
820 | if (fd >= 0) { |
821 | close(fd); | |
822 | fd = -1; | |
823 | } | |
824 | } | |
6a62c0e3 | 825 | } |
1720247e CHB |
826 | boost::uuids::uuid id; |
827 | std::set<unsigned int> hashes; | |
d58e616a | 828 | mutable pthread_rwlock_t d_lock; |
5bdbb83d RG |
829 | std::vector<int> sockets; |
830 | std::mutex socketsLock; | |
5d7e6765 | 831 | std::mutex connectLock; |
5bdbb83d | 832 | std::unique_ptr<FDMultiplexer> mplexer{nullptr}; |
df111b53 | 833 | std::thread tid; |
a2353842 | 834 | const ComboAddress remote; |
df111b53 | 835 | QPSLimiter qps; |
836 | vector<IDState> idStates; | |
73402775 | 837 | const ComboAddress sourceAddr; |
98650fde | 838 | checkfunc_t checkFunction; |
fbe2a2e0 RG |
839 | DNSName checkName{"a.root-servers.net."}; |
840 | QType checkType{QType::A}; | |
de9f7157 | 841 | uint16_t checkClass{QClass::IN}; |
df111b53 | 842 | std::atomic<uint64_t> idOffset{0}; |
843 | std::atomic<uint64_t> sendErrors{0}; | |
844 | std::atomic<uint64_t> outstanding{0}; | |
845 | std::atomic<uint64_t> reuseds{0}; | |
846 | std::atomic<uint64_t> queries{0}; | |
847 | struct { | |
848 | std::atomic<uint64_t> sendErrors{0}; | |
849 | std::atomic<uint64_t> reuseds{0}; | |
850 | std::atomic<uint64_t> queries{0}; | |
851 | } prev; | |
a6e9e107 RG |
852 | std::atomic<uint64_t> tcpDiedSendingQuery{0}; |
853 | std::atomic<uint64_t> tcpDiedReadingResponse{0}; | |
854 | std::atomic<uint64_t> tcpGaveUp{0}; | |
855 | std::atomic<uint64_t> tcpReadTimeouts{0}; | |
856 | std::atomic<uint64_t> tcpWriteTimeouts{0}; | |
cff9aa03 RG |
857 | std::atomic<uint64_t> tcpCurrentConnections{0}; |
858 | std::atomic<double> tcpAvgQueriesPerConnection{0.0}; | |
859 | /* in ms */ | |
860 | std::atomic<double> tcpAvgConnectionDuration{0.0}; | |
18eeccc9 | 861 | string name; |
5bdbb83d | 862 | size_t socketsOffset{0}; |
df111b53 | 863 | double queryLoad{0.0}; |
864 | double dropRate{0.0}; | |
865 | double latencyUsec{0.0}; | |
866 | int order{1}; | |
867 | int weight{1}; | |
b40cffe7 | 868 | int tcpConnectTimeout{5}; |
3f6d07a4 RG |
869 | int tcpRecvTimeout{30}; |
870 | int tcpSendTimeout{30}; | |
7c9bf18d | 871 | unsigned int checkInterval{1}; |
872 | unsigned int lastCheck{0}; | |
73402775 | 873 | const unsigned int sourceItf{0}; |
3f6d07a4 | 874 | uint16_t retries{5}; |
c85f69a8 | 875 | uint16_t xpfRRCode{0}; |
b7e6f4a1 | 876 | uint16_t checkTimeout{1000}; /* in milliseconds */ |
9e87dcb8 | 877 | uint8_t currentCheckFailures{0}; |
853faf61 | 878 | uint8_t consecutiveSuccessfulChecks{0}; |
9e87dcb8 | 879 | uint8_t maxCheckFailures{1}; |
1b633bec | 880 | uint8_t minRiseSuccesses{1}; |
df111b53 | 881 | StopWatch sw; |
882 | set<string> pools; | |
883 | enum class Availability { Up, Down, Auto} availability{Availability::Auto}; | |
fbe2a2e0 | 884 | bool mustResolve{false}; |
df111b53 | 885 | bool upStatus{false}; |
ca404e94 | 886 | bool useECS{false}; |
21830638 | 887 | bool setCD{false}; |
49c33a6c | 888 | bool disableZeroScope{false}; |
7565f4e6 | 889 | std::atomic<bool> connected{false}; |
5d7e6765 | 890 | std::atomic_flag threadStarted; |
284d460c | 891 | bool tcpFastOpen{false}; |
5602f131 | 892 | bool ipBindAddrNoPort{true}; |
5d7e6765 | 893 | |
df111b53 | 894 | bool isUp() const |
895 | { | |
896 | if(availability == Availability::Down) | |
897 | return false; | |
898 | if(availability == Availability::Up) | |
899 | return true; | |
900 | return upStatus; | |
901 | } | |
902 | void setUp() { availability = Availability::Up; } | |
903 | void setDown() { availability = Availability::Down; } | |
904 | void setAuto() { availability = Availability::Auto; } | |
18eeccc9 RG |
905 | string getName() const { |
906 | if (name.empty()) { | |
907 | return remote.toStringWithPort(); | |
908 | } | |
909 | return name; | |
910 | } | |
a7940c06 | 911 | string getNameWithAddr() const { |
912 | if (name.empty()) { | |
913 | return remote.toStringWithPort(); | |
914 | } | |
915 | return name + " (" + remote.toStringWithPort()+ ")"; | |
916 | } | |
9f4eb5cc RG |
917 | string getStatus() const |
918 | { | |
919 | string status; | |
920 | if(availability == DownstreamState::Availability::Up) | |
921 | status = "UP"; | |
922 | else if(availability == DownstreamState::Availability::Down) | |
923 | status = "DOWN"; | |
924 | else | |
925 | status = (upStatus ? "up" : "down"); | |
926 | return status; | |
927 | } | |
5d7e6765 | 928 | bool reconnect(); |
f2caf657 CHB |
929 | void hash(); |
930 | void setId(const boost::uuids::uuid& newId); | |
931 | void setWeight(int newWeight); | |
cff9aa03 RG |
932 | |
933 | void updateTCPMetrics(size_t queries, uint64_t durationMs) | |
934 | { | |
935 | tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0); | |
936 | tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0); | |
937 | } | |
df111b53 | 938 | }; |
939 | using servers_t =vector<std::shared_ptr<DownstreamState>>; | |
df111b53 | 940 | |
da4e7813 | 941 | template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >; |
942 | ||
9b73b71c | 943 | void responderThread(std::shared_ptr<DownstreamState> state); |
da4e7813 | 944 | extern std::mutex g_luamutex; |
945 | extern LuaContext g_lua; | |
946 | extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex | |
947 | ||
0940e4eb | 948 | class DNSRule |
949 | { | |
950 | public: | |
205f2081 RG |
951 | virtual ~DNSRule () |
952 | { | |
953 | } | |
497a6e3a | 954 | virtual bool matches(const DNSQuestion* dq) const =0; |
0940e4eb | 955 | virtual string toString() const = 0; |
956 | mutable std::atomic<uint64_t> d_matches{0}; | |
957 | }; | |
958 | ||
da4e7813 | 959 | using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>; |
497a6e3a | 960 | typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t; |
df111b53 | 961 | |
962 | struct ServerPolicy | |
963 | { | |
964 | string name; | |
70a57b05 | 965 | policyfunc_t policy; |
a1b1a29d | 966 | bool isLua; |
a4fd2d2f CH |
967 | std::string toString() const { |
968 | return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\""; | |
969 | } | |
df111b53 | 970 | }; |
971 | ||
886e2cf2 RG |
972 | struct ServerPool |
973 | { | |
a1b1a29d RG |
974 | ServerPool() |
975 | { | |
976 | pthread_rwlock_init(&d_lock, nullptr); | |
977 | } | |
978 | ||
886e2cf2 RG |
979 | const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; }; |
980 | ||
7e687744 RG |
981 | bool getECS() const |
982 | { | |
983 | return d_useECS; | |
984 | } | |
985 | ||
986 | void setECS(bool useECS) | |
987 | { | |
988 | d_useECS = useECS; | |
989 | } | |
990 | ||
886e2cf2 | 991 | std::shared_ptr<DNSDistPacketCache> packetCache{nullptr}; |
b9f8a6c8 | 992 | std::shared_ptr<ServerPolicy> policy{nullptr}; |
5c30ec69 | 993 | |
a1b1a29d RG |
994 | size_t countServers(bool upOnly) |
995 | { | |
996 | size_t count = 0; | |
997 | ReadLock rl(&d_lock); | |
998 | for (const auto& server : d_servers) { | |
999 | if (!upOnly || std::get<1>(server)->isUp() ) { | |
1000 | count++; | |
c1b81381 RG |
1001 | } |
1002 | } | |
a1b1a29d RG |
1003 | return count; |
1004 | } | |
1005 | ||
1006 | NumberedVector<shared_ptr<DownstreamState>> getServers() | |
1007 | { | |
1008 | NumberedVector<shared_ptr<DownstreamState>> result; | |
1009 | { | |
1010 | ReadLock rl(&d_lock); | |
1011 | result = d_servers; | |
1012 | } | |
1013 | return result; | |
1014 | } | |
1015 | ||
1016 | void addServer(shared_ptr<DownstreamState>& server) | |
1017 | { | |
1018 | WriteLock wl(&d_lock); | |
1019 | unsigned int count = (unsigned int) d_servers.size(); | |
1020 | d_servers.push_back(make_pair(++count, server)); | |
1021 | /* we need to reorder based on the server 'order' */ | |
1022 | std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) { | |
1023 | return a.second->order < b.second->order; | |
1024 | }); | |
1025 | /* and now we need to renumber for Lua (custom policies) */ | |
1026 | size_t idx = 1; | |
1027 | for (auto& serv : d_servers) { | |
1028 | serv.first = idx++; | |
1029 | } | |
1030 | } | |
1031 | ||
1032 | void removeServer(shared_ptr<DownstreamState>& server) | |
1033 | { | |
1034 | WriteLock wl(&d_lock); | |
1035 | size_t idx = 1; | |
1036 | bool found = false; | |
1037 | for (auto it = d_servers.begin(); it != d_servers.end();) { | |
1038 | if (found) { | |
1039 | /* we need to renumber the servers placed | |
1040 | after the removed one, for Lua (custom policies) */ | |
1041 | it->first = idx++; | |
1042 | it++; | |
1043 | } | |
1044 | else if (it->second == server) { | |
1045 | it = d_servers.erase(it); | |
1046 | found = true; | |
1047 | } else { | |
1048 | idx++; | |
1049 | it++; | |
1050 | } | |
1051 | } | |
1052 | } | |
1053 | ||
1054 | private: | |
1055 | NumberedVector<shared_ptr<DownstreamState>> d_servers; | |
1056 | pthread_rwlock_t d_lock; | |
7e687744 | 1057 | bool d_useECS{false}; |
886e2cf2 RG |
1058 | }; |
1059 | using pools_t=map<std::string,std::shared_ptr<ServerPool>>; | |
742c079a | 1060 | void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy); |
886e2cf2 RG |
1061 | void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server); |
1062 | void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server); | |
1063 | ||
42fae326 | 1064 | struct CarbonConfig |
1065 | { | |
d617b22c | 1066 | ComboAddress server; |
813b0ba9 | 1067 | std::string namespace_name; |
42fae326 | 1068 | std::string ourname; |
813b0ba9 | 1069 | std::string instance_name; |
d617b22c | 1070 | unsigned int interval; |
42fae326 | 1071 | }; |
1072 | ||
ca404e94 RG |
1073 | enum ednsHeaderFlags { |
1074 | EDNS_HEADER_FLAG_NONE = 0, | |
1075 | EDNS_HEADER_FLAG_DO = 32768 | |
1076 | }; | |
1077 | ||
4d5959e6 RG |
1078 | struct DNSDistRuleAction |
1079 | { | |
1080 | std::shared_ptr<DNSRule> d_rule; | |
1081 | std::shared_ptr<DNSAction> d_action; | |
1082 | boost::uuids::uuid d_id; | |
f8a222ac | 1083 | uint64_t d_creationOrder; |
4d5959e6 RG |
1084 | }; |
1085 | ||
1086 | struct DNSDistResponseRuleAction | |
1087 | { | |
1088 | std::shared_ptr<DNSRule> d_rule; | |
1089 | std::shared_ptr<DNSResponseAction> d_action; | |
1090 | boost::uuids::uuid d_id; | |
f8a222ac | 1091 | uint64_t d_creationOrder; |
4d5959e6 RG |
1092 | }; |
1093 | ||
71c94675 | 1094 | extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT; |
dd46e5e3 | 1095 | extern DNSAction::Action g_dynBlockAction; |
71c94675 | 1096 | |
d617b22c | 1097 | extern GlobalStateHolder<vector<CarbonConfig> > g_carbon; |
ecbe9133 | 1098 | extern GlobalStateHolder<ServerPolicy> g_policy; |
1099 | extern GlobalStateHolder<servers_t> g_dstates; | |
886e2cf2 | 1100 | extern GlobalStateHolder<pools_t> g_pools; |
4d5959e6 RG |
1101 | extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions; |
1102 | extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions; | |
1103 | extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions; | |
2d4783a8 | 1104 | extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions; |
638184e9 | 1105 | extern GlobalStateHolder<NetmaskGroup> g_ACL; |
2e72cc0e | 1106 | |
ecbe9133 | 1107 | extern ComboAddress g_serverControl; // not changed during runtime |
1108 | ||
f0e4dcba | 1109 | extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX) |
a227f47d | 1110 | extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals; |
fbf14b03 | 1111 | extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals; |
6e9fd124 | 1112 | extern std::vector<std::unique_ptr<ClientState>> g_frontends; |
6ad8b29a | 1113 | extern bool g_truncateTC; |
b29edbee | 1114 | extern bool g_fixupCase; |
3f6d07a4 RG |
1115 | extern int g_tcpRecvTimeout; |
1116 | extern int g_tcpSendTimeout; | |
e0b5e49d | 1117 | extern int g_udpTimeout; |
e41f8165 RG |
1118 | extern uint16_t g_maxOutstanding; |
1119 | extern std::atomic<bool> g_configurationDone; | |
6c1ca990 RG |
1120 | extern uint64_t g_maxTCPClientThreads; |
1121 | extern uint64_t g_maxTCPQueuedConnections; | |
9396d955 RG |
1122 | extern size_t g_maxTCPQueriesPerConn; |
1123 | extern size_t g_maxTCPConnectionDuration; | |
1124 | extern size_t g_maxTCPConnectionsPerClient; | |
886e2cf2 | 1125 | extern std::atomic<uint16_t> g_cacheCleaningDelay; |
f65ea0c2 | 1126 | extern std::atomic<uint16_t> g_cacheCleaningPercentage; |
9e87dcb8 | 1127 | extern bool g_verboseHealthChecks; |
1ea747c0 | 1128 | extern uint32_t g_staleCacheEntriesTTL; |
56d68fad RG |
1129 | extern bool g_apiReadWrite; |
1130 | extern std::string g_apiConfigDirectory; | |
26a3cdb7 | 1131 | extern bool g_servFailOnNoPolicy; |
36e763fa | 1132 | extern uint32_t g_hashperturb; |
edbda1ad | 1133 | extern bool g_useTCPSinglePipe; |
cff9aa03 | 1134 | extern uint16_t g_downstreamTCPCleanupInterval; |
0beaa5c8 | 1135 | extern size_t g_udpVectorSize; |
53c57da7 | 1136 | extern bool g_preserveTrailingData; |
0dffe9e3 | 1137 | extern bool g_allowEmptyResponse; |
32b86928 | 1138 | extern bool g_roundrobinFailOnNoServer; |
ca404e94 | 1139 | |
87b515ed RG |
1140 | #ifdef HAVE_EBPF |
1141 | extern shared_ptr<BPFFilter> g_defaultBPFFilter; | |
8429ad04 | 1142 | extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters; |
87b515ed RG |
1143 | #endif /* HAVE_EBPF */ |
1144 | ||
0beaa5c8 RG |
1145 | struct LocalHolders |
1146 | { | |
2d4783a8 | 1147 | LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal()) |
0beaa5c8 RG |
1148 | { |
1149 | } | |
1150 | ||
1151 | LocalStateHolder<NetmaskGroup> acl; | |
1152 | LocalStateHolder<ServerPolicy> policy; | |
4d5959e6 RG |
1153 | LocalStateHolder<vector<DNSDistRuleAction> > rulactions; |
1154 | LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions; | |
2d4783a8 | 1155 | LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions; |
0beaa5c8 RG |
1156 | LocalStateHolder<servers_t> servers; |
1157 | LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock; | |
1158 | LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock; | |
1159 | LocalStateHolder<pools_t> pools; | |
1160 | }; | |
1161 | ||
ecbe9133 | 1162 | struct dnsheader; |
1163 | ||
1164 | void controlThread(int fd, ComboAddress local); | |
839f3021 | 1165 | vector<std::function<void(void)>> setupLua(bool client, const std::string& config); |
886e2cf2 RG |
1166 | std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName); |
1167 | std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName); | |
a1b1a29d | 1168 | NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName); |
da4e7813 | 1169 | |
497a6e3a | 1170 | std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq); |
ecbe9133 | 1171 | |
497a6e3a RG |
1172 | std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq); |
1173 | std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq); | |
1174 | std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq); | |
1720247e | 1175 | std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq); |
497a6e3a | 1176 | std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq); |
e7c732b8 | 1177 | |
80dbd7d2 CHB |
1178 | struct WebserverConfig |
1179 | { | |
1180 | std::string password; | |
1181 | std::string apiKey; | |
1182 | boost::optional<std::map<std::string, std::string> > customHeaders; | |
1183 | std::mutex lock; | |
1184 | }; | |
1185 | ||
32c97b56 CHB |
1186 | void setWebserverAPIKey(const boost::optional<std::string> apiKey); |
1187 | void setWebserverPassword(const std::string& password); | |
1188 | void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders); | |
1189 | ||
80dbd7d2 | 1190 | void dnsdistWebserverThread(int sock, const ComboAddress& local); |
9b73b71c | 1191 | void tcpAcceptorThread(void* p); |
fbf14b03 RG |
1192 | #ifdef HAVE_DNS_OVER_HTTPS |
1193 | void dohThread(ClientState* cs); | |
1194 | #endif /* HAVE_DNS_OVER_HTTPS */ | |
80a216c9 | 1195 | |
f758857a | 1196 | void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects |
1197 | void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls | |
1198 | bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect | |
1199 | void resetLuaSideEffect(); // reset to indeterminate state | |
11e1e08b | 1200 | |
e7c732b8 | 1201 | bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed); |
3e425868 | 1202 | bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted); |
2a28db86 | 1203 | bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop); |
4ab01344 | 1204 | |
0beaa5c8 | 1205 | bool checkQueryHeaders(const struct dnsheader* dh); |
fcffc585 | 1206 | |
6e9fd124 | 1207 | extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals; |
43234e76 | 1208 | int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response); |
4ab01344 | 1209 | boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp); |
9f4eb5cc | 1210 | |
18f707fa | 1211 | bool addXPF(DNSQuestion& dq, uint16_t optionCode); |
5cc8371b | 1212 | |
555970c9 RG |
1213 | uint16_t getRandomDNSID(); |
1214 | ||
9f4eb5cc RG |
1215 | #include "dnsdist-snmp.hh" |
1216 | ||
1217 | extern bool g_snmpEnabled; | |
1218 | extern bool g_snmpTrapsEnabled; | |
1219 | extern DNSDistSNMPAgent* g_snmpAgent; | |
e7c732b8 RG |
1220 | extern bool g_addEDNSToSelfGeneratedResponses; |
1221 | ||
1222 | static const size_t s_udpIncomingBufferSize{1500}; | |
4ab01344 | 1223 | |
3e425868 RG |
1224 | enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend }; |
1225 | ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend); | |
4ab01344 | 1226 | |
d0ae6360 RG |
1227 | DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP); |
1228 | void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname); | |
fbf14b03 RG |
1229 | |
1230 | int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state); | |
1231 | ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false); |