]>
Commit | Line | Data |
---|---|---|
12471842 PL |
1 | /* |
2 | * This file is part of PowerDNS or dnsdist. | |
3 | * Copyright -- PowerDNS.COM B.V. and its contributors | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of version 2 of the GNU General Public License as | |
7 | * published by the Free Software Foundation. | |
8 | * | |
9 | * In addition, for the avoidance of any doubt, permission is granted to | |
10 | * link this program with OpenSSL and to (re)distribute the binaries | |
11 | * produced as the result of such linking. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
21 | */ | |
df111b53 | 22 | #pragma once |
11e1e08b | 23 | #include "config.h" |
df111b53 | 24 | #include "ext/luawrapper/include/LuaContext.hpp" |
cbf4e13a | 25 | |
df111b53 | 26 | #include <atomic> |
df111b53 | 27 | #include <mutex> |
cbf4e13a | 28 | #include <string> |
df111b53 | 29 | #include <thread> |
cbf4e13a | 30 | #include <time.h> |
bffca8b9 | 31 | #include <unistd.h> |
cbf4e13a RG |
32 | #include <unordered_map> |
33 | ||
cbf4e13a RG |
34 | #include <boost/variant.hpp> |
35 | ||
36 | #include "bpf-filter.hh" | |
f12666f2 | 37 | #include "capabilities.hh" |
9f6a31ff | 38 | #include "circular_buffer.hh" |
11e1e08b | 39 | #include "dnscrypt.hh" |
886e2cf2 | 40 | #include "dnsdist-cache.hh" |
87b515ed | 41 | #include "dnsdist-dynbpf.hh" |
cbf4e13a | 42 | #include "dnsname.hh" |
fbf14b03 | 43 | #include "doh.hh" |
cbf4e13a RG |
44 | #include "ednsoptions.hh" |
45 | #include "gettime.hh" | |
46 | #include "iputils.hh" | |
47 | #include "misc.hh" | |
48 | #include "mplexer.hh" | |
49 | #include "sholder.hh" | |
a227f47d | 50 | #include "tcpiohandler.hh" |
d61aa945 | 51 | #include "uuid-utils.hh" |
d8c19b98 | 52 | |
9b73b71c | 53 | void carbonDumpThread(); |
61d1b966 | 54 | uint64_t uptimeOfProcess(const std::string& str); |
bd1c631b | 55 | |
7b925432 RG |
56 | extern uint16_t g_ECSSourcePrefixV4; |
57 | extern uint16_t g_ECSSourcePrefixV6; | |
58 | extern bool g_ECSOverride; | |
26a6373d | 59 | |
15fac047 CH |
60 | typedef std::unordered_map<string, string> QTag; |
61 | ||
7b925432 RG |
62 | struct DNSQuestion |
63 | { | |
e7c732b8 | 64 | DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_): |
4ab01344 RG |
65 | qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) { |
66 | const uint16_t* flags = getFlagsFromDNSHeader(dh); | |
67 | origFlags = *flags; | |
68 | } | |
dd1a3034 RG |
69 | DNSQuestion(const DNSQuestion&) = delete; |
70 | DNSQuestion& operator=(const DNSQuestion&) = delete; | |
71 | DNSQuestion(DNSQuestion&&) = default; | |
7b925432 RG |
72 | |
73 | #ifdef HAVE_PROTOBUF | |
ec48a28d | 74 | boost::optional<boost::uuids::uuid> uniqueId; |
7b925432 | 75 | #endif |
bd14f087 | 76 | Netmask ecs; |
4ab01344 | 77 | boost::optional<Netmask> subnet; |
046bac5c | 78 | std::string sni; /* Server Name Indication, if any (DoT or DoH) */ |
2a28db86 | 79 | std::string poolname; |
4ab01344 RG |
80 | const DNSName* qname{nullptr}; |
81 | const ComboAddress* local{nullptr}; | |
82 | const ComboAddress* remote{nullptr}; | |
15fac047 | 83 | std::shared_ptr<QTag> qTag{nullptr}; |
cbf4e13a | 84 | std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions; |
4ab01344 RG |
85 | std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr}; |
86 | std::shared_ptr<DNSDistPacketCache> packetCache{nullptr}; | |
87 | struct dnsheader* dh{nullptr}; | |
88 | const struct timespec* queryTime{nullptr}; | |
fbf14b03 | 89 | struct DOHUnit* du{nullptr}; |
7b925432 | 90 | size_t size; |
e7c732b8 | 91 | unsigned int consumed{0}; |
4ab01344 RG |
92 | int delayMsec{0}; |
93 | boost::optional<uint32_t> tempFailureTTL; | |
94 | uint32_t cacheKeyNoECS; | |
95 | uint32_t cacheKey; | |
96 | const uint16_t qtype; | |
97 | const uint16_t qclass; | |
7b925432 RG |
98 | uint16_t len; |
99 | uint16_t ecsPrefixLength; | |
4ab01344 | 100 | uint16_t origFlags; |
1ecbd15e | 101 | uint8_t ednsRCode{0}; |
7b925432 RG |
102 | const bool tcp; |
103 | bool skipCache{false}; | |
104 | bool ecsOverride; | |
5b8255ba | 105 | bool useECS{true}; |
5cc8371b | 106 | bool addXPF{true}; |
bd14f087 | 107 | bool ecsSet{false}; |
4ab01344 RG |
108 | bool ecsAdded{false}; |
109 | bool ednsAdded{false}; | |
110 | bool useZeroScope{false}; | |
111 | bool dnssecOK{false}; | |
7b925432 RG |
112 | }; |
113 | ||
114 | struct DNSResponse : DNSQuestion | |
115 | { | |
dd026b9c RG |
116 | DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_): |
117 | DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { } | |
dd1a3034 RG |
118 | DNSResponse(const DNSResponse&) = delete; |
119 | DNSResponse& operator=(const DNSResponse&) = delete; | |
120 | DNSResponse(DNSResponse&&) = default; | |
7b925432 RG |
121 | }; |
122 | ||
5c30ec69 LM |
123 | /* so what could you do: |
124 | drop, | |
125 | fake up nxdomain, | |
126 | provide actual answer, | |
127 | allow & and stop processing, | |
128 | continue processing, | |
7b925432 RG |
129 | modify header: (servfail|refused|notimp), set TC=1, |
130 | send to pool */ | |
131 | ||
132 | class DNSAction | |
133 | { | |
134 | public: | |
3d60b39a | 135 | enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse }; |
b718792f RG |
136 | static std::string typeToString(const Action& action) |
137 | { | |
138 | switch(action) { | |
139 | case Action::Drop: | |
140 | return "Drop"; | |
141 | case Action::Nxdomain: | |
142 | return "Send NXDomain"; | |
143 | case Action::Refused: | |
144 | return "Send Refused"; | |
145 | case Action::Spoof: | |
146 | return "Spoof an answer"; | |
147 | case Action::Allow: | |
148 | return "Allow"; | |
149 | case Action::HeaderModify: | |
150 | return "Modify the header"; | |
151 | case Action::Pool: | |
152 | return "Route to a pool"; | |
153 | case Action::Delay: | |
154 | return "Delay"; | |
155 | case Action::Truncate: | |
156 | return "Truncate over UDP"; | |
157 | case Action::ServFail: | |
158 | return "Send ServFail"; | |
159 | case Action::None: | |
477c86a0 | 160 | case Action::NoOp: |
b718792f | 161 | return "Do nothing"; |
3d60b39a | 162 | case Action::NoRecurse: |
163 | return "Set rd=0"; | |
b718792f RG |
164 | } |
165 | ||
166 | return "Unknown"; | |
167 | } | |
168 | ||
7b925432 | 169 | virtual Action operator()(DNSQuestion*, string* ruleresult) const =0; |
205f2081 RG |
170 | virtual ~DNSAction() |
171 | { | |
172 | } | |
7b925432 | 173 | virtual string toString() const = 0; |
b8019cf7 | 174 | virtual std::map<string, double> getStats() const |
7b925432 RG |
175 | { |
176 | return {{}}; | |
177 | } | |
178 | }; | |
179 | ||
180 | class DNSResponseAction | |
181 | { | |
182 | public: | |
5f23eb98 | 183 | enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None }; |
7b925432 | 184 | virtual Action operator()(DNSResponse*, string* ruleresult) const =0; |
205f2081 RG |
185 | virtual ~DNSResponseAction() |
186 | { | |
187 | } | |
7b925432 RG |
188 | virtual string toString() const = 0; |
189 | }; | |
190 | ||
78ffa782 | 191 | struct DynBlock |
192 | { | |
1d3ba133 | 193 | DynBlock(): action(DNSAction::Action::None), warning(false) |
5708a729 RG |
194 | { |
195 | } | |
196 | ||
1d3ba133 | 197 | DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false) |
5708a729 RG |
198 | { |
199 | } | |
200 | ||
1d3ba133 | 201 | DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning) |
5708a729 RG |
202 | { |
203 | blocks.store(rhs.blocks); | |
204 | } | |
205 | ||
78ffa782 | 206 | DynBlock& operator=(const DynBlock& rhs) |
207 | { | |
208 | reason=rhs.reason; | |
209 | until=rhs.until; | |
71c94675 | 210 | domain=rhs.domain; |
7b925432 | 211 | action=rhs.action; |
78ffa782 | 212 | blocks.store(rhs.blocks); |
1d3ba133 | 213 | warning=rhs.warning; |
78ffa782 | 214 | return *this; |
215 | } | |
71c94675 | 216 | |
78ffa782 | 217 | string reason; |
218 | struct timespec until; | |
71c94675 | 219 | DNSName domain; |
7b925432 | 220 | DNSAction::Action action; |
78ffa782 | 221 | mutable std::atomic<unsigned int> blocks; |
1d3ba133 | 222 | bool warning; |
78ffa782 | 223 | }; |
224 | ||
225 | extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG; | |
f758857a | 226 | |
227 | extern vector<pair<struct timeval, std::string> > g_confDelta; | |
228 | ||
eb0335ff MC |
229 | extern uint64_t getLatencyCount(const std::string&); |
230 | ||
e48090d1 | 231 | struct DNSDistStats |
232 | { | |
6ad8b29a | 233 | using stat_t=std::atomic<uint64_t>; // aww yiss ;-) |
e48090d1 | 234 | stat_t responses{0}; |
235 | stat_t servfailResponses{0}; | |
236 | stat_t queries{0}; | |
61d10a4d MH |
237 | stat_t frontendNXDomain{0}; |
238 | stat_t frontendServFail{0}; | |
239 | stat_t frontendNoError{0}; | |
e73ec7d3 | 240 | stat_t nonCompliantQueries{0}; |
d08b1cdf | 241 | stat_t nonCompliantResponses{0}; |
643a182a | 242 | stat_t rdQueries{0}; |
2efd427d | 243 | stat_t emptyQueries{0}; |
e48090d1 | 244 | stat_t aclDrops{0}; |
bd1c631b | 245 | stat_t dynBlocked{0}; |
e48090d1 | 246 | stat_t ruleDrop{0}; |
247 | stat_t ruleNXDomain{0}; | |
dd46e5e3 | 248 | stat_t ruleRefused{0}; |
5f23eb98 | 249 | stat_t ruleServFail{0}; |
e48090d1 | 250 | stat_t selfAnswered{0}; |
251 | stat_t downstreamTimeouts{0}; | |
252 | stat_t downstreamSendErrors{0}; | |
6ad8b29a | 253 | stat_t truncFail{0}; |
b8bc7e61 | 254 | stat_t noPolicy{0}; |
886e2cf2 RG |
255 | stat_t cacheHits{0}; |
256 | stat_t cacheMisses{0}; | |
eb0335ff | 257 | stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0}; |
f29758cc | 258 | stat_t securityStatus{0}; |
5c30ec69 | 259 | |
e16fd59c | 260 | double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0}; |
a1a787dc | 261 | typedef std::function<uint64_t(const std::string&)> statfunction_t; |
72f58a53 | 262 | typedef boost::variant<stat_t*, double*, statfunction_t> entry_t; |
e16fd59c | 263 | std::vector<std::pair<std::string, entry_t>> entries{ |
dd46e5e3 RG |
264 | {"responses", &responses}, |
265 | {"servfail-responses", &servfailResponses}, | |
266 | {"queries", &queries}, | |
61d10a4d MH |
267 | {"frontend-nxdomain", &frontendNXDomain}, |
268 | {"frontend-servfail", &frontendServFail}, | |
269 | {"frontend-noerror", &frontendNoError}, | |
dd46e5e3 | 270 | {"acl-drops", &aclDrops}, |
dd46e5e3 RG |
271 | {"rule-drop", &ruleDrop}, |
272 | {"rule-nxdomain", &ruleNXDomain}, | |
273 | {"rule-refused", &ruleRefused}, | |
5f23eb98 | 274 | {"rule-servfail", &ruleServFail}, |
dd46e5e3 RG |
275 | {"self-answered", &selfAnswered}, |
276 | {"downstream-timeouts", &downstreamTimeouts}, | |
5c30ec69 | 277 | {"downstream-send-errors", &downstreamSendErrors}, |
dd46e5e3 RG |
278 | {"trunc-failures", &truncFail}, |
279 | {"no-policy", &noPolicy}, | |
280 | {"latency0-1", &latency0_1}, | |
281 | {"latency1-10", &latency1_10}, | |
282 | {"latency10-50", &latency10_50}, | |
283 | {"latency50-100", &latency50_100}, | |
284 | {"latency100-1000", &latency100_1000}, | |
285 | {"latency-slow", &latencySlow}, | |
286 | {"latency-avg100", &latencyAvg100}, | |
287 | {"latency-avg1000", &latencyAvg1000}, | |
288 | {"latency-avg10000", &latencyAvg10000}, | |
289 | {"latency-avg1000000", &latencyAvg1000000}, | |
61d1b966 | 290 | {"uptime", uptimeOfProcess}, |
a9b6db56 | 291 | {"real-memory-usage", getRealMemoryUsage}, |
330dcb5c | 292 | {"special-memory-usage", getSpecialMemoryUsage}, |
0d394f35 RG |
293 | {"udp-in-errors", boost::bind(udpErrorStats, "udp-in-errors")}, |
294 | {"udp-noport-errors", boost::bind(udpErrorStats, "udp-noport-errors")}, | |
295 | {"udp-recvbuf-errors", boost::bind(udpErrorStats, "udp-recvbuf-errors")}, | |
296 | {"udp-sndbuf-errors", boost::bind(udpErrorStats, "udp-sndbuf-errors")}, | |
a2aa00ed | 297 | {"noncompliant-queries", &nonCompliantQueries}, |
d08b1cdf | 298 | {"noncompliant-responses", &nonCompliantResponses}, |
643a182a | 299 | {"rdqueries", &rdQueries}, |
2efd427d | 300 | {"empty-queries", &emptyQueries}, |
886e2cf2 RG |
301 | {"cache-hits", &cacheHits}, |
302 | {"cache-misses", &cacheMisses}, | |
0d394f35 RG |
303 | {"cpu-iowait", getCPUIOWait}, |
304 | {"cpu-steal", getCPUSteal}, | |
4f99f3d3 | 305 | {"cpu-sys-msec", getCPUTimeSystem}, |
0d394f35 | 306 | {"cpu-user-msec", getCPUTimeUser}, |
dd46e5e3 | 307 | {"fd-usage", getOpenFileDescriptors}, |
5c30ec69 | 308 | {"dyn-blocked", &dynBlocked}, |
f29758cc | 309 | {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }}, |
eb0335ff MC |
310 | {"security-status", &securityStatus}, |
311 | // Latency histogram | |
312 | {"latency-sum", &latencySum}, | |
313 | {"latency-count", getLatencyCount}, | |
42fae326 | 314 | }; |
e48090d1 | 315 | }; |
316 | ||
317 | extern struct DNSDistStats g_stats; | |
f653b8df | 318 | void doLatencyStats(double udiff); |
e48090d1 | 319 | |
638184e9 | 320 | |
df111b53 | 321 | struct StopWatch |
322 | { | |
58307a85 RG |
323 | StopWatch(bool realTime=false): d_needRealTime(realTime) |
324 | { | |
325 | } | |
df111b53 | 326 | struct timespec d_start{0,0}; |
58307a85 RG |
327 | bool d_needRealTime{false}; |
328 | ||
5c30ec69 | 329 | void start() { |
58307a85 | 330 | if(gettime(&d_start, d_needRealTime) < 0) |
df111b53 | 331 | unixDie("Getting timestamp"); |
5c30ec69 | 332 | |
df111b53 | 333 | } |
cf48b0ce RG |
334 | |
335 | void set(const struct timespec& from) { | |
336 | d_start = from; | |
337 | } | |
5c30ec69 | 338 | |
df111b53 | 339 | double udiff() const { |
340 | struct timespec now; | |
58307a85 | 341 | if(gettime(&now, d_needRealTime) < 0) |
df111b53 | 342 | unixDie("Getting timestamp"); |
5c30ec69 | 343 | |
df111b53 | 344 | return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0; |
345 | } | |
346 | ||
347 | double udiffAndSet() { | |
348 | struct timespec now; | |
58307a85 | 349 | if(gettime(&now, d_needRealTime) < 0) |
df111b53 | 350 | unixDie("Getting timestamp"); |
5c30ec69 | 351 | |
df111b53 | 352 | auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0; |
353 | d_start = now; | |
354 | return ret; | |
355 | } | |
356 | ||
357 | }; | |
358 | ||
67ce0bdd | 359 | class BasicQPSLimiter |
df111b53 | 360 | { |
361 | public: | |
67ce0bdd | 362 | BasicQPSLimiter() |
df111b53 | 363 | { |
364 | } | |
365 | ||
2d29e6b7 | 366 | BasicQPSLimiter(unsigned int burst): d_tokens(burst) |
67ce0bdd RG |
367 | { |
368 | d_prev.start(); | |
369 | } | |
370 | ||
371 | bool check(unsigned int rate, unsigned int burst) const // this is not quite fair | |
372 | { | |
373 | auto delta = d_prev.udiffAndSet(); | |
374 | ||
1a1787b6 | 375 | if(delta > 0.0) // time, frequently, does go backwards.. |
376 | d_tokens += 1.0 * rate * (delta/1000000.0); | |
67ce0bdd RG |
377 | |
378 | if(d_tokens > burst) { | |
379 | d_tokens = burst; | |
380 | } | |
381 | ||
382 | bool ret=false; | |
383 | if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise | |
384 | ret=true; | |
385 | --d_tokens; | |
386 | } | |
387 | ||
388 | return ret; | |
389 | } | |
390 | ||
391 | bool seenSince(const struct timespec& cutOff) const | |
392 | { | |
393 | return cutOff < d_prev.d_start; | |
394 | } | |
395 | ||
396 | protected: | |
397 | mutable StopWatch d_prev; | |
398 | mutable double d_tokens; | |
399 | }; | |
400 | ||
401 | class QPSLimiter : public BasicQPSLimiter | |
402 | { | |
403 | public: | |
404 | QPSLimiter(): BasicQPSLimiter() | |
405 | { | |
406 | } | |
407 | ||
2d29e6b7 | 408 | QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false) |
df111b53 | 409 | { |
df111b53 | 410 | d_prev.start(); |
411 | } | |
412 | ||
413 | unsigned int getRate() const | |
414 | { | |
67ce0bdd | 415 | return d_passthrough ? 0 : d_rate; |
df111b53 | 416 | } |
417 | ||
418 | int getPassed() const | |
419 | { | |
420 | return d_passed; | |
421 | } | |
67ce0bdd | 422 | |
df111b53 | 423 | int getBlocked() const |
424 | { | |
425 | return d_blocked; | |
426 | } | |
427 | ||
ecbe9133 | 428 | bool check() const // this is not quite fair |
df111b53 | 429 | { |
67ce0bdd | 430 | if (d_passthrough) { |
df111b53 | 431 | return true; |
67ce0bdd | 432 | } |
df111b53 | 433 | |
67ce0bdd RG |
434 | bool ret = BasicQPSLimiter::check(d_rate, d_burst); |
435 | if (ret) { | |
df111b53 | 436 | d_passed++; |
437 | } | |
67ce0bdd | 438 | else { |
df111b53 | 439 | d_blocked++; |
67ce0bdd | 440 | } |
df111b53 | 441 | |
5c30ec69 | 442 | return ret; |
df111b53 | 443 | } |
444 | private: | |
ecbe9133 | 445 | mutable unsigned int d_passed{0}; |
446 | mutable unsigned int d_blocked{0}; | |
67ce0bdd RG |
447 | unsigned int d_rate; |
448 | unsigned int d_burst; | |
449 | bool d_passthrough{true}; | |
df111b53 | 450 | }; |
451 | ||
b5b93e0b RG |
452 | struct ClientState; |
453 | ||
df111b53 | 454 | struct IDState |
455 | { | |
a9489723 | 456 | IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;} |
71b86bd8 | 457 | IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age) |
df111b53 | 458 | { |
a9489723 RG |
459 | usageIndicator.store(orig.usageIndicator.load()); |
460 | origFD = orig.origFD; | |
df111b53 | 461 | origID = orig.origID; |
7b3865cd | 462 | delayMsec = orig.delayMsec; |
acb8f5d5 | 463 | tempFailureTTL = orig.tempFailureTTL; |
df111b53 | 464 | } |
465 | ||
311f19d5 | 466 | static const int64_t unusedIndicator = -1; |
2bf26975 | 467 | |
311f19d5 RG |
468 | static bool isInUse(int64_t usageIndicator) |
469 | { | |
470 | return usageIndicator != unusedIndicator; | |
471 | } | |
472 | ||
473 | bool isInUse() const | |
474 | { | |
475 | return usageIndicator != unusedIndicator; | |
476 | } | |
477 | ||
478 | /* return true if the value has been successfully replaced meaning that | |
479 | no-one updated the usage indicator in the meantime */ | |
480 | bool tryMarkUnused(int64_t expectedUsageIndicator) | |
481 | { | |
482 | return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator); | |
483 | } | |
484 | ||
485 | /* mark as unused no matter what, return true if the state was in use before */ | |
486 | bool markAsUsed() | |
487 | { | |
488 | auto currentGeneration = generation++; | |
489 | return markAsUsed(currentGeneration); | |
490 | } | |
491 | ||
492 | /* mark as unused no matter what, return true if the state was in use before */ | |
493 | bool markAsUsed(int64_t currentGeneration) | |
494 | { | |
495 | int64_t oldUsage = usageIndicator.exchange(currentGeneration); | |
496 | return oldUsage != unusedIndicator; | |
497 | } | |
498 | ||
a9489723 | 499 | /* We use this value to detect whether this state is in use. |
9bd1a882 RG |
500 | For performance reasons we don't want to use a lock here, but that means |
501 | we need to be very careful when modifying this value. Modifications happen | |
502 | from: | |
503 | - one of the UDP or DoH 'client' threads receiving a query, selecting a backend | |
504 | then picking one of the states associated to this backend (via the idOffset). | |
a9489723 | 505 | Most of the time this state should not be in use and usageIndicator is -1, but we |
9bd1a882 RG |
506 | might not yet have received a response for the query previously associated to this |
507 | state, meaning that we will 'reuse' this state and erase the existing state. | |
508 | If we ever receive a response for this state, it will be discarded. This is | |
509 | mostly fine for UDP except that we still need to be careful in order to miss | |
510 | the 'outstanding' counters, which should only be increased when we are picking | |
511 | an empty state, and not when reusing ; | |
512 | For DoH, though, we have dynamically allocated a DOHUnit object that needs to | |
513 | be freed, as well as internal objects internals to libh2o. | |
514 | - one of the UDP receiver threads receiving a response from a backend, picking | |
515 | the corresponding state and sending the response to the client ; | |
516 | - the 'healthcheck' thread scanning the states to actively discover timeouts, | |
517 | mostly to keep some counters like the 'outstanding' one sane. | |
a9489723 RG |
518 | We previously based that logic on the origFD (FD on which the query was received, |
519 | and therefore from where the response should be sent) but this suffered from an | |
520 | ABA problem since it was quite likely that a UDP 'client thread' would reset it to the | |
521 | same value since we only have so much incoming sockets: | |
522 | - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ; | |
523 | - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname, | |
524 | qtype and qclass match | |
525 | - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ; | |
526 | - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still | |
527 | 5, except it's not the same 5 anymore and it overrides a fresh state. | |
528 | We now use a 32-bit unsigned counter instead, which is incremented every time the state is set, | |
529 | wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1 | |
530 | when the state is unused and the value of our counter otherwise. | |
9bd1a882 | 531 | */ |
311f19d5 RG |
532 | std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8 |
533 | std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4 | |
2bf26975 | 534 | ComboAddress origRemote; // 28 |
549d63c9 | 535 | ComboAddress origDest; // 28 |
2bf26975 | 536 | StopWatch sentTime; // 16 |
537 | DNSName qname; // 80 | |
43234e76 | 538 | std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr}; |
d8c19b98 | 539 | #ifdef HAVE_PROTOBUF |
ec48a28d | 540 | boost::optional<boost::uuids::uuid> uniqueId; |
11e1e08b | 541 | #endif |
78e3ac9e | 542 | boost::optional<Netmask> subnet{boost::none}; |
886e2cf2 | 543 | std::shared_ptr<DNSDistPacketCache> packetCache{nullptr}; |
a76b0d63 | 544 | std::shared_ptr<QTag> qTag{nullptr}; |
b5b93e0b | 545 | const ClientState* cs{nullptr}; |
fbf14b03 | 546 | DOHUnit* du{nullptr}; |
9837850d | 547 | uint32_t cacheKey; // 4 |
548 | uint32_t cacheKeyNoECS; // 4 | |
71b86bd8 | 549 | uint16_t age; // 4 |
2bf26975 | 550 | uint16_t qtype; // 2 |
886e2cf2 | 551 | uint16_t qclass; // 2 |
2bf26975 | 552 | uint16_t origID; // 2 |
aeb36780 | 553 | uint16_t origFlags; // 2 |
a9489723 | 554 | int origFD{-1}; |
7b3865cd | 555 | int delayMsec; |
acb8f5d5 | 556 | boost::optional<uint32_t> tempFailureTTL; |
ca404e94 | 557 | bool ednsAdded{false}; |
ff73f02b | 558 | bool ecsAdded{false}; |
886e2cf2 | 559 | bool skipCache{false}; |
7cea4e39 | 560 | bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr |
d7728daf | 561 | bool dnssecOK{false}; |
389d903a | 562 | bool useZeroScope; |
df111b53 | 563 | }; |
564 | ||
786e4d8c | 565 | typedef std::unordered_map<string, unsigned int> QueryCountRecords; |
dd1a3034 | 566 | typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter; |
786e4d8c RS |
567 | struct QueryCount { |
568 | QueryCount() | |
569 | { | |
43234e76 | 570 | pthread_rwlock_init(&queryLock, nullptr); |
786e4d8c | 571 | } |
040793d4 OM |
572 | ~QueryCount() |
573 | { | |
574 | pthread_rwlock_destroy(&queryLock); | |
575 | } | |
786e4d8c RS |
576 | QueryCountRecords records; |
577 | QueryCountFilter filter; | |
578 | pthread_rwlock_t queryLock; | |
579 | bool enabled{false}; | |
580 | }; | |
581 | ||
582 | extern QueryCount g_qcount; | |
583 | ||
8a5d5053 | 584 | struct ClientState |
585 | { | |
8274967b | 586 | ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort) |
6e9fd124 RG |
587 | { |
588 | } | |
589 | ||
f0e4dcba | 590 | std::set<int> cpus; |
8a5d5053 | 591 | ComboAddress local; |
43234e76 | 592 | std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr}; |
6e9fd124 | 593 | std::shared_ptr<TLSFrontend> tlsFrontend{nullptr}; |
fbf14b03 | 594 | std::shared_ptr<DOHFrontend> dohFrontend{nullptr}; |
6e9fd124 | 595 | std::string interface; |
963bef8d | 596 | std::atomic<uint64_t> queries{0}; |
7fc95193 | 597 | mutable std::atomic<uint64_t> responses{0}; |
a6e9e107 RG |
598 | std::atomic<uint64_t> tcpDiedReadingQuery{0}; |
599 | std::atomic<uint64_t> tcpDiedSendingResponse{0}; | |
600 | std::atomic<uint64_t> tcpGaveUp{0}; | |
601 | std::atomic<uint64_t> tcpClientTimeouts{0}; | |
602 | std::atomic<uint64_t> tcpDownstreamTimeouts{0}; | |
cff9aa03 | 603 | std::atomic<uint64_t> tcpCurrentConnections{0}; |
846b63bb RG |
604 | std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption |
605 | std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket | |
b608e6c6 RG |
606 | std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired) |
607 | std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one | |
bb3954f0 RG |
608 | std::atomic<uint64_t> tls10queries{0}; // valid DNS queries received via TLSv1.0 |
609 | std::atomic<uint64_t> tls11queries{0}; // valid DNS queries received via TLSv1.1 | |
610 | std::atomic<uint64_t> tls12queries{0}; // valid DNS queries received via TLSv1.2 | |
611 | std::atomic<uint64_t> tls13queries{0}; // valid DNS queries received via TLSv1.3 | |
612 | std::atomic<uint64_t> tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version | |
cff9aa03 RG |
613 | std::atomic<double> tcpAvgQueriesPerConnection{0.0}; |
614 | /* in ms */ | |
615 | std::atomic<double> tcpAvgConnectionDuration{0.0}; | |
a36ce055 RG |
616 | int udpFD{-1}; |
617 | int tcpFD{-1}; | |
6e9fd124 | 618 | int fastOpenQueueSize{0}; |
b5b93e0b | 619 | bool muted{false}; |
6e9fd124 RG |
620 | bool tcp; |
621 | bool reuseport; | |
622 | bool ready{false}; | |
8429ad04 RG |
623 | |
624 | int getSocket() const | |
625 | { | |
626 | return udpFD != -1 ? udpFD : tcpFD; | |
627 | } | |
628 | ||
3a2ca389 RG |
629 | bool isUDP() const |
630 | { | |
631 | return udpFD != -1; | |
632 | } | |
633 | ||
634 | bool isTCP() const | |
635 | { | |
636 | return udpFD == -1; | |
637 | } | |
638 | ||
f34fdcc5 RG |
639 | bool hasTLS() const |
640 | { | |
641 | return tlsFrontend != nullptr || dohFrontend != nullptr; | |
642 | } | |
643 | ||
ba7ec340 RG |
644 | std::string getType() const |
645 | { | |
646 | std::string result = udpFD != -1 ? "UDP" : "TCP"; | |
647 | ||
fbf14b03 RG |
648 | if (dohFrontend) { |
649 | result += " (DNS over HTTPS)"; | |
650 | } | |
651 | else if (tlsFrontend) { | |
ba7ec340 RG |
652 | result += " (DNS over TLS)"; |
653 | } | |
654 | else if (dnscryptCtx) { | |
655 | result += " (DNSCrypt)"; | |
656 | } | |
657 | ||
658 | return result; | |
659 | } | |
660 | ||
8429ad04 RG |
661 | #ifdef HAVE_EBPF |
662 | shared_ptr<BPFFilter> d_filter; | |
663 | ||
664 | void detachFilter() | |
665 | { | |
666 | if (d_filter) { | |
667 | d_filter->removeSocket(getSocket()); | |
668 | d_filter = nullptr; | |
669 | } | |
670 | } | |
671 | ||
672 | void attachFilter(shared_ptr<BPFFilter> bpf) | |
673 | { | |
674 | detachFilter(); | |
675 | ||
676 | bpf->addSocket(getSocket()); | |
677 | d_filter = bpf; | |
678 | } | |
679 | #endif /* HAVE_EBPF */ | |
cff9aa03 | 680 | |
dd026b9c | 681 | void updateTCPMetrics(size_t nbQueries, uint64_t durationMs) |
cff9aa03 | 682 | { |
dd026b9c | 683 | tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0); |
cff9aa03 RG |
684 | tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0); |
685 | } | |
8a5d5053 | 686 | }; |
687 | ||
688 | class TCPClientCollection { | |
689 | std::vector<int> d_tcpclientthreads; | |
ded1985a | 690 | std::atomic<uint64_t> d_numthreads{0}; |
a9bf3ec4 | 691 | std::atomic<uint64_t> d_pos{0}; |
ded1985a | 692 | std::atomic<uint64_t> d_queued{0}; |
73402775 | 693 | const uint64_t d_maxthreads{0}; |
ded1985a | 694 | std::mutex d_mutex; |
edbda1ad | 695 | int d_singlePipe[2]; |
73402775 | 696 | const bool d_useSinglePipe; |
ded1985a | 697 | public: |
8a5d5053 | 698 | |
b79e4996 RG |
699 | TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe) |
700 | ||
8a5d5053 | 701 | { |
a9bf3ec4 | 702 | d_tcpclientthreads.reserve(maxThreads); |
edbda1ad RG |
703 | |
704 | if (d_useSinglePipe) { | |
705 | if (pipe(d_singlePipe) < 0) { | |
c52b8cb6 OM |
706 | int err = errno; |
707 | throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err)); | |
edbda1ad | 708 | } |
3b07fd1b RG |
709 | |
710 | if (!setNonBlocking(d_singlePipe[0])) { | |
711 | int err = errno; | |
712 | close(d_singlePipe[0]); | |
713 | close(d_singlePipe[1]); | |
c52b8cb6 | 714 | throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err)); |
3b07fd1b RG |
715 | } |
716 | ||
edbda1ad RG |
717 | if (!setNonBlocking(d_singlePipe[1])) { |
718 | int err = errno; | |
719 | close(d_singlePipe[0]); | |
720 | close(d_singlePipe[1]); | |
c52b8cb6 | 721 | throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err)); |
edbda1ad RG |
722 | } |
723 | } | |
8a5d5053 | 724 | } |
a9bf3ec4 | 725 | int getThread() |
8a5d5053 | 726 | { |
6c1ca990 | 727 | uint64_t pos = d_pos++; |
8a5d5053 | 728 | ++d_queued; |
729 | return d_tcpclientthreads[pos % d_numthreads]; | |
730 | } | |
ded1985a RG |
731 | bool hasReachedMaxThreads() const |
732 | { | |
733 | return d_numthreads >= d_maxthreads; | |
734 | } | |
735 | uint64_t getThreadsCount() const | |
736 | { | |
737 | return d_numthreads; | |
738 | } | |
739 | uint64_t getQueuedCount() const | |
740 | { | |
741 | return d_queued; | |
742 | } | |
743 | void decrementQueuedCount() | |
744 | { | |
745 | --d_queued; | |
746 | } | |
8a5d5053 | 747 | void addTCPClientThread(); |
748 | }; | |
749 | ||
1f7646c2 | 750 | extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads; |
8a5d5053 | 751 | |
df111b53 | 752 | struct DownstreamState |
753 | { | |
1720247e | 754 | typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t; |
98650fde | 755 | |
203b5348 RG |
756 | DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect); |
757 | DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {} | |
6a62c0e3 RG |
758 | ~DownstreamState() |
759 | { | |
5bdbb83d | 760 | for (auto& fd : sockets) { |
150105a2 RG |
761 | if (fd >= 0) { |
762 | close(fd); | |
763 | fd = -1; | |
764 | } | |
765 | } | |
040793d4 | 766 | pthread_rwlock_destroy(&d_lock); |
6a62c0e3 | 767 | } |
1720247e CHB |
768 | boost::uuids::uuid id; |
769 | std::set<unsigned int> hashes; | |
d58e616a | 770 | mutable pthread_rwlock_t d_lock; |
5bdbb83d | 771 | std::vector<int> sockets; |
70b0d0e2 | 772 | const std::string sourceItfName; |
5bdbb83d | 773 | std::mutex socketsLock; |
5d7e6765 | 774 | std::mutex connectLock; |
5bdbb83d | 775 | std::unique_ptr<FDMultiplexer> mplexer{nullptr}; |
df111b53 | 776 | std::thread tid; |
a2353842 | 777 | const ComboAddress remote; |
df111b53 | 778 | QPSLimiter qps; |
779 | vector<IDState> idStates; | |
73402775 | 780 | const ComboAddress sourceAddr; |
98650fde | 781 | checkfunc_t checkFunction; |
fbe2a2e0 RG |
782 | DNSName checkName{"a.root-servers.net."}; |
783 | QType checkType{QType::A}; | |
de9f7157 | 784 | uint16_t checkClass{QClass::IN}; |
df111b53 | 785 | std::atomic<uint64_t> idOffset{0}; |
786 | std::atomic<uint64_t> sendErrors{0}; | |
787 | std::atomic<uint64_t> outstanding{0}; | |
788 | std::atomic<uint64_t> reuseds{0}; | |
789 | std::atomic<uint64_t> queries{0}; | |
7fc95193 | 790 | std::atomic<uint64_t> responses{0}; |
df111b53 | 791 | struct { |
792 | std::atomic<uint64_t> sendErrors{0}; | |
793 | std::atomic<uint64_t> reuseds{0}; | |
794 | std::atomic<uint64_t> queries{0}; | |
795 | } prev; | |
a6e9e107 RG |
796 | std::atomic<uint64_t> tcpDiedSendingQuery{0}; |
797 | std::atomic<uint64_t> tcpDiedReadingResponse{0}; | |
798 | std::atomic<uint64_t> tcpGaveUp{0}; | |
799 | std::atomic<uint64_t> tcpReadTimeouts{0}; | |
800 | std::atomic<uint64_t> tcpWriteTimeouts{0}; | |
cff9aa03 RG |
801 | std::atomic<uint64_t> tcpCurrentConnections{0}; |
802 | std::atomic<double> tcpAvgQueriesPerConnection{0.0}; | |
803 | /* in ms */ | |
804 | std::atomic<double> tcpAvgConnectionDuration{0.0}; | |
18eeccc9 | 805 | string name; |
5bdbb83d | 806 | size_t socketsOffset{0}; |
df111b53 | 807 | double queryLoad{0.0}; |
808 | double dropRate{0.0}; | |
809 | double latencyUsec{0.0}; | |
810 | int order{1}; | |
811 | int weight{1}; | |
b40cffe7 | 812 | int tcpConnectTimeout{5}; |
3f6d07a4 RG |
813 | int tcpRecvTimeout{30}; |
814 | int tcpSendTimeout{30}; | |
7c9bf18d | 815 | unsigned int checkInterval{1}; |
816 | unsigned int lastCheck{0}; | |
73402775 | 817 | const unsigned int sourceItf{0}; |
3f6d07a4 | 818 | uint16_t retries{5}; |
c85f69a8 | 819 | uint16_t xpfRRCode{0}; |
b7e6f4a1 | 820 | uint16_t checkTimeout{1000}; /* in milliseconds */ |
9e87dcb8 | 821 | uint8_t currentCheckFailures{0}; |
853faf61 | 822 | uint8_t consecutiveSuccessfulChecks{0}; |
9e87dcb8 | 823 | uint8_t maxCheckFailures{1}; |
1b633bec | 824 | uint8_t minRiseSuccesses{1}; |
df111b53 | 825 | StopWatch sw; |
826 | set<string> pools; | |
827 | enum class Availability { Up, Down, Auto} availability{Availability::Auto}; | |
fbe2a2e0 | 828 | bool mustResolve{false}; |
df111b53 | 829 | bool upStatus{false}; |
ca404e94 | 830 | bool useECS{false}; |
21830638 | 831 | bool setCD{false}; |
49c33a6c | 832 | bool disableZeroScope{false}; |
7565f4e6 | 833 | std::atomic<bool> connected{false}; |
5d7e6765 | 834 | std::atomic_flag threadStarted; |
284d460c | 835 | bool tcpFastOpen{false}; |
5602f131 | 836 | bool ipBindAddrNoPort{true}; |
5d7e6765 | 837 | |
df111b53 | 838 | bool isUp() const |
839 | { | |
840 | if(availability == Availability::Down) | |
841 | return false; | |
842 | if(availability == Availability::Up) | |
843 | return true; | |
844 | return upStatus; | |
845 | } | |
846 | void setUp() { availability = Availability::Up; } | |
847 | void setDown() { availability = Availability::Down; } | |
848 | void setAuto() { availability = Availability::Auto; } | |
18eeccc9 RG |
849 | string getName() const { |
850 | if (name.empty()) { | |
851 | return remote.toStringWithPort(); | |
852 | } | |
853 | return name; | |
854 | } | |
a7940c06 | 855 | string getNameWithAddr() const { |
856 | if (name.empty()) { | |
857 | return remote.toStringWithPort(); | |
858 | } | |
859 | return name + " (" + remote.toStringWithPort()+ ")"; | |
860 | } | |
9f4eb5cc RG |
861 | string getStatus() const |
862 | { | |
863 | string status; | |
864 | if(availability == DownstreamState::Availability::Up) | |
865 | status = "UP"; | |
866 | else if(availability == DownstreamState::Availability::Down) | |
867 | status = "DOWN"; | |
868 | else | |
869 | status = (upStatus ? "up" : "down"); | |
870 | return status; | |
871 | } | |
5d7e6765 | 872 | bool reconnect(); |
f2caf657 CHB |
873 | void hash(); |
874 | void setId(const boost::uuids::uuid& newId); | |
875 | void setWeight(int newWeight); | |
cff9aa03 | 876 | |
dd026b9c | 877 | void updateTCPMetrics(size_t nbQueries, uint64_t durationMs) |
cff9aa03 | 878 | { |
dd026b9c | 879 | tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0); |
cff9aa03 RG |
880 | tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0); |
881 | } | |
df111b53 | 882 | }; |
883 | using servers_t =vector<std::shared_ptr<DownstreamState>>; | |
df111b53 | 884 | |
da4e7813 | 885 | template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >; |
886 | ||
9b73b71c | 887 | void responderThread(std::shared_ptr<DownstreamState> state); |
da4e7813 | 888 | extern std::mutex g_luamutex; |
889 | extern LuaContext g_lua; | |
890 | extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex | |
891 | ||
0940e4eb | 892 | class DNSRule |
893 | { | |
894 | public: | |
205f2081 RG |
895 | virtual ~DNSRule () |
896 | { | |
897 | } | |
497a6e3a | 898 | virtual bool matches(const DNSQuestion* dq) const =0; |
0940e4eb | 899 | virtual string toString() const = 0; |
900 | mutable std::atomic<uint64_t> d_matches{0}; | |
901 | }; | |
902 | ||
da4e7813 | 903 | using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>; |
497a6e3a | 904 | typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t; |
df111b53 | 905 | |
906 | struct ServerPolicy | |
907 | { | |
908 | string name; | |
70a57b05 | 909 | policyfunc_t policy; |
a1b1a29d | 910 | bool isLua; |
a4fd2d2f CH |
911 | std::string toString() const { |
912 | return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\""; | |
913 | } | |
df111b53 | 914 | }; |
915 | ||
886e2cf2 RG |
916 | struct ServerPool |
917 | { | |
a1b1a29d RG |
918 | ServerPool() |
919 | { | |
920 | pthread_rwlock_init(&d_lock, nullptr); | |
921 | } | |
040793d4 OM |
922 | ~ServerPool() |
923 | { | |
924 | pthread_rwlock_destroy(&d_lock); | |
925 | } | |
a1b1a29d | 926 | |
886e2cf2 RG |
927 | const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; }; |
928 | ||
7e687744 RG |
929 | bool getECS() const |
930 | { | |
931 | return d_useECS; | |
932 | } | |
933 | ||
934 | void setECS(bool useECS) | |
935 | { | |
936 | d_useECS = useECS; | |
937 | } | |
938 | ||
886e2cf2 | 939 | std::shared_ptr<DNSDistPacketCache> packetCache{nullptr}; |
b9f8a6c8 | 940 | std::shared_ptr<ServerPolicy> policy{nullptr}; |
5c30ec69 | 941 | |
a1b1a29d RG |
942 | size_t countServers(bool upOnly) |
943 | { | |
944 | size_t count = 0; | |
945 | ReadLock rl(&d_lock); | |
946 | for (const auto& server : d_servers) { | |
947 | if (!upOnly || std::get<1>(server)->isUp() ) { | |
948 | count++; | |
c1b81381 RG |
949 | } |
950 | } | |
a1b1a29d RG |
951 | return count; |
952 | } | |
953 | ||
954 | NumberedVector<shared_ptr<DownstreamState>> getServers() | |
955 | { | |
956 | NumberedVector<shared_ptr<DownstreamState>> result; | |
957 | { | |
958 | ReadLock rl(&d_lock); | |
959 | result = d_servers; | |
960 | } | |
961 | return result; | |
962 | } | |
963 | ||
964 | void addServer(shared_ptr<DownstreamState>& server) | |
965 | { | |
966 | WriteLock wl(&d_lock); | |
967 | unsigned int count = (unsigned int) d_servers.size(); | |
968 | d_servers.push_back(make_pair(++count, server)); | |
969 | /* we need to reorder based on the server 'order' */ | |
970 | std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) { | |
971 | return a.second->order < b.second->order; | |
972 | }); | |
973 | /* and now we need to renumber for Lua (custom policies) */ | |
974 | size_t idx = 1; | |
975 | for (auto& serv : d_servers) { | |
976 | serv.first = idx++; | |
977 | } | |
978 | } | |
979 | ||
980 | void removeServer(shared_ptr<DownstreamState>& server) | |
981 | { | |
982 | WriteLock wl(&d_lock); | |
983 | size_t idx = 1; | |
984 | bool found = false; | |
985 | for (auto it = d_servers.begin(); it != d_servers.end();) { | |
986 | if (found) { | |
987 | /* we need to renumber the servers placed | |
988 | after the removed one, for Lua (custom policies) */ | |
989 | it->first = idx++; | |
990 | it++; | |
991 | } | |
992 | else if (it->second == server) { | |
993 | it = d_servers.erase(it); | |
994 | found = true; | |
995 | } else { | |
996 | idx++; | |
997 | it++; | |
998 | } | |
999 | } | |
1000 | } | |
1001 | ||
1002 | private: | |
1003 | NumberedVector<shared_ptr<DownstreamState>> d_servers; | |
1004 | pthread_rwlock_t d_lock; | |
7e687744 | 1005 | bool d_useECS{false}; |
886e2cf2 RG |
1006 | }; |
1007 | using pools_t=map<std::string,std::shared_ptr<ServerPool>>; | |
742c079a | 1008 | void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy); |
886e2cf2 RG |
1009 | void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server); |
1010 | void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server); | |
1011 | ||
42fae326 | 1012 | struct CarbonConfig |
1013 | { | |
d617b22c | 1014 | ComboAddress server; |
813b0ba9 | 1015 | std::string namespace_name; |
42fae326 | 1016 | std::string ourname; |
813b0ba9 | 1017 | std::string instance_name; |
d617b22c | 1018 | unsigned int interval; |
42fae326 | 1019 | }; |
1020 | ||
ca404e94 RG |
1021 | enum ednsHeaderFlags { |
1022 | EDNS_HEADER_FLAG_NONE = 0, | |
1023 | EDNS_HEADER_FLAG_DO = 32768 | |
1024 | }; | |
1025 | ||
4d5959e6 RG |
1026 | struct DNSDistRuleAction |
1027 | { | |
1028 | std::shared_ptr<DNSRule> d_rule; | |
1029 | std::shared_ptr<DNSAction> d_action; | |
1030 | boost::uuids::uuid d_id; | |
f8a222ac | 1031 | uint64_t d_creationOrder; |
4d5959e6 RG |
1032 | }; |
1033 | ||
1034 | struct DNSDistResponseRuleAction | |
1035 | { | |
1036 | std::shared_ptr<DNSRule> d_rule; | |
1037 | std::shared_ptr<DNSResponseAction> d_action; | |
1038 | boost::uuids::uuid d_id; | |
f8a222ac | 1039 | uint64_t d_creationOrder; |
4d5959e6 RG |
1040 | }; |
1041 | ||
71c94675 | 1042 | extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT; |
dd46e5e3 | 1043 | extern DNSAction::Action g_dynBlockAction; |
71c94675 | 1044 | |
d617b22c | 1045 | extern GlobalStateHolder<vector<CarbonConfig> > g_carbon; |
ecbe9133 | 1046 | extern GlobalStateHolder<ServerPolicy> g_policy; |
1047 | extern GlobalStateHolder<servers_t> g_dstates; | |
886e2cf2 | 1048 | extern GlobalStateHolder<pools_t> g_pools; |
4d5959e6 RG |
1049 | extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions; |
1050 | extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions; | |
1051 | extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions; | |
2d4783a8 | 1052 | extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions; |
638184e9 | 1053 | extern GlobalStateHolder<NetmaskGroup> g_ACL; |
2e72cc0e | 1054 | |
ecbe9133 | 1055 | extern ComboAddress g_serverControl; // not changed during runtime |
1056 | ||
f0e4dcba | 1057 | extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX) |
a227f47d | 1058 | extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals; |
fbf14b03 | 1059 | extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals; |
6e9fd124 | 1060 | extern std::vector<std::unique_ptr<ClientState>> g_frontends; |
6ad8b29a | 1061 | extern bool g_truncateTC; |
b29edbee | 1062 | extern bool g_fixupCase; |
3f6d07a4 RG |
1063 | extern int g_tcpRecvTimeout; |
1064 | extern int g_tcpSendTimeout; | |
e0b5e49d | 1065 | extern int g_udpTimeout; |
e41f8165 RG |
1066 | extern uint16_t g_maxOutstanding; |
1067 | extern std::atomic<bool> g_configurationDone; | |
6c1ca990 RG |
1068 | extern uint64_t g_maxTCPClientThreads; |
1069 | extern uint64_t g_maxTCPQueuedConnections; | |
9396d955 RG |
1070 | extern size_t g_maxTCPQueriesPerConn; |
1071 | extern size_t g_maxTCPConnectionDuration; | |
1072 | extern size_t g_maxTCPConnectionsPerClient; | |
886e2cf2 | 1073 | extern std::atomic<uint16_t> g_cacheCleaningDelay; |
f65ea0c2 | 1074 | extern std::atomic<uint16_t> g_cacheCleaningPercentage; |
1ea747c0 | 1075 | extern uint32_t g_staleCacheEntriesTTL; |
56d68fad RG |
1076 | extern bool g_apiReadWrite; |
1077 | extern std::string g_apiConfigDirectory; | |
26a3cdb7 | 1078 | extern bool g_servFailOnNoPolicy; |
36e763fa | 1079 | extern uint32_t g_hashperturb; |
edbda1ad | 1080 | extern bool g_useTCPSinglePipe; |
cff9aa03 | 1081 | extern uint16_t g_downstreamTCPCleanupInterval; |
0beaa5c8 | 1082 | extern size_t g_udpVectorSize; |
53c57da7 | 1083 | extern bool g_preserveTrailingData; |
0dffe9e3 | 1084 | extern bool g_allowEmptyResponse; |
32b86928 | 1085 | extern bool g_roundrobinFailOnNoServer; |
2b4287d4 | 1086 | extern double g_consistentHashBalancingFactor; |
ca404e94 | 1087 | |
87b515ed RG |
1088 | #ifdef HAVE_EBPF |
1089 | extern shared_ptr<BPFFilter> g_defaultBPFFilter; | |
8429ad04 | 1090 | extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters; |
87b515ed RG |
1091 | #endif /* HAVE_EBPF */ |
1092 | ||
0beaa5c8 RG |
1093 | struct LocalHolders |
1094 | { | |
2d4783a8 | 1095 | LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal()) |
0beaa5c8 RG |
1096 | { |
1097 | } | |
1098 | ||
1099 | LocalStateHolder<NetmaskGroup> acl; | |
1100 | LocalStateHolder<ServerPolicy> policy; | |
4d5959e6 RG |
1101 | LocalStateHolder<vector<DNSDistRuleAction> > rulactions; |
1102 | LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions; | |
2d4783a8 | 1103 | LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions; |
0beaa5c8 RG |
1104 | LocalStateHolder<servers_t> servers; |
1105 | LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock; | |
1106 | LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock; | |
1107 | LocalStateHolder<pools_t> pools; | |
1108 | }; | |
1109 | ||
ecbe9133 | 1110 | struct dnsheader; |
1111 | ||
1112 | void controlThread(int fd, ComboAddress local); | |
886e2cf2 RG |
1113 | std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName); |
1114 | std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName); | |
a1b1a29d | 1115 | NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName); |
da4e7813 | 1116 | |
497a6e3a | 1117 | std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq); |
ecbe9133 | 1118 | |
497a6e3a RG |
1119 | std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq); |
1120 | std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq); | |
1121 | std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq); | |
1720247e | 1122 | std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq); |
497a6e3a | 1123 | std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq); |
e7c732b8 | 1124 | |
80dbd7d2 CHB |
1125 | struct WebserverConfig |
1126 | { | |
1127 | std::string password; | |
1128 | std::string apiKey; | |
1129 | boost::optional<std::map<std::string, std::string> > customHeaders; | |
1130 | std::mutex lock; | |
1131 | }; | |
1132 | ||
32c97b56 CHB |
1133 | void setWebserverAPIKey(const boost::optional<std::string> apiKey); |
1134 | void setWebserverPassword(const std::string& password); | |
1135 | void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders); | |
1136 | ||
80dbd7d2 | 1137 | void dnsdistWebserverThread(int sock, const ComboAddress& local); |
9b73b71c | 1138 | void tcpAcceptorThread(void* p); |
fbf14b03 RG |
1139 | #ifdef HAVE_DNS_OVER_HTTPS |
1140 | void dohThread(ClientState* cs); | |
1141 | #endif /* HAVE_DNS_OVER_HTTPS */ | |
80a216c9 | 1142 | |
f758857a | 1143 | void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects |
1144 | void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls | |
1145 | bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect | |
1146 | void resetLuaSideEffect(); // reset to indeterminate state | |
11e1e08b | 1147 | |
e7c732b8 | 1148 | bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed); |
3e425868 | 1149 | bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted); |
2a28db86 | 1150 | bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop); |
4ab01344 | 1151 | |
0beaa5c8 | 1152 | bool checkQueryHeaders(const struct dnsheader* dh); |
fcffc585 | 1153 | |
6e9fd124 | 1154 | extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals; |
43234e76 | 1155 | int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response); |
4ab01344 | 1156 | boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp); |
9f4eb5cc | 1157 | |
18f707fa | 1158 | bool addXPF(DNSQuestion& dq, uint16_t optionCode); |
5cc8371b | 1159 | |
555970c9 RG |
1160 | uint16_t getRandomDNSID(); |
1161 | ||
9f4eb5cc RG |
1162 | #include "dnsdist-snmp.hh" |
1163 | ||
1164 | extern bool g_snmpEnabled; | |
1165 | extern bool g_snmpTrapsEnabled; | |
1166 | extern DNSDistSNMPAgent* g_snmpAgent; | |
e7c732b8 RG |
1167 | extern bool g_addEDNSToSelfGeneratedResponses; |
1168 | ||
83fe2c55 | 1169 | extern std::set<std::string> g_capabilitiesToRetain; |
8179b6d6 RG |
1170 | static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value |
1171 | static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value | |
4ab01344 | 1172 | |
3e425868 RG |
1173 | enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend }; |
1174 | ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend); | |
4ab01344 | 1175 | |
d0ae6360 RG |
1176 | DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP); |
1177 | void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname); | |
fbf14b03 RG |
1178 | |
1179 | int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state); | |
1180 | ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false); |